remove cbb
This commit is contained in:
@@ -42,7 +42,7 @@ from scrape_schedules import (
|
||||
# NWSL sources
|
||||
scrape_nwsl_espn, scrape_nwsl_fbref, scrape_nwsl_nwslsoccer,
|
||||
# Utilities
|
||||
generate_stadiums_from_teams,
|
||||
scrape_all_stadiums,
|
||||
)
|
||||
from validate_data import (
|
||||
validate_games,
|
||||
@@ -129,7 +129,7 @@ def run_pipeline(
|
||||
|
||||
# Scrape stadiums
|
||||
print_section("Stadiums")
|
||||
all_stadiums = generate_stadiums_from_teams()
|
||||
all_stadiums = scrape_all_stadiums()
|
||||
print(f" Generated {len(all_stadiums)} stadiums from team data")
|
||||
|
||||
# Scrape by sport with multi-source fallback
|
||||
|
||||
@@ -12,8 +12,6 @@ This script coordinates scraping across sport-specific modules:
|
||||
- wnba.py: WNBA stadiums
|
||||
- nwsl.py: NWSL stadiums
|
||||
|
||||
CBB (College Basketball) remains inline pending extraction (350+ D1 teams).
|
||||
|
||||
Usage:
|
||||
python scrape_schedules.py --sport nba --season 2026
|
||||
python scrape_schedules.py --sport all --season 2026
|
||||
@@ -93,7 +91,6 @@ from nwsl import (
|
||||
# =============================================================================
|
||||
# NON-CORE SPORT SCRAPERS
|
||||
# NOTE: MLS, WNBA, NWSL stadiums are now imported from their respective modules
|
||||
# TODO: Extract CBB to separate module (350+ D1 teams requires separate scoped phase)
|
||||
# =============================================================================
|
||||
|
||||
def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tuple[str, str]) -> list[Game]:
|
||||
@@ -106,7 +103,6 @@ def _scrape_espn_schedule(sport: str, league: str, season: int, date_range: tupl
|
||||
'wnba': 'WNBA',
|
||||
'usa.1': 'MLS',
|
||||
'usa.nwsl': 'NWSL',
|
||||
'mens-college-basketball': 'CBB'
|
||||
}.get(league, league.upper())
|
||||
|
||||
print(f"Fetching {sport_upper} {season} from ESPN API...")
|
||||
@@ -213,13 +209,6 @@ def scrape_nwsl_espn(season: int) -> list[Game]:
|
||||
return _scrape_espn_schedule('soccer', 'usa.nwsl', season, (start, end))
|
||||
|
||||
|
||||
def scrape_cbb_espn(season: int) -> list[Game]:
|
||||
"""Fetch College Basketball schedule from ESPN API (D1 only)."""
|
||||
start = f"{season-1}1101"
|
||||
end = f"{season}0415"
|
||||
return _scrape_espn_schedule('basketball', 'mens-college-basketball', season, (start, end))
|
||||
|
||||
|
||||
def scrape_wnba_basketball_reference(season: int) -> list[Game]:
|
||||
"""Scrape WNBA schedule from Basketball-Reference."""
|
||||
games = []
|
||||
@@ -339,42 +328,6 @@ def scrape_nwsl_nwslsoccer(season: int) -> list[Game]:
|
||||
return games
|
||||
|
||||
|
||||
def scrape_cbb_sports_reference(season: int) -> list[Game]:
|
||||
"""Scrape College Basketball schedule from Sports-Reference."""
|
||||
games = []
|
||||
print(f"Scraping CBB {season} from Sports-Reference...")
|
||||
# Placeholder - Sports-Reference scraping would go here
|
||||
print(f" Found {len(games)} games from Sports-Reference")
|
||||
return games
|
||||
|
||||
|
||||
def scrape_cbb_cbssports(season: int) -> list[Game]:
|
||||
"""Fetch College Basketball schedule from CBS Sports."""
|
||||
games = []
|
||||
print(f"Fetching CBB {season} from CBS Sports...")
|
||||
# Placeholder - CBS Sports scraping would go here
|
||||
print(f" Found {len(games)} games from CBS Sports")
|
||||
return games
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# NON-CORE STADIUM SCRAPERS
|
||||
# NOTE: scrape_mls_stadiums() is now imported from mls.py
|
||||
# NOTE: scrape_wnba_stadiums() is now imported from wnba.py
|
||||
# NOTE: scrape_nwsl_stadiums() is now imported from nwsl.py
|
||||
# TODO: Extract CBB to separate module (350+ D1 teams requires separate scoped phase)
|
||||
# =============================================================================
|
||||
|
||||
def scrape_cbb_stadiums() -> list[Stadium]:
|
||||
"""Fetch College Basketball arena data."""
|
||||
print("\nCBB STADIUMS")
|
||||
print("-" * 40)
|
||||
stadiums = []
|
||||
# Would include CBB arena data here
|
||||
print(f" Found {len(stadiums)} CBB arenas")
|
||||
return stadiums
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# LEGACY STADIUM FUNCTIONS
|
||||
# =============================================================================
|
||||
@@ -407,7 +360,6 @@ def scrape_all_stadiums() -> list[Stadium]:
|
||||
all_stadiums.extend(scrape_mls_stadiums())
|
||||
all_stadiums.extend(scrape_wnba_stadiums())
|
||||
all_stadiums.extend(scrape_nwsl_stadiums())
|
||||
all_stadiums.extend(scrape_cbb_stadiums())
|
||||
|
||||
return all_stadiums
|
||||
|
||||
@@ -444,7 +396,7 @@ def get_team_abbrev(team_name: str, sport: str) -> str:
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Scrape sports schedules')
|
||||
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'], default='all')
|
||||
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'], default='all')
|
||||
parser.add_argument('--season', type=int, default=2026, help='Season year (ending year)')
|
||||
parser.add_argument('--stadiums-only', action='store_true', help='Only scrape stadium data (legacy method)')
|
||||
parser.add_argument('--stadiums-update', action='store_true', help='Scrape ALL stadium data for all 8 sports (comprehensive)')
|
||||
@@ -550,20 +502,6 @@ def main():
|
||||
nwsl_games = assign_stable_ids(nwsl_games, 'NWSL', str(args.season))
|
||||
all_games.extend(nwsl_games)
|
||||
|
||||
if args.sport in ['cbb', 'all']:
|
||||
print("\n" + "="*60)
|
||||
print(f"SCRAPING CBB {args.season}")
|
||||
print("="*60)
|
||||
cbb_sources = [
|
||||
ScraperSource('ESPN', scrape_cbb_espn, priority=1, min_games=1000),
|
||||
ScraperSource('Sports-Reference', scrape_cbb_sports_reference, priority=2, min_games=500),
|
||||
ScraperSource('CBS Sports', scrape_cbb_cbssports, priority=3, min_games=300),
|
||||
]
|
||||
cbb_games = scrape_with_fallback('CBB', args.season, cbb_sources)
|
||||
cbb_season = f"{args.season-1}-{str(args.season)[2:]}"
|
||||
cbb_games = assign_stable_ids(cbb_games, 'CBB', cbb_season)
|
||||
all_games.extend(cbb_games)
|
||||
|
||||
# Export
|
||||
print("\n" + "="*60)
|
||||
print("EXPORTING DATA")
|
||||
|
||||
@@ -123,11 +123,11 @@ def get_season_and_sport(mode: str):
|
||||
print(f"\n{Colors.BOLD}Select sport:{Colors.RESET}")
|
||||
print(f" {Colors.GREEN}[1]{Colors.RESET} All Sports")
|
||||
print(f" {Colors.GREEN}[2]{Colors.RESET} MLB {Colors.GREEN}[3]{Colors.RESET} NBA {Colors.GREEN}[4]{Colors.RESET} NHL {Colors.GREEN}[5]{Colors.RESET} NFL")
|
||||
print(f" {Colors.GREEN}[6]{Colors.RESET} MLS {Colors.GREEN}[7]{Colors.RESET} WNBA {Colors.GREEN}[8]{Colors.RESET} NWSL {Colors.GREEN}[9]{Colors.RESET} CBB")
|
||||
print(f" {Colors.GREEN}[6]{Colors.RESET} MLS {Colors.GREEN}[7]{Colors.RESET} WNBA {Colors.GREEN}[8]{Colors.RESET} NWSL")
|
||||
|
||||
sport_map = {
|
||||
'1': 'all', '2': 'mlb', '3': 'nba', '4': 'nhl', '5': 'nfl',
|
||||
'6': 'mls', '7': 'wnba', '8': 'nwsl', '9': 'cbb'
|
||||
'6': 'mls', '7': 'wnba', '8': 'nwsl'
|
||||
}
|
||||
|
||||
sport_choice = input(f"{Colors.CYAN}Enter choice [1]:{Colors.RESET} ").strip()
|
||||
@@ -156,7 +156,6 @@ def scrape_submenu():
|
||||
('6', 'mls', 'MLS - Major League Soccer'),
|
||||
('7', 'wnba', 'WNBA - Women\'s National Basketball Association'),
|
||||
('8', 'nwsl', 'NWSL - National Women\'s Soccer League'),
|
||||
('9', 'cbb', 'CBB - College Basketball'),
|
||||
('b', 'back', 'Back to main menu'),
|
||||
]
|
||||
|
||||
@@ -235,7 +234,6 @@ def pipeline_submenu():
|
||||
('6', 'mls', 'MLS only'),
|
||||
('7', 'wnba', 'WNBA only'),
|
||||
('8', 'nwsl', 'NWSL only'),
|
||||
('9', 'cbb', 'CBB only'),
|
||||
('b', 'back', 'Back to main menu'),
|
||||
]
|
||||
|
||||
@@ -624,7 +622,7 @@ Examples:
|
||||
)
|
||||
scrape_parser.add_argument(
|
||||
'--sport',
|
||||
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'],
|
||||
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'],
|
||||
default='all',
|
||||
help='Sport to scrape (default: all)'
|
||||
)
|
||||
@@ -653,7 +651,7 @@ Examples:
|
||||
stadiums_update_parser = subparsers.add_parser(
|
||||
'stadiums-update',
|
||||
help='Scrape ALL stadium data for all 8 sports',
|
||||
description='Comprehensive stadium scraping for NBA, MLB, NHL, NFL, WNBA, MLS, NWSL, and CBB'
|
||||
description='Comprehensive stadium scraping for NBA, MLB, NHL, NFL, WNBA, MLS, and NWSL'
|
||||
)
|
||||
stadiums_update_parser.add_argument(
|
||||
'--output',
|
||||
@@ -893,7 +891,7 @@ Examples:
|
||||
)
|
||||
pipeline_parser.add_argument(
|
||||
'--sport',
|
||||
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'],
|
||||
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'],
|
||||
default='all',
|
||||
help='Sport to process (default: all)'
|
||||
)
|
||||
@@ -934,7 +932,7 @@ Examples:
|
||||
)
|
||||
full_pipeline_parser.add_argument(
|
||||
'--sport',
|
||||
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'],
|
||||
choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'],
|
||||
default='all',
|
||||
help='Sport to process (default: all)'
|
||||
)
|
||||
|
||||
@@ -91,8 +91,6 @@ EXPECTED_GAMES = {
|
||||
'max': 30,
|
||||
'description': 'NWSL regular season (26 games)'
|
||||
},
|
||||
# Note: CBB doesn't have fixed game counts per "team"
|
||||
# CBB teams vary widely (30+ games)
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ from nfl import scrape_nfl_espn, NFL_TEAMS
|
||||
|
||||
# Import secondary sports from scrape_schedules (stubs)
|
||||
from scrape_schedules import (
|
||||
scrape_wnba_espn, scrape_mls_espn, scrape_nwsl_espn, scrape_cbb_espn,
|
||||
scrape_wnba_espn, scrape_mls_espn, scrape_nwsl_espn,
|
||||
WNBA_TEAMS, MLS_TEAMS, NWSL_TEAMS,
|
||||
)
|
||||
|
||||
@@ -474,7 +474,7 @@ def main():
|
||||
parser.add_argument('--data-dir', type=str, default='./data', help='Data directory')
|
||||
parser.add_argument('--scrape-and-validate', action='store_true', help='Scrape fresh and validate')
|
||||
parser.add_argument('--season', type=int, default=2025, help='Season year')
|
||||
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'], default='all')
|
||||
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'], default='all')
|
||||
parser.add_argument('--output', type=str, default='./data/validation_report.json')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
Reference in New Issue
Block a user