From 1b796a604c87841341ae92eecc6a894a4805ef2a Mon Sep 17 00:00:00 2001 From: Trey t Date: Sat, 10 Jan 2026 10:56:24 -0600 Subject: [PATCH] chore: remove CBB from pipeline scripts CBB (College Basketball) was deferred in Phase 2.1 due to 350+ D1 teams requiring a separate scoped approach. Remove it from pipeline scripts. Co-Authored-By: Claude Opus 4.5 --- Scripts/run_canonicalization_pipeline.py | 13 ------------- Scripts/run_pipeline.py | 17 +---------------- 2 files changed, 1 insertion(+), 29 deletions(-) diff --git a/Scripts/run_canonicalization_pipeline.py b/Scripts/run_canonicalization_pipeline.py index 19bb6fd..d656051 100644 --- a/Scripts/run_canonicalization_pipeline.py +++ b/Scripts/run_canonicalization_pipeline.py @@ -49,8 +49,6 @@ from scrape_schedules import ( scrape_mls_espn, scrape_mls_fbref, scrape_mls_mlssoccer, # NWSL sources scrape_nwsl_espn, scrape_nwsl_fbref, scrape_nwsl_nwslsoccer, - # CBB sources - scrape_cbb_espn, scrape_cbb_sports_reference, scrape_cbb_cbssports, # Utilities generate_stadiums_from_teams, ) @@ -204,17 +202,6 @@ def run_pipeline( nwsl_games = assign_stable_ids(nwsl_games, 'NWSL', str(season)) all_games.extend(nwsl_games) - print_section(f"CBB {season}") - cbb_sources = [ - ScraperSource('ESPN', scrape_cbb_espn, priority=1, min_games=1000), - ScraperSource('Sports-Reference', scrape_cbb_sports_reference, priority=2, min_games=500), - ScraperSource('CBS Sports', scrape_cbb_cbssports, priority=3, min_games=300), - ] - cbb_games = scrape_with_fallback('CBB', season, cbb_sources) - cbb_season = f"{season-1}-{str(season)[2:]}" - cbb_games = assign_stable_ids(cbb_games, 'CBB', cbb_season) - all_games.extend(cbb_games) - # Export raw data print_section("Exporting Raw Data") export_to_json(all_games, all_stadiums, output_dir) diff --git a/Scripts/run_pipeline.py b/Scripts/run_pipeline.py index a381979..c34fceb 100755 --- a/Scripts/run_pipeline.py +++ b/Scripts/run_pipeline.py @@ -41,8 +41,6 @@ from scrape_schedules import ( scrape_mls_espn, scrape_mls_fbref, scrape_mls_mlssoccer, # NWSL sources scrape_nwsl_espn, scrape_nwsl_fbref, scrape_nwsl_nwslsoccer, - # CBB sources - scrape_cbb_espn, scrape_cbb_sports_reference, scrape_cbb_cbssports, # Utilities generate_stadiums_from_teams, ) @@ -222,19 +220,6 @@ def run_pipeline( all_games.extend(nwsl_games) games_by_sport['NWSL'] = len(nwsl_games) - if sport in ['cbb', 'all']: - print_section(f"CBB {season}") - cbb_sources = [ - ScraperSource('ESPN', scrape_cbb_espn, priority=1, min_games=1000), - ScraperSource('Sports-Reference', scrape_cbb_sports_reference, priority=2, min_games=500), - ScraperSource('CBS Sports', scrape_cbb_cbssports, priority=3, min_games=300), - ] - cbb_games = scrape_with_fallback('CBB', season, cbb_sources) - cbb_season = f"{season-1}-{str(season)[2:]}" - cbb_games = assign_stable_ids(cbb_games, 'CBB', cbb_season) - all_games.extend(cbb_games) - games_by_sport['CBB'] = len(cbb_games) - # Export data print_section("Exporting Data") export_to_json(all_games, all_stadiums, output_dir) @@ -499,7 +484,7 @@ Examples: help='Season year (default: 2025)' ) parser.add_argument( - '--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'cbb', 'all'], default='all', + '--sport', choices=['nba', 'mlb', 'nhl', 'nfl', 'wnba', 'mls', 'nwsl', 'all'], default='all', help='Sport to process (default: all)' ) parser.add_argument(