From 63fb06c41ad20d52bc961cab45f07d04a760214d Mon Sep 17 00:00:00 2001 From: Trey t Date: Sat, 10 Jan 2026 10:52:13 -0600 Subject: [PATCH] fix: update pipeline imports to use sport modules After Phase 1 refactoring moved scraper functions to sport-specific modules (nba.py, mlb.py, etc.), these pipeline scripts still imported from scrape_schedules.py. - run_pipeline.py: import from core.py and sport modules - validate_data.py: import from core.py and sport modules - run_canonicalization_pipeline.py: import from core.py and sport modules Co-Authored-By: Claude Opus 4.5 --- Scripts/run_canonicalization_pipeline.py | 25 ++++++++++++------------ Scripts/run_pipeline.py | 25 ++++++++++++------------ Scripts/validate_data.py | 22 +++++++++++---------- 3 files changed, 38 insertions(+), 34 deletions(-) diff --git a/Scripts/run_canonicalization_pipeline.py b/Scripts/run_canonicalization_pipeline.py index d35711f..19bb6fd 100644 --- a/Scripts/run_canonicalization_pipeline.py +++ b/Scripts/run_canonicalization_pipeline.py @@ -29,17 +29,20 @@ from datetime import datetime from pathlib import Path from dataclasses import dataclass, asdict -# Import pipeline components -from scrape_schedules import ( +# Import from core module +from core import ( ScraperSource, scrape_with_fallback, - # NBA sources - scrape_nba_basketball_reference, scrape_nba_espn, scrape_nba_cbssports, - # MLB sources - scrape_mlb_statsapi, scrape_mlb_baseball_reference, scrape_mlb_espn, - # NHL sources - scrape_nhl_hockey_reference, scrape_nhl_espn, scrape_nhl_api, - # NFL sources - scrape_nfl_espn, scrape_nfl_pro_football_reference, scrape_nfl_cbssports, + assign_stable_ids, export_to_json, +) + +# Import from sport modules +from nba import scrape_nba_basketball_reference, scrape_nba_espn, scrape_nba_cbssports +from mlb import scrape_mlb_statsapi, scrape_mlb_baseball_reference, scrape_mlb_espn +from nhl import scrape_nhl_hockey_reference, scrape_nhl_espn, scrape_nhl_api +from nfl import scrape_nfl_espn, scrape_nfl_pro_football_reference, scrape_nfl_cbssports + +# Import secondary sports from scrape_schedules (stubs) +from scrape_schedules import ( # WNBA sources scrape_wnba_espn, scrape_wnba_basketball_reference, scrape_wnba_cbssports, # MLS sources @@ -50,8 +53,6 @@ from scrape_schedules import ( scrape_cbb_espn, scrape_cbb_sports_reference, scrape_cbb_cbssports, # Utilities generate_stadiums_from_teams, - assign_stable_ids, - export_to_json, ) from canonicalize_stadiums import ( canonicalize_stadiums, diff --git a/Scripts/run_pipeline.py b/Scripts/run_pipeline.py index d8ea178..a381979 100755 --- a/Scripts/run_pipeline.py +++ b/Scripts/run_pipeline.py @@ -21,17 +21,20 @@ from dataclasses import dataclass from typing import Optional from enum import Enum -# Import our modules -from scrape_schedules import ( +# Import from core module +from core import ( Game, Stadium, ScraperSource, scrape_with_fallback, - # NBA sources - scrape_nba_basketball_reference, scrape_nba_espn, scrape_nba_cbssports, - # MLB sources - scrape_mlb_statsapi, scrape_mlb_baseball_reference, scrape_mlb_espn, - # NHL sources - scrape_nhl_hockey_reference, scrape_nhl_espn, scrape_nhl_api, - # NFL sources - scrape_nfl_espn, scrape_nfl_pro_football_reference, scrape_nfl_cbssports, + assign_stable_ids, export_to_json, +) + +# Import from sport modules +from nba import scrape_nba_basketball_reference, scrape_nba_espn, scrape_nba_cbssports +from mlb import scrape_mlb_statsapi, scrape_mlb_baseball_reference, scrape_mlb_espn +from nhl import scrape_nhl_hockey_reference, scrape_nhl_espn, scrape_nhl_api +from nfl import scrape_nfl_espn, scrape_nfl_pro_football_reference, scrape_nfl_cbssports + +# Import secondary sports from scrape_schedules (stubs) +from scrape_schedules import ( # WNBA sources scrape_wnba_espn, scrape_wnba_basketball_reference, scrape_wnba_cbssports, # MLS sources @@ -42,8 +45,6 @@ from scrape_schedules import ( scrape_cbb_espn, scrape_cbb_sports_reference, scrape_cbb_cbssports, # Utilities generate_stadiums_from_teams, - export_to_json, - assign_stable_ids, ) from validate_data import ( validate_games, diff --git a/Scripts/validate_data.py b/Scripts/validate_data.py index 9e04e00..adb4514 100644 --- a/Scripts/validate_data.py +++ b/Scripts/validate_data.py @@ -16,17 +16,19 @@ from dataclasses import dataclass, asdict, field from typing import Optional from collections import defaultdict -# Import scrapers from main script +# Import from core module +from core import Game, Stadium, assign_stable_ids + +# Import from sport modules +from nba import scrape_nba_basketball_reference, NBA_TEAMS +from mlb import scrape_mlb_statsapi, scrape_mlb_baseball_reference, MLB_TEAMS +from nhl import scrape_nhl_hockey_reference, NHL_TEAMS +from nfl import scrape_nfl_espn, NFL_TEAMS + +# Import secondary sports from scrape_schedules (stubs) from scrape_schedules import ( - Game, Stadium, - scrape_nba_basketball_reference, - scrape_mlb_statsapi, scrape_mlb_baseball_reference, - scrape_nhl_hockey_reference, - scrape_wnba_espn, scrape_mls_espn, scrape_nwsl_espn, - scrape_nfl_espn, scrape_cbb_espn, - NBA_TEAMS, MLB_TEAMS, NHL_TEAMS, WNBA_TEAMS, MLS_TEAMS, NWSL_TEAMS, - NFL_TEAMS, - assign_stable_ids, + scrape_wnba_espn, scrape_mls_espn, scrape_nwsl_espn, scrape_cbb_espn, + WNBA_TEAMS, MLS_TEAMS, NWSL_TEAMS, )