Refactor travel segments and simplify trip options
Travel segment architecture: - Remove departureTime/arrivalTime from TravelSegment (location-based, not date-based) - Fix travel sections appearing after destination instead of between cities - Fix missing travel segments when revisiting same city (consecutive grouping) - Remove unwanted rest day at end of trip Planning engine fixes: - All three planners now group only consecutive games at same stadium - Visiting A → B → A creates 3 stops with proper travel between UI simplification: - Remove redundant sort options (mostDriving/leastDriving, mostCities/leastCities) - Remove unused "Find Other Sports Along Route" toggle (was dead code) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -4,8 +4,8 @@ Sports Schedule Scraper for SportsTime App
|
||||
Scrapes NBA, MLB, NHL schedules from multiple sources for cross-validation.
|
||||
|
||||
Usage:
|
||||
python scrape_schedules.py --sport nba --season 2025
|
||||
python scrape_schedules.py --sport all --season 2025
|
||||
python scrape_schedules.py --sport nba --season 2026
|
||||
python scrape_schedules.py --sport all --season 2026
|
||||
python scrape_schedules.py --stadiums-only
|
||||
"""
|
||||
|
||||
@@ -435,7 +435,7 @@ def scrape_mlb_statsapi(season: int) -> list[Game]:
|
||||
time_str = None
|
||||
|
||||
game = Game(
|
||||
id=f"mlb_{game_data.get('gamePk', '')}",
|
||||
id='', # Will be assigned by assign_stable_ids
|
||||
sport='MLB',
|
||||
season=str(season),
|
||||
date=game_date,
|
||||
@@ -786,28 +786,34 @@ def generate_stadiums_from_teams() -> list[Stadium]:
|
||||
|
||||
def assign_stable_ids(games: list[Game], sport: str, season: str) -> list[Game]:
|
||||
"""
|
||||
Assign stable IDs based on matchup + occurrence number within season.
|
||||
Format: {sport}_{season}_{away}_{home}_{num}
|
||||
Assign IDs based on matchup + date.
|
||||
Format: {sport}_{season}_{away}_{home}_{MMDD} (or {MMDD}_2 for doubleheaders)
|
||||
|
||||
This ensures IDs don't change when games are rescheduled.
|
||||
When games are rescheduled, the old ID becomes orphaned and a new one is created.
|
||||
Use --delete-all before import to clean up orphaned records.
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
# Group games by matchup (away @ home)
|
||||
matchups = defaultdict(list)
|
||||
for game in games:
|
||||
key = f"{game.away_team_abbrev}_{game.home_team_abbrev}"
|
||||
matchups[key].append(game)
|
||||
season_str = season.replace('-', '')
|
||||
|
||||
# Sort each matchup by date and assign occurrence number
|
||||
for key, matchup_games in matchups.items():
|
||||
matchup_games.sort(key=lambda g: g.date)
|
||||
for i, game in enumerate(matchup_games, 1):
|
||||
away = game.away_team_abbrev.lower()
|
||||
home = game.home_team_abbrev.lower()
|
||||
# Normalize season format (e.g., "2024-25" -> "2024-25", "2025" -> "2025")
|
||||
season_str = season.replace('-', '')
|
||||
game.id = f"{sport.lower()}_{season_str}_{away}_{home}_{i}"
|
||||
# Track how many times we've seen each base ID (for doubleheaders)
|
||||
id_counts = defaultdict(int)
|
||||
|
||||
for game in games:
|
||||
away = game.away_team_abbrev.lower()
|
||||
home = game.home_team_abbrev.lower()
|
||||
# Extract MMDD from date (YYYY-MM-DD)
|
||||
date_parts = game.date.split('-')
|
||||
mmdd = f"{date_parts[1]}{date_parts[2]}" if len(date_parts) == 3 else "0000"
|
||||
|
||||
base_id = f"{sport.lower()}_{season_str}_{away}_{home}_{mmdd}"
|
||||
id_counts[base_id] += 1
|
||||
|
||||
# Add suffix for doubleheaders (game 2+)
|
||||
if id_counts[base_id] > 1:
|
||||
game.id = f"{base_id}_{id_counts[base_id]}"
|
||||
else:
|
||||
game.id = base_id
|
||||
|
||||
return games
|
||||
|
||||
@@ -892,7 +898,7 @@ def export_to_json(games: list[Game], stadiums: list[Stadium], output_dir: Path)
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Scrape sports schedules')
|
||||
parser.add_argument('--sport', choices=['nba', 'mlb', 'nhl', 'all'], default='all')
|
||||
parser.add_argument('--season', type=int, default=2025, help='Season year (ending year)')
|
||||
parser.add_argument('--season', type=int, default=2026, help='Season year (ending year)')
|
||||
parser.add_argument('--stadiums-only', action='store_true', help='Only scrape stadium data')
|
||||
parser.add_argument('--output', type=str, default='./data', help='Output directory')
|
||||
|
||||
@@ -931,7 +937,7 @@ def main():
|
||||
print("="*60)
|
||||
|
||||
mlb_games_api = scrape_mlb_statsapi(args.season)
|
||||
# MLB API uses official gamePk which is already stable - no reassignment needed
|
||||
mlb_games_api = assign_stable_ids(mlb_games_api, 'MLB', str(args.season))
|
||||
all_games.extend(mlb_games_api)
|
||||
|
||||
if args.sport in ['nhl', 'all']:
|
||||
|
||||
Reference in New Issue
Block a user