diff --git a/Scripts/sportstime_parser/normalizers/stadium_resolver.py b/Scripts/sportstime_parser/normalizers/stadium_resolver.py index 51deaf7..9ba3784 100644 --- a/Scripts/sportstime_parser/normalizers/stadium_resolver.py +++ b/Scripts/sportstime_parser/normalizers/stadium_resolver.py @@ -175,6 +175,12 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = { "stadium_nfl_raymond_james_stadium": StadiumInfo("stadium_nfl_raymond_james_stadium", "Raymond James Stadium", "Tampa", "FL", "USA", "nfl", 27.9759, -82.5033), "stadium_nfl_nissan_stadium": StadiumInfo("stadium_nfl_nissan_stadium", "Nissan Stadium", "Nashville", "TN", "USA", "nfl", 36.1665, -86.7713, "America/Chicago"), "stadium_nfl_northwest_stadium": StadiumInfo("stadium_nfl_northwest_stadium", "Northwest Stadium", "Landover", "MD", "USA", "nfl", 38.9076, -76.8645), + # Special and international venues + "stadium_nfl_tom_benson_hall_of_fame_stadium": StadiumInfo("stadium_nfl_tom_benson_hall_of_fame_stadium", "Tom Benson Hall of Fame Stadium", "Canton", "OH", "USA", "nfl", 40.8209, -81.3985), + "stadium_nfl_corinthians_arena": StadiumInfo("stadium_nfl_corinthians_arena", "Corinthians Arena", "São Paulo", "SP", "Brazil", "nfl", -23.5453, -46.4742, "America/Sao_Paulo"), + "stadium_nfl_croke_park": StadiumInfo("stadium_nfl_croke_park", "Croke Park", "Dublin", "", "Ireland", "nfl", 53.3609, -6.2514, "Europe/Dublin"), + "stadium_nfl_olympic_stadium_berlin": StadiumInfo("stadium_nfl_olympic_stadium_berlin", "Olympic Stadium Berlin", "Berlin", "", "Germany", "nfl", 52.5147, 13.2395, "Europe/Berlin"), + "stadium_nfl_santiago_bernabeu": StadiumInfo("stadium_nfl_santiago_bernabeu", "Santiago Bernabéu", "Madrid", "", "Spain", "nfl", 40.4531, -3.6883, "Europe/Madrid"), }, "nhl": { "stadium_nhl_honda_center": StadiumInfo("stadium_nhl_honda_center", "Honda Center", "Anaheim", "CA", "USA", "nhl", 33.8078, -117.8765, "America/Los_Angeles"), @@ -241,6 +247,11 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = { "stadium_mls_citypark": StadiumInfo("stadium_mls_citypark", "CITYPARK", "St. Louis", "MO", "USA", "mls", 38.6316, -90.2106, "America/Chicago"), "stadium_mls_bmo_field": StadiumInfo("stadium_mls_bmo_field", "BMO Field", "Toronto", "ON", "Canada", "mls", 43.6332, -79.4186, "America/Toronto"), "stadium_mls_bc_place": StadiumInfo("stadium_mls_bc_place", "BC Place", "Vancouver", "BC", "Canada", "mls", 49.2768, -123.1118, "America/Vancouver"), + # Alternate and special venues + "stadium_mls_miami_freedom_park": StadiumInfo("stadium_mls_miami_freedom_park", "Miami Freedom Park", "Miami", "FL", "USA", "mls", 25.789, -80.237), + "stadium_mls_citi_field": StadiumInfo("stadium_mls_citi_field", "Citi Field", "New York", "NY", "USA", "mls", 40.7571, -73.8458), + "stadium_mls_los_angeles_memorial_coliseum": StadiumInfo("stadium_mls_los_angeles_memorial_coliseum", "Los Angeles Memorial Coliseum", "Los Angeles", "CA", "USA", "mls", 34.0141, -118.2879, "America/Los_Angeles"), + "stadium_mls_mandt_bank_stadium": StadiumInfo("stadium_mls_mandt_bank_stadium", "M&T Bank Stadium", "Baltimore", "MD", "USA", "mls", 39.2780, -76.6227), }, "wnba": { "stadium_wnba_gateway_center_arena": StadiumInfo("stadium_wnba_gateway_center_arena", "Gateway Center Arena", "College Park", "GA", "USA", "wnba", 33.6510, -84.4474), @@ -286,6 +297,9 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = { # Shared NFL/MLB venues "stadium_nwsl_soldier_field": StadiumInfo("stadium_nwsl_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "nwsl", 41.8623, -87.6167, "America/Chicago"), "stadium_nwsl_oracle_park": StadiumInfo("stadium_nwsl_oracle_park", "Oracle Park", "San Francisco", "CA", "USA", "nwsl", 37.7786, -122.3893, "America/Los_Angeles"), + # Additional alternate venues + "stadium_nwsl_northwestern_medicine_field": StadiumInfo("stadium_nwsl_northwestern_medicine_field", "Northwestern Medicine Field at Martin Stadium", "Evanston", "IL", "USA", "nwsl", 42.0598, -87.6743, "America/Chicago"), + "stadium_nwsl_one_spokane_stadium": StadiumInfo("stadium_nwsl_one_spokane_stadium", "ONE Spokane Stadium", "Spokane", "WA", "USA", "nwsl", 47.6588, -117.4101, "America/Los_Angeles"), }, } diff --git a/Scripts/sportstime_parser/scrapers/mlb.py b/Scripts/sportstime_parser/scrapers/mlb.py index 5fc9e7a..7592fb7 100644 --- a/Scripts/sportstime_parser/scrapers/mlb.py +++ b/Scripts/sportstime_parser/scrapers/mlb.py @@ -2,6 +2,7 @@ from datetime import datetime, date, timedelta from typing import Optional +from zoneinfo import ZoneInfo from bs4 import BeautifulSoup from .base import BaseScraper, RawGameData, ScrapeResult @@ -28,9 +29,14 @@ class MLBScraper(BaseScraper): """MLB schedule scraper with multi-source fallback. Sources (in priority order): - 1. Baseball-Reference - Most reliable, complete historical data - 2. MLB Stats API - Official MLB data - 3. ESPN API - Backup option + 1. MLB Stats API - Official MLB data (primary) + 2. ESPN API - Backup option + 3. Baseball-Reference - Complete historical data + + Source Timezones: + - mlb_api: UTC - ISO 8601 format with "Z" suffix (gameDate field) + - espn: UTC - ISO 8601 format with "Z" suffix + - baseball_reference: Eastern Time (ET) - times displayed as "7:05 PM ET" """ def __init__(self, season: int, **kwargs): @@ -143,7 +149,22 @@ class MLBScraper(BaseScraper): continue try: - game = self._parse_br_game(elem, current_date, source_url) + # Extract game time from the element if present + # Baseball-Reference may have time in a span or in the text + game_time_for_row = None + time_elem = elem.find("span", class_="game_time") + if time_elem: + time_text = time_elem.get_text(strip=True) + if time_text: + try: + # Parse time like "7:05 PM ET" or "1:10 PM" + # Remove timezone suffix if present + time_clean = time_text.replace(" ET", "").replace(" PT", "").replace(" CT", "").replace(" MT", "").strip() + game_time_for_row = datetime.strptime(time_clean, "%I:%M %p") + except ValueError: + pass + + game = self._parse_br_game(elem, current_date, source_url, game_time_for_row) if game: games.append(game) except Exception as e: @@ -157,6 +178,7 @@ class MLBScraper(BaseScraper): elem, game_date: datetime, source_url: str, + game_time: Optional[datetime] = None, ) -> Optional[RawGameData]: """Parse a single Baseball-Reference game element.""" text = elem.get_text(" ", strip=True) @@ -206,8 +228,17 @@ class MLBScraper(BaseScraper): # Third link might be stadium stadium = links[2].get_text(strip=True) + # Combine date and time if time was provided, with ET timezone (Baseball-Reference uses ET) + final_game_date = game_date + if game_time: + final_game_date = game_date.replace( + hour=game_time.hour, + minute=game_time.minute, + tzinfo=ZoneInfo("America/New_York"), + ) + return RawGameData( - game_date=game_date, + game_date=final_game_date, home_team_raw=home_team, away_team_raw=away_team, stadium_raw=stadium, diff --git a/Scripts/sportstime_parser/scrapers/mls.py b/Scripts/sportstime_parser/scrapers/mls.py index 3ad7d7a..82f5bab 100644 --- a/Scripts/sportstime_parser/scrapers/mls.py +++ b/Scripts/sportstime_parser/scrapers/mls.py @@ -27,7 +27,11 @@ class MLSScraper(BaseScraper): Sources (in priority order): 1. ESPN API - Most reliable for MLS - 2. FBref - Backup option + 2. FBref - Backup option (not implemented) + + Source Timezones: + - espn: UTC - ISO 8601 format with "Z" suffix + - fbref: Not implemented """ def __init__(self, season: int, **kwargs): diff --git a/Scripts/sportstime_parser/scrapers/nba.py b/Scripts/sportstime_parser/scrapers/nba.py index 575260b..3ed7dfc 100644 --- a/Scripts/sportstime_parser/scrapers/nba.py +++ b/Scripts/sportstime_parser/scrapers/nba.py @@ -2,6 +2,7 @@ from datetime import datetime, date, timezone from typing import Optional +from zoneinfo import ZoneInfo from bs4 import BeautifulSoup import re @@ -45,7 +46,12 @@ class NBAScraper(BaseScraper): Sources (in priority order): 1. Basketball-Reference - Most reliable, complete historical data 2. ESPN API - Good for current/future seasons - 3. CBS Sports - Backup option + 3. CBS Sports - Backup option (not implemented) + + Source Timezones: + - basketball_reference: Eastern Time (ET) - times displayed as "7:30p" + - espn: UTC - ISO 8601 format with "Z" suffix + - cbs: Not implemented """ def __init__(self, season: int, **kwargs): @@ -196,6 +202,25 @@ class NBAScraper(BaseScraper): self._logger.debug(f"Could not parse date: {date_text}") return None + # Get game start time (format: "7:30p" or "10:00p") - times are in ET + time_cell = row.find("td", {"data-stat": "game_start_time"}) + if time_cell: + time_text = time_cell.get_text(strip=True) + if time_text: + try: + # Parse time like "7:30p" or "10:00p" + # Normalize: "7:30p" -> "7:30 PM", "10:00p" -> "10:00 PM" + time_normalized = time_text.replace("p", " PM").replace("a", " AM") + game_time = datetime.strptime(time_normalized, "%I:%M %p") + # Combine date and time with ET timezone (Basketball-Reference uses ET) + game_date = game_date.replace( + hour=game_time.hour, + minute=game_time.minute, + tzinfo=ZoneInfo("America/New_York"), + ) + except ValueError: + self._logger.debug(f"Could not parse time: {time_text}, using midnight") + # Get teams away_cell = row.find("td", {"data-stat": "visitor_team_name"}) home_cell = row.find("td", {"data-stat": "home_team_name"}) diff --git a/Scripts/sportstime_parser/scrapers/nfl.py b/Scripts/sportstime_parser/scrapers/nfl.py index c0bf406..6222b07 100644 --- a/Scripts/sportstime_parser/scrapers/nfl.py +++ b/Scripts/sportstime_parser/scrapers/nfl.py @@ -2,6 +2,7 @@ from datetime import datetime, date from typing import Optional +from zoneinfo import ZoneInfo from bs4 import BeautifulSoup from .base import BaseScraper, RawGameData, ScrapeResult @@ -33,7 +34,12 @@ class NFLScraper(BaseScraper): Sources (in priority order): 1. ESPN API - Most reliable for NFL 2. Pro-Football-Reference - Complete historical data - 3. CBS Sports - Backup option + 3. CBS Sports - Backup option (not implemented) + + Source Timezones: + - espn: UTC - ISO 8601 format with "Z" suffix + - pro_football_reference: Eastern Time (ET) - times displayed as "8:20PM" + - cbs: Not implemented """ def __init__(self, season: int, **kwargs): @@ -323,6 +329,25 @@ class NFLScraper(BaseScraper): except ValueError: return None + # Get game start time (format: "8:20PM" or "1:00PM") - times are in ET + time_cell = row.find("td", {"data-stat": "gametime"}) + if time_cell: + time_text = time_cell.get_text(strip=True) + if time_text: + try: + # Parse time like "8:20PM" or "1:00PM" + # Normalize: "8:20PM" -> "8:20 PM" + time_normalized = time_text.replace("PM", " PM").replace("AM", " AM") + game_time = datetime.strptime(time_normalized, "%I:%M %p") + # Combine date and time with ET timezone (Pro-Football-Reference uses ET) + game_date = game_date.replace( + hour=game_time.hour, + minute=game_time.minute, + tzinfo=ZoneInfo("America/New_York"), + ) + except ValueError: + self._logger.debug(f"Could not parse time: {time_text}, using midnight") + # Get teams winner_cell = row.find("td", {"data-stat": "winner"}) loser_cell = row.find("td", {"data-stat": "loser"}) diff --git a/Scripts/sportstime_parser/scrapers/nhl.py b/Scripts/sportstime_parser/scrapers/nhl.py index c6b513d..2388515 100644 --- a/Scripts/sportstime_parser/scrapers/nhl.py +++ b/Scripts/sportstime_parser/scrapers/nhl.py @@ -2,6 +2,7 @@ from datetime import datetime, date from typing import Optional +from zoneinfo import ZoneInfo from bs4 import BeautifulSoup from .base import BaseScraper, RawGameData, ScrapeResult @@ -40,6 +41,11 @@ class NHLScraper(BaseScraper): 1. Hockey-Reference - Most reliable for NHL 2. NHL API - Official NHL data 3. ESPN API - Backup option + + Source Timezones: + - hockey_reference: Eastern Time (ET) - times displayed as "7:00p" + - nhl_api: UTC - ISO 8601 format with "Z" suffix (startTimeUTC field) + - espn: UTC - ISO 8601 format with "Z" suffix """ def __init__(self, season: int, **kwargs): @@ -158,6 +164,25 @@ class NHLScraper(BaseScraper): except ValueError: return None + # Get game start time (format: "7:00p" or "10:30p") - times are in ET + time_cell = row.find("td", {"data-stat": "time_game"}) + if time_cell: + time_text = time_cell.get_text(strip=True) + if time_text: + try: + # Parse time like "7:00p" or "10:30p" + # Normalize: "7:00p" -> "7:00 PM", "10:30p" -> "10:30 PM" + time_normalized = time_text.replace("p", " PM").replace("a", " AM") + game_time = datetime.strptime(time_normalized, "%I:%M %p") + # Combine date and time with ET timezone (Hockey-Reference uses ET) + game_date = game_date.replace( + hour=game_time.hour, + minute=game_time.minute, + tzinfo=ZoneInfo("America/New_York"), + ) + except ValueError: + self._logger.debug(f"Could not parse time: {time_text}, using midnight") + # Get teams visitor_cell = row.find("td", {"data-stat": "visitor_team_name"}) home_cell = row.find("td", {"data-stat": "home_team_name"}) diff --git a/Scripts/sportstime_parser/scrapers/nwsl.py b/Scripts/sportstime_parser/scrapers/nwsl.py index 3b570e0..3405fdb 100644 --- a/Scripts/sportstime_parser/scrapers/nwsl.py +++ b/Scripts/sportstime_parser/scrapers/nwsl.py @@ -27,7 +27,9 @@ class NWSLScraper(BaseScraper): Sources (in priority order): 1. ESPN API - Most reliable for NWSL - 2. NWSL official (via ESPN) - Backup option + + Source Timezones: + - espn: UTC - ISO 8601 format with "Z" suffix """ def __init__(self, season: int, **kwargs): diff --git a/Scripts/sportstime_parser/scrapers/wnba.py b/Scripts/sportstime_parser/scrapers/wnba.py index 55f1caa..ad32099 100644 --- a/Scripts/sportstime_parser/scrapers/wnba.py +++ b/Scripts/sportstime_parser/scrapers/wnba.py @@ -27,7 +27,9 @@ class WNBAScraper(BaseScraper): Sources (in priority order): 1. ESPN API - Most reliable for WNBA - 2. WNBA official (via ESPN) - Backup option + + Source Timezones: + - espn: UTC - ISO 8601 format with "Z" suffix """ def __init__(self, season: int, **kwargs): diff --git a/Scripts/stadium_aliases.json b/Scripts/stadium_aliases.json index 1401b37..2355caa 100644 --- a/Scripts/stadium_aliases.json +++ b/Scripts/stadium_aliases.json @@ -2032,5 +2032,89 @@ "stadium_canonical_id": "stadium_wnba_purcell_pavilion", "valid_from": null, "valid_until": null + }, + { + "alias_name": "inter miami", + "stadium_canonical_id": "stadium_mls_chase_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "inter miami cf", + "stadium_canonical_id": "stadium_mls_chase_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "miami", + "stadium_canonical_id": "stadium_mls_chase_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "mia", + "stadium_canonical_id": "stadium_mls_chase_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "nycfc", + "stadium_canonical_id": "stadium_mls_yankee_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "nyc", + "stadium_canonical_id": "stadium_mls_yankee_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "new york city fc", + "stadium_canonical_id": "stadium_mls_yankee_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "lafc", + "stadium_canonical_id": "stadium_mls_bmo_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "los angeles fc", + "stadium_canonical_id": "stadium_mls_bmo_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "dc united", + "stadium_canonical_id": "stadium_mls_audi_field", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "d.c. united", + "stadium_canonical_id": "stadium_mls_audi_field", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "chicago red stars", + "stadium_canonical_id": "stadium_nwsl_seatgeek_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "chi", + "stadium_canonical_id": "stadium_nwsl_seatgeek_stadium", + "valid_from": null, + "valid_until": null + }, + { + "alias_name": "chicago", + "stadium_canonical_id": "stadium_nwsl_seatgeek_stadium", + "valid_from": null, + "valid_until": null } ] \ No newline at end of file diff --git a/SportsTime/Core/Services/CanonicalSyncService.swift b/SportsTime/Core/Services/CanonicalSyncService.swift index 33a9d58..bedbde0 100644 --- a/SportsTime/Core/Services/CanonicalSyncService.swift +++ b/SportsTime/Core/Services/CanonicalSyncService.swift @@ -582,6 +582,7 @@ actor CanonicalSyncService { existing.yearOpened = remote.yearOpened existing.imageURL = remote.imageURL?.absoluteString existing.sport = remote.sport.rawValue + existing.timezoneIdentifier = remote.timeZoneIdentifier existing.source = .cloudKit existing.lastModified = Date() @@ -607,7 +608,8 @@ actor CanonicalSyncService { capacity: remote.capacity, yearOpened: remote.yearOpened, imageURL: remote.imageURL?.absoluteString, - sport: remote.sport.rawValue + sport: remote.sport.rawValue, + timezoneIdentifier: remote.timeZoneIdentifier ) context.insert(canonical) return .applied diff --git a/SportsTime/Features/Schedule/ViewModels/ScheduleViewModel.swift b/SportsTime/Features/Schedule/ViewModels/ScheduleViewModel.swift index a55fe9b..e1ca1e7 100644 --- a/SportsTime/Features/Schedule/ViewModels/ScheduleViewModel.swift +++ b/SportsTime/Features/Schedule/ViewModels/ScheduleViewModel.swift @@ -120,7 +120,7 @@ final class ScheduleViewModel { let nbaGames = games.filter { $0.game.sport == .nba } print("🏀 [DEBUG] All NBA games in schedule (\(nbaGames.count) total):") for game in nbaGames.sorted(by: { $0.game.dateTime < $1.game.dateTime }) { - let dateStr = game.game.dateTime.formatted(date: .abbreviated, time: .shortened) + let dateStr = game.game.dateTime.gameDateTimeString(in: game.stadium.timeZone) print("🏀 \(dateStr): \(game.awayTeam.name) @ \(game.homeTeam.name) (\(game.game.id))") }