"""Timezone conversion utilities for normalizing game times to UTC.""" import re from dataclasses import dataclass from datetime import datetime, date, time from typing import Optional from zoneinfo import ZoneInfo from dateutil import parser as dateutil_parser from dateutil.tz import gettz, tzutc from ..models.aliases import ReviewReason, ManualReviewItem # Common timezone abbreviations to IANA timezones TIMEZONE_ABBREV_MAP: dict[str, str] = { # US timezones "ET": "America/New_York", "EST": "America/New_York", "EDT": "America/New_York", "CT": "America/Chicago", "CST": "America/Chicago", "CDT": "America/Chicago", "MT": "America/Denver", "MST": "America/Denver", "MDT": "America/Denver", "PT": "America/Los_Angeles", "PST": "America/Los_Angeles", "PDT": "America/Los_Angeles", "AT": "America/Anchorage", "AKST": "America/Anchorage", "AKDT": "America/Anchorage", "HT": "Pacific/Honolulu", "HST": "Pacific/Honolulu", # Canada "AST": "America/Halifax", "ADT": "America/Halifax", "NST": "America/St_Johns", "NDT": "America/St_Johns", # Mexico "CDST": "America/Mexico_City", # UTC "UTC": "UTC", "GMT": "UTC", "Z": "UTC", } # State/region to timezone mapping for inferring timezone from location STATE_TIMEZONE_MAP: dict[str, str] = { # Eastern "CT": "America/New_York", "DE": "America/New_York", "FL": "America/New_York", # Most of Florida "GA": "America/New_York", "MA": "America/New_York", "MD": "America/New_York", "ME": "America/New_York", "MI": "America/Detroit", "NC": "America/New_York", "NH": "America/New_York", "NJ": "America/New_York", "NY": "America/New_York", "OH": "America/New_York", "PA": "America/New_York", "RI": "America/New_York", "SC": "America/New_York", "VA": "America/New_York", "VT": "America/New_York", "WV": "America/New_York", "DC": "America/New_York", # Central "AL": "America/Chicago", "AR": "America/Chicago", "IA": "America/Chicago", "IL": "America/Chicago", "IN": "America/Indiana/Indianapolis", "KS": "America/Chicago", "KY": "America/Kentucky/Louisville", "LA": "America/Chicago", "MN": "America/Chicago", "MO": "America/Chicago", "MS": "America/Chicago", "ND": "America/Chicago", "NE": "America/Chicago", "OK": "America/Chicago", "SD": "America/Chicago", "TN": "America/Chicago", "TX": "America/Chicago", "WI": "America/Chicago", # Mountain "AZ": "America/Phoenix", # No DST "CO": "America/Denver", "ID": "America/Boise", "MT": "America/Denver", "NM": "America/Denver", "UT": "America/Denver", "WY": "America/Denver", # Pacific "CA": "America/Los_Angeles", "NV": "America/Los_Angeles", "OR": "America/Los_Angeles", "WA": "America/Los_Angeles", # Alaska/Hawaii "AK": "America/Anchorage", "HI": "Pacific/Honolulu", # Canada provinces "ON": "America/Toronto", "QC": "America/Montreal", "BC": "America/Vancouver", "AB": "America/Edmonton", "MB": "America/Winnipeg", "SK": "America/Regina", "NS": "America/Halifax", "NB": "America/Moncton", "NL": "America/St_Johns", "PE": "America/Halifax", } @dataclass class TimezoneResult: """Result of timezone conversion. Attributes: datetime_utc: The datetime converted to UTC source_timezone: The timezone that was detected/used confidence: Confidence in the timezone detection ('high', 'medium', 'low') warning: Warning message if timezone was uncertain """ datetime_utc: datetime source_timezone: str confidence: str warning: Optional[str] = None def detect_timezone_from_string(time_str: str) -> Optional[str]: """Detect timezone from a time string containing a timezone abbreviation. Args: time_str: Time string that may contain timezone info (e.g., '7:00 PM ET') Returns: IANA timezone string if detected, None otherwise """ # Look for timezone abbreviation at end of string for abbrev, tz in TIMEZONE_ABBREV_MAP.items(): pattern = rf"\b{abbrev}\b" if re.search(pattern, time_str, re.IGNORECASE): return tz return None def detect_timezone_from_location( state: Optional[str] = None, city: Optional[str] = None, ) -> Optional[str]: """Detect timezone from location information. Args: state: State/province code (e.g., 'NY', 'ON') city: City name (optional, for special cases) Returns: IANA timezone string if detected, None otherwise """ if state and state.upper() in STATE_TIMEZONE_MAP: return STATE_TIMEZONE_MAP[state.upper()] return None def parse_datetime( date_str: str, time_str: Optional[str] = None, timezone_hint: Optional[str] = None, location_state: Optional[str] = None, ) -> TimezoneResult: """Parse a date/time string and convert to UTC. Attempts to detect timezone from: 1. Explicit timezone in the string 2. Provided timezone hint 3. Location-based inference 4. Default to Eastern Time with warning Args: date_str: Date string (e.g., '2025-10-21', 'October 21, 2025') time_str: Optional time string (e.g., '7:00 PM ET', '19:00') timezone_hint: Optional IANA timezone to use if not detected location_state: Optional state code for timezone inference Returns: TimezoneResult with UTC datetime and metadata """ # Parse the date try: if time_str: # Combine date and time full_str = f"{date_str} {time_str}" else: full_str = date_str parsed = dateutil_parser.parse(full_str, fuzzy=True) except (ValueError, OverflowError) as e: # If parsing fails, return a placeholder with low confidence return TimezoneResult( datetime_utc=datetime.now(tz=ZoneInfo("UTC")), source_timezone="unknown", confidence="low", warning=f"Failed to parse datetime: {e}", ) # Determine timezone detected_tz = None confidence = "high" warning = None # Check if datetime already has timezone if parsed.tzinfo is not None: detected_tz = str(parsed.tzinfo) else: # Try to detect from time string if time_str: detected_tz = detect_timezone_from_string(time_str) # Try timezone hint if not detected_tz and timezone_hint: detected_tz = timezone_hint confidence = "medium" # Try location inference if not detected_tz and location_state: detected_tz = detect_timezone_from_location(state=location_state) confidence = "medium" # Default to Eastern Time if not detected_tz: detected_tz = "America/New_York" confidence = "low" warning = "Timezone not detected, defaulting to Eastern Time" # Apply timezone and convert to UTC try: tz = ZoneInfo(detected_tz) except KeyError: # Invalid timezone, try to resolve abbreviation if detected_tz in TIMEZONE_ABBREV_MAP: tz = ZoneInfo(TIMEZONE_ABBREV_MAP[detected_tz]) detected_tz = TIMEZONE_ABBREV_MAP[detected_tz] else: tz = ZoneInfo("America/New_York") confidence = "low" warning = f"Unknown timezone '{detected_tz}', defaulting to Eastern Time" detected_tz = "America/New_York" # Apply timezone if not already set if parsed.tzinfo is None: parsed = parsed.replace(tzinfo=tz) # Convert to UTC utc_dt = parsed.astimezone(ZoneInfo("UTC")) return TimezoneResult( datetime_utc=utc_dt, source_timezone=detected_tz, confidence=confidence, warning=warning, ) def convert_to_utc( dt: datetime, source_timezone: str, ) -> datetime: """Convert a datetime from a known timezone to UTC. Args: dt: Datetime to convert (timezone-naive or timezone-aware) source_timezone: IANA timezone of the datetime Returns: Datetime in UTC """ tz = ZoneInfo(source_timezone) if dt.tzinfo is None: # Localize naive datetime dt = dt.replace(tzinfo=tz) return dt.astimezone(ZoneInfo("UTC")) def create_timezone_warning( raw_value: str, sport: str, game_date: Optional[date] = None, source_url: Optional[str] = None, ) -> ManualReviewItem: """Create a manual review item for an undetermined timezone. Args: raw_value: The original time string that couldn't be resolved sport: Sport code game_date: Date of the game source_url: URL of the source page Returns: ManualReviewItem for timezone review """ return ManualReviewItem( id=f"tz_{sport}_{raw_value[:20].replace(' ', '_')}", reason=ReviewReason.TIMEZONE_UNKNOWN, sport=sport, raw_value=raw_value, context={"issue": "Could not determine timezone for game time"}, source_url=source_url, game_date=game_date, ) def get_stadium_timezone( stadium_state: str, stadium_timezone: Optional[str] = None, ) -> str: """Get the timezone for a stadium based on its location. Args: stadium_state: State/province code stadium_timezone: Explicit timezone override from stadium data Returns: IANA timezone string """ if stadium_timezone: return stadium_timezone tz = detect_timezone_from_location(state=stadium_state) if tz: return tz # Default to Eastern return "America/New_York"