feat(scripts): complete data pipeline remediation
Scripts changes: - Add WNBA abbreviation aliases to team_resolver.py - Fix NHL stadium coordinates in stadium_resolver.py - Add validate_aliases.py script for orphan detection - Update scrapers with improved error handling - Add DATA_AUDIT.md and REMEDIATION_PLAN.md documentation - Update alias JSON files with new mappings iOS bundle updates: - Update games_canonical.json with latest scraped data - Update teams_canonical.json and stadiums_canonical.json - Sync alias files with Scripts versions All 5 remediation phases complete. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -79,6 +79,8 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = {
|
||||
"stadium_nba_scotiabank_arena": StadiumInfo("stadium_nba_scotiabank_arena", "Scotiabank Arena", "Toronto", "ON", "Canada", "nba", 43.6435, -79.3791, "America/Toronto"),
|
||||
"stadium_nba_delta_center": StadiumInfo("stadium_nba_delta_center", "Delta Center", "Salt Lake City", "UT", "USA", "nba", 40.7683, -111.9011, "America/Denver"),
|
||||
"stadium_nba_capital_one_arena": StadiumInfo("stadium_nba_capital_one_arena", "Capital One Arena", "Washington", "DC", "USA", "nba", 38.8981, -77.0209),
|
||||
# International venues
|
||||
"stadium_nba_mexico_city_arena": StadiumInfo("stadium_nba_mexico_city_arena", "Mexico City Arena", "Mexico City", "CDMX", "Mexico", "nba", 19.4042, -99.0970, "America/Mexico_City"),
|
||||
},
|
||||
"mlb": {
|
||||
"stadium_mlb_chase_field": StadiumInfo("stadium_mlb_chase_field", "Chase Field", "Phoenix", "AZ", "USA", "mlb", 33.4455, -112.0667),
|
||||
@@ -215,7 +217,7 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = {
|
||||
"stadium_mls_soldier_field": StadiumInfo("stadium_mls_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "mls", 41.8623, -87.6167),
|
||||
"stadium_mls_tql_stadium": StadiumInfo("stadium_mls_tql_stadium", "TQL Stadium", "Cincinnati", "OH", "USA", "mls", 39.1112, -84.5225),
|
||||
"stadium_mls_dicks_sporting_goods_park": StadiumInfo("stadium_mls_dicks_sporting_goods_park", "Dick's Sporting Goods Park", "Commerce City", "CO", "USA", "mls", 39.8056, -104.8922),
|
||||
"stadium_mls_lower_com_field": StadiumInfo("stadium_mls_lower_com_field", "Lower.com Field", "Columbus", "OH", "USA", "mls", 39.9689, -83.0173),
|
||||
"stadium_mls_lowercom_field": StadiumInfo("stadium_mls_lowercom_field", "Lower.com Field", "Columbus", "OH", "USA", "mls", 39.9689, -83.0173),
|
||||
"stadium_mls_toyota_stadium": StadiumInfo("stadium_mls_toyota_stadium", "Toyota Stadium", "Frisco", "TX", "USA", "mls", 33.1545, -96.8353),
|
||||
"stadium_mls_audi_field": StadiumInfo("stadium_mls_audi_field", "Audi Field", "Washington", "DC", "USA", "mls", 38.8687, -77.0128),
|
||||
"stadium_mls_shell_energy_stadium": StadiumInfo("stadium_mls_shell_energy_stadium", "Shell Energy Stadium", "Houston", "TX", "USA", "mls", 29.7522, -95.3527),
|
||||
@@ -228,7 +230,7 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = {
|
||||
"stadium_mls_gillette_stadium": StadiumInfo("stadium_mls_gillette_stadium", "Gillette Stadium", "Foxborough", "MA", "USA", "mls", 42.0909, -71.2643),
|
||||
"stadium_mls_yankee_stadium": StadiumInfo("stadium_mls_yankee_stadium", "Yankee Stadium", "Bronx", "NY", "USA", "mls", 40.8296, -73.9262),
|
||||
"stadium_mls_red_bull_arena": StadiumInfo("stadium_mls_red_bull_arena", "Red Bull Arena", "Harrison", "NJ", "USA", "mls", 40.7369, -74.1503),
|
||||
"stadium_mls_inter_co_stadium": StadiumInfo("stadium_mls_inter_co_stadium", "Inter&Co Stadium", "Orlando", "FL", "USA", "mls", 28.5411, -81.3895),
|
||||
"stadium_mls_interco_stadium": StadiumInfo("stadium_mls_interco_stadium", "Inter&Co Stadium", "Orlando", "FL", "USA", "mls", 28.5411, -81.3895),
|
||||
"stadium_mls_subaru_park": StadiumInfo("stadium_mls_subaru_park", "Subaru Park", "Chester", "PA", "USA", "mls", 39.8328, -75.3789),
|
||||
"stadium_mls_providence_park": StadiumInfo("stadium_mls_providence_park", "Providence Park", "Portland", "OR", "USA", "mls", 45.5216, -122.6917),
|
||||
"stadium_mls_america_first_field": StadiumInfo("stadium_mls_america_first_field", "America First Field", "Sandy", "UT", "USA", "mls", 40.5830, -111.8933),
|
||||
@@ -254,6 +256,10 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = {
|
||||
"stadium_wnba_footprint_center": StadiumInfo("stadium_wnba_footprint_center", "Footprint Center", "Phoenix", "AZ", "USA", "wnba", 33.4457, -112.0712),
|
||||
"stadium_wnba_climate_pledge_arena": StadiumInfo("stadium_wnba_climate_pledge_arena", "Climate Pledge Arena", "Seattle", "WA", "USA", "wnba", 47.6221, -122.3540),
|
||||
"stadium_wnba_entertainment_sports_arena": StadiumInfo("stadium_wnba_entertainment_sports_arena", "Entertainment & Sports Arena", "Washington", "DC", "USA", "wnba", 38.8690, -76.9745),
|
||||
"stadium_wnba_state_farm_arena": StadiumInfo("stadium_wnba_state_farm_arena", "State Farm Arena", "Atlanta", "GA", "USA", "wnba", 33.7573, -84.3963),
|
||||
"stadium_wnba_rocket_mortgage_fieldhouse": StadiumInfo("stadium_wnba_rocket_mortgage_fieldhouse", "Rocket Mortgage FieldHouse", "Cleveland", "OH", "USA", "wnba", 41.4965, -81.6882),
|
||||
"stadium_wnba_cfg_bank_arena": StadiumInfo("stadium_wnba_cfg_bank_arena", "CFG Bank Arena", "Baltimore", "MD", "USA", "wnba", 39.2825, -76.6220),
|
||||
"stadium_wnba_purcell_pavilion": StadiumInfo("stadium_wnba_purcell_pavilion", "Purcell Pavilion", "Notre Dame", "IN", "USA", "wnba", 41.6987, -86.2340),
|
||||
},
|
||||
"nwsl": {
|
||||
"stadium_nwsl_bmo_stadium": StadiumInfo("stadium_nwsl_bmo_stadium", "BMO Stadium", "Los Angeles", "CA", "USA", "nwsl", 34.0128, -118.2841),
|
||||
@@ -262,7 +268,7 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = {
|
||||
"stadium_nwsl_cpkc_stadium": StadiumInfo("stadium_nwsl_cpkc_stadium", "CPKC Stadium", "Kansas City", "MO", "USA", "nwsl", 39.1050, -94.5580),
|
||||
"stadium_nwsl_red_bull_arena": StadiumInfo("stadium_nwsl_red_bull_arena", "Red Bull Arena", "Harrison", "NJ", "USA", "nwsl", 40.7369, -74.1503),
|
||||
"stadium_nwsl_wakemed_soccer_park": StadiumInfo("stadium_nwsl_wakemed_soccer_park", "WakeMed Soccer Park", "Cary", "NC", "USA", "nwsl", 35.7879, -78.7806),
|
||||
"stadium_nwsl_inter_co_stadium": StadiumInfo("stadium_nwsl_inter_co_stadium", "Inter&Co Stadium", "Orlando", "FL", "USA", "nwsl", 28.5411, -81.3895),
|
||||
"stadium_nwsl_interco_stadium": StadiumInfo("stadium_nwsl_interco_stadium", "Inter&Co Stadium", "Orlando", "FL", "USA", "nwsl", 28.5411, -81.3895),
|
||||
"stadium_nwsl_providence_park": StadiumInfo("stadium_nwsl_providence_park", "Providence Park", "Portland", "OR", "USA", "nwsl", 45.5216, -122.6917),
|
||||
"stadium_nwsl_lynn_family_stadium": StadiumInfo("stadium_nwsl_lynn_family_stadium", "Lynn Family Stadium", "Louisville", "KY", "USA", "nwsl", 38.2219, -85.7381),
|
||||
"stadium_nwsl_snapdragon_stadium": StadiumInfo("stadium_nwsl_snapdragon_stadium", "Snapdragon Stadium", "San Diego", "CA", "USA", "nwsl", 32.7837, -117.1225),
|
||||
@@ -277,6 +283,9 @@ STADIUM_MAPPINGS: dict[str, dict[str, StadiumInfo]] = {
|
||||
"stadium_nwsl_empower_field": StadiumInfo("stadium_nwsl_empower_field", "Empower Field at Mile High", "Denver", "CO", "USA", "nwsl", 39.7439, -105.0201, "America/Denver"),
|
||||
"stadium_nwsl_dicks_sporting_goods_park": StadiumInfo("stadium_nwsl_dicks_sporting_goods_park", "Dick's Sporting Goods Park", "Commerce City", "CO", "USA", "nwsl", 39.8056, -104.8922, "America/Denver"),
|
||||
"stadium_nwsl_centennial_stadium": StadiumInfo("stadium_nwsl_centennial_stadium", "Centennial Stadium", "Centennial", "CO", "USA", "nwsl", 39.6000, -104.8800, "America/Denver"),
|
||||
# Shared NFL/MLB venues
|
||||
"stadium_nwsl_soldier_field": StadiumInfo("stadium_nwsl_soldier_field", "Soldier Field", "Chicago", "IL", "USA", "nwsl", 41.8623, -87.6167),
|
||||
"stadium_nwsl_oracle_park": StadiumInfo("stadium_nwsl_oracle_park", "Oracle Park", "San Francisco", "CA", "USA", "nwsl", 37.7786, -122.3893, "America/Los_Angeles"),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -208,7 +208,7 @@ TEAM_MAPPINGS: dict[str, dict[str, tuple[str, str, str, str]]] = {
|
||||
"CHI": ("team_mls_chi", "Chicago Fire", "Chicago", "stadium_mls_soldier_field"),
|
||||
"CIN": ("team_mls_cin", "FC Cincinnati", "Cincinnati", "stadium_mls_tql_stadium"),
|
||||
"COL": ("team_mls_col", "Colorado Rapids", "Colorado", "stadium_mls_dicks_sporting_goods_park"),
|
||||
"CLB": ("team_mls_clb", "Columbus Crew", "Columbus", "stadium_mls_lower_com_field"),
|
||||
"CLB": ("team_mls_clb", "Columbus Crew", "Columbus", "stadium_mls_lowercom_field"),
|
||||
"DAL": ("team_mls_dal", "FC Dallas", "Dallas", "stadium_mls_toyota_stadium"),
|
||||
"DC": ("team_mls_dc", "D.C. United", "Washington", "stadium_mls_audi_field"),
|
||||
"HOU": ("team_mls_hou", "Houston Dynamo", "Houston", "stadium_mls_shell_energy_stadium"),
|
||||
@@ -222,7 +222,7 @@ TEAM_MAPPINGS: dict[str, dict[str, tuple[str, str, str, str]]] = {
|
||||
"NYC": ("team_mls_nyc", "New York City FC", "New York", "stadium_mls_yankee_stadium"),
|
||||
"RB": ("team_mls_ny", "New York Red Bulls", "New York", "stadium_mls_red_bull_arena"),
|
||||
"RBNY": ("team_mls_ny", "New York Red Bulls", "New York", "stadium_mls_red_bull_arena"),
|
||||
"ORL": ("team_mls_orl", "Orlando City", "Orlando", "stadium_mls_inter_co_stadium"),
|
||||
"ORL": ("team_mls_orl", "Orlando City", "Orlando", "stadium_mls_interco_stadium"),
|
||||
"PHI": ("team_mls_phi", "Philadelphia Union", "Philadelphia", "stadium_mls_subaru_park"),
|
||||
"POR": ("team_mls_por", "Portland Timbers", "Portland", "stadium_mls_providence_park"),
|
||||
"SLC": ("team_mls_slc", "Real Salt Lake", "Salt Lake", "stadium_mls_america_first_field"),
|
||||
@@ -237,34 +237,64 @@ TEAM_MAPPINGS: dict[str, dict[str, tuple[str, str, str, str]]] = {
|
||||
},
|
||||
"wnba": {
|
||||
"ATL": ("team_wnba_atl", "Atlanta Dream", "Atlanta", "stadium_wnba_gateway_center_arena"),
|
||||
"DREAM": ("team_wnba_atl", "Atlanta Dream", "Atlanta", "stadium_wnba_gateway_center_arena"), # alias
|
||||
"CHI": ("team_wnba_chi", "Chicago Sky", "Chicago", "stadium_wnba_wintrust_arena"),
|
||||
"SKY": ("team_wnba_chi", "Chicago Sky", "Chicago", "stadium_wnba_wintrust_arena"), # alias
|
||||
"CON": ("team_wnba_con", "Connecticut Sun", "Connecticut", "stadium_wnba_mohegan_sun_arena"),
|
||||
"CONN": ("team_wnba_con", "Connecticut Sun", "Connecticut", "stadium_wnba_mohegan_sun_arena"), # alias
|
||||
"SUN": ("team_wnba_con", "Connecticut Sun", "Connecticut", "stadium_wnba_mohegan_sun_arena"), # alias
|
||||
"DAL": ("team_wnba_dal", "Dallas Wings", "Dallas", "stadium_wnba_college_park_center"),
|
||||
"WINGS": ("team_wnba_dal", "Dallas Wings", "Dallas", "stadium_wnba_college_park_center"), # alias
|
||||
"GSV": ("team_wnba_gsv", "Golden State Valkyries", "Golden State", "stadium_wnba_chase_center"),
|
||||
"GS": ("team_wnba_gsv", "Golden State Valkyries", "Golden State", "stadium_wnba_chase_center"), # alias
|
||||
"VAL": ("team_wnba_gsv", "Golden State Valkyries", "Golden State", "stadium_wnba_chase_center"), # alias
|
||||
"IND": ("team_wnba_ind", "Indiana Fever", "Indiana", "stadium_wnba_gainbridge_fieldhouse"),
|
||||
"FEVER": ("team_wnba_ind", "Indiana Fever", "Indiana", "stadium_wnba_gainbridge_fieldhouse"), # alias
|
||||
"LV": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas", "stadium_wnba_michelob_ultra_arena"),
|
||||
"LVA": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas", "stadium_wnba_michelob_ultra_arena"), # alias
|
||||
"ACES": ("team_wnba_lv", "Las Vegas Aces", "Las Vegas", "stadium_wnba_michelob_ultra_arena"), # alias
|
||||
"LA": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles", "stadium_wnba_cryptocom_arena"),
|
||||
"LAS": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles", "stadium_wnba_cryptocom_arena"), # alias
|
||||
"SPARKS": ("team_wnba_la", "Los Angeles Sparks", "Los Angeles", "stadium_wnba_cryptocom_arena"), # alias
|
||||
"MIN": ("team_wnba_min", "Minnesota Lynx", "Minnesota", "stadium_wnba_target_center"),
|
||||
"LYNX": ("team_wnba_min", "Minnesota Lynx", "Minnesota", "stadium_wnba_target_center"), # alias
|
||||
"NY": ("team_wnba_ny", "New York Liberty", "New York", "stadium_wnba_barclays_center"),
|
||||
"NYL": ("team_wnba_ny", "New York Liberty", "New York", "stadium_wnba_barclays_center"), # alias
|
||||
"LIB": ("team_wnba_ny", "New York Liberty", "New York", "stadium_wnba_barclays_center"), # alias
|
||||
"PHX": ("team_wnba_phx", "Phoenix Mercury", "Phoenix", "stadium_wnba_footprint_center"),
|
||||
"PHO": ("team_wnba_phx", "Phoenix Mercury", "Phoenix", "stadium_wnba_footprint_center"), # alias
|
||||
"MERCURY": ("team_wnba_phx", "Phoenix Mercury", "Phoenix", "stadium_wnba_footprint_center"), # alias
|
||||
"SEA": ("team_wnba_sea", "Seattle Storm", "Seattle", "stadium_wnba_climate_pledge_arena"),
|
||||
"STORM": ("team_wnba_sea", "Seattle Storm", "Seattle", "stadium_wnba_climate_pledge_arena"), # alias
|
||||
"WAS": ("team_wnba_was", "Washington Mystics", "Washington", "stadium_wnba_entertainment_sports_arena"),
|
||||
"WSH": ("team_wnba_was", "Washington Mystics", "Washington", "stadium_wnba_entertainment_sports_arena"), # alias
|
||||
"MYSTICS": ("team_wnba_was", "Washington Mystics", "Washington", "stadium_wnba_entertainment_sports_arena"), # alias
|
||||
},
|
||||
"nwsl": {
|
||||
"ANF": ("team_nwsl_anf", "Angel City FC", "Los Angeles", "stadium_nwsl_bmo_stadium"),
|
||||
# Canonical IDs aligned with teams_canonical.json
|
||||
"ANG": ("team_nwsl_ang", "Angel City FC", "Los Angeles", "stadium_nwsl_bmo_stadium"),
|
||||
"ANF": ("team_nwsl_ang", "Angel City FC", "Los Angeles", "stadium_nwsl_bmo_stadium"), # alias
|
||||
"CHI": ("team_nwsl_chi", "Chicago Red Stars", "Chicago", "stadium_nwsl_seatgeek_stadium"),
|
||||
"HOU": ("team_nwsl_hou", "Houston Dash", "Houston", "stadium_nwsl_shell_energy_stadium"),
|
||||
"KC": ("team_nwsl_kc", "Kansas City Current", "Kansas City", "stadium_nwsl_cpkc_stadium"),
|
||||
"NJ": ("team_nwsl_nj", "NJ/NY Gotham FC", "New Jersey", "stadium_nwsl_red_bull_arena"),
|
||||
"NC": ("team_nwsl_nc", "North Carolina Courage", "North Carolina", "stadium_nwsl_wakemed_soccer_park"),
|
||||
"ORL": ("team_nwsl_orl", "Orlando Pride", "Orlando", "stadium_nwsl_inter_co_stadium"),
|
||||
"KCC": ("team_nwsl_kcc", "Kansas City Current", "Kansas City", "stadium_nwsl_cpkc_stadium"),
|
||||
"KC": ("team_nwsl_kcc", "Kansas City Current", "Kansas City", "stadium_nwsl_cpkc_stadium"), # alias
|
||||
"NJY": ("team_nwsl_njy", "NJ/NY Gotham FC", "New Jersey", "stadium_nwsl_red_bull_arena"),
|
||||
"NJ": ("team_nwsl_njy", "NJ/NY Gotham FC", "New Jersey", "stadium_nwsl_red_bull_arena"), # alias
|
||||
"NCC": ("team_nwsl_ncc", "North Carolina Courage", "North Carolina", "stadium_nwsl_wakemed_soccer_park"),
|
||||
"NC": ("team_nwsl_ncc", "North Carolina Courage", "North Carolina", "stadium_nwsl_wakemed_soccer_park"), # alias
|
||||
"ORL": ("team_nwsl_orl", "Orlando Pride", "Orlando", "stadium_nwsl_interco_stadium"),
|
||||
"POR": ("team_nwsl_por", "Portland Thorns", "Portland", "stadium_nwsl_providence_park"),
|
||||
"RGN": ("team_nwsl_rgn", "Racing Louisville", "Louisville", "stadium_nwsl_lynn_family_stadium"),
|
||||
"SD": ("team_nwsl_sd", "San Diego Wave", "San Diego", "stadium_nwsl_snapdragon_stadium"),
|
||||
"SDW": ("team_nwsl_sdw", "San Diego Wave", "San Diego", "stadium_nwsl_snapdragon_stadium"),
|
||||
"SD": ("team_nwsl_sdw", "San Diego Wave", "San Diego", "stadium_nwsl_snapdragon_stadium"), # alias
|
||||
"SEA": ("team_nwsl_sea", "Seattle Reign", "Seattle", "stadium_nwsl_lumen_field"),
|
||||
"SLC": ("team_nwsl_slc", "Utah Royals", "Utah", "stadium_nwsl_america_first_field"),
|
||||
"WAS": ("team_nwsl_was", "Washington Spirit", "Washington", "stadium_nwsl_audi_field"),
|
||||
"BFC": ("team_nwsl_bfc", "Bay FC", "San Francisco", "stadium_nwsl_paypal_park"),
|
||||
"UTA": ("team_nwsl_uta", "Utah Royals", "Utah", "stadium_nwsl_america_first_field"),
|
||||
"SLC": ("team_nwsl_uta", "Utah Royals", "Utah", "stadium_nwsl_america_first_field"), # alias
|
||||
"WSH": ("team_nwsl_wsh", "Washington Spirit", "Washington", "stadium_nwsl_audi_field"),
|
||||
"WAS": ("team_nwsl_wsh", "Washington Spirit", "Washington", "stadium_nwsl_audi_field"), # alias
|
||||
"BAY": ("team_nwsl_bay", "Bay FC", "San Francisco", "stadium_nwsl_paypal_park"),
|
||||
"BFC": ("team_nwsl_bay", "Bay FC", "San Francisco", "stadium_nwsl_paypal_park"), # alias
|
||||
# Expansion teams (2026) - need to be added to teams_canonical.json
|
||||
"BOS": ("team_nwsl_bos", "Boston Legacy FC", "Boston", "stadium_nwsl_gillette_stadium"),
|
||||
"DEN": ("team_nwsl_den", "Denver Summit FC", "Denver", "stadium_nwsl_dicks_sporting_goods_park"),
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user