diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md index 664dd90..90dddab 100644 --- a/.planning/ROADMAP.md +++ b/.planning/ROADMAP.md @@ -59,11 +59,12 @@ Plans: ### Phase 3: Alias Systems **Goal**: Implement alias systems for both stadiums and teams to handle name variations across data sources **Depends on**: Phase 2.1 -**Research**: Unlikely (internal mapping logic) -**Plans**: TBD +**Research**: No (internal mapping logic) +**Plans**: 2 plans Plans: -- [ ] 03-01: TBD +- [ ] 03-01: Add NFL to canonicalization pipeline with aliases +- [ ] 03-02: Add MLS, WNBA, NWSL to canonicalization pipeline with aliases ### Phase 4: Canonical Linking **Goal**: Ensure every game correctly links to its home/away teams and stadium via canonical IDs diff --git a/.planning/phases/03-alias-systems/03-01-PLAN.md b/.planning/phases/03-alias-systems/03-01-PLAN.md new file mode 100644 index 0000000..d7539f3 --- /dev/null +++ b/.planning/phases/03-alias-systems/03-01-PLAN.md @@ -0,0 +1,191 @@ +--- +phase: 03-alias-systems +plan: 01 +type: execute +--- + + +Add NFL to the canonicalization pipeline with complete alias support. + +Purpose: NFL is a core sport but missing from team/game canonicalization, breaking game→team→stadium linking for NFL games. +Output: NFL teams canonicalized with division structure, NFL abbreviation aliases for game resolution, NFL stadium historical aliases. + + + +~/.claude/get-shit-done/workflows/execute-phase.md +~/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/phases/01-script-architecture/01-03-SUMMARY.md + +# Key source files: +@Scripts/canonicalize_teams.py +@Scripts/canonicalize_games.py +@Scripts/canonicalize_stadiums.py +@Scripts/nfl.py + +**Prior decisions:** +- NFL uses cross-calendar-year season format (2025-26) like NBA/NHL +- Each sport module exports {SPORT}_TEAMS dict with team mappings +- canonicalize_teams.py uses division mappings for conference/division assignment + +**Patterns established:** +- Team canonicalization: import {SPORT}_TEAMS, add {SPORT}_DIVISIONS dict, include in sport_mappings list +- Game resolution: TEAM_ABBREV_ALIASES dict maps alternate abbrevs to canonical team IDs +- Stadium aliases: HISTORICAL_STADIUM_ALIASES dict maps canonical_id to list of historical names with dates + + + + + + Task 1: Add NFL to canonicalize_teams.py + Scripts/canonicalize_teams.py + + 1. Add NFL_TEAMS to import statement: `from scrape_schedules import NBA_TEAMS, MLB_TEAMS, NHL_TEAMS, NFL_TEAMS` + 2. Add NFL_DIVISIONS dict with all 32 teams mapped to (conference_id, division_id): + - AFC East: BUF, MIA, NE, NYJ → ('nfl_afc', 'nfl_afc_east') + - AFC North: BAL, CIN, CLE, PIT → ('nfl_afc', 'nfl_afc_north') + - AFC South: HOU, IND, JAX, TEN → ('nfl_afc', 'nfl_afc_south') + - AFC West: DEN, KC, LV, LAC → ('nfl_afc', 'nfl_afc_west') + - NFC East: DAL, NYG, PHI, WAS → ('nfl_nfc', 'nfl_nfc_east') + - NFC North: CHI, DET, GB, MIN → ('nfl_nfc', 'nfl_nfc_north') + - NFC South: ATL, CAR, NO, TB → ('nfl_nfc', 'nfl_nfc_south') + - NFC West: ARI, LAR, SF, SEA → ('nfl_nfc', 'nfl_nfc_west') + 3. Add ('NFL', NFL_TEAMS) to sport_mappings list in canonicalize_all_teams() + 4. Update canonicalize_teams() to use 'stadium' key for NFL (same as MLB, not 'arena') + 5. Add NFL_DIVISIONS to division_map dict + + Note: NFL_TEAMS uses 'stadium' key (not 'arena' like NBA/NHL), so the arena_key logic already handles this. + + python Scripts/canonicalize_teams.py --verbose 2>&1 | grep -E "NFL:|Created.*teams" + NFL teams appear in output with 32 teams, no warnings about stadium matches + + + + Task 2: Add NFL team abbreviation aliases to canonicalize_games.py + Scripts/canonicalize_games.py + + Add NFL entries to TEAM_ABBREV_ALIASES dict for common abbreviation variations: + + ```python + # NFL + ('NFL', 'JAC'): 'team_nfl_jax', # Jacksonville (JAC vs JAX) + ('NFL', 'OAK'): 'team_nfl_lv', # Oakland → Las Vegas Raiders (moved 2020) + ('NFL', 'SD'): 'team_nfl_lac', # San Diego → Los Angeles Chargers (moved 2017) + ('NFL', 'STL'): 'team_nfl_lar', # St. Louis → Los Angeles Rams (moved 2016) + ('NFL', 'GNB'): 'team_nfl_gb', # Green Bay alternate + ('NFL', 'KAN'): 'team_nfl_kc', # Kansas City alternate + ('NFL', 'NWE'): 'team_nfl_ne', # New England alternate + ('NFL', 'NOR'): 'team_nfl_no', # New Orleans alternate + ('NFL', 'TAM'): 'team_nfl_tb', # Tampa Bay alternate + ('NFL', 'SFO'): 'team_nfl_sf', # San Francisco alternate + ('NFL', 'WAS'): 'team_nfl_was', # Washington (direct match but include for completeness) + ``` + + These cover: + - Historical relocations (OAK→LV, SD→LAC, STL→LAR) + - Common 3-letter variations used by different data sources + + grep -c "NFL" Scripts/canonicalize_games.py | head -1 + NFL aliases present in TEAM_ABBREV_ALIASES dict (should show 10+ NFL entries) + + + + Task 3: Add NFL stadium historical aliases to canonicalize_stadiums.py + Scripts/canonicalize_stadiums.py + + Add NFL entries to HISTORICAL_STADIUM_ALIASES dict for sponsorship changes and renames: + + ```python + # NFL + 'stadium_nfl_sofi_stadium': [ + # SoFi Stadium opened 2020, no prior name + ], + 'stadium_nfl_allegiant_stadium': [ + # Allegiant Stadium opened 2020, no prior name (Raiders moved from Oakland Coliseum) + ], + 'stadium_nfl_caesars_superdome': [ + {'alias_name': 'mercedes-benz superdome', 'valid_from': '2011-10-01', 'valid_until': '2021-07-01'}, + {'alias_name': 'louisiana superdome', 'valid_from': '1975-08-01', 'valid_until': '2011-09-30'}, + {'alias_name': 'superdome', 'valid_from': '1975-08-01'}, + ], + 'stadium_nfl_paycor_stadium': [ + {'alias_name': 'paul brown stadium', 'valid_from': '2000-08-01', 'valid_until': '2022-09-05'}, + ], + 'stadium_nfl_empower_field_at_mile_high': [ + {'alias_name': 'broncos stadium at mile high', 'valid_from': '2018-09-01', 'valid_until': '2019-08-31'}, + {'alias_name': 'sports authority field at mile high', 'valid_from': '2011-08-01', 'valid_until': '2018-08-31'}, + {'alias_name': 'invesco field at mile high', 'valid_from': '2001-09-01', 'valid_until': '2011-07-31'}, + {'alias_name': 'mile high stadium', 'valid_from': '1960-01-01', 'valid_until': '2001-08-31'}, + ], + 'stadium_nfl_acrisure_stadium': [ + {'alias_name': 'heinz field', 'valid_from': '2001-08-01', 'valid_until': '2022-07-10'}, + ], + 'stadium_nfl_everbank_stadium': [ + {'alias_name': 'tiaa bank field', 'valid_from': '2018-01-01', 'valid_until': '2023-03-31'}, + {'alias_name': 'everbank field', 'valid_from': '2014-01-01', 'valid_until': '2017-12-31'}, + {'alias_name': 'alltel stadium', 'valid_from': '1997-06-01', 'valid_until': '2006-12-31'}, + {'alias_name': 'jacksonville municipal stadium', 'valid_from': '1995-08-01', 'valid_until': '1997-05-31'}, + ], + 'stadium_nfl_northwest_stadium': [ + {'alias_name': 'fedexfield', 'valid_from': '1999-11-01', 'valid_until': '2025-01-01'}, + {'alias_name': 'fedex field', 'valid_from': '1999-11-01', 'valid_until': '2025-01-01'}, + {'alias_name': 'jack kent cooke stadium', 'valid_from': '1997-09-01', 'valid_until': '1999-10-31'}, + ], + 'stadium_nfl_hard_rock_stadium': [ + {'alias_name': 'sun life stadium', 'valid_from': '2010-01-01', 'valid_until': '2016-07-31'}, + {'alias_name': 'land shark stadium', 'valid_from': '2009-01-01', 'valid_until': '2009-12-31'}, + {'alias_name': 'dolphin stadium', 'valid_from': '2005-01-01', 'valid_until': '2008-12-31'}, + {'alias_name': 'pro player stadium', 'valid_from': '1996-04-01', 'valid_until': '2004-12-31'}, + {'alias_name': 'joe robbie stadium', 'valid_from': '1987-08-01', 'valid_until': '1996-03-31'}, + ], + 'stadium_nfl_highmark_stadium': [ + {'alias_name': 'bills stadium', 'valid_from': '2020-03-01', 'valid_until': '2021-03-31'}, + {'alias_name': 'new era field', 'valid_from': '2016-08-01', 'valid_until': '2020-02-29'}, + {'alias_name': 'ralph wilson stadium', 'valid_from': '1998-08-01', 'valid_until': '2016-07-31'}, + {'alias_name': 'rich stadium', 'valid_from': '1973-08-01', 'valid_until': '1998-07-31'}, + ], + 'stadium_nfl_geha_field_at_arrowhead_stadium': [ + {'alias_name': 'arrowhead stadium', 'valid_from': '1972-08-01'}, + ], + 'stadium_nfl_att_stadium': [ + {'alias_name': 'cowboys stadium', 'valid_from': '2009-05-01', 'valid_until': '2013-07-24'}, + ], + 'stadium_nfl_us_bank_stadium': [ + # Opened 2016, no prior name (Vikings moved from Metrodome) + ], + 'stadium_nfl_lumen_field': [ + {'alias_name': 'centurylink field', 'valid_from': '2011-06-01', 'valid_until': '2020-11-18'}, + {'alias_name': 'qwest field', 'valid_from': '2004-06-01', 'valid_until': '2011-05-31'}, + {'alias_name': 'seahawks stadium', 'valid_from': '2002-07-01', 'valid_until': '2004-05-31'}, + ], + ``` + + Only include stadiums with actual historical name changes. Skip stadiums like Soldier Field, Lambeau Field that have kept their names. + + grep -c "stadium_nfl" Scripts/canonicalize_stadiums.py + NFL stadium aliases present in HISTORICAL_STADIUM_ALIASES (should show 10+ entries) + + + + + +Before declaring plan complete: +- [ ] `python Scripts/canonicalize_teams.py --verbose` shows 32 NFL teams with stadium matches +- [ ] `python Scripts/canonicalize_stadiums.py --verbose` runs without error +- [ ] NFL entries exist in all three canonicalization scripts + + + +- All tasks completed +- NFL teams appear in teams_canonical.json output +- NFL stadium aliases added to canonicalization +- NFL abbreviation variations covered for game resolution + + + +After completion, create `.planning/phases/03-alias-systems/03-01-SUMMARY.md` + diff --git a/.planning/phases/03-alias-systems/03-02-PLAN.md b/.planning/phases/03-alias-systems/03-02-PLAN.md new file mode 100644 index 0000000..37a278f --- /dev/null +++ b/.planning/phases/03-alias-systems/03-02-PLAN.md @@ -0,0 +1,190 @@ +--- +phase: 03-alias-systems +plan: 02 +type: execute +--- + + +Add MLS, WNBA, and NWSL to the canonicalization pipeline with alias support. + +Purpose: Secondary sports modules exist (Phase 2.1) but aren't integrated into canonicalization, preventing game→team→stadium linking. +Output: All three secondary sports canonicalized with team and stadium alias support. + + + +~/.claude/get-shit-done/workflows/execute-phase.md +~/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/phases/03-alias-systems/03-01-SUMMARY.md + +# Key source files: +@Scripts/canonicalize_teams.py +@Scripts/canonicalize_games.py +@Scripts/canonicalize_stadiums.py +@Scripts/mls.py +@Scripts/wnba.py +@Scripts/nwsl.py + +**Prior decisions:** +- MLS uses soccer configuration capacities for shared NFL stadiums +- WNBA cross-references shared arena coordinates from nba.py and nhl.py +- NWSL cross-references shared stadium coordinates from mls.py + +**Patterns established (from 03-01):** +- Team canonicalization: import {SPORT}_TEAMS, add {SPORT}_DIVISIONS dict, include in sport_mappings list +- Game resolution: TEAM_ABBREV_ALIASES dict maps alternate abbrevs to canonical team IDs +- Stadium aliases: HISTORICAL_STADIUM_ALIASES dict maps canonical_id to list of historical names + + + + + + Task 1: Add MLS to canonicalization pipeline + Scripts/canonicalize_teams.py, Scripts/canonicalize_games.py, Scripts/canonicalize_stadiums.py + + **canonicalize_teams.py:** + 1. Update import: add MLS_TEAMS from mls module + `from mls import MLS_TEAMS` + 2. Add MLS_DIVISIONS dict (MLS uses conferences, not divisions): + - Eastern Conference: ATL, CHI, CIN, CLB, CLT, DCU, FCC, MIA, MTL, NE, NYC, NYR, ORL, PHI, TOR → ('mls_eastern', None) + - Western Conference: AUS, COL, DAL, HOU, LAF, LAG, MIN, NSH, POR, RSL, SEA, SJE, SKC, STL, VAN → ('mls_western', None) + 3. Add ('MLS', MLS_TEAMS) to sport_mappings list + + **canonicalize_games.py:** + Add MLS aliases to TEAM_ABBREV_ALIASES: + ```python + # MLS + ('MLS', 'LA'): 'team_mls_lag', # LA Galaxy + ('MLS', 'LAFC'): 'team_mls_laf', # LAFC (Los Angeles FC) + ('MLS', 'NYCFC'): 'team_mls_nyc', # NYC FC + ('MLS', 'RBNY'): 'team_mls_nyr', # NY Red Bulls + ('MLS', 'SJ'): 'team_mls_sje', # San Jose Earthquakes + ('MLS', 'KC'): 'team_mls_skc', # Sporting KC + ('MLS', 'DC'): 'team_mls_dcu', # DC United + ('MLS', 'FCD'): 'team_mls_dal', # FC Dallas + ('MLS', 'MON'): 'team_mls_mtl', # Montreal + ``` + + **canonicalize_stadiums.py:** + Add MLS stadium historical aliases (recent renames only): + ```python + # MLS + 'stadium_mls_bmw_stadium': [ + {'alias_name': 'adi stadium', 'valid_from': '2021-07-01', 'valid_until': '2024-01-01'}, + ], + 'stadium_mls_shell_energy_stadium': [ + {'alias_name': 'paypal park', 'valid_from': '2021-01-01', 'valid_until': '2024-06-01'}, + {'alias_name': 'earthquakes stadium', 'valid_from': '2015-03-01', 'valid_until': '2020-12-31'}, + {'alias_name': 'avaya stadium', 'valid_from': '2015-03-01', 'valid_until': '2020-12-31'}, + ], + 'stadium_mls_geodis_park': [ + # Opened 2022, no prior name + ], + 'stadium_mls_dignity_health_sports_park': [ + {'alias_name': 'stubhub center', 'valid_from': '2013-06-01', 'valid_until': '2019-01-31'}, + {'alias_name': 'home depot center', 'valid_from': '2003-06-01', 'valid_until': '2013-05-31'}, + ], + ``` + + python Scripts/canonicalize_teams.py --verbose 2>&1 | grep -E "MLS:|Created.*teams" + MLS teams appear in output (29-30 teams depending on expansion), no critical warnings + + + + Task 2: Add WNBA to canonicalization pipeline + Scripts/canonicalize_teams.py, Scripts/canonicalize_games.py, Scripts/canonicalize_stadiums.py + + **canonicalize_teams.py:** + 1. Update import: add WNBA_TEAMS from wnba module + `from wnba import WNBA_TEAMS` + 2. Add WNBA_DIVISIONS dict (no divisions, just conferences, but map to None): + - Single key mapping per team to ('wnba', None) + - 13 teams: ATL, CHI, CON, DAL, IND, LVA, LAS, MIN, NYL, PHO, SEA, WAS, GSV + 3. Add ('WNBA', WNBA_TEAMS) to sport_mappings list + + **canonicalize_games.py:** + Add WNBA aliases to TEAM_ABBREV_ALIASES: + ```python + # WNBA + ('WNBA', 'LA'): 'team_wnba_las', # LA Sparks + ('WNBA', 'LV'): 'team_wnba_lva', # Las Vegas Aces + ('WNBA', 'NY'): 'team_wnba_nyl', # New York Liberty + ('WNBA', 'PHX'): 'team_wnba_pho', # Phoenix Mercury + ('WNBA', 'CONN'): 'team_wnba_con', # Connecticut Sun + ('WNBA', 'WSH'): 'team_wnba_was', # Washington Mystics + ``` + + **canonicalize_stadiums.py:** + WNBA shares arenas with NBA/NHL, so most aliases already exist. Add WNBA-specific entries if any: + ```python + # WNBA (most share NBA arenas, which have existing aliases) + 'stadium_wnba_gateway_center_arena': [ + # College Park Center - no historical renames + ], + ``` + + python Scripts/canonicalize_teams.py --verbose 2>&1 | grep -E "WNBA:|Created.*teams" + WNBA teams appear in output (13 teams), no critical warnings + + + + Task 3: Add NWSL to canonicalization pipeline + Scripts/canonicalize_teams.py, Scripts/canonicalize_games.py, Scripts/canonicalize_stadiums.py + + **canonicalize_teams.py:** + 1. Update import: add NWSL_TEAMS from nwsl module + `from nwsl import NWSL_TEAMS` + 2. Add NWSL_DIVISIONS dict (no divisions in NWSL): + - 14 teams all map to ('nwsl', None): ANG, CHI, HOU, KC, LOU, NCC, NJY, ORL, POR, RAC, SD, SEA, UTA, WAS + 3. Add ('NWSL', NWSL_TEAMS) to sport_mappings list + + **canonicalize_games.py:** + Add NWSL aliases to TEAM_ABBREV_ALIASES: + ```python + # NWSL + ('NWSL', 'LA'): 'team_nwsl_ang', # Angel City FC (Los Angeles) + ('NWSL', 'NC'): 'team_nwsl_ncc', # North Carolina Courage + ('NWSL', 'GOTHAM'): 'team_nwsl_njy', # NJ/NY Gotham FC + ('NWSL', 'NY'): 'team_nwsl_njy', # NJ/NY Gotham FC alt + ('NWSL', 'LOU'): 'team_nwsl_lou', # Louisville (Racing Louisville) + ('NWSL', 'RLC'): 'team_nwsl_lou', # Racing Louisville alt + ``` + + **canonicalize_stadiums.py:** + NWSL shares stadiums with MLS, so most aliases already exist. Add NWSL-specific: + ```python + # NWSL + 'stadium_nwsl_cpkc_stadium': [ + # Opened 2024, no prior name (first soccer-specific stadium built for NWSL team) + ], + ``` + + python Scripts/canonicalize_teams.py --verbose 2>&1 | grep -E "NWSL:|Created.*teams" + NWSL teams appear in output (13-14 teams), no critical warnings + + + + + +Before declaring plan complete: +- [ ] `python Scripts/canonicalize_teams.py --verbose` shows MLS, WNBA, NWSL teams +- [ ] All three secondary sports have abbreviation aliases in canonicalize_games.py +- [ ] Stadium aliases added where applicable +- [ ] Total team count increased to ~180 (90 core + ~90 secondary) + + + +- All tasks completed +- MLS, WNBA, NWSL teams appear in teams_canonical.json output +- Game resolution can handle common abbreviation variations for all sports +- Phase 3 complete (all 7 sports have alias support) + + + +After completion, create `.planning/phases/03-alias-systems/03-02-SUMMARY.md`: +Include final team count by sport, note any warnings or issues encountered. +