feat(scripts): add sportstime-parser data pipeline

Complete Python package for scraping, normalizing, and uploading
sports schedule data to CloudKit. Includes:

- Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL
- Canonical ID system for teams, stadiums, and games
- Fuzzy matching with manual alias support
- CloudKit uploader with batch operations and deduplication
- Comprehensive test suite with fixtures
- WNBA abbreviation aliases for improved team resolution
- Alias validation script to detect orphan references

All 5 phases of data remediation plan completed:
- Phase 1: Alias fixes (team/stadium alias additions)
- Phase 2: NHL stadium coordinate fixes
- Phase 3: Re-scrape validation
- Phase 4: iOS bundle update
- Phase 5: Code quality improvements (WNBA aliases)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-20 18:56:25 -06:00
parent ac78042a7e
commit 52d445bca4
76 changed files with 25065 additions and 0 deletions

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "28",
"uid": "s:20~l:28",
"name": "National Football League",
"abbreviation": "NFL"
}
],
"season": {
"type": 2,
"year": 2025
},
"week": {
"number": 1
},
"events": [
{
"id": "401671801",
"uid": "s:20~l:28~e:401671801",
"date": "2025-09-07T20:00:00Z",
"name": "Kansas City Chiefs at Baltimore Ravens",
"shortName": "KC @ BAL",
"competitions": [
{
"id": "401671801",
"uid": "s:20~l:28~e:401671801~c:401671801",
"date": "2025-09-07T20:00:00Z",
"attendance": 71547,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "3814",
"fullName": "M&T Bank Stadium",
"address": {
"city": "Baltimore",
"state": "MD"
},
"capacity": 71008,
"indoor": false
},
"competitors": [
{
"id": "33",
"uid": "s:20~l:28~t:33",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "33",
"uid": "s:20~l:28~t:33",
"location": "Baltimore",
"name": "Ravens",
"abbreviation": "BAL",
"displayName": "Baltimore Ravens"
},
"score": "20",
"winner": false
},
{
"id": "12",
"uid": "s:20~l:28~t:12",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "12",
"uid": "s:20~l:28~t:12",
"location": "Kansas City",
"name": "Chiefs",
"abbreviation": "KC",
"displayName": "Kansas City Chiefs"
},
"score": "27",
"winner": true
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 4,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401671802",
"uid": "s:20~l:28~e:401671802",
"date": "2025-09-08T17:00:00Z",
"name": "Philadelphia Eagles at Green Bay Packers",
"shortName": "PHI @ GB",
"competitions": [
{
"id": "401671802",
"uid": "s:20~l:28~e:401671802~c:401671802",
"date": "2025-09-08T17:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "3798",
"fullName": "Lambeau Field",
"address": {
"city": "Green Bay",
"state": "WI"
},
"capacity": 81441,
"indoor": false
},
"competitors": [
{
"id": "9",
"uid": "s:20~l:28~t:9",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "9",
"uid": "s:20~l:28~t:9",
"location": "Green Bay",
"name": "Packers",
"abbreviation": "GB",
"displayName": "Green Bay Packers"
},
"score": "34",
"winner": true
},
{
"id": "21",
"uid": "s:20~l:28~t:21",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "21",
"uid": "s:20~l:28~t:21",
"location": "Philadelphia",
"name": "Eagles",
"abbreviation": "PHI",
"displayName": "Philadelphia Eagles"
},
"score": "29",
"winner": false
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 4,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401671803",
"uid": "s:20~l:28~e:401671803",
"date": "2025-09-08T20:25:00Z",
"name": "Dallas Cowboys at Cleveland Browns",
"shortName": "DAL @ CLE",
"competitions": [
{
"id": "401671803",
"uid": "s:20~l:28~e:401671803~c:401671803",
"date": "2025-09-08T20:25:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "3653",
"fullName": "Cleveland Browns Stadium",
"address": {
"city": "Cleveland",
"state": "OH"
},
"capacity": 67431,
"indoor": false
},
"competitors": [
{
"id": "5",
"uid": "s:20~l:28~t:5",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "5",
"uid": "s:20~l:28~t:5",
"location": "Cleveland",
"name": "Browns",
"abbreviation": "CLE",
"displayName": "Cleveland Browns"
},
"score": null,
"winner": null
},
{
"id": "6",
"uid": "s:20~l:28~t:6",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "6",
"uid": "s:20~l:28~t:6",
"location": "Dallas",
"name": "Cowboys",
"abbreviation": "DAL",
"displayName": "Dallas Cowboys"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}