feat(scripts): add sportstime-parser data pipeline

Complete Python package for scraping, normalizing, and uploading
sports schedule data to CloudKit. Includes:

- Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL
- Canonical ID system for teams, stadiums, and games
- Fuzzy matching with manual alias support
- CloudKit uploader with batch operations and deduplication
- Comprehensive test suite with fixtures
- WNBA abbreviation aliases for improved team resolution
- Alias validation script to detect orphan references

All 5 phases of data remediation plan completed:
- Phase 1: Alias fixes (team/stadium alias additions)
- Phase 2: NHL stadium coordinate fixes
- Phase 3: Re-scrape validation
- Phase 4: iOS bundle update
- Phase 5: Code quality improvements (WNBA aliases)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-20 18:56:25 -06:00
parent ac78042a7e
commit 52d445bca4
76 changed files with 25065 additions and 0 deletions

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "10",
"uid": "s:1~l:10",
"name": "Major League Baseball",
"abbreviation": "MLB"
}
],
"season": {
"type": 2,
"year": 2026
},
"day": {
"date": "2026-04-15T00:00:00Z"
},
"events": [
{
"id": "401584801",
"uid": "s:1~l:10~e:401584801",
"date": "2026-04-15T23:05:00Z",
"name": "New York Yankees at Boston Red Sox",
"shortName": "NYY @ BOS",
"competitions": [
{
"id": "401584801",
"uid": "s:1~l:10~e:401584801~c:401584801",
"date": "2026-04-15T23:05:00Z",
"attendance": 37435,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "3",
"fullName": "Fenway Park",
"address": {
"city": "Boston",
"state": "MA"
},
"capacity": 37755,
"indoor": false
},
"competitors": [
{
"id": "2",
"uid": "s:1~l:10~t:2",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "2",
"uid": "s:1~l:10~t:2",
"location": "Boston",
"name": "Red Sox",
"abbreviation": "BOS",
"displayName": "Boston Red Sox"
},
"score": "5",
"winner": true
},
{
"id": "10",
"uid": "s:1~l:10~t:10",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "10",
"uid": "s:1~l:10~t:10",
"location": "New York",
"name": "Yankees",
"abbreviation": "NYY",
"displayName": "New York Yankees"
},
"score": "3",
"winner": false
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 9,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401584802",
"uid": "s:1~l:10~e:401584802",
"date": "2026-04-15T20:10:00Z",
"name": "Chicago Cubs at St. Louis Cardinals",
"shortName": "CHC @ STL",
"competitions": [
{
"id": "401584802",
"uid": "s:1~l:10~e:401584802~c:401584802",
"date": "2026-04-15T20:10:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "87",
"fullName": "Busch Stadium",
"address": {
"city": "St. Louis",
"state": "MO"
},
"capacity": 45538,
"indoor": false
},
"competitors": [
{
"id": "24",
"uid": "s:1~l:10~t:24",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "24",
"uid": "s:1~l:10~t:24",
"location": "St. Louis",
"name": "Cardinals",
"abbreviation": "STL",
"displayName": "St. Louis Cardinals"
},
"score": "7",
"winner": true
},
{
"id": "16",
"uid": "s:1~l:10~t:16",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "16",
"uid": "s:1~l:10~t:16",
"location": "Chicago",
"name": "Cubs",
"abbreviation": "CHC",
"displayName": "Chicago Cubs"
},
"score": "4",
"winner": false
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 9,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401584803",
"uid": "s:1~l:10~e:401584803",
"date": "2026-04-16T00:10:00Z",
"name": "Los Angeles Dodgers at San Francisco Giants",
"shortName": "LAD @ SF",
"competitions": [
{
"id": "401584803",
"uid": "s:1~l:10~e:401584803~c:401584803",
"date": "2026-04-16T00:10:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "116",
"fullName": "Oracle Park",
"address": {
"city": "San Francisco",
"state": "CA"
},
"capacity": 41915,
"indoor": false
},
"competitors": [
{
"id": "26",
"uid": "s:1~l:10~t:26",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "26",
"uid": "s:1~l:10~t:26",
"location": "San Francisco",
"name": "Giants",
"abbreviation": "SF",
"displayName": "San Francisco Giants"
},
"score": null,
"winner": null
},
{
"id": "19",
"uid": "s:1~l:10~t:19",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "19",
"uid": "s:1~l:10~t:19",
"location": "Los Angeles",
"name": "Dodgers",
"abbreviation": "LAD",
"displayName": "Los Angeles Dodgers"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}