feat(scripts): add sportstime-parser data pipeline

Complete Python package for scraping, normalizing, and uploading
sports schedule data to CloudKit. Includes:

- Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL
- Canonical ID system for teams, stadiums, and games
- Fuzzy matching with manual alias support
- CloudKit uploader with batch operations and deduplication
- Comprehensive test suite with fixtures
- WNBA abbreviation aliases for improved team resolution
- Alias validation script to detect orphan references

All 5 phases of data remediation plan completed:
- Phase 1: Alias fixes (team/stadium alias additions)
- Phase 2: NHL stadium coordinate fixes
- Phase 3: Re-scrape validation
- Phase 4: iOS bundle update
- Phase 5: Code quality improvements (WNBA aliases)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-20 18:56:25 -06:00
parent ac78042a7e
commit 52d445bca4
76 changed files with 25065 additions and 0 deletions

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "19",
"uid": "s:600~l:19",
"name": "Major League Soccer",
"abbreviation": "MLS"
}
],
"season": {
"type": 2,
"year": 2026
},
"day": {
"date": "2026-03-15T00:00:00Z"
},
"events": [
{
"id": "401672001",
"uid": "s:600~l:19~e:401672001",
"date": "2026-03-15T22:00:00Z",
"name": "LA Galaxy at LAFC",
"shortName": "LA @ LAFC",
"competitions": [
{
"id": "401672001",
"uid": "s:600~l:19~e:401672001~c:401672001",
"date": "2026-03-15T22:00:00Z",
"attendance": 22000,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "8909",
"fullName": "BMO Stadium",
"address": {
"city": "Los Angeles",
"state": "CA"
},
"capacity": 22000,
"indoor": false
},
"competitors": [
{
"id": "21295",
"uid": "s:600~l:19~t:21295",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "21295",
"uid": "s:600~l:19~t:21295",
"location": "Los Angeles",
"name": "FC",
"abbreviation": "LAFC",
"displayName": "Los Angeles FC"
},
"score": "3",
"winner": true
},
{
"id": "3610",
"uid": "s:600~l:19~t:3610",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "3610",
"uid": "s:600~l:19~t:3610",
"location": "Los Angeles",
"name": "Galaxy",
"abbreviation": "LA",
"displayName": "LA Galaxy"
},
"score": "2",
"winner": false
}
],
"status": {
"clock": 90,
"displayClock": "90'",
"period": 2,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401672002",
"uid": "s:600~l:19~e:401672002",
"date": "2026-03-15T23:00:00Z",
"name": "Seattle Sounders at Portland Timbers",
"shortName": "SEA @ POR",
"competitions": [
{
"id": "401672002",
"uid": "s:600~l:19~e:401672002~c:401672002",
"date": "2026-03-15T23:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "8070",
"fullName": "Providence Park",
"address": {
"city": "Portland",
"state": "OR"
},
"capacity": 25218,
"indoor": false
},
"competitors": [
{
"id": "5282",
"uid": "s:600~l:19~t:5282",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "5282",
"uid": "s:600~l:19~t:5282",
"location": "Portland",
"name": "Timbers",
"abbreviation": "POR",
"displayName": "Portland Timbers"
},
"score": "2",
"winner": false
},
{
"id": "4687",
"uid": "s:600~l:19~t:4687",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "4687",
"uid": "s:600~l:19~t:4687",
"location": "Seattle",
"name": "Sounders FC",
"abbreviation": "SEA",
"displayName": "Seattle Sounders FC"
},
"score": "2",
"winner": false
}
],
"status": {
"clock": 90,
"displayClock": "90'",
"period": 2,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401672003",
"uid": "s:600~l:19~e:401672003",
"date": "2026-03-16T00:00:00Z",
"name": "New York Red Bulls at Atlanta United",
"shortName": "NY @ ATL",
"competitions": [
{
"id": "401672003",
"uid": "s:600~l:19~e:401672003~c:401672003",
"date": "2026-03-16T00:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "8904",
"fullName": "Mercedes-Benz Stadium",
"address": {
"city": "Atlanta",
"state": "GA"
},
"capacity": 42500,
"indoor": true
},
"competitors": [
{
"id": "18626",
"uid": "s:600~l:19~t:18626",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "18626",
"uid": "s:600~l:19~t:18626",
"location": "Atlanta",
"name": "United FC",
"abbreviation": "ATL",
"displayName": "Atlanta United FC"
},
"score": null,
"winner": null
},
{
"id": "399",
"uid": "s:600~l:19~t:399",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "399",
"uid": "s:600~l:19~t:399",
"location": "New York",
"name": "Red Bulls",
"abbreviation": "NY",
"displayName": "New York Red Bulls"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0'",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}