feat(scripts): add sportstime-parser data pipeline

Complete Python package for scraping, normalizing, and uploading
sports schedule data to CloudKit. Includes:

- Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL
- Canonical ID system for teams, stadiums, and games
- Fuzzy matching with manual alias support
- CloudKit uploader with batch operations and deduplication
- Comprehensive test suite with fixtures
- WNBA abbreviation aliases for improved team resolution
- Alias validation script to detect orphan references

All 5 phases of data remediation plan completed:
- Phase 1: Alias fixes (team/stadium alias additions)
- Phase 2: NHL stadium coordinate fixes
- Phase 3: Re-scrape validation
- Phase 4: iOS bundle update
- Phase 5: Code quality improvements (WNBA aliases)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-20 18:56:25 -06:00
parent ac78042a7e
commit 52d445bca4
76 changed files with 25065 additions and 0 deletions

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "90",
"uid": "s:70~l:90",
"name": "National Hockey League",
"abbreviation": "NHL"
}
],
"season": {
"type": 2,
"year": 2026
},
"day": {
"date": "2025-10-08T00:00:00Z"
},
"events": [
{
"id": "401671901",
"uid": "s:70~l:90~e:401671901",
"date": "2025-10-08T23:00:00Z",
"name": "Pittsburgh Penguins at Boston Bruins",
"shortName": "PIT @ BOS",
"competitions": [
{
"id": "401671901",
"uid": "s:70~l:90~e:401671901~c:401671901",
"date": "2025-10-08T23:00:00Z",
"attendance": 17850,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "1823",
"fullName": "TD Garden",
"address": {
"city": "Boston",
"state": "MA"
},
"capacity": 17850,
"indoor": true
},
"competitors": [
{
"id": "1",
"uid": "s:70~l:90~t:1",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "1",
"uid": "s:70~l:90~t:1",
"location": "Boston",
"name": "Bruins",
"abbreviation": "BOS",
"displayName": "Boston Bruins"
},
"score": "4",
"winner": true
},
{
"id": "5",
"uid": "s:70~l:90~t:5",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "5",
"uid": "s:70~l:90~t:5",
"location": "Pittsburgh",
"name": "Penguins",
"abbreviation": "PIT",
"displayName": "Pittsburgh Penguins"
},
"score": "2",
"winner": false
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 3,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401671902",
"uid": "s:70~l:90~e:401671902",
"date": "2025-10-09T00:00:00Z",
"name": "Toronto Maple Leafs at Montreal Canadiens",
"shortName": "TOR @ MTL",
"competitions": [
{
"id": "401671902",
"uid": "s:70~l:90~e:401671902~c:401671902",
"date": "2025-10-09T00:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "1918",
"fullName": "Bell Centre",
"address": {
"city": "Montreal",
"state": "QC"
},
"capacity": 21302,
"indoor": true
},
"competitors": [
{
"id": "8",
"uid": "s:70~l:90~t:8",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "8",
"uid": "s:70~l:90~t:8",
"location": "Montreal",
"name": "Canadiens",
"abbreviation": "MTL",
"displayName": "Montreal Canadiens"
},
"score": "3",
"winner": false
},
{
"id": "10",
"uid": "s:70~l:90~t:10",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "10",
"uid": "s:70~l:90~t:10",
"location": "Toronto",
"name": "Maple Leafs",
"abbreviation": "TOR",
"displayName": "Toronto Maple Leafs"
},
"score": "5",
"winner": true
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 3,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401671903",
"uid": "s:70~l:90~e:401671903",
"date": "2025-10-09T02:00:00Z",
"name": "Vegas Golden Knights at Los Angeles Kings",
"shortName": "VGK @ LAK",
"competitions": [
{
"id": "401671903",
"uid": "s:70~l:90~e:401671903~c:401671903",
"date": "2025-10-09T02:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "1816",
"fullName": "Crypto.com Arena",
"address": {
"city": "Los Angeles",
"state": "CA"
},
"capacity": 18230,
"indoor": true
},
"competitors": [
{
"id": "26",
"uid": "s:70~l:90~t:26",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "26",
"uid": "s:70~l:90~t:26",
"location": "Los Angeles",
"name": "Kings",
"abbreviation": "LAK",
"displayName": "Los Angeles Kings"
},
"score": null,
"winner": null
},
{
"id": "54",
"uid": "s:70~l:90~t:54",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "54",
"uid": "s:70~l:90~t:54",
"location": "Vegas",
"name": "Golden Knights",
"abbreviation": "VGK",
"displayName": "Vegas Golden Knights"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}