feat(scripts): add sportstime-parser data pipeline

Complete Python package for scraping, normalizing, and uploading
sports schedule data to CloudKit. Includes:

- Multi-source scrapers for NBA, MLB, NFL, NHL, MLS, WNBA, NWSL
- Canonical ID system for teams, stadiums, and games
- Fuzzy matching with manual alias support
- CloudKit uploader with batch operations and deduplication
- Comprehensive test suite with fixtures
- WNBA abbreviation aliases for improved team resolution
- Alias validation script to detect orphan references

All 5 phases of data remediation plan completed:
- Phase 1: Alias fixes (team/stadium alias additions)
- Phase 2: NHL stadium coordinate fixes
- Phase 3: Re-scrape validation
- Phase 4: iOS bundle update
- Phase 5: Code quality improvements (WNBA aliases)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-20 18:56:25 -06:00
parent ac78042a7e
commit 52d445bca4
76 changed files with 25065 additions and 0 deletions

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "59",
"uid": "s:40~l:59",
"name": "Women's National Basketball Association",
"abbreviation": "WNBA"
}
],
"season": {
"type": 2,
"year": 2026
},
"day": {
"date": "2026-05-20T00:00:00Z"
},
"events": [
{
"id": "401672101",
"uid": "s:40~l:59~e:401672101",
"date": "2026-05-20T23:00:00Z",
"name": "Las Vegas Aces at New York Liberty",
"shortName": "LV @ NY",
"competitions": [
{
"id": "401672101",
"uid": "s:40~l:59~e:401672101~c:401672101",
"date": "2026-05-20T23:00:00Z",
"attendance": 17732,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "4346",
"fullName": "Barclays Center",
"address": {
"city": "Brooklyn",
"state": "NY"
},
"capacity": 17732,
"indoor": true
},
"competitors": [
{
"id": "9",
"uid": "s:40~l:59~t:9",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "9",
"uid": "s:40~l:59~t:9",
"location": "New York",
"name": "Liberty",
"abbreviation": "NY",
"displayName": "New York Liberty"
},
"score": "92",
"winner": true
},
{
"id": "20",
"uid": "s:40~l:59~t:20",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "20",
"uid": "s:40~l:59~t:20",
"location": "Las Vegas",
"name": "Aces",
"abbreviation": "LV",
"displayName": "Las Vegas Aces"
},
"score": "88",
"winner": false
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 4,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401672102",
"uid": "s:40~l:59~e:401672102",
"date": "2026-05-21T00:00:00Z",
"name": "Connecticut Sun at Chicago Sky",
"shortName": "CONN @ CHI",
"competitions": [
{
"id": "401672102",
"uid": "s:40~l:59~e:401672102~c:401672102",
"date": "2026-05-21T00:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "8086",
"fullName": "Wintrust Arena",
"address": {
"city": "Chicago",
"state": "IL"
},
"capacity": 10387,
"indoor": true
},
"competitors": [
{
"id": "6",
"uid": "s:40~l:59~t:6",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "6",
"uid": "s:40~l:59~t:6",
"location": "Chicago",
"name": "Sky",
"abbreviation": "CHI",
"displayName": "Chicago Sky"
},
"score": "78",
"winner": false
},
{
"id": "5",
"uid": "s:40~l:59~t:5",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "5",
"uid": "s:40~l:59~t:5",
"location": "Connecticut",
"name": "Sun",
"abbreviation": "CONN",
"displayName": "Connecticut Sun"
},
"score": "85",
"winner": true
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 4,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401672103",
"uid": "s:40~l:59~e:401672103",
"date": "2026-05-21T02:00:00Z",
"name": "Phoenix Mercury at Seattle Storm",
"shortName": "PHX @ SEA",
"competitions": [
{
"id": "401672103",
"uid": "s:40~l:59~e:401672103~c:401672103",
"date": "2026-05-21T02:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "3097",
"fullName": "Climate Pledge Arena",
"address": {
"city": "Seattle",
"state": "WA"
},
"capacity": 18100,
"indoor": true
},
"competitors": [
{
"id": "11",
"uid": "s:40~l:59~t:11",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "11",
"uid": "s:40~l:59~t:11",
"location": "Seattle",
"name": "Storm",
"abbreviation": "SEA",
"displayName": "Seattle Storm"
},
"score": null,
"winner": null
},
{
"id": "8",
"uid": "s:40~l:59~t:8",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "8",
"uid": "s:40~l:59~t:8",
"location": "Phoenix",
"name": "Mercury",
"abbreviation": "PHX",
"displayName": "Phoenix Mercury"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0:00",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}