feat(scripts): rewrite parser as modular Python CLI

Replace monolithic scraping scripts with sportstime_parser package:

- Multi-source scrapers with automatic fallback for 7 sports
- Canonical ID generation for games, teams, and stadiums
- Fuzzy matching with configurable thresholds for name resolution
- CloudKit Web Services uploader with JWT auth, diff-based updates
- Resumable uploads with checkpoint state persistence
- Validation reports with manual review items and suggested matches
- Comprehensive test suite (249 tests)

CLI: sportstime-parser scrape|validate|upload|status|retry|clear

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-01-10 21:06:12 -06:00
parent 284a10d9e1
commit eeaf900e5a
109 changed files with 18415 additions and 266211 deletions

View File

@@ -0,0 +1,245 @@
{
"leagues": [
{
"id": "761",
"uid": "s:600~l:761",
"name": "National Women's Soccer League",
"abbreviation": "NWSL"
}
],
"season": {
"type": 2,
"year": 2026
},
"day": {
"date": "2026-04-10T00:00:00Z"
},
"events": [
{
"id": "401672201",
"uid": "s:600~l:761~e:401672201",
"date": "2026-04-10T23:00:00Z",
"name": "Angel City FC at Portland Thorns",
"shortName": "LA @ POR",
"competitions": [
{
"id": "401672201",
"uid": "s:600~l:761~e:401672201~c:401672201",
"date": "2026-04-10T23:00:00Z",
"attendance": 22000,
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "8070",
"fullName": "Providence Park",
"address": {
"city": "Portland",
"state": "OR"
},
"capacity": 25218,
"indoor": false
},
"competitors": [
{
"id": "15625",
"uid": "s:600~l:761~t:15625",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "15625",
"uid": "s:600~l:761~t:15625",
"location": "Portland",
"name": "Thorns FC",
"abbreviation": "POR",
"displayName": "Portland Thorns FC"
},
"score": "2",
"winner": true
},
{
"id": "19934",
"uid": "s:600~l:761~t:19934",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "19934",
"uid": "s:600~l:761~t:19934",
"location": "Los Angeles",
"name": "Angel City",
"abbreviation": "LA",
"displayName": "Angel City FC"
},
"score": "1",
"winner": false
}
],
"status": {
"clock": 90,
"displayClock": "90'",
"period": 2,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401672202",
"uid": "s:600~l:761~e:401672202",
"date": "2026-04-11T00:00:00Z",
"name": "Orlando Pride at North Carolina Courage",
"shortName": "ORL @ NC",
"competitions": [
{
"id": "401672202",
"uid": "s:600~l:761~e:401672202~c:401672202",
"date": "2026-04-11T00:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "8073",
"fullName": "WakeMed Soccer Park",
"address": {
"city": "Cary",
"state": "NC"
},
"capacity": 10000,
"indoor": false
},
"competitors": [
{
"id": "15618",
"uid": "s:600~l:761~t:15618",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "15618",
"uid": "s:600~l:761~t:15618",
"location": "North Carolina",
"name": "Courage",
"abbreviation": "NC",
"displayName": "North Carolina Courage"
},
"score": "3",
"winner": true
},
{
"id": "15626",
"uid": "s:600~l:761~t:15626",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "15626",
"uid": "s:600~l:761~t:15626",
"location": "Orlando",
"name": "Pride",
"abbreviation": "ORL",
"displayName": "Orlando Pride"
},
"score": "1",
"winner": false
}
],
"status": {
"clock": 90,
"displayClock": "90'",
"period": 2,
"type": {
"id": "3",
"name": "STATUS_FINAL",
"state": "post",
"completed": true
}
}
}
]
},
{
"id": "401672203",
"uid": "s:600~l:761~e:401672203",
"date": "2026-04-11T02:00:00Z",
"name": "San Diego Wave at Bay FC",
"shortName": "SD @ BAY",
"competitions": [
{
"id": "401672203",
"uid": "s:600~l:761~e:401672203~c:401672203",
"date": "2026-04-11T02:00:00Z",
"type": {
"id": "1",
"abbreviation": "STD"
},
"venue": {
"id": "3945",
"fullName": "PayPal Park",
"address": {
"city": "San Jose",
"state": "CA"
},
"capacity": 18000,
"indoor": false
},
"competitors": [
{
"id": "25645",
"uid": "s:600~l:761~t:25645",
"type": "team",
"order": 0,
"homeAway": "home",
"team": {
"id": "25645",
"uid": "s:600~l:761~t:25645",
"location": "Bay Area",
"name": "FC",
"abbreviation": "BAY",
"displayName": "Bay FC"
},
"score": null,
"winner": null
},
{
"id": "22638",
"uid": "s:600~l:761~t:22638",
"type": "team",
"order": 1,
"homeAway": "away",
"team": {
"id": "22638",
"uid": "s:600~l:761~t:22638",
"location": "San Diego",
"name": "Wave FC",
"abbreviation": "SD",
"displayName": "San Diego Wave FC"
},
"score": null,
"winner": null
}
],
"status": {
"clock": 0,
"displayClock": "0'",
"period": 0,
"type": {
"id": "1",
"name": "STATUS_SCHEDULED",
"state": "pre",
"completed": false
}
}
}
]
}
]
}