0c9d02f7d4
The user reported 0 enrichment hits. Two root causes:
1. route-explorer's schedule endpoint only covers FUTURE flights —
verified by curl: WN1942 / 2024-01-27 → no flights; WN7 /
2026-05-27 → 1 result with equipmentIata=73H. The user's CSV is
mostly historical, so route-explorer was a dead end for ~all rows.
2. Even when route-explorer DID return data, it ships IATA aircraft
codes ("73H") while the rest of the app expects ICAO designators
("B738"). The saved string wouldn't have matched displayName or
type-filter tables anyway.
Two fixes:
- FlightAwareLookup actor scrapes flightaware.com/live/flight/
<CALLSIGN> for the trackpollBootstrap JSON embedded in the page.
The activityLog.flights[] array contains 8–15 recent operations
of that flight number, each with a real ICAO aircraftType
("B738", "B38M", etc.) and the route IATA. We walk braces to
extract the JSON literal, then pick the best match:
1. Same dep/arr route → most common type on that route
2. Reverse direction (same airframe usually flies the return)
3. Fallback: most common type across the entire activity log
Verified by curl that FA isn't Cloudflare-gated and returns
ICAO codes directly. Per-callsign result cached.
- AircraftDatabase.normalizedICAO(forCode:) converts either input
form (IATA or ICAO) to canonical ICAO. New iataToICAO map covers
the common 60+ codes (737 family, A320 family, widebodies,
regionals, MD-80s). Anything missing falls through unchanged.
- Both enrichers (EnrichAircraftTypesView for existing flights,
ImportCSVView during import) now run the two-step lookup:
route-explorer first, FlightAware on miss. Result is normalized
through normalizedICAO before saving so the Aircraft Stats screen
recognizes the value.
Expected outcome: the user's 80 historical Southwest flights should
mostly get B737 / B738 / B38M codes assigned via FlightAware's
per-flight-# activity log (Southwest reuses flight numbers reliably
on the same routes).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
161 lines
6.3 KiB
Swift
161 lines
6.3 KiB
Swift
import Foundation
|
|
|
|
/// Best-effort aircraft type lookup by scraping FlightAware's
|
|
/// `/live/flight/<callsign>` page. Their server embeds a
|
|
/// `trackpollBootstrap` JSON in the page source that contains an
|
|
/// `activityLog.flights[]` array — each entry has an `aircraftType`
|
|
/// in ICAO designator form (B738, B38M, A21N, etc.), the route as
|
|
/// IATA codes, and the scheduled gate departure timestamp.
|
|
///
|
|
/// Pages are not Cloudflare-gated for direct GET requests with a
|
|
/// browser-shaped User-Agent. No auth required.
|
|
///
|
|
/// Matching strategy: prefer an activity-log entry whose route
|
|
/// matches the user's flight; otherwise fall back to the most common
|
|
/// `aircraftType` across the log (good proxy because flight numbers
|
|
/// usually keep the same equipment class across many days).
|
|
actor FlightAwareLookup {
|
|
static let shared = FlightAwareLookup()
|
|
|
|
private let session: URLSession
|
|
private var cache: [String: String?] = [:] // callsign -> "B738" or nil for miss
|
|
|
|
init(session: URLSession = .shared) {
|
|
self.session = session
|
|
}
|
|
|
|
/// Look up the ICAO aircraft type for one flight.
|
|
/// `callsign` is ICAO carrier + number, e.g. "SWA1942".
|
|
/// `departureIATA` + `arrivalIATA` are used to find the best
|
|
/// route match in the activity log.
|
|
func lookupType(
|
|
callsign: String,
|
|
departureIATA: String,
|
|
arrivalIATA: String
|
|
) async -> String? {
|
|
let key = "\(callsign)-\(departureIATA)-\(arrivalIATA)"
|
|
if let cached = cache[key] { return cached }
|
|
|
|
guard let url = URL(string: "https://flightaware.com/live/flight/\(callsign)") else {
|
|
cache[key] = nil
|
|
return nil
|
|
}
|
|
var req = URLRequest(url: url)
|
|
req.timeoutInterval = 10
|
|
req.setValue(
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15",
|
|
forHTTPHeaderField: "User-Agent"
|
|
)
|
|
req.setValue("text/html,application/xhtml+xml", forHTTPHeaderField: "Accept")
|
|
|
|
do {
|
|
let (data, response) = try await session.data(for: req)
|
|
guard let http = response as? HTTPURLResponse,
|
|
(200..<300).contains(http.statusCode),
|
|
let html = String(data: data, encoding: .utf8)
|
|
else {
|
|
cache[key] = nil
|
|
return nil
|
|
}
|
|
let result = parse(html: html, dep: departureIATA, arr: arrivalIATA)
|
|
cache[key] = result
|
|
return result
|
|
} catch {
|
|
cache[key] = nil
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// MARK: - Parsing
|
|
|
|
/// Find the `trackpollBootstrap` JSON and pull aircraft types from
|
|
/// its activity log. Brace-walking handles the trailing JS noise
|
|
/// after the object literal (no easy regex sentinel).
|
|
private func parse(html: String, dep: String, arr: String) -> String? {
|
|
guard let blob = extractTrackpollBootstrap(from: html),
|
|
let json = try? JSONSerialization.jsonObject(with: Data(blob.utf8)) as? [String: Any]
|
|
else { return nil }
|
|
|
|
// The bootstrap is `{flights: {<flightId>: {activityLog: {flights: [...]}}}}`.
|
|
// We don't know the key, so just take the first one.
|
|
guard let flights = json["flights"] as? [String: Any],
|
|
let first = flights.values.first as? [String: Any],
|
|
let activityLog = first["activityLog"] as? [String: Any],
|
|
let entries = activityLog["flights"] as? [[String: Any]],
|
|
!entries.isEmpty
|
|
else { return nil }
|
|
|
|
// Pull (route, type) pairs from each entry.
|
|
var byRoute: [String: [String]] = [:] // "DAL-HOU" → ["B738", "B38M", ...]
|
|
var allTypes: [String] = []
|
|
for entry in entries {
|
|
guard let origin = entry["origin"] as? [String: Any],
|
|
let destination = entry["destination"] as? [String: Any],
|
|
let oIata = (origin["iata"] as? String)?.uppercased(),
|
|
let dIata = (destination["iata"] as? String)?.uppercased(),
|
|
let type = (entry["aircraftType"] as? String)?.uppercased(),
|
|
!type.isEmpty
|
|
else { continue }
|
|
let routeKey = "\(oIata)-\(dIata)"
|
|
byRoute[routeKey, default: []].append(type)
|
|
allTypes.append(type)
|
|
}
|
|
|
|
// 1) Exact route match → most common type for that route
|
|
let routeKey = "\(dep)-\(arr)"
|
|
if let types = byRoute[routeKey], let top = mostCommon(types) {
|
|
return top
|
|
}
|
|
// 2) Reverse-direction match (return leg of same flight)
|
|
let reverseKey = "\(arr)-\(dep)"
|
|
if let types = byRoute[reverseKey], let top = mostCommon(types) {
|
|
return top
|
|
}
|
|
// 3) Most common across the entire activity log
|
|
return mostCommon(allTypes)
|
|
}
|
|
|
|
/// Locate `var trackpollBootstrap = {...};` in the page and
|
|
/// return just the `{...}` literal, brace-balanced.
|
|
private func extractTrackpollBootstrap(from html: String) -> String? {
|
|
guard let start = html.range(of: "var trackpollBootstrap"),
|
|
let openBrace = html.range(of: "{", range: start.upperBound..<html.endIndex)
|
|
else { return nil }
|
|
|
|
var depth = 0
|
|
var inString = false
|
|
var escaped = false
|
|
var endIdx = openBrace.lowerBound
|
|
var idx = openBrace.lowerBound
|
|
while idx < html.endIndex {
|
|
let ch = html[idx]
|
|
if escaped {
|
|
escaped = false
|
|
} else if ch == "\\" {
|
|
escaped = true
|
|
} else if ch == "\"" {
|
|
inString.toggle()
|
|
} else if !inString {
|
|
if ch == "{" { depth += 1 }
|
|
else if ch == "}" {
|
|
depth -= 1
|
|
if depth == 0 {
|
|
endIdx = html.index(after: idx)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
idx = html.index(after: idx)
|
|
}
|
|
guard depth == 0 else { return nil }
|
|
return String(html[openBrace.lowerBound..<endIdx])
|
|
}
|
|
|
|
private func mostCommon(_ list: [String]) -> String? {
|
|
guard !list.isEmpty else { return nil }
|
|
var counts: [String: Int] = [:]
|
|
for v in list { counts[v, default: 0] += 1 }
|
|
return counts.max(by: { $0.value < $1.value })?.key
|
|
}
|
|
}
|