Files
Flights/Flights/Services/FlightAwareLookup.swift
T
Trey T 0c9d02f7d4 Aircraft enrichment: FlightAware fallback + IATA→ICAO normalization
The user reported 0 enrichment hits. Two root causes:

1. route-explorer's schedule endpoint only covers FUTURE flights —
   verified by curl: WN1942 / 2024-01-27 → no flights; WN7 /
   2026-05-27 → 1 result with equipmentIata=73H. The user's CSV is
   mostly historical, so route-explorer was a dead end for ~all rows.

2. Even when route-explorer DID return data, it ships IATA aircraft
   codes ("73H") while the rest of the app expects ICAO designators
   ("B738"). The saved string wouldn't have matched displayName or
   type-filter tables anyway.

Two fixes:

- FlightAwareLookup actor scrapes flightaware.com/live/flight/
  <CALLSIGN> for the trackpollBootstrap JSON embedded in the page.
  The activityLog.flights[] array contains 8–15 recent operations
  of that flight number, each with a real ICAO aircraftType
  ("B738", "B38M", etc.) and the route IATA. We walk braces to
  extract the JSON literal, then pick the best match:
    1. Same dep/arr route → most common type on that route
    2. Reverse direction (same airframe usually flies the return)
    3. Fallback: most common type across the entire activity log
  Verified by curl that FA isn't Cloudflare-gated and returns
  ICAO codes directly. Per-callsign result cached.

- AircraftDatabase.normalizedICAO(forCode:) converts either input
  form (IATA or ICAO) to canonical ICAO. New iataToICAO map covers
  the common 60+ codes (737 family, A320 family, widebodies,
  regionals, MD-80s). Anything missing falls through unchanged.

- Both enrichers (EnrichAircraftTypesView for existing flights,
  ImportCSVView during import) now run the two-step lookup:
  route-explorer first, FlightAware on miss. Result is normalized
  through normalizedICAO before saving so the Aircraft Stats screen
  recognizes the value.

Expected outcome: the user's 80 historical Southwest flights should
mostly get B737 / B738 / B38M codes assigned via FlightAware's
per-flight-# activity log (Southwest reuses flight numbers reliably
on the same routes).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 19:00:02 -05:00

161 lines
6.3 KiB
Swift

import Foundation
/// Best-effort aircraft type lookup by scraping FlightAware's
/// `/live/flight/<callsign>` page. Their server embeds a
/// `trackpollBootstrap` JSON in the page source that contains an
/// `activityLog.flights[]` array each entry has an `aircraftType`
/// in ICAO designator form (B738, B38M, A21N, etc.), the route as
/// IATA codes, and the scheduled gate departure timestamp.
///
/// Pages are not Cloudflare-gated for direct GET requests with a
/// browser-shaped User-Agent. No auth required.
///
/// Matching strategy: prefer an activity-log entry whose route
/// matches the user's flight; otherwise fall back to the most common
/// `aircraftType` across the log (good proxy because flight numbers
/// usually keep the same equipment class across many days).
actor FlightAwareLookup {
static let shared = FlightAwareLookup()
private let session: URLSession
private var cache: [String: String?] = [:] // callsign -> "B738" or nil for miss
init(session: URLSession = .shared) {
self.session = session
}
/// Look up the ICAO aircraft type for one flight.
/// `callsign` is ICAO carrier + number, e.g. "SWA1942".
/// `departureIATA` + `arrivalIATA` are used to find the best
/// route match in the activity log.
func lookupType(
callsign: String,
departureIATA: String,
arrivalIATA: String
) async -> String? {
let key = "\(callsign)-\(departureIATA)-\(arrivalIATA)"
if let cached = cache[key] { return cached }
guard let url = URL(string: "https://flightaware.com/live/flight/\(callsign)") else {
cache[key] = nil
return nil
}
var req = URLRequest(url: url)
req.timeoutInterval = 10
req.setValue(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15",
forHTTPHeaderField: "User-Agent"
)
req.setValue("text/html,application/xhtml+xml", forHTTPHeaderField: "Accept")
do {
let (data, response) = try await session.data(for: req)
guard let http = response as? HTTPURLResponse,
(200..<300).contains(http.statusCode),
let html = String(data: data, encoding: .utf8)
else {
cache[key] = nil
return nil
}
let result = parse(html: html, dep: departureIATA, arr: arrivalIATA)
cache[key] = result
return result
} catch {
cache[key] = nil
return nil
}
}
// MARK: - Parsing
/// Find the `trackpollBootstrap` JSON and pull aircraft types from
/// its activity log. Brace-walking handles the trailing JS noise
/// after the object literal (no easy regex sentinel).
private func parse(html: String, dep: String, arr: String) -> String? {
guard let blob = extractTrackpollBootstrap(from: html),
let json = try? JSONSerialization.jsonObject(with: Data(blob.utf8)) as? [String: Any]
else { return nil }
// The bootstrap is `{flights: {<flightId>: {activityLog: {flights: [...]}}}}`.
// We don't know the key, so just take the first one.
guard let flights = json["flights"] as? [String: Any],
let first = flights.values.first as? [String: Any],
let activityLog = first["activityLog"] as? [String: Any],
let entries = activityLog["flights"] as? [[String: Any]],
!entries.isEmpty
else { return nil }
// Pull (route, type) pairs from each entry.
var byRoute: [String: [String]] = [:] // "DAL-HOU" ["B738", "B38M", ...]
var allTypes: [String] = []
for entry in entries {
guard let origin = entry["origin"] as? [String: Any],
let destination = entry["destination"] as? [String: Any],
let oIata = (origin["iata"] as? String)?.uppercased(),
let dIata = (destination["iata"] as? String)?.uppercased(),
let type = (entry["aircraftType"] as? String)?.uppercased(),
!type.isEmpty
else { continue }
let routeKey = "\(oIata)-\(dIata)"
byRoute[routeKey, default: []].append(type)
allTypes.append(type)
}
// 1) Exact route match most common type for that route
let routeKey = "\(dep)-\(arr)"
if let types = byRoute[routeKey], let top = mostCommon(types) {
return top
}
// 2) Reverse-direction match (return leg of same flight)
let reverseKey = "\(arr)-\(dep)"
if let types = byRoute[reverseKey], let top = mostCommon(types) {
return top
}
// 3) Most common across the entire activity log
return mostCommon(allTypes)
}
/// Locate `var trackpollBootstrap = {...};` in the page and
/// return just the `{...}` literal, brace-balanced.
private func extractTrackpollBootstrap(from html: String) -> String? {
guard let start = html.range(of: "var trackpollBootstrap"),
let openBrace = html.range(of: "{", range: start.upperBound..<html.endIndex)
else { return nil }
var depth = 0
var inString = false
var escaped = false
var endIdx = openBrace.lowerBound
var idx = openBrace.lowerBound
while idx < html.endIndex {
let ch = html[idx]
if escaped {
escaped = false
} else if ch == "\\" {
escaped = true
} else if ch == "\"" {
inString.toggle()
} else if !inString {
if ch == "{" { depth += 1 }
else if ch == "}" {
depth -= 1
if depth == 0 {
endIdx = html.index(after: idx)
break
}
}
}
idx = html.index(after: idx)
}
guard depth == 0 else { return nil }
return String(html[openBrace.lowerBound..<endIdx])
}
private func mostCommon(_ list: [String]) -> String? {
guard !list.isEmpty else { return nil }
var counts: [String: Int] = [:]
for v in list { counts[v, default: 0] += 1 }
return counts.max(by: { $0.value < $1.value })?.key
}
}