Aircraft enrichment: FlightAware fallback + IATA→ICAO normalization

The user reported 0 enrichment hits. Two root causes:

1. route-explorer's schedule endpoint only covers FUTURE flights —
   verified by curl: WN1942 / 2024-01-27 → no flights; WN7 /
   2026-05-27 → 1 result with equipmentIata=73H. The user's CSV is
   mostly historical, so route-explorer was a dead end for ~all rows.

2. Even when route-explorer DID return data, it ships IATA aircraft
   codes ("73H") while the rest of the app expects ICAO designators
   ("B738"). The saved string wouldn't have matched displayName or
   type-filter tables anyway.

Two fixes:

- FlightAwareLookup actor scrapes flightaware.com/live/flight/
  <CALLSIGN> for the trackpollBootstrap JSON embedded in the page.
  The activityLog.flights[] array contains 8–15 recent operations
  of that flight number, each with a real ICAO aircraftType
  ("B738", "B38M", etc.) and the route IATA. We walk braces to
  extract the JSON literal, then pick the best match:
    1. Same dep/arr route → most common type on that route
    2. Reverse direction (same airframe usually flies the return)
    3. Fallback: most common type across the entire activity log
  Verified by curl that FA isn't Cloudflare-gated and returns
  ICAO codes directly. Per-callsign result cached.

- AircraftDatabase.normalizedICAO(forCode:) converts either input
  form (IATA or ICAO) to canonical ICAO. New iataToICAO map covers
  the common 60+ codes (737 family, A320 family, widebodies,
  regionals, MD-80s). Anything missing falls through unchanged.

- Both enrichers (EnrichAircraftTypesView for existing flights,
  ImportCSVView during import) now run the two-step lookup:
  route-explorer first, FlightAware on miss. Result is normalized
  through normalizedICAO before saving so the Aircraft Stats screen
  recognizes the value.

Expected outcome: the user's 80 historical Southwest flights should
mostly get B737 / B738 / B38M codes assigned via FlightAware's
per-flight-# activity log (Southwest reuses flight numbers reliably
on the same routes).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Trey T
2026-05-29 19:00:02 -05:00
parent e5333ff965
commit 0c9d02f7d4
5 changed files with 268 additions and 14 deletions
+4
View File
@@ -86,6 +86,7 @@
HX1800001800000018000001 /* PassportView.swift in Sources */ = {isa = PBXBuildFile; fileRef = HX1800001800000018000002 /* PassportView.swift */; };
HX1900001900000019000001 /* AircraftStatsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = HX1900001900000019000002 /* AircraftStatsView.swift */; };
HX2000002000000020000001 /* EnrichAircraftTypesView.swift in Sources */ = {isa = PBXBuildFile; fileRef = HX2000002000000020000002 /* EnrichAircraftTypesView.swift */; };
HX2100002100000021000001 /* FlightAwareLookup.swift in Sources */ = {isa = PBXBuildFile; fileRef = HX2100002100000021000002 /* FlightAwareLookup.swift */; };
/* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */
@@ -181,6 +182,7 @@
HX1800001800000018000002 /* PassportView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PassportView.swift; sourceTree = "<group>"; };
HX1900001900000019000002 /* AircraftStatsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AircraftStatsView.swift; sourceTree = "<group>"; };
HX2000002000000020000002 /* EnrichAircraftTypesView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EnrichAircraftTypesView.swift; sourceTree = "<group>"; };
HX2100002100000021000002 /* FlightAwareLookup.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FlightAwareLookup.swift; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@@ -326,6 +328,7 @@
HX1000001000000010000002 /* AirframeMetadataService.swift */,
HX1100001100000011000002 /* CSVFlightImporter.swift */,
HX1300001300000013000002 /* HistoryFilters.swift */,
HX2100002100000021000002 /* FlightAwareLookup.swift */,
);
path = Services;
sourceTree = "<group>";
@@ -527,6 +530,7 @@
HX1800001800000018000001 /* PassportView.swift in Sources */,
HX1900001900000019000001 /* AircraftStatsView.swift in Sources */,
HX2000002000000020000001 /* EnrichAircraftTypesView.swift in Sources */,
HX2100002100000021000001 /* FlightAwareLookup.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
+54
View File
@@ -65,6 +65,60 @@ final class AircraftDatabase: @unchecked Sendable {
Self.typeNames[code.uppercased()] ?? code
}
/// Normalize either an IATA aircraft code (e.g. "73H") or an ICAO
/// type designator (e.g. "B738") to the ICAO form the rest of the
/// app expects. Schedule feeds (route-explorer) hand out IATA;
/// FR24's live feed and FlightAware both hand out ICAO. We want
/// one canonical form on disk.
func normalizedICAO(forCode code: String) -> String {
let upper = code.uppercased()
if Self.typeNames[upper] != nil { return upper } // already ICAO
return Self.iataToICAO[upper] ?? upper
}
/// Common IATA ICAO mappings for aircraft codes we'll actually
/// see from schedule data. Not exhaustive covers the bulk of
/// commercial fleet types. Anything missing falls through as-is.
private static let iataToICAO: [String: String] = [
// Airbus narrowbody
"319": "A319", "31N": "A19N",
"320": "A320", "32A": "A320", "32B": "A320", "32N": "A20N", "32S": "A320",
"321": "A321", "32Q": "A21N",
"318": "A318",
// Airbus widebody
"330": "A332", "332": "A332", "333": "A333", "338": "A338", "339": "A339",
"340": "A343", "343": "A343", "346": "A346",
"350": "A359", "359": "A359", "35K": "A35K", "351": "A359", "358": "A359",
"380": "A388", "388": "A388",
// A220
"221": "BCS1", "223": "BCS3",
// Boeing 737 family
"73G": "B737", "73R": "B737",
"73H": "B738", "73W": "B738", "738": "B738",
"73J": "B739", "739": "B739", "73Y": "B739",
"732": "B732", "733": "B733", "734": "B734", "735": "B735", "736": "B736",
"7M7": "B37M", "7M8": "B38M", "7M9": "B39M", "7MJ": "B3XM",
// Boeing 747/767/777/787
"744": "B744", "748": "B748",
"762": "B762", "763": "B763", "764": "B764",
"772": "B772", "773": "B773", "77L": "B77L", "77W": "B77W", "77F": "B77F",
"778": "B778", "779": "B779",
"788": "B788", "789": "B789", "78X": "B78X", "78J": "B78X",
// 757
"752": "B752", "753": "B753",
// Embraer regional
"E70": "E170", "E75": "E175", "E7W": "E175",
"E90": "E190", "E95": "E195", "295": "E295",
// Bombardier / CRJ
"CR2": "CRJ2", "CR7": "CRJ7", "CR9": "CRJ9",
// Dash 8
"DH4": "DH8D", "DH3": "DH8C",
// ATR
"AT5": "AT45", "AT7": "AT72", "ATR": "AT72",
// MD-80 family
"M80": "MD80", "M81": "MD81", "M82": "MD82", "M83": "MD83", "M87": "MD87", "M88": "MD88", "M90": "MD90",
]
/// Friendly names for the ~150 most common commercial type designators
/// we'd see on the map. Anything else displays as the raw 34 letter
/// code (still useful for filtering). This is by ICAO Doc 8643.
+160
View File
@@ -0,0 +1,160 @@
import Foundation
/// Best-effort aircraft type lookup by scraping FlightAware's
/// `/live/flight/<callsign>` page. Their server embeds a
/// `trackpollBootstrap` JSON in the page source that contains an
/// `activityLog.flights[]` array each entry has an `aircraftType`
/// in ICAO designator form (B738, B38M, A21N, etc.), the route as
/// IATA codes, and the scheduled gate departure timestamp.
///
/// Pages are not Cloudflare-gated for direct GET requests with a
/// browser-shaped User-Agent. No auth required.
///
/// Matching strategy: prefer an activity-log entry whose route
/// matches the user's flight; otherwise fall back to the most common
/// `aircraftType` across the log (good proxy because flight numbers
/// usually keep the same equipment class across many days).
actor FlightAwareLookup {
static let shared = FlightAwareLookup()
private let session: URLSession
private var cache: [String: String?] = [:] // callsign -> "B738" or nil for miss
init(session: URLSession = .shared) {
self.session = session
}
/// Look up the ICAO aircraft type for one flight.
/// `callsign` is ICAO carrier + number, e.g. "SWA1942".
/// `departureIATA` + `arrivalIATA` are used to find the best
/// route match in the activity log.
func lookupType(
callsign: String,
departureIATA: String,
arrivalIATA: String
) async -> String? {
let key = "\(callsign)-\(departureIATA)-\(arrivalIATA)"
if let cached = cache[key] { return cached }
guard let url = URL(string: "https://flightaware.com/live/flight/\(callsign)") else {
cache[key] = nil
return nil
}
var req = URLRequest(url: url)
req.timeoutInterval = 10
req.setValue(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15",
forHTTPHeaderField: "User-Agent"
)
req.setValue("text/html,application/xhtml+xml", forHTTPHeaderField: "Accept")
do {
let (data, response) = try await session.data(for: req)
guard let http = response as? HTTPURLResponse,
(200..<300).contains(http.statusCode),
let html = String(data: data, encoding: .utf8)
else {
cache[key] = nil
return nil
}
let result = parse(html: html, dep: departureIATA, arr: arrivalIATA)
cache[key] = result
return result
} catch {
cache[key] = nil
return nil
}
}
// MARK: - Parsing
/// Find the `trackpollBootstrap` JSON and pull aircraft types from
/// its activity log. Brace-walking handles the trailing JS noise
/// after the object literal (no easy regex sentinel).
private func parse(html: String, dep: String, arr: String) -> String? {
guard let blob = extractTrackpollBootstrap(from: html),
let json = try? JSONSerialization.jsonObject(with: Data(blob.utf8)) as? [String: Any]
else { return nil }
// The bootstrap is `{flights: {<flightId>: {activityLog: {flights: [...]}}}}`.
// We don't know the key, so just take the first one.
guard let flights = json["flights"] as? [String: Any],
let first = flights.values.first as? [String: Any],
let activityLog = first["activityLog"] as? [String: Any],
let entries = activityLog["flights"] as? [[String: Any]],
!entries.isEmpty
else { return nil }
// Pull (route, type) pairs from each entry.
var byRoute: [String: [String]] = [:] // "DAL-HOU" ["B738", "B38M", ...]
var allTypes: [String] = []
for entry in entries {
guard let origin = entry["origin"] as? [String: Any],
let destination = entry["destination"] as? [String: Any],
let oIata = (origin["iata"] as? String)?.uppercased(),
let dIata = (destination["iata"] as? String)?.uppercased(),
let type = (entry["aircraftType"] as? String)?.uppercased(),
!type.isEmpty
else { continue }
let routeKey = "\(oIata)-\(dIata)"
byRoute[routeKey, default: []].append(type)
allTypes.append(type)
}
// 1) Exact route match most common type for that route
let routeKey = "\(dep)-\(arr)"
if let types = byRoute[routeKey], let top = mostCommon(types) {
return top
}
// 2) Reverse-direction match (return leg of same flight)
let reverseKey = "\(arr)-\(dep)"
if let types = byRoute[reverseKey], let top = mostCommon(types) {
return top
}
// 3) Most common across the entire activity log
return mostCommon(allTypes)
}
/// Locate `var trackpollBootstrap = {...};` in the page and
/// return just the `{...}` literal, brace-balanced.
private func extractTrackpollBootstrap(from html: String) -> String? {
guard let start = html.range(of: "var trackpollBootstrap"),
let openBrace = html.range(of: "{", range: start.upperBound..<html.endIndex)
else { return nil }
var depth = 0
var inString = false
var escaped = false
var endIdx = openBrace.lowerBound
var idx = openBrace.lowerBound
while idx < html.endIndex {
let ch = html[idx]
if escaped {
escaped = false
} else if ch == "\\" {
escaped = true
} else if ch == "\"" {
inString.toggle()
} else if !inString {
if ch == "{" { depth += 1 }
else if ch == "}" {
depth -= 1
if depth == 0 {
endIdx = html.index(after: idx)
break
}
}
}
idx = html.index(after: idx)
}
guard depth == 0 else { return nil }
return String(html[openBrace.lowerBound..<endIdx])
}
private func mostCommon(_ list: [String]) -> String? {
guard !list.isEmpty else { return nil }
var counts: [String: Int] = [:]
for v in list { counts[v, default: 0] += 1 }
return counts.max(by: { $0.value < $1.value })?.key
}
}
+29 -4
View File
@@ -138,11 +138,21 @@ struct EnrichAircraftTypesView: View {
phase = .done
}
/// Two-step lookup:
/// 1. route-explorer schedule works for future or near-future
/// flights. Returns IATA aircraft codes ("73H").
/// 2. FlightAware activity-log scrape works for historical
/// flights still on a current flight number. Returns ICAO
/// codes ("B738").
/// Either way we normalize to canonical ICAO via AircraftDatabase
/// before saving so the rest of the app recognizes the value.
private func lookupAircraftType(for f: LoggedFlight) async -> String? {
guard let carrier = f.carrierIATA,
let numStr = f.flightNumber,
let num = Int(numStr)
else { return nil }
// 1) route-explorer
let day = Calendar.current.startOfDay(for: f.flightDate)
let next = Calendar.current.date(byAdding: .day, value: 1, to: day) ?? day
let results = await routeExplorer.searchSchedule(
@@ -151,13 +161,28 @@ struct EnrichAircraftTypesView: View {
startDate: day,
endDate: next
)
// Prefer the result whose dep/arr matches our flight's route
// (some flight numbers fly different routes day to day).
let exact = results.first {
$0.departure.airportIata == f.departureIATA
&& $0.arrival.airportIata == f.arrivalIATA
} ?? results.first
guard let eq = exact?.equipmentIata, !eq.isEmpty else { return nil }
return eq.uppercased()
if let eq = exact?.equipmentIata, !eq.isEmpty {
return AircraftDatabase.shared.normalizedICAO(forCode: eq)
}
// 2) FlightAware fallback
// Build the ICAO callsign FA addresses pages by ICAO carrier
// + flight number. AircraftRegistry already maps IATAICAO.
guard let carrierICAO = f.carrierICAO
?? AircraftRegistry.shared.lookup(iata: carrier)?.icao
else { return nil }
let callsign = "\(carrierICAO)\(num)"
if let icaoType = await FlightAwareLookup.shared.lookupType(
callsign: callsign,
departureIATA: f.departureIATA,
arrivalIATA: f.arrivalIATA
) {
return AircraftDatabase.shared.normalizedICAO(forCode: icaoType)
}
return nil
}
}
+21 -10
View File
@@ -187,10 +187,10 @@ struct ImportCSVView: View {
}
}
/// Look up the scheduled aircraft type from route-explorer for one
/// parsed row. Returns nil for old flights (no schedule data),
/// unmappable carriers, or network failures those cases are
/// expected and we just save the flight without a type.
/// Two-step lookup. Tries route-explorer first (works for future
/// schedules, returns IATA), then FlightAware (works for
/// historical flights, returns ICAO). Normalizes the result to
/// canonical ICAO before returning.
private func lookupAircraftType(for p: CSVFlightImporter.ParsedFlight) async -> String? {
guard let carrier = p.carrierIATA,
let numStr = p.flightNumber,
@@ -204,16 +204,27 @@ struct ImportCSVView: View {
startDate: day,
endDate: next
)
// Match the route too (some flight numbers fly different
// routes on different days; we want the one matching the
// user's dep/arr).
let exact = results.first {
$0.departure.airportIata == p.departureIATA
&& $0.arrival.airportIata == p.arrivalIATA
} ?? results.first
let eq = exact?.equipmentIata
guard let eq, !eq.isEmpty else { return nil }
return eq.uppercased()
if let eq = exact?.equipmentIata, !eq.isEmpty {
return AircraftDatabase.shared.normalizedICAO(forCode: eq)
}
// FlightAware fallback for historical flights
guard let carrierICAO = p.carrierICAO
?? AircraftRegistry.shared.lookup(iata: carrier)?.icao
else { return nil }
let callsign = "\(carrierICAO)\(num)"
if let icaoType = await FlightAwareLookup.shared.lookupType(
callsign: callsign,
departureIATA: p.departureIATA,
arrivalIATA: p.arrivalIATA
) {
return AircraftDatabase.shared.normalizedICAO(forCode: icaoType)
}
return nil
}
private func runParse(url: URL) async {