Files
Flights/scripts/probe_route_explorer.py
T
Trey T ba0688a412 Search: FlightAware backbone, blob catalog, diagnostic infra
route-explorer's /api/token sits behind invisible Cloudflare Turnstile
that requires Apple's Private Access Token attestation. Third-party
iOS apps don't qualify for PAT issuance, and Linux Docker containers
can't pass it either (cross-OS fingerprint, even with patchright /
Camoufox). Migrates direct-flight search to FlightAware; multi-stop
and where-can-I-go remain via embedded SFSafariViewController.

- FlightAwareScheduleClient — scrapes route.rvt + trackpoll JSON for
  real schedules without auth. T+0..2 day window. Tests against
  captured HTML fixtures.
- BlobRouteClient — pulls the public Vercel blob route catalog
  route-explorer's frontend reads (no auth, no Turnstile).
- DiagnosticLogger + LoggingURLSessionDelegate + DiagnosticsView —
  device-shareable forensic trace. Boot header captures device, OS,
  locale, UA; share-sheet export of session logs.
- TurnstileDebugView — live WKWebView gate inspector. Used to prove
  the PAT-entitlement gap on a real device.
- RouteExplorerBrowserView — SFSafariViewController wrapper. Real
  Safari clears Turnstile naturally; the in-app browser opens at
  pre-filled search URLs. Surfaced from Search ("Open in
  route-explorer") and Settings → Tools.
- RouteExplorerTokenStore + RouteExplorerSetupView — bookmarklet
  capture flow (token round-tripped via flights://routeexplorer-token
  URL scheme). Kept dormant for future use.

backend/ — Docker proxy attempts (Playwright, patchright, Camoufox).
All fail on Linux because Cloudflare auto-denies before the Turnstile
widget renders. Documented; kept as scaffolding for a future paid-
solver integration.

scripts/probe_flightaware.py — reference algorithm for the FA path.
scripts/probe_nodriver.py — local-Mac sanity check confirming the
gate clears with real macOS Chrome (proves the blocker is
fingerprint-level, not network-level).
2026-06-06 01:09:59 -05:00

338 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Probe route-explorer.com end-to-end from outside our iOS app.
Tests, in order:
1. Plain requests.get('/api/token') with browser-shaped headers.
2. Homepage → cookies → retry /api/token (same session).
3. cloudscraper (Cloudflare-aware) if installed.
4. playwright headless Chromium → load homepage → accept cookies →
click Retry → wait for /api/token to return 200, capture cookies,
re-issue /api/token from a plain requests session using those cookies.
5. If we ever land a token: call /api/flight-search for DAL→HOU today
and dump the flight numbers + times.
6. Verify public Vercel blob data (the catalog path).
The point: prove or disprove that *anything* outside Safari-with-history
can reach /api/flight-search, and if it can, what it took.
Usage: python3 probe_route_explorer.py
"""
from __future__ import annotations
import json
import sys
import time
from datetime import date
BASE = "https://route-explorer.com"
BLOB = "https://g80l6xxwjkrjoai7.public.blob.vercel-storage.com"
HEADERS_SAFARI_IPHONE = {
"User-Agent": (
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_5 like Mac OS X) "
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 "
"Mobile/15E148 Safari/604.1"
),
"Accept": "application/json",
"Accept-Language": "en-US,en;q=0.9",
"Origin": BASE,
"Referer": BASE + "/",
}
def line(s=""):
print(s, flush=True)
def section(title: str):
line()
line("=" * 72)
line(f" {title}")
line("=" * 72)
# ---------------------------------------------------------------------------
def test_plain_requests():
section("1. Plain requests with browser-shaped headers")
import requests
r = requests.get(f"{BASE}/api/token", headers=HEADERS_SAFARI_IPHONE, timeout=15)
line(f" /api/token → HTTP {r.status_code}")
line(f" body: {r.text[:300]}")
line(f" set-cookies: {[c.name for c in r.cookies]}")
return r
def test_session_homepage_first():
section("2. requests.Session: homepage → cookies → retry /api/token")
import requests
s = requests.Session()
s.headers.update(HEADERS_SAFARI_IPHONE)
r1 = s.get(BASE + "/", timeout=15)
line(f" GET / → HTTP {r1.status_code} cookies: {[c.name for c in s.cookies]}")
r2 = s.get(f"{BASE}/api/token", timeout=15)
line(f" GET /api/token→ HTTP {r2.status_code} body: {r2.text[:200]}")
line(f" cookies after: {[c.name for c in s.cookies]}")
return s, r2
def test_cloudscraper():
section("3. cloudscraper (if installed)")
try:
import cloudscraper # type: ignore
except ImportError:
line(" cloudscraper NOT installed. (pip install cloudscraper)")
return None
s = cloudscraper.create_scraper()
r = s.get(f"{BASE}/api/token", timeout=30)
line(f" /api/token → HTTP {r.status_code}")
line(f" body: {r.text[:300]}")
line(f" cookies: {[c.name for c in s.cookies]}")
return s if r.status_code == 200 else None
def test_playwright(headless: bool = True, label: str = "headless"):
section(f"4. Playwright Chromium ({label}) — full clearance dance")
try:
from playwright.sync_api import sync_playwright # type: ignore
except ImportError:
line(" playwright NOT installed. (pip install playwright && playwright install chromium)")
return None
with sync_playwright() as p:
# In headed mode, use the full chromium build, not the headless shell.
if headless:
browser = p.chromium.launch(headless=True)
else:
browser = p.chromium.launch(headless=False, args=["--disable-blink-features=AutomationControlled"])
ctx = browser.new_context(
user_agent=HEADERS_SAFARI_IPHONE["User-Agent"],
)
page = ctx.new_page()
status_codes: list[tuple[str, int]] = []
page.on("response", lambda r: (
status_codes.append((r.url, r.status))
if "/api/" in r.url and BASE in r.url else None
))
line(" goto homepage…")
page.goto(BASE + "/", wait_until="domcontentloaded", timeout=30000)
# accept cookies
page.evaluate("""() => {
for (const b of document.querySelectorAll('button')) {
if (/accept|agree|allow/i.test((b.innerText||'').trim())) b.click();
}
}""")
line(" accepted cookie banner")
# tap Retry repeatedly + wait for clearance
cleared = False
for tick in range(1, 31):
page.wait_for_timeout(1000)
page.evaluate("""() => {
for (const b of document.querySelectorAll('button')) {
if (/retry/i.test((b.innerText||'').trim())) b.click();
}
}""")
try:
status = page.evaluate("""async () => {
try {
const r = await fetch('/api/token', { credentials: 'include' });
return r.status;
} catch (e) { return -1; }
}""")
except Exception as e:
status = -1
cookie_names = sorted(c["name"] for c in ctx.cookies())
line(f" t+{tick:2d}s /api/token→{status} cookies={cookie_names}")
if status == 200:
cleared = True
break
cookies = ctx.cookies()
ua = ctx._impl_obj._initializer.get("userAgent") # type: ignore
line(f" final cleared={cleared} cookies={[c['name'] for c in cookies]}")
browser.close()
if cleared:
# Build a plain requests session pre-loaded with the cookies and
# test whether /api/token survives outside the browser context.
import requests
s = requests.Session()
s.headers.update(HEADERS_SAFARI_IPHONE)
for c in cookies:
s.cookies.set(c["name"], c["value"], domain=c["domain"], path=c["path"])
r = s.get(f"{BASE}/api/token", timeout=15)
line(f" REPLAY via requests with captured cookies → HTTP {r.status_code}")
line(f" body: {r.text[:200]}")
if r.status_code == 200:
token = r.json().get("token")
line(f" TOKEN MINTED: {token[:24]}")
return s, token
return None
def test_undetected_chromedriver():
section("4b. undetected-chromedriver (Cloudflare-aware Selenium)")
try:
import undetected_chromedriver as uc # type: ignore
except ImportError:
line(" undetected-chromedriver NOT installed.")
return None
opts = uc.ChromeOptions()
opts.add_argument("--headless=new")
driver = uc.Chrome(options=opts, version_main=None)
try:
driver.get(BASE + "/")
time.sleep(2)
# accept cookies
driver.execute_script("""
for (const b of document.querySelectorAll('button')) {
if (/accept|agree|allow/i.test((b.innerText||'').trim())) b.click();
}
""")
cleared = False
for tick in range(1, 31):
time.sleep(1)
try:
status = driver.execute_script("""
return new Promise((res) => {
fetch('/api/token', { credentials: 'include' })
.then(r => res(r.status))
.catch(() => res(-1));
});
""")
except Exception:
status = -1
cookies = sorted(c["name"] for c in driver.get_cookies())
line(f" t+{tick:2d}s /api/token→{status} cookies={cookies}")
if status == 200:
cleared = True
break
result = None
if cleared:
import requests
s = requests.Session()
s.headers.update(HEADERS_SAFARI_IPHONE)
for c in driver.get_cookies():
s.cookies.set(c["name"], c["value"], domain=c["domain"], path=c["path"])
r = s.get(f"{BASE}/api/token", timeout=15)
line(f" REPLAY via requests → HTTP {r.status_code} body: {r.text[:200]}")
if r.status_code == 200:
result = (s, r.json().get("token"))
return result
finally:
driver.quit()
def test_flight_search(session, token):
section("5. /api/flight-search for DAL→HOU today")
if not session or not token:
line(" no session/token → skipped")
return
today = date.today().isoformat()
body = {
"endpoint": "/route",
"body": {
"json": {
"departureAirportIata": "DAL",
"arrivalAirportIata": "HOU",
"departureDates": [today],
"maxStops": 0,
"limit": 50,
"includeAppendix": True,
}
}
}
import requests
r = session.post(
f"{BASE}/api/flight-search",
headers={**HEADERS_SAFARI_IPHONE, "Content-Type": "application/json", "X-API-Token": token},
json=body, timeout=20,
)
line(f" /api/flight-search → HTTP {r.status_code}")
if r.status_code != 200:
line(f" body: {r.text[:400]}")
return
data = r.json()
conns = data.get("json", {}).get("connections", [])
line(f"{len(conns)} connections")
for c in conns[:8]:
for f in c.get("flights", []):
line(f" {f.get('carrierIata')}{f.get('flightNumber')} "
f"{f.get('departure',{}).get('airportIata')}@"
f"{f.get('departure',{}).get('dateTime')}"
f"{f.get('arrival',{}).get('airportIata')}@"
f"{f.get('arrival',{}).get('dateTime')} "
f"({f.get('equipmentIata')})")
def test_blob_catalog():
section("6. Public Vercel blob — no auth, raw route catalog")
import requests
urls = [
"/data/airports-with-routes.json",
"/data/airlines.json",
"/data/routes/DAL.json",
]
for u in urls:
r = requests.get(BLOB + u, timeout=15)
line(f" GET {u} → HTTP {r.status_code} size={len(r.content):,}B")
# sample DAL→HOU from blob
dal = requests.get(BLOB + "/data/routes/DAL.json", timeout=15).json()
hou = [r for r in dal["routes"] if r["dest"] == "HOU"]
line(f" DAL→HOU in blob: {hou[0] if hou else '<not found>'}")
# ---------------------------------------------------------------------------
def main():
sess = None
token = None
test_plain_requests()
test_session_homepage_first()
if r := test_cloudscraper():
sess, token = r, None # cloudscraper currently won't carry token, see below
if not (sess and token):
if result := test_playwright(headless=True, label="headless"):
sess, token = result
if not (sess and token):
if result := test_undetected_chromedriver():
sess, token = result
if not (sess and token):
line()
line(">>> headless approaches all failed. Trying HEADED Chromium...")
line(">>> (window will appear on your screen)")
if result := test_playwright(headless=False, label="HEADED"):
sess, token = result
if sess and token:
test_flight_search(sess, token)
else:
line()
line("No path produced a token — /api/flight-search step skipped.")
test_blob_catalog()
section("CONCLUSION")
if sess and token:
line(f" Reached /api/flight-search with status 200. The data IS reachable")
line(f" programmatically — Playwright-with-real-Chromium passes the gate.")
line(f" Path forward: small backend that mints tokens this way and serves")
line(f" the iOS app, or pin the captured cookie into the app's WKWebView.")
else:
line(" No request shape outside real Safari managed to mint a token.")
line(" The gate categorically rejects URLSession + WKWebView + headless")
line(" Chromium without sticky cumulative session state.")
line()
line(" But blob catalog data IS public — browse-style UX is achievable")
line(" without any auth.")
if __name__ == "__main__":
main()