Files
Flights/backend/app.py
T
Trey T ba0688a412 Search: FlightAware backbone, blob catalog, diagnostic infra
route-explorer's /api/token sits behind invisible Cloudflare Turnstile
that requires Apple's Private Access Token attestation. Third-party
iOS apps don't qualify for PAT issuance, and Linux Docker containers
can't pass it either (cross-OS fingerprint, even with patchright /
Camoufox). Migrates direct-flight search to FlightAware; multi-stop
and where-can-I-go remain via embedded SFSafariViewController.

- FlightAwareScheduleClient — scrapes route.rvt + trackpoll JSON for
  real schedules without auth. T+0..2 day window. Tests against
  captured HTML fixtures.
- BlobRouteClient — pulls the public Vercel blob route catalog
  route-explorer's frontend reads (no auth, no Turnstile).
- DiagnosticLogger + LoggingURLSessionDelegate + DiagnosticsView —
  device-shareable forensic trace. Boot header captures device, OS,
  locale, UA; share-sheet export of session logs.
- TurnstileDebugView — live WKWebView gate inspector. Used to prove
  the PAT-entitlement gap on a real device.
- RouteExplorerBrowserView — SFSafariViewController wrapper. Real
  Safari clears Turnstile naturally; the in-app browser opens at
  pre-filled search URLs. Surfaced from Search ("Open in
  route-explorer") and Settings → Tools.
- RouteExplorerTokenStore + RouteExplorerSetupView — bookmarklet
  capture flow (token round-tripped via flights://routeexplorer-token
  URL scheme). Kept dormant for future use.

backend/ — Docker proxy attempts (Playwright, patchright, Camoufox).
All fail on Linux because Cloudflare auto-denies before the Turnstile
widget renders. Documented; kept as scaffolding for a future paid-
solver integration.

scripts/probe_flightaware.py — reference algorithm for the FA path.
scripts/probe_nodriver.py — local-Mac sanity check confirming the
gate clears with real macOS Chrome (proves the blocker is
fingerprint-level, not network-level).
2026-06-06 01:09:59 -05:00

404 lines
15 KiB
Python

"""
flights.treytartt.com — route-explorer proxy backend.
What this service does and why it exists
========================================
route-explorer.com gates `/api/token` behind Cloudflare Turnstile that
requires Apple's Private Access Token. Third-party iOS apps cannot
mint a PAT, so the iOS app can never get a token directly. This
service runs headed Chromium (via nodriver) on an X virtual display
inside a Docker container — Chromium passes Turnstile silently from
Linux because the Cloudflare bypass relies on TLS/JS fingerprints,
not Apple-specific attestation — fetches a token, caches it, and
exposes a thin proxy that the iOS app authenticates with a shared
bearer secret.
Endpoints
---------
GET /health — public, returns {"status": "ok", ...}
GET /api/token — returns a cached {"token": ...} (refreshes if expired)
POST /api/flight-search— forwards the JSON body to route-explorer.com
with the cached cookies + X-API-Token header
POST /api/route — alias for /api/flight-search with endpoint=/route
POST /api/departures — alias for endpoint=/departures
POST /api/schedule — alias for endpoint=/schedule
Auth
----
All `/api/*` endpoints require `Authorization: Bearer $SHARED_SECRET`.
The shared secret comes from the env var `SHARED_SECRET`. The iOS app
bundles the same value at build time.
Token cache
-----------
Tokens are minted on first /api/token request and refreshed when
the in-memory expiry is < 60 seconds away. A single asyncio.Lock
serializes refresh so a thundering-herd doesn't spawn 10 browsers.
"""
import asyncio
import json
import logging
import os
import re
import time
from contextlib import asynccontextmanager
from pathlib import Path
import httpx
from fastapi import Depends, FastAPI, Header, HTTPException, Request
from fastapi.responses import JSONResponse
# Load .env from the current working directory so launchd-managed runs
# pick up SHARED_SECRET without needing to bake it into the plist.
try:
from dotenv import load_dotenv
load_dotenv(Path(__file__).parent / ".env")
except ImportError:
pass
SHARED_SECRET = os.environ.get("SHARED_SECRET", "")
TOKEN_TTL_SECONDS = int(os.environ.get("TOKEN_TTL_SECONDS", "1500")) # 25 min
ROUTE_EXPLORER_BASE = "https://route-explorer.com"
SAFARI_UA = (
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_5 like Mac OS X) "
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 "
"Mobile/15E148 Safari/604.1"
)
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger("flights")
class TokenCache:
"""Single-token in-memory cache with serialized refresh."""
def __init__(self) -> None:
self.token: str | None = None
self.cookies: dict[str, str] = {}
self.expires_at: float = 0.0
self.refresh_count: int = 0
self.last_refresh_at: float = 0.0
self.last_refresh_error: str | None = None
self.lock = asyncio.Lock()
async def ensure_valid(self) -> tuple[str, dict[str, str]]:
now = time.time()
if self.token and self.expires_at > now + 30:
return self.token, dict(self.cookies)
async with self.lock:
now = time.time()
if self.token and self.expires_at > now + 30:
return self.token, dict(self.cookies)
log.info("token refresh starting (cached expires=%s, now=%s)",
self.expires_at, now)
try:
token, cookies = await mint_token()
except Exception as e:
self.last_refresh_error = f"{type(e).__name__}: {e}"
log.exception("token mint failed")
raise
self.token = token
self.cookies = cookies
self.expires_at = time.time() + TOKEN_TTL_SECONDS
self.refresh_count += 1
self.last_refresh_at = time.time()
self.last_refresh_error = None
log.info("token refresh ok (token=%s..., %d cookies, expires_at=%s)",
token[:16], len(cookies), self.expires_at)
return self.token, dict(self.cookies)
def status(self) -> dict:
now = time.time()
return {
"has_token": self.token is not None,
"expires_in_seconds": max(0, int(self.expires_at - now)) if self.token else None,
"refresh_count": self.refresh_count,
"last_refresh_at": self.last_refresh_at,
"last_refresh_error": self.last_refresh_error,
"cookie_names": sorted(self.cookies.keys()),
}
cache = TokenCache()
async def mint_token() -> tuple[str, dict[str, str]]:
"""Drive headless Chromium (via Playwright + stealth) through
Turnstile and fetch /api/token.
Returns (token, cookies-dict). Raises if Turnstile never clears
within 90 seconds. Adds forensic logging per tick so we can
diagnose what Turnstile is rejecting when the bypass fails.
"""
# Strategy: drive the page like a real user. The React SPA gates
# Turnstile-rendering behind its own /api/token call. Polling
# /api/token from outside the React context (as our prior attempts
# did) never causes the SPA to render Turnstile, so it never gets
# a chance to clear. Filling the From field + clicking Search
# makes the SPA invoke its R() callback which fetches /api/token,
# gets 403, then mounts the Turnstile widget — at which point
# Cloudflare's auto-pass (or a visible solve) can run.
from patchright.async_api import async_playwright
log.info("mint_token: starting browser")
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True,
args=[
"--no-sandbox",
"--disable-dev-shm-usage",
# WebGL via SwiftShader is a strong automation signal.
# Try the real ANGLE renderer instead so navigator.gpu
# and WebGL renderer strings look normal-ish.
"--use-gl=angle",
"--use-angle=swiftshader-webgl",
],
)
try:
context = await browser.new_context(
user_agent=(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
),
locale="en-US",
timezone_id="America/Chicago",
viewport={"width": 1280, "height": 800},
)
page = await context.new_page()
log.info("mint_token: navigating to homepage")
await page.goto(
f"{ROUTE_EXPLORER_BASE}/",
wait_until="domcontentloaded",
timeout=30000,
)
# Spend time on page like a real user — Cloudflare's heuristics
# care about dwell time, mouse movement, scroll signals.
await asyncio.sleep(4)
try:
await page.mouse.move(640, 400)
await page.mouse.move(700, 450, steps=8)
await page.mouse.move(500, 600, steps=8)
except Exception:
pass
# Trigger the SPA's own token request by filling From + To
# and clicking Search. This invokes R() → /api/token → 403
# → M() → Turnstile widget renders.
try:
# The From / To inputs are role="combobox". Type IATA
# codes that the SPA will accept directly.
await _drive_search_form(page)
except Exception as e:
log.warning("form drive failed (continuing with poll): %s", e)
cleared = False
for tick in range(1, 91):
await asyncio.sleep(1)
try:
probe = await page.evaluate(
"""
async () => {
try {
const r = await fetch('/api/token', { credentials: 'include' });
const t = await r.text();
return {status: r.status, body: t.substring(0,160)};
} catch (e) { return {status: -1, body: String(e)}; }
}
"""
)
except Exception as e:
probe = {"status": -1, "body": str(e)}
status = probe.get("status", -1)
if tick % 3 == 1:
cks = await context.cookies("https://route-explorer.com")
names = sorted({c["name"] for c in cks})
widget = await page.evaluate(
"""
() => {
const el = document.querySelector('iframe[src*="challenges.cloudflare.com"]');
return el ? 'turnstile-iframe-present' : 'no-iframe';
}
"""
)
log.info("tick=%d status=%s cookies=%s widget=%s",
tick, status, names, widget)
if status == 200:
cleared = True
log.info("turnstile cleared at tick=%d", tick)
break
if not cleared:
raise RuntimeError("Turnstile never cleared after 90 seconds")
body = await page.evaluate(
"""
async () => (await (await fetch('/api/token', {credentials:'include'})).text())
"""
)
parsed = json.loads(body)
token = parsed.get("token")
if not token:
raise RuntimeError(f"token endpoint returned no token: {body!r}")
raw_cookies = await context.cookies("https://route-explorer.com")
cookies = {c["name"]: c["value"] for c in raw_cookies}
return token, cookies
finally:
await browser.close()
async def _drive_search_form(page) -> None:
"""Type DFW into From, AMS into To, click Search. This triggers
the React `R` callback that fetches /api/token, which makes the
SPA mount the Turnstile widget.
"""
# Click the From input area to focus it; the picker is keyboard-
# accessible so we can just type.
try:
from_input = page.locator("input").first
await from_input.click(timeout=5000)
await page.keyboard.type("DFW", delay=80)
await asyncio.sleep(0.5)
await page.keyboard.press("Enter")
except Exception:
pass
try:
# Find To picker — second input on the page.
to_input = page.locator("input").nth(1)
await to_input.click(timeout=5000)
await page.keyboard.type("AMS", delay=80)
await asyncio.sleep(0.5)
await page.keyboard.press("Enter")
except Exception:
pass
# Click any "Search Routes" button.
try:
await page.get_by_role("button", name=re.compile("search", re.I)).click(timeout=5000)
except Exception:
pass
# ---------------------------------------------------------------------------
# FastAPI app
# ---------------------------------------------------------------------------
@asynccontextmanager
async def lifespan(_app: FastAPI):
# Warm the token on startup so the first user search isn't slow.
try:
await cache.ensure_valid()
except Exception:
log.exception("startup token mint failed; service will retry on first request")
yield
app = FastAPI(
title="flights backend",
description="Cloudflare-bypassing proxy for route-explorer.com",
lifespan=lifespan,
)
def auth(authorization: str = Header(default="")) -> None:
"""Bearer auth dependency. Raises 401 on mismatch."""
if not SHARED_SECRET:
raise HTTPException(500, "server misconfigured: SHARED_SECRET not set")
expected = f"Bearer {SHARED_SECRET}"
if authorization != expected:
raise HTTPException(401, "unauthorized")
@app.get("/health")
async def health() -> dict:
"""Public liveness + cache status. No secret revealed."""
return {
"status": "ok",
"cache": cache.status(),
}
@app.get("/api/token", dependencies=[Depends(auth)])
async def get_token() -> dict:
try:
token, _ = await cache.ensure_valid()
except Exception as e:
raise HTTPException(503, f"token mint failed: {e}")
return {"token": token, "expires_at": cache.expires_at}
async def _proxy_search(payload: bytes, override_endpoint: str | None = None) -> JSONResponse:
"""Common path for /api/flight-search and the endpoint-specific aliases.
`payload` must already be the JSON body the iOS app sent. Caller can
optionally rewrap with a fixed endpoint name for the aliases."""
try:
token, cookies = await cache.ensure_valid()
except Exception as e:
raise HTTPException(503, f"token mint failed: {e}")
body_bytes = payload
if override_endpoint:
try:
inner = json.loads(payload or b"{}")
except Exception:
inner = {}
wrapped = {
"endpoint": override_endpoint,
"body": {"json": inner.get("body", {}).get("json", inner)},
}
body_bytes = json.dumps(wrapped).encode()
cookie_header = "; ".join(f"{k}={v}" for k, v in cookies.items())
async with httpx.AsyncClient(timeout=30) as client:
r = await client.post(
f"{ROUTE_EXPLORER_BASE}/api/flight-search",
content=body_bytes,
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"User-Agent": SAFARI_UA,
"Origin": ROUTE_EXPLORER_BASE,
"Referer": f"{ROUTE_EXPLORER_BASE}/",
"Cookie": cookie_header,
"X-API-Token": token,
},
)
# If upstream complains the token is stale, invalidate cache so the
# next call refreshes. Don't try to retry inline — caller can retry.
body_text = r.text
if r.status_code == 403 and '"reason":"token"' in body_text:
log.warning("upstream rejected cached token; invalidating")
cache.token = None
cache.expires_at = 0
content_type = r.headers.get("content-type", "")
if content_type.startswith("application/json"):
try:
return JSONResponse(content=r.json(), status_code=r.status_code)
except Exception:
pass
return JSONResponse(
content={"raw": body_text, "content_type": content_type},
status_code=r.status_code,
)
@app.post("/api/flight-search", dependencies=[Depends(auth)])
async def flight_search(request: Request) -> JSONResponse:
return await _proxy_search(await request.body())
@app.post("/api/route", dependencies=[Depends(auth)])
async def route_search(request: Request) -> JSONResponse:
return await _proxy_search(await request.body(), override_endpoint="/route")
@app.post("/api/departures", dependencies=[Depends(auth)])
async def departures(request: Request) -> JSONResponse:
return await _proxy_search(await request.body(), override_endpoint="/departures")
@app.post("/api/schedule", dependencies=[Depends(auth)])
async def schedule(request: Request) -> JSONResponse:
return await _proxy_search(await request.body(), override_endpoint="/schedule")