#!/usr/bin/env python3 """ i18n_audit.py — exhaustive localization gap detector for honeyDue (KMM). Method: enumerate EVERY prose string literal in the iOS Swift + shared Kotlin sources (char-aware: comments and interpolation excluded), then assign each to exactly one bucket: LOCALIZED — inside a localization wrapper (String(localized:), stringResource, ClientStrings.t, LocalizedStringKey, NSLocalizedString, LocalizedStringResource/IntentDescription, ...) or the direct literal argument of an auto-localizing SwiftUI view init/modifier (Text, Label, Button, .navigationTitle, .accessibilityLabel, ...). ALLOWLIST — matches an explicit, reviewable non-UI rule (logging, asset/symbol names, color/asset lookups, keys, URLs, routes, regex/predicate, date-format patterns, API-value comparisons, serialization, the brand name "honeyDue") OR is suppressed by an inline directive. GAP — everything else. THIS BUCKET MUST BE EMPTY. "Done" == GAP count is 0: every literal accounted for in LOCALIZED or ALLOWLIST. High recall by design. False positives are silenced ONLY by a documented allowlist rule or an in-source suppression directive — never by narrowing the enumeration. Inline suppression (auditable; reviewed by the adversarial re-scan): "..." // i18n-ignore: skip literals on this line // i18n-ignore-next: skip the next code line // i18n-ignore-begin: ... // i18n-ignore-end skip the region Usage: python3 scripts/i18n_audit.py [--json OUT] [--all] [--allowlist] Exit code: 0 if GAP==0 else 1 (CI gate). """ import os, re, sys, json, argparse ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) IOS_DIRS = ["iosApp/iosApp", "iosApp/HoneyDue"] KT_DIRS = ["composeApp/src/commonMain/kotlin"] # Files/dirs skipped entirely, each with a reason (test data, samples, analytics # identifiers, the translation data store, dev-only screens). SKIP_PATH_SUBSTR = { "/build/": "build artifacts", "/DerivedData/": "build artifacts", "ClientStringsData.kt": "translation data store", "/testing/": "test fixtures", "Fixtures.kt": "test fixtures", "/analytics/": "analytics event/property identifiers (non-UI)", "Greeting.kt": "KMM template boilerplate", "/dev/": "dev-only screen (Android, unreachable)", "AnimationTestingScreen.kt": "dev-only screen (Android, unregistered route)", "SimpleTaskListItem.kt": "dead/unused component", # NOTE: iOS /AnimationTesting/ is intentionally NOT skipped — it ships & is # reachable from Profile > Completion Animation (no #if DEBUG gate). } SKIP_PATH_REGEX = re.compile(r"(Tests?/|UITests?/|/Test[A-Z]|Tests\.swift$|/Mock)") # ---------- char-aware literal extraction (comments + triple-strings handled) ---------- def extract_literals(line, state): """Return list of (col, inner) string literals on `line`; mutate `state` (block-comment / triple-string carry across lines).""" lits = [] if state["triple"]: idx = line.find('"""') if idx < 0: return lits state["triple"] = False line = line[idx + 3:] i, n = 0, len(line) while i < n: if state["block"]: j = line.find("*/", i) if j < 0: return lits state["block"] = False i = j + 2 continue c = line[i] if c == "/" and i + 1 < n and line[i + 1] == "/": break # line comment -> rest ignored if c == "/" and i + 1 < n and line[i + 1] == "*": state["block"] = True i += 2 continue if c == '"': if line[i:i + 3] == '"""': k = line.find('"""', i + 3) if k < 0: state["triple"] = True return lits i = k + 3 continue start = i i += 1 buf = [] while i < n: if line[i] == "\\": buf.append(line[i:i + 2]); i += 2; continue if line[i] == '"': break buf.append(line[i]); i += 1 lits.append((start, "".join(buf))) i += 1 continue i += 1 return lits def strip_interpolation(lit): lit = re.sub(r"\\\([^)]*\)", "X", lit) # swift \( ... ) lit = re.sub(r"\$\{[^}]*\}", "X", lit) # kotlin ${ ... } lit = re.sub(r"\$[A-Za-z_][A-Za-z0-9_]*", "X", lit) # kotlin $ident return lit # ---------- "prose" test ---------- def is_prose(s): st = s.strip() if sum(c.isalpha() for c in st) < 2: return False if re.fullmatch(r"[X\W\s]*", st): # only interpolation residue/punct return False if re.fullmatch(r"[A-Za-z0-9]+([._\-/][A-Za-z0-9]+)+", st): # dotted.key/snake/kebab/path return False if ("/" in st or st.startswith("http")) and " " not in st: return False if re.fullmatch(r"[%@{}\d\s.,:#x*lld$\-]*", st): # format-spec residue return False if re.fullmatch(r"[a-z][a-z0-9]*", st): # lowercase single token (route/key) return False if re.fullmatch(r"[A-Z][A-Z0-9_]*", st): # ALLCAPS const / api enum return False if re.fullmatch(r"#?[0-9A-Fa-f]{3,}", st): # hex/uuid-ish return False return True def is_regex(s): return bool(re.search(r"\\[.dwsDWSbB]|\[[A-Za-z0-9^]|\]\+|\]\*|\]\{|MATCHES|\(\?:|\^|\$$", s)) def is_date_pattern(s): st = s.strip() if not re.search(r"[MdyHhmsaEZ]", st): return False return bool(re.fullmatch(r"[MdyHhmsaEZ:/.,'\s\-]+", st)) LOCALIZED_PREFIX = re.compile( r"(String\(\s*localized:\s*|LocalizedStringKey\(\s*|NSLocalizedString\(\s*" r"|ClientStrings\.t\(\s*|\.t\(\s*|IntentDescription\(\s*|LocalizedStringResource\(\s*" r"|TypeDisplayRepresentation\(\s*name:\s*|DisplayRepresentation\(\s*title:\s*" r"|stringResource\(\s*)$") AUTOLOC_PREFIX = re.compile( r"(\bText\(\s*|\bText\(\s*verbatim:\s*|\bLabel\(\s*|\bButton\(\s*|\bToggle\(\s*" r"|\bPicker\(\s*|\bStepper\(\s*|\bLink\(\s*|\bMenu\(\s*|\bTextField\(\s*" r"|\bSecureField\(\s*|\bNavigationLink\(\s*|\bSection\(\s*" r"|\.navigationTitle\(\s*|\.navigationBarTitle\(\s*|\.help\(\s*" r"|\.accessibilityLabel\(\s*|\.accessibilityHint\(\s*|\.accessibilityValue\(\s*" r"|\.alert\(\s*|\.confirmationDialog\(\s*|\.configurationDisplayName\(\s*|\.description\(\s*" # `titleKey:` is SwiftUI's documented LocalizedStringKey param. Other labeled # args (label:/title:/message:/...) are NOT trusted — custom components often # take a plain String and render it verbatim (Text(String)), bypassing the # catalog. Those must be explicitly wrapped, so we let them fall through to GAP. r"|\btitleKey:\s*)$") ALLOWLIST_PREFIX = re.compile( r"(print\(|NSLog\(|os_log|Logger|\bprintln\(|\blog\.|Log\.[a-z]+\(" r"|\.debug\(|\.error\(|\.info\(|\.warning\(|\.fault\(|\.verbose\(" r"|systemName:\s*|\bImage\(\s*|\bUIImage\(|named:\s*|Color\(\s*|UIColor\(|themed\(\s*" r"|forKey:\s*|forHTTPHeaderField:\s*|setValue\(|withName:\s*|appendPathComponent\(\s*" r"|URL\(\s*string:\s*|accessibilityIdentifier\(\s*|\.font\(|Font\(|fontName|infoDictionary\??!?\[" r"|navigate\(\s*|composable\(\s*|route\s*=\s*|startDestination" r"|@SerialName\(\s*|SerialName\(\s*|JSONDecoder|JSONEncoder|NSPredicate\(\s*format:\s*" r"|==\s*|!=\s*|\bcase\s+|contains\(\s*|hasPrefix\(\s*|hasSuffix\(\s*|range\(\s*of:\s*" r"|\.tag\(\s*|identifier:\s*|kCFBundle|Bundle\.|Notification\.Name\(\s*|previewDisplayName\(\s*" r"|NotificationCenter)$") ALLOWLIST_LINE = re.compile( r"(accessibilityIdentifier|AccessibilityIdentifiers\.|UserDefaults|keychain|Keychain" r"|mimeType|MimeType|contentType:\s*\"|application/json|image/jpeg|text/plain" r"|NSPredicate\(format:|\.regularExpression|Icons\.|Notification\.Name\()") # Line declares an iOS LocalizedStringResource / IntentDescription / AppEnum display, # or an AppIntent @Parameter(title:) (title is a LocalizedStringResource → auto-localizes). LOCALIZED_LINE = re.compile( r"(LocalizedStringResource|IntentDescription|DisplayRepresentation|TypeDisplayRepresentation|@Parameter\()") def classify(lang, prefix, inner_clean, raw, suffix): tail = prefix[-80:] if inner_clean.strip() == "honeyDue": return "allowlist" if is_regex(inner_clean) or is_date_pattern(inner_clean): return "allowlist" # Kotlin detection-pattern map: "incoming error substring" to "err.key" if lang == "kotlin" and re.match(r'\s*to\s+"', suffix): return "allowlist" if LOCALIZED_PREFIX.search(tail): return "localized" if lang == "swift" and (AUTOLOC_PREFIX.search(tail) or LOCALIZED_LINE.search(raw)): return "localized" if ALLOWLIST_PREFIX.search(tail) or ALLOWLIST_LINE.search(raw): return "allowlist" return "gap" def scan_file(path, lang): rel = os.path.relpath(path, ROOT) out = {"localized": [], "allowlist": [], "gap": []} try: lines = open(path, encoding="utf-8").read().splitlines() except Exception: return out state = {"block": False, "triple": False} ignore_region = False ignore_next = False preview_depth = 0 # >0 while inside a #Preview / PreviewProvider block for ln, raw in enumerate(lines, 1): line_ignored = ignore_next ignore_next = False if "i18n-ignore-begin" in raw: ignore_region = True if "i18n-ignore-end" in raw: ignore_region = False if "i18n-ignore-next" in raw: ignore_next = True lits = extract_literals(raw, state) # --- #Preview / PreviewProvider block skip (mock/sample data, not shipped UI) --- if preview_depth == 0 and re.search(r"#Preview\b|PreviewProvider|_Previews\b", raw): preview_depth = 1 if "{" not in raw else 0 # arm; depth counted below if preview_depth > 0 or re.search(r"#Preview\b|PreviewProvider|_Previews\b", raw): preview_depth += raw.count("{") - raw.count("}") if preview_depth < 0: preview_depth = 0 continue if ignore_region or line_ignored: continue line_suppress = "i18n-ignore" in raw and "i18n-ignore-next" not in raw for start, inner in lits: inner_clean = strip_interpolation(inner) if not is_prose(inner_clean): continue if line_suppress: out["allowlist"].append({"file": rel, "line": ln, "text": inner, "ctx": raw.strip()[:160], "supp": True}) continue end = start + 1 # locate closing quote to compute suffix k = raw.find('"', start + 1) while k != -1 and raw[k - 1] == "\\": k = raw.find('"', k + 1) suffix = raw[k + 1:] if k != -1 else "" b = classify(lang, raw[:start], inner_clean, raw, suffix) out[b].append({"file": rel, "line": ln, "text": inner, "ctx": raw.strip()[:160]}) return out def collect(): files = [] for d in IOS_DIRS + KT_DIRS: base = os.path.join(ROOT, d) lang = "swift" if "iosApp" in d else "kotlin" for r, _, fs in os.walk(base): for f in fs: if f.endswith(".swift") or f.endswith(".kt"): files.append((os.path.join(r, f), "swift" if f.endswith(".swift") else "kotlin")) res = {"localized": [], "allowlist": [], "gap": []} for path, lang in files: rel = os.path.relpath(path, ROOT) if any(s in path for s in SKIP_PATH_SUBSTR) or SKIP_PATH_REGEX.search(rel): continue r = scan_file(path, lang) for k in res: res[k].extend(r[k]) return res def main(): ap = argparse.ArgumentParser() ap.add_argument("--json") ap.add_argument("--all", action="store_true") ap.add_argument("--allowlist", action="store_true") args = ap.parse_args() res = collect() g = res["gap"] print("\n=== i18n audit ===") print(f"LOCALIZED : {len(res['localized'])}") print(f"ALLOWLIST : {len(res['allowlist'])} (incl. {sum(1 for x in res['allowlist'] if x.get('supp'))} inline-suppressed)") print(f"GAP : {len(g)}") by_file = {} for r in g: by_file.setdefault(r["file"], []).append(r) print(f"\n--- GAPS in {len(by_file)} files ---") for f in sorted(by_file): print(f"\n{f} ({len(by_file[f])})") for r in by_file[f]: print(f" {r['line']:>4}: \"{r['text']}\"") if args.json: json.dump(res, open(args.json, "w"), ensure_ascii=False, indent=2) print(f"\nwrote {args.json}") if args.allowlist: print("\n--- ALLOWLIST (non-suppressed) sample ---") for r in [x for x in res["allowlist"] if not x.get("supp")][:60]: print(f" {r['file']}:{r['line']}: \"{r['text']}\"") sys.exit(0 if not g else 1) if __name__ == "__main__": main()