#!/usr/bin/env swift // Re-OCR the images referenced in quarantined_cards.json using Vision with // bounding-box info, then pair lines by column position (left = Spanish, // right = English) instead of by document read order. // // Output: repaired_cards.json — {"byImage": {"f0142-02.jpg": [{"es":..., "en":...}, ...]}} import Foundation import Vision import AppKit guard CommandLine.arguments.count >= 4 else { print("Usage: swift repair_quarantined.swift ") exit(1) } let quarantinedURL = URL(fileURLWithPath: CommandLine.arguments[1]) let imageDir = URL(fileURLWithPath: CommandLine.arguments[2]) let outputURL = URL(fileURLWithPath: CommandLine.arguments[3]) guard let data = try? Data(contentsOf: quarantinedURL), let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], let cards = json["cards"] as? [[String: Any]] else { print("Could not load \(quarantinedURL.path)") exit(1) } var uniqueImages = Set() for card in cards { if let src = card["sourceImage"] as? String { uniqueImages.insert(src) } } print("Unique images to re-OCR: \(uniqueImages.count)") struct RecognizedLine { let text: String let cx: CGFloat // center X (normalized 0..1) let cy: CGFloat // center Y (normalized 0..1 from top) let confidence: Float } struct Pair: Encodable { var es: String var en: String var confidence: Double } struct ImageResult: Encodable { var pairs: [Pair] var lineCount: Int var strategy: String } func classify(_ s: String) -> String { // "es" if has accents or starts with ES article; "en" if starts with EN article; else "?" let lower = s.lowercased() let accentChars: Set = ["á", "é", "í", "ó", "ú", "ñ", "ü", "¿", "¡"] if lower.contains(where: { accentChars.contains($0) }) { return "es" } let first = lower.split(separator: " ").first.map(String.init) ?? "" let esArticles: Set = ["el", "la", "los", "las", "un", "una", "unos", "unas"] let enStarters: Set = ["the", "a", "an", "to", "my", "his", "her", "our", "their"] if esArticles.contains(first) { return "es" } if enStarters.contains(first) { return "en" } return "?" } func recognizeLines(cgImage: CGImage) -> [RecognizedLine] { let handler = VNImageRequestHandler(cgImage: cgImage, options: [:]) let request = VNRecognizeTextRequest() request.recognitionLevel = .accurate request.recognitionLanguages = ["es-ES", "es", "en-US"] request.usesLanguageCorrection = true if #available(macOS 13.0, *) { request.automaticallyDetectsLanguage = true } do { try handler.perform([request]) } catch { return [] } var out: [RecognizedLine] = [] for obs in request.results ?? [] { guard let top = obs.topCandidates(1).first else { continue } let s = top.string.trimmingCharacters(in: .whitespaces) if s.isEmpty { continue } // Vision's boundingBox is normalized with origin at lower-left let bb = obs.boundingBox let cx = bb.origin.x + bb.width / 2 let cyTop = 1.0 - (bb.origin.y + bb.height / 2) // flip to top-origin out.append(RecognizedLine(text: s, cx: cx, cy: cyTop, confidence: top.confidence)) } return out } /// Pair lines by column position: left column = Spanish, right column = English. /// Groups lines into rows by Y proximity, then within each row pairs left-right. func pairByPosition(_ lines: [RecognizedLine]) -> ([Pair], String) { guard !lines.isEmpty else { return ([], "empty") } // Cluster by Y into rows. Use adaptive row height: median line gap * 0.6 let sortedByY = lines.sorted { $0.cy < $1.cy } var rows: [[RecognizedLine]] = [] var current: [RecognizedLine] = [] let rowTol: CGFloat = 0.015 // 1.5% of page height for l in sortedByY { if let last = current.last, abs(l.cy - last.cy) > rowTol { rows.append(current) current = [l] } else { current.append(l) } } if !current.isEmpty { rows.append(current) } var pairs: [Pair] = [] var strategy = "row-pair" for row in rows { guard row.count >= 2 else { continue } // Sort row by X, split at midpoint; left = Spanish, right = English let sortedX = row.sorted { $0.cx < $1.cx } // Find gap: pick the biggest x-gap in the row to split var maxGap: CGFloat = 0 var splitIdx = 1 for i in 1.. maxGap { maxGap = gap splitIdx = i } } let leftLines = Array(sortedX[0.. \(pairs.count) pairs via \(strategy)") } struct Output: Encodable { var byImage: [String: ImageResult] var totalPairs: Int } let output = Output( byImage: results, totalPairs: results.values.reduce(0) { $0 + $1.pairs.count } ) let enc = JSONEncoder() enc.outputFormatting = [.prettyPrinted, .sortedKeys] try enc.encode(output).write(to: outputURL) print("Wrote \(output.totalPairs) repaired pairs to \(outputURL.path)")