Spanish/Conjuga/Conjuga/Services/PronunciationService.swift

import Foundation
import Speech
import AVFoundation

@MainActor
@Observable
final class PronunciationService {
    var isRecording = false
    var transcript = ""
    var isAuthorized = false

    private var recognizer: SFSpeechRecognizer?
    private var audioEngine: AVAudioEngine?
    private var request: SFSpeechAudioBufferRecognitionRequest?
    private var task: SFSpeechRecognitionTask?
    private var recognizerResolved = false

    func requestAuthorization() {
        #if targetEnvironment(simulator)
        print("[PronunciationService] skipping speech auth on simulator")
        return
        #else
        // Check current status first to avoid unnecessary prompt
        let currentStatus = SFSpeechRecognizer.authorizationStatus()
        if currentStatus == .authorized {
            isAuthorized = true
            return
        }
        if currentStatus == .denied || currentStatus == .restricted {
            isAuthorized = false
            return
        }

        // Only request if not determined yet — do it on a background queue
        // to avoid blocking main thread, then update state on main
        DispatchQueue.global(qos: .userInitiated).async {
            SFSpeechRecognizer.requestAuthorization { status in
                DispatchQueue.main.async { [weak self] in
                    self?.isAuthorized = (status == .authorized)
                    print("[PronunciationService] authorization status: \(status.rawValue)")
                }
            }
        }
        #endif
    }

    private func resolveRecognizerIfNeeded() {
        guard !recognizerResolved else { return }
        recognizerResolved = true
        recognizer = SFSpeechRecognizer(locale: Locale(identifier: "es-ES"))
    }

    func startRecording() {
        guard isAuthorized else {
            print("[PronunciationService] not authorized")
            return
        }
        resolveRecognizerIfNeeded()
        guard let recognizer, recognizer.isAvailable else {
            print("[PronunciationService] recognizer unavailable")
            return
        }

        stopRecording()

        do {
            let audioSession = AVAudioSession.sharedInstance()
            try audioSession.setCategory(.record, mode: .measurement, options: [.duckOthers])
            try audioSession.setActive(true, options: .notifyOthersOnDeactivation)

            // Use SFSpeechAudioBufferRecognitionRequest with the recognizer
            // directly — avoid AVAudioEngine entirely since it produces
            // zero-length buffers on some devices causing assertion crashes.
            request = SFSpeechAudioBufferRecognitionRequest()
            guard let request else { return }
            request.shouldReportPartialResults = true
            request.requiresOnDeviceRecognition = recognizer.supportsOnDeviceRecognition

            // Use AVAudioEngine with the native input format
            audioEngine = AVAudioEngine()
            guard let audioEngine else { return }

            let inputNode = audioEngine.inputNode

            // Use nil format — lets the system pick a compatible format
            // and avoids the mDataByteSize(0) assertion from format mismatches
            inputNode.installTap(onBus: 0, bufferSize: 4096, format: nil) { buffer, _ in
                request.append(buffer)
            }

            audioEngine.prepare()
            try audioEngine.start()

            transcript = ""
            isRecording = true

            task = recognizer.recognitionTask(with: request) { [weak self] result, error in
                DispatchQueue.main.async {
                    if let result {
                        self?.transcript = result.bestTranscription.formattedString
                    }
                    if error != nil || (result?.isFinal == true) {
                        self?.stopRecording()
                    }
                }
            }
        } catch {
            print("[PronunciationService] startRecording failed: \(error)")
            stopRecording()
        }
    }

    func stopRecording() {
        audioEngine?.stop()
        audioEngine?.inputNode.removeTap(onBus: 0)
        request?.endAudio()
        task?.cancel()
        task = nil
        request = nil
        audioEngine = nil
        isRecording = false
    }

    /// Compare spoken transcript against expected text, returns matched word ratio (0.0-1.0).
    static func scoreMatch(expected: String, spoken: String) -> (score: Double, matches: [WordMatch]) {
        let expectedWords = expected.lowercased()
            .components(separatedBy: .whitespacesAndNewlines)
            .map { $0.trimmingCharacters(in: .punctuationCharacters) }
            .filter { !$0.isEmpty }

        let spokenWords = spoken.lowercased()
            .components(separatedBy: .whitespacesAndNewlines)
            .map { $0.trimmingCharacters(in: .punctuationCharacters) }
            .filter { !$0.isEmpty }

        let spokenSet = Set(spokenWords)
        var matches: [WordMatch] = []

        for word in expectedWords {
            matches.append(WordMatch(word: word, matched: spokenSet.contains(word)))
        }

        let matchCount = matches.filter(\.matched).count
        let score = expectedWords.isEmpty ? 0 : Double(matchCount) / Double(expectedWords.count)
        return (score, matches)
    }

    struct WordMatch: Identifiable {
        let word: String
        let matched: Bool
        var id: String { word }
    }
}