Optimize AI generation speed and add richer insight data

Speed optimizations: - Add session.prewarm() in InsightsViewModel and ReportsViewModel init for 40% faster first-token latency - Cap maximumResponseTokens on all 8 AI respond() calls (100-600 per use case) - Add prompt brevity constraints ("1-2 sentences", "2 sentences") - Reduce report batch concurrency from 4 to 2 to prevent device contention - Pre-fetch health data once and share across all 3 insight periods Richer insight data in MoodDataSummarizer: - Tag-mood correlations: overall frequency + good day vs bad day tag breakdown - Weather-mood correlations: avg mood by condition and temperature range - Absence pattern detection: logging gap count with pre/post-gap mood averages - Entry source breakdown: % of entries from App, Widget, Watch, Siri, etc. - Update insight prompt to leverage tags, weather, and gap data when available Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 11:52:14 -05:00
parent 329fb7c671
commit 70400b7790
7 changed files with 302 additions and 53 deletions
--- a/Shared/Services/FoundationModelsInsightService.swift
+++ b/Shared/Services/FoundationModelsInsightService.swift
@@ -84,7 +84,13 @@ class FoundationModelsInsightService: ObservableObject {
        }
    }

-    /// Creates a new session for each request to allow concurrent generation
+    /// Prewarm the language model to reduce first-generation latency
+    func prewarm() {
+        let session = LanguageModelSession(instructions: systemInstructions)
+        session.prewarm()
+    }
+
+    /// Creates a fresh session per request (sessions accumulate transcript, so reuse causes context overflow)
    private func createSession() -> LanguageModelSession {
        LanguageModelSession(instructions: systemInstructions)
    }
@@ -213,8 +219,7 @@ class FoundationModelsInsightService: ObservableObject {
            throw InsightGenerationError.modelUnavailable(reason: lastError?.localizedDescription ?? "Model not available")
        }

-        // Create a new session for this request to allow concurrent generation
-        let session = createSession()
+        let activeSession = createSession()

        // Filter valid entries
        let validEntries = entries.filter { ![.missing, .placeholder].contains($0.mood) }
@@ -231,9 +236,10 @@ class FoundationModelsInsightService: ObservableObject {
        let prompt = buildPrompt(from: summary, count: count)

        do {
-            let response = try await session.respond(
+            let response = try await activeSession.respond(
                to: prompt,
-                generating: AIInsightsResponse.self
+                generating: AIInsightsResponse.self,
+                options: GenerationOptions(maximumResponseTokens: 600)
            )

            let insights = response.content.insights.map { $0.toInsight() }
@@ -263,7 +269,7 @@ class FoundationModelsInsightService: ObservableObject {

        \(dataSection)

-        Include: 1 pattern, 1 advice, 1 prediction, and other varied insights. Reference specific data points.
+        Include: 1 pattern, 1 advice, 1 prediction, and other varied insights. Reference specific data points. Keep each insight to 1-2 sentences. If theme tags are available, identify what good days and bad days have in common. If weather data is available, note weather-mood correlations. If logging gaps exist, comment on what happens around breaks in tracking.
        """
    }