diff --git a/.gitignore b/.gitignore index c209bd0..9be782c 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,16 @@ scrape/ *.webm *.mp4 *.mkv + +# Third-party textbook sources (not redistributable) +*.pdf +*.epub +epub_extract/ + +# Textbook extraction artifacts — regenerate locally via run_pipeline.sh. +# Scripts are committed; their generated outputs are not. +Conjuga/Scripts/textbook/*.json +Conjuga/Scripts/textbook/review.html +# App-bundle copies of the textbook content +Conjuga/Conjuga/textbook_data.json +Conjuga/Conjuga/textbook_vocab.json diff --git a/Conjuga/Conjuga.xcodeproj/project.pbxproj b/Conjuga/Conjuga.xcodeproj/project.pbxproj index e880ee1..2099abe 100644 --- a/Conjuga/Conjuga.xcodeproj/project.pbxproj +++ b/Conjuga/Conjuga.xcodeproj/project.pbxproj @@ -9,8 +9,10 @@ /* Begin PBXBuildFile section */ 00BEC0BDBB49198022D9852E /* WordOfDayWidget.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8E9BCDBB9BC24F5C8117767E /* WordOfDayWidget.swift */; }; 0A89DCC82BE11605CB866DEF /* TenseInfo.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3BC3247457109FC6BF00D85B /* TenseInfo.swift */; }; + 12D2C9311D5C4764B48B1754 /* StoryQuizView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E292A183ABB24FFE9CB719C8 /* StoryQuizView.swift */; }; 13F29AD5745FB532709FA28A /* OnboardingView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E972AA745F44586EF0B1B0C8 /* OnboardingView.swift */; }; 1A230C01A045F0C095BFBD35 /* PracticeView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1EA0FA4F9149B9D8E197ADE9 /* PracticeView.swift */; }; + 1B0B3B2C771AD72E25B3493C /* StemChangeToggleTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8F08E1DC6932D9EA1D380913 /* StemChangeToggleTests.swift */; }; 1C2636790E70B6BC7FFCC904 /* DailyLog.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0313D24F96E6A0039C34341F /* DailyLog.swift */; }; 218E982FC4267949F82AABAD /* SharedModels in Frameworks */ = {isa = PBXBuildFile; productRef = 4A4D7B02884EBA9ACD93F0FD /* SharedModels */; }; 261E582449BED6EF41881B04 /* AdaptiveContainer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3B16FF4C52457CD8CD703532 /* AdaptiveContainer.swift */; }; @@ -21,44 +23,65 @@ 33E885EB38C3BB0CB058871A /* HandwritingView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1F842EB5E566C74658D918BB /* HandwritingView.swift */; }; 352A5BAA6E406AA5850653A4 /* PracticeSessionService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 842DB48F8570C39CDCFF2F57 /* PracticeSessionService.swift */; }; 35A0F6E7124D989312721F7D /* DashboardView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 18AC3C548BDB9EF8701BE64C /* DashboardView.swift */; }; + 35D6404C60C249D5995AD895 /* ConversationService.swift in Sources */ = {isa = PBXBuildFile; fileRef = E10603F454E54341AA4B9931 /* ConversationService.swift */; }; 36F92EBAEB0E5F2B010401EF /* StreakCalendarView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 30EF2362D9FFF9B07A45CE6D /* StreakCalendarView.swift */; }; 377C4AA000CE9A0D8CC43DA9 /* GrammarNote.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4D389CA5B5C4E7A12CAEA5BC /* GrammarNote.swift */; }; 39D0666B293DC265CF87B9DD /* SentenceBuilderView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 731614CACCB73B6FD592D34A /* SentenceBuilderView.swift */; }; + 3EC2A2F4B9C24B029DA49C40 /* VocabReviewView.swift in Sources */ = {isa = PBXBuildFile; fileRef = D3698CE7ACF148318615293E /* VocabReviewView.swift */; }; 3F4F0C07BE61512CBFBBB203 /* HandwritingCanvas.swift in Sources */ = {isa = PBXBuildFile; fileRef = 80D974250C396589656B8443 /* HandwritingCanvas.swift */; }; 4005E258FDF03C8B3A0D53BD /* VocabFlashcardView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2931634BEB33B93429CE254F /* VocabFlashcardView.swift */; }; 46943ACFABF329DE1CBFC471 /* TensePill.swift in Sources */ = {isa = PBXBuildFile; fileRef = 102F0E136CDFF8CED710210F /* TensePill.swift */; }; + 4C2649215B81470195F38ED0 /* StoryLibraryView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 950347251CC94D4A9DFF7CBC /* StoryLibraryView.swift */; }; 4C3484403FD96E37DA4BEA66 /* NewWordIntent.swift in Sources */ = {isa = PBXBuildFile; fileRef = 72CB5F95DF256DF7CD73269D /* NewWordIntent.swift */; }; + 4DCC5CC233DE4701A12FD7EB /* ListeningView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 02B2179562E54E148C98219D /* ListeningView.swift */; }; 50E0095A23E119D1AB561232 /* VerbDetailView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E1DBE662F89F02A0282F5BEE /* VerbDetailView.swift */; }; 519E68D2DF4C80AB96058C0D /* LyricsConfirmationView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3EA01795655C444795577A22 /* LyricsConfirmationView.swift */; }; 51D072AF30F4B12CD3E8F918 /* SRSEngine.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5C0E6EAFC0D24928BA956FA5 /* SRSEngine.swift */; }; + 53908E41767B438C8BD229CD /* ClozeView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A649B04B8B3C49419AD9219C /* ClozeView.swift */; }; 53A0AC57EAC44B676C997374 /* QuizType.swift in Sources */ = {isa = PBXBuildFile; fileRef = 626873572466403C0288090D /* QuizType.swift */; }; 5A3246026E68AB6483126D0B /* WeekProgressWidget.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1980E8E439EB76ED7330A90D /* WeekProgressWidget.swift */; }; 5EA915FFA906C5C2938FCADA /* ConjugaWidgetBundle.swift in Sources */ = {isa = PBXBuildFile; fileRef = E325FE0E484DE75009672D02 /* ConjugaWidgetBundle.swift */; }; 60E86BABE2735E2052B99DF3 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = BCCC95A95581458E068E0484 /* SettingsView.swift */; }; 615D3128ED6E84EF59BB5AA3 /* LyricsReaderView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 58394296923991E56BAC2B02 /* LyricsReaderView.swift */; }; + 65ABC39F35804C619DAB3200 /* GrammarExercise.swift in Sources */ = {isa = PBXBuildFile; fileRef = 17E5252282F44ECD9BA70DB8 /* GrammarExercise.swift */; }; 6BB4B0A655E6CB6F82D81B5A /* WeekTestView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5E7EF4161C73AAC67B3A0004 /* WeekTestView.swift */; }; - 968D626462B0ADEC8D7D56AA /* CheckpointExamView.swift in Sources */ = {isa = PBXBuildFile; fileRef = EA1F177F7ABF5D2E4E5466CD /* CheckpointExamView.swift */; }; + 6CCC8D51F5524688A4BC5AF8 /* ChatView.swift in Sources */ = {isa = PBXBuildFile; fileRef = FA5FE6E149F54A6BA7D01D99 /* ChatView.swift */; }; 6D4A29280FDD99B8E18AF264 /* WidgetDataReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2889F2F81673AFF3A58A07A8 /* WidgetDataReader.swift */; }; 6ED2AC2CAA54688161D4B920 /* SyncStatusMonitor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 18CCD69C14D1B0CFBD03C92F /* SyncStatusMonitor.swift */; }; 728702D9AA7A8BDABBA62513 /* ReviewStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = CBCF6FCFA6B00151C2371E77 /* ReviewStore.swift */; }; 760628EFE1CF191CE2FC07DC /* GuideView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8C935ECDF8A5D8D6FA541E20 /* GuideView.swift */; }; 7A13757EA40E81E55640D0FC /* LyricsSearchView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 70960F0FD7509310B3F61C48 /* LyricsSearchView.swift */; }; + 7A1B2C3D4E5F60718293A4B5 /* textbook_data.json in Resources */ = {isa = PBXBuildFile; fileRef = 7A1B2C3D4E5F60718293A4C6 /* textbook_data.json */; }; + 7A1B2C3D4E5F60718293A4B6 /* textbook_vocab.json in Resources */ = {isa = PBXBuildFile; fileRef = 7A1B2C3D4E5F60718293A4C7 /* textbook_vocab.json */; }; + 7A1B2C3D4E5F60718293AA01 /* TextbookChapterListView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7A1B2C3D4E5F60718293AA11 /* TextbookChapterListView.swift */; }; + 7A1B2C3D4E5F60718293AA02 /* TextbookChapterView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7A1B2C3D4E5F60718293AA12 /* TextbookChapterView.swift */; }; + 7A1B2C3D4E5F60718293AA03 /* TextbookExerciseView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7A1B2C3D4E5F60718293AA13 /* TextbookExerciseView.swift */; }; + 7A1B2C3D4E5F60718293AA04 /* AnswerChecker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7A1B2C3D4E5F60718293AA14 /* AnswerChecker.swift */; }; 81FA7EBCF18F0AAE0BF385C3 /* VerbListView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A63061BBC8998DF33E3DCA2B /* VerbListView.swift */; }; 82F6079BE3F31AC3FB2D1013 /* MultipleChoiceView.swift in Sources */ = {isa = PBXBuildFile; fileRef = DA3A33983B2F2078C9EA1A3D /* MultipleChoiceView.swift */; }; 84CCBAE22A9E0DA27AE28723 /* DeckStudyView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 631DC0A942DD57C81DECE083 /* DeckStudyView.swift */; }; + 8510085D78E248D885181E80 /* FeatureReferenceView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 12E9DDEFD53C49E0A48EA655 /* FeatureReferenceView.swift */; }; + 8C1E4E7F36D64EFF8D092AC8 /* StoryGenerator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 327659ABFD524514B6D2D505 /* StoryGenerator.swift */; }; 8C43F09F52EA9B537EA27E43 /* CourseReviewStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = DAF7CA1E6F9979CB2C699FDC /* CourseReviewStore.swift */; }; + 8D7CA0F4496B44C28CD5EBD5 /* DictionaryService.swift in Sources */ = {isa = PBXBuildFile; fileRef = A04370CF6B4E4D38BE3EB0C7 /* DictionaryService.swift */; }; + 8E3D8E8254CF4213B9D9FAD3 /* StoryReaderView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2A8B6081226847E0A0A174BC /* StoryReaderView.swift */; }; + 943728CD3E65FE6CCADB05EE /* StemChangeConjugationView.swift in Sources */ = {isa = PBXBuildFile; fileRef = CF3A181BF2399D34C23DA933 /* StemChangeConjugationView.swift */; }; 943A94A8C71919F3EFC0E8FA /* UserProgress.swift in Sources */ = {isa = PBXBuildFile; fileRef = E536AD1180FE10576EAC884A /* UserProgress.swift */; }; + 968D626462B0ADEC8D7D56AA /* CheckpointExamView.swift in Sources */ = {isa = PBXBuildFile; fileRef = EA1F177F7ABF5D2E4E5466CD /* CheckpointExamView.swift */; }; + 96A3E5FA8EC63123D97365E1 /* TextbookFlowUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = CEEA84E15880A9D56DE18F33 /* TextbookFlowUITests.swift */; }; 97EFCF6724CE59DC4F0274FD /* AchievementService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1C42EA0EBD4CB1E10A82BA25 /* AchievementService.swift */; }; 9D9FD3853C5C969C62AE9999 /* StartupCoordinator.swift in Sources */ = {isa = PBXBuildFile; fileRef = A4B95B276C054DBFE508C4D1 /* StartupCoordinator.swift */; }; A9959AE6C87B4AD21554E401 /* FullTableView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 711CB7539EF5887F6F7B8B82 /* FullTableView.swift */; }; AAC6F85A1C3B6C1186E1656A /* TenseEndingTable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 69D98E1564C6538056D81200 /* TenseEndingTable.swift */; }; B4603AA6EFB134794AA39BF4 /* LyricsLibraryView.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2B1F646394D7C03493F1BF /* LyricsLibraryView.swift */; }; + B73F6EED00304B718C6FEFFA /* GrammarExerciseView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1F71CA5CD67342F18319DB9A /* GrammarExerciseView.swift */; }; BB48230C3B26EA6E84D2D823 /* DailyProgressRing.swift in Sources */ = {isa = PBXBuildFile; fileRef = 180F9D59828C36B44A5E384F /* DailyProgressRing.swift */; }; BF0832865857EFDA1D1CDEAD /* SharedModels in Frameworks */ = {isa = PBXBuildFile; productRef = BCCBABD74CADDB118179D8E9 /* SharedModels */; }; C0BAEF49A6270D8F64CF13D6 /* PracticeViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = C359C051FB157EF447561405 /* PracticeViewModel.swift */; }; C1F84182F12EB5CFF32768B6 /* MainTabView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5983A534E4836F30B5281ACB /* MainTabView.swift */; }; C2B3D97F119EFCE97E3CB1CE /* ConjugaApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1EB4830F9289AACC82D753F8 /* ConjugaApp.swift */; }; C3851F960C1162239DC2F935 /* CourseQuizView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 143D06606AE10DCA30A140C2 /* CourseQuizView.swift */; }; + C8AF0931F7FD458C80B6EC0D /* ChatLibraryView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5667AA04211A449A9150BD28 /* ChatLibraryView.swift */; }; C8C3880535008764B7117049 /* DataLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = DADCA82DDD34DF36D59BB283 /* DataLoader.swift */; }; CAC69045B74249F121643E88 /* AnswerReviewView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 83A8C1A048627C8DEB83C12D /* AnswerReviewView.swift */; }; CF9E48ADF0501FB79F3DDB7B /* conjuga_data.json in Resources */ = {isa = PBXBuildFile; fileRef = 8C2D88FF9A3B0590B22C7837 /* conjuga_data.json */; }; @@ -68,32 +91,20 @@ D6B67523714E0B3618391956 /* CombinedWidget.swift in Sources */ = {isa = PBXBuildFile; fileRef = 43345D6C7EAA4017E3A45935 /* CombinedWidget.swift */; }; D7456B289D135CEB3A15122B /* TestResult.swift in Sources */ = {isa = PBXBuildFile; fileRef = DAFE27F29412021AEC57E728 /* TestResult.swift */; }; DB73836F751BB2751439E826 /* LyricsSearchService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 43B8AED76C14A05AF2339C27 /* LyricsSearchService.swift */; }; + DDF58F3899FC4B92BF6587D2 /* StudyTimerService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 978FB24DF8D7436CB5210ACE /* StudyTimerService.swift */; }; DF06034A4B2C11BA0C0A84CB /* ConjugaWidgetExtension.appex in Embed Foundation Extensions */ = {isa = PBXBuildFile; fileRef = 9708FF3CF33E4765DB225F93 /* ConjugaWidgetExtension.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; }; DF82C2579F9889DDB06362CC /* ReferenceStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = 777C696A841803D5B775B678 /* ReferenceStore.swift */; }; E7BFEE9A90E1300EFF5B1F32 /* HandwritingRecognizer.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3695075616689E72DBB26D4C /* HandwritingRecognizer.swift */; }; E814A9CF1067313F74B509C6 /* StoreInspector.swift in Sources */ = {isa = PBXBuildFile; fileRef = E8E9833868EB73AF9EB3A611 /* StoreInspector.swift */; }; + E82C743EB1FDF6B67ED22EAD /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = A6153A5C7241C1AB0373AA17 /* Foundation.framework */; }; E99473B7DF9BCAE150E9D1E1 /* WidgetDataService.swift in Sources */ = {isa = PBXBuildFile; fileRef = D570252DA3DCDD9217C71863 /* WidgetDataService.swift */; }; + EA07DB964C8940F69C14DE2C /* PronunciationService.swift in Sources */ = {isa = PBXBuildFile; fileRef = D535EF6988A24B47B70209A2 /* PronunciationService.swift */; }; ED0401D05A7C2B4C55057A88 /* DailyProgressWidget.swift in Sources */ = {isa = PBXBuildFile; fileRef = 195DA9CDA703DDFAD1B3CD5A /* DailyProgressWidget.swift */; }; F0D0778207F144D6AC3D39C3 /* CourseView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 833516C5D57F164C8660A479 /* CourseView.swift */; }; F59655A8B8FCE6264315DD33 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = A014EEC3EE08E945FBBA5335 /* Assets.xcassets */; }; + F7E459C46F25A8A45D7E0DFB /* AllChaptersScreenshotTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A630C74D28CE1B280C9F296 /* AllChaptersScreenshotTests.swift */; }; F84706B47A2156B2138FB8D5 /* GrammarNotesView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3F1A6221A35699BD8065D064 /* GrammarNotesView.swift */; }; FC7873F97017532C215DAD34 /* ReviewCard.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0A8A63F750065CA4EF36B4D3 /* ReviewCard.swift */; }; - DDF58F3899FC4B92BF6587D2 /* StudyTimerService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 978FB24DF8D7436CB5210ACE /* StudyTimerService.swift */; }; - 8C1E4E7F36D64EFF8D092AC8 /* StoryGenerator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 327659ABFD524514B6D2D505 /* StoryGenerator.swift */; }; - 4C2649215B81470195F38ED0 /* StoryLibraryView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 950347251CC94D4A9DFF7CBC /* StoryLibraryView.swift */; }; - 8E3D8E8254CF4213B9D9FAD3 /* StoryReaderView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2A8B6081226847E0A0A174BC /* StoryReaderView.swift */; }; - 12D2C9311D5C4764B48B1754 /* StoryQuizView.swift in Sources */ = {isa = PBXBuildFile; fileRef = E292A183ABB24FFE9CB719C8 /* StoryQuizView.swift */; }; - 8D7CA0F4496B44C28CD5EBD5 /* DictionaryService.swift in Sources */ = {isa = PBXBuildFile; fileRef = A04370CF6B4E4D38BE3EB0C7 /* DictionaryService.swift */; }; - 3EC2A2F4B9C24B029DA49C40 /* VocabReviewView.swift in Sources */ = {isa = PBXBuildFile; fileRef = D3698CE7ACF148318615293E /* VocabReviewView.swift */; }; - 53908E41767B438C8BD229CD /* ClozeView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A649B04B8B3C49419AD9219C /* ClozeView.swift */; }; - 65ABC39F35804C619DAB3200 /* GrammarExercise.swift in Sources */ = {isa = PBXBuildFile; fileRef = 17E5252282F44ECD9BA70DB8 /* GrammarExercise.swift */; }; - B73F6EED00304B718C6FEFFA /* GrammarExerciseView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1F71CA5CD67342F18319DB9A /* GrammarExerciseView.swift */; }; - EA07DB964C8940F69C14DE2C /* PronunciationService.swift in Sources */ = {isa = PBXBuildFile; fileRef = D535EF6988A24B47B70209A2 /* PronunciationService.swift */; }; - 4DCC5CC233DE4701A12FD7EB /* ListeningView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 02B2179562E54E148C98219D /* ListeningView.swift */; }; - 35D6404C60C249D5995AD895 /* ConversationService.swift in Sources */ = {isa = PBXBuildFile; fileRef = E10603F454E54341AA4B9931 /* ConversationService.swift */; }; - C8AF0931F7FD458C80B6EC0D /* ChatLibraryView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5667AA04211A449A9150BD28 /* ChatLibraryView.swift */; }; - 6CCC8D51F5524688A4BC5AF8 /* ChatView.swift in Sources */ = {isa = PBXBuildFile; fileRef = FA5FE6E149F54A6BA7D01D99 /* ChatView.swift */; }; - 8510085D78E248D885181E80 /* FeatureReferenceView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 12E9DDEFD53C49E0A48EA655 /* FeatureReferenceView.swift */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -104,6 +115,13 @@ remoteGlobalIDString = F73909B4044081DB8F6272AF; remoteInfo = ConjugaWidgetExtension; }; + 6E1F966015DA38BD4E3CE8AF /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = AB7396D9C3E14B65B5238368 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 96127FACA68AE541F5C0F8BC; + remoteInfo = Conjuga; + }; /* End PBXContainerItemProxy section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -121,12 +139,15 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + 02B2179562E54E148C98219D /* ListeningView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ListeningView.swift; sourceTree = ""; }; 0313D24F96E6A0039C34341F /* DailyLog.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DailyLog.swift; sourceTree = ""; }; 0A8A63F750065CA4EF36B4D3 /* ReviewCard.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReviewCard.swift; sourceTree = ""; }; 102F0E136CDFF8CED710210F /* TensePill.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TensePill.swift; sourceTree = ""; }; 10C16AA6022E4742898745CE /* TypingView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TypingView.swift; sourceTree = ""; }; + 12E9DDEFD53C49E0A48EA655 /* FeatureReferenceView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeatureReferenceView.swift; sourceTree = ""; }; 143D06606AE10DCA30A140C2 /* CourseQuizView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CourseQuizView.swift; sourceTree = ""; }; 16C1F74196C3C5628953BE3F /* Conjuga.app */ = {isa = PBXFileReference; includeInIndex = 0; lastKnownFileType = wrapper.application; path = Conjuga.app; sourceTree = BUILT_PRODUCTS_DIR; }; + 17E5252282F44ECD9BA70DB8 /* GrammarExercise.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GrammarExercise.swift; sourceTree = ""; }; 180F9D59828C36B44A5E384F /* DailyProgressRing.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DailyProgressRing.swift; sourceTree = ""; }; 18AC3C548BDB9EF8701BE64C /* DashboardView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DashboardView.swift; sourceTree = ""; }; 18CCD69C14D1B0CFBD03C92F /* SyncStatusMonitor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SyncStatusMonitor.swift; sourceTree = ""; }; @@ -136,10 +157,14 @@ 1C4B5204F6B8647C816814F0 /* SyncToast.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SyncToast.swift; sourceTree = ""; }; 1EA0FA4F9149B9D8E197ADE9 /* PracticeView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PracticeView.swift; sourceTree = ""; }; 1EB4830F9289AACC82D753F8 /* ConjugaApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConjugaApp.swift; sourceTree = ""; }; + 1F71CA5CD67342F18319DB9A /* GrammarExerciseView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GrammarExerciseView.swift; sourceTree = ""; }; 1F842EB5E566C74658D918BB /* HandwritingView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HandwritingView.swift; sourceTree = ""; }; + 27B2A75AAF79A9402AAF3F57 /* ConjugaUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = ConjugaUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 2889F2F81673AFF3A58A07A8 /* WidgetDataReader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WidgetDataReader.swift; sourceTree = ""; }; 2931634BEB33B93429CE254F /* VocabFlashcardView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VocabFlashcardView.swift; sourceTree = ""; }; + 2A8B6081226847E0A0A174BC /* StoryReaderView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StoryReaderView.swift; sourceTree = ""; }; 30EF2362D9FFF9B07A45CE6D /* StreakCalendarView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StreakCalendarView.swift; sourceTree = ""; }; + 327659ABFD524514B6D2D505 /* StoryGenerator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StoryGenerator.swift; sourceTree = ""; }; 3695075616689E72DBB26D4C /* HandwritingRecognizer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HandwritingRecognizer.swift; sourceTree = ""; }; 3B16FF4C52457CD8CD703532 /* AdaptiveContainer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AdaptiveContainer.swift; sourceTree = ""; }; 3BC3247457109FC6BF00D85B /* TenseInfo.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TenseInfo.swift; sourceTree = ""; }; @@ -151,12 +176,12 @@ 43B8AED76C14A05AF2339C27 /* LyricsSearchService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LyricsSearchService.swift; sourceTree = ""; }; 49E3AD244327CBF24B7A2752 /* SpeechService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpeechService.swift; sourceTree = ""; }; 4D389CA5B5C4E7A12CAEA5BC /* GrammarNote.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GrammarNote.swift; sourceTree = ""; }; + 5667AA04211A449A9150BD28 /* ChatLibraryView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatLibraryView.swift; sourceTree = ""; }; 58394296923991E56BAC2B02 /* LyricsReaderView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LyricsReaderView.swift; sourceTree = ""; }; 5983A534E4836F30B5281ACB /* MainTabView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MainTabView.swift; sourceTree = ""; }; 5BF946245110C92F087D81E8 /* PracticeHeaderView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PracticeHeaderView.swift; sourceTree = ""; }; 5C0E6EAFC0D24928BA956FA5 /* SRSEngine.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SRSEngine.swift; sourceTree = ""; }; 5E7EF4161C73AAC67B3A0004 /* WeekTestView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WeekTestView.swift; sourceTree = ""; }; - EA1F177F7ABF5D2E4E5466CD /* CheckpointExamView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CheckpointExamView.swift; sourceTree = ""; }; 626873572466403C0288090D /* QuizType.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = QuizType.swift; sourceTree = ""; }; 631DC0A942DD57C81DECE083 /* DeckStudyView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DeckStudyView.swift; sourceTree = ""; }; 69D98E1564C6538056D81200 /* TenseEndingTable.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TenseEndingTable.swift; sourceTree = ""; }; @@ -166,52 +191,57 @@ 72CB5F95DF256DF7CD73269D /* NewWordIntent.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = NewWordIntent.swift; sourceTree = ""; }; 731614CACCB73B6FD592D34A /* SentenceBuilderView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SentenceBuilderView.swift; sourceTree = ""; }; 777C696A841803D5B775B678 /* ReferenceStore.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReferenceStore.swift; sourceTree = ""; }; + 7A1B2C3D4E5F60718293A4C6 /* textbook_data.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = textbook_data.json; sourceTree = ""; }; + 7A1B2C3D4E5F60718293A4C7 /* textbook_vocab.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = textbook_vocab.json; sourceTree = ""; }; + 7A1B2C3D4E5F60718293AA11 /* TextbookChapterListView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TextbookChapterListView.swift; sourceTree = ""; }; + 7A1B2C3D4E5F60718293AA12 /* TextbookChapterView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TextbookChapterView.swift; sourceTree = ""; }; + 7A1B2C3D4E5F60718293AA13 /* TextbookExerciseView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TextbookExerciseView.swift; sourceTree = ""; }; + 7A1B2C3D4E5F60718293AA14 /* AnswerChecker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AnswerChecker.swift; sourceTree = ""; }; 7E6AF62A3A949630E067DC22 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = ""; }; 80D974250C396589656B8443 /* HandwritingCanvas.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HandwritingCanvas.swift; sourceTree = ""; }; 833516C5D57F164C8660A479 /* CourseView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CourseView.swift; sourceTree = ""; }; 83A8C1A048627C8DEB83C12D /* AnswerReviewView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AnswerReviewView.swift; sourceTree = ""; }; 842DB48F8570C39CDCFF2F57 /* PracticeSessionService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PracticeSessionService.swift; sourceTree = ""; }; + 8A630C74D28CE1B280C9F296 /* AllChaptersScreenshotTests.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = AllChaptersScreenshotTests.swift; sourceTree = ""; }; 8C2D88FF9A3B0590B22C7837 /* conjuga_data.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = conjuga_data.json; sourceTree = ""; }; 8C935ECDF8A5D8D6FA541E20 /* GuideView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GuideView.swift; sourceTree = ""; }; 8E9BCDBB9BC24F5C8117767E /* WordOfDayWidget.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WordOfDayWidget.swift; sourceTree = ""; }; + 8F08E1DC6932D9EA1D380913 /* StemChangeToggleTests.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = StemChangeToggleTests.swift; sourceTree = ""; }; + 950347251CC94D4A9DFF7CBC /* StoryLibraryView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StoryLibraryView.swift; sourceTree = ""; }; 9708FF3CF33E4765DB225F93 /* ConjugaWidgetExtension.appex */ = {isa = PBXFileReference; includeInIndex = 0; lastKnownFileType = "wrapper.app-extension"; path = ConjugaWidgetExtension.appex; sourceTree = BUILT_PRODUCTS_DIR; }; + 978FB24DF8D7436CB5210ACE /* StudyTimerService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StudyTimerService.swift; sourceTree = ""; }; 9E1FB35614B709E6B1D1D017 /* Conjuga.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = Conjuga.entitlements; sourceTree = ""; }; A014EEC3EE08E945FBBA5335 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + A04370CF6B4E4D38BE3EB0C7 /* DictionaryService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DictionaryService.swift; sourceTree = ""; }; A4B95B276C054DBFE508C4D1 /* StartupCoordinator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StartupCoordinator.swift; sourceTree = ""; }; + A6153A5C7241C1AB0373AA17 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS18.0.sdk/System/Library/Frameworks/Foundation.framework; sourceTree = DEVELOPER_DIR; }; A63061BBC8998DF33E3DCA2B /* VerbListView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VerbListView.swift; sourceTree = ""; }; + A649B04B8B3C49419AD9219C /* ClozeView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ClozeView.swift; sourceTree = ""; }; AC34396050805693AA4AC582 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = ""; }; BC273716CD14A99EFF8206CA /* course_data.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = course_data.json; sourceTree = ""; }; BCCC95A95581458E068E0484 /* SettingsView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = ""; }; C359C051FB157EF447561405 /* PracticeViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PracticeViewModel.swift; sourceTree = ""; }; CBCF6FCFA6B00151C2371E77 /* ReviewStore.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReviewStore.swift; sourceTree = ""; }; + CEEA84E15880A9D56DE18F33 /* TextbookFlowUITests.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = TextbookFlowUITests.swift; sourceTree = ""; }; + CF3A181BF2399D34C23DA933 /* StemChangeConjugationView.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = StemChangeConjugationView.swift; sourceTree = ""; }; CF6D58AEE2F0DFE0F1829A73 /* SharedModels */ = {isa = PBXFileReference; lastKnownFileType = folder; name = SharedModels; path = SharedModels; sourceTree = SOURCE_ROOT; }; + D3698CE7ACF148318615293E /* VocabReviewView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VocabReviewView.swift; sourceTree = ""; }; + D535EF6988A24B47B70209A2 /* PronunciationService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PronunciationService.swift; sourceTree = ""; }; D570252DA3DCDD9217C71863 /* WidgetDataService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WidgetDataService.swift; sourceTree = ""; }; DA3A33983B2F2078C9EA1A3D /* MultipleChoiceView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultipleChoiceView.swift; sourceTree = ""; }; DADCA82DDD34DF36D59BB283 /* DataLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DataLoader.swift; sourceTree = ""; }; DAF7CA1E6F9979CB2C699FDC /* CourseReviewStore.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CourseReviewStore.swift; sourceTree = ""; }; DAFE27F29412021AEC57E728 /* TestResult.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TestResult.swift; sourceTree = ""; }; + E10603F454E54341AA4B9931 /* ConversationService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConversationService.swift; sourceTree = ""; }; E1DBE662F89F02A0282F5BEE /* VerbDetailView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VerbDetailView.swift; sourceTree = ""; }; + E292A183ABB24FFE9CB719C8 /* StoryQuizView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StoryQuizView.swift; sourceTree = ""; }; E325FE0E484DE75009672D02 /* ConjugaWidgetBundle.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConjugaWidgetBundle.swift; sourceTree = ""; }; E536AD1180FE10576EAC884A /* UserProgress.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = UserProgress.swift; sourceTree = ""; }; E8E9833868EB73AF9EB3A611 /* StoreInspector.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StoreInspector.swift; sourceTree = ""; }; E972AA745F44586EF0B1B0C8 /* OnboardingView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OnboardingView.swift; sourceTree = ""; }; - FC2B1F646394D7C03493F1BF /* LyricsLibraryView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LyricsLibraryView.swift; sourceTree = ""; }; - 978FB24DF8D7436CB5210ACE /* StudyTimerService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StudyTimerService.swift; sourceTree = ""; }; - 327659ABFD524514B6D2D505 /* StoryGenerator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StoryGenerator.swift; sourceTree = ""; }; - 950347251CC94D4A9DFF7CBC /* StoryLibraryView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StoryLibraryView.swift; sourceTree = ""; }; - 2A8B6081226847E0A0A174BC /* StoryReaderView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StoryReaderView.swift; sourceTree = ""; }; - E292A183ABB24FFE9CB719C8 /* StoryQuizView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StoryQuizView.swift; sourceTree = ""; }; - A04370CF6B4E4D38BE3EB0C7 /* DictionaryService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DictionaryService.swift; sourceTree = ""; }; - D3698CE7ACF148318615293E /* VocabReviewView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VocabReviewView.swift; sourceTree = ""; }; - A649B04B8B3C49419AD9219C /* ClozeView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ClozeView.swift; sourceTree = ""; }; - 17E5252282F44ECD9BA70DB8 /* GrammarExercise.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GrammarExercise.swift; sourceTree = ""; }; - 1F71CA5CD67342F18319DB9A /* GrammarExerciseView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GrammarExerciseView.swift; sourceTree = ""; }; - D535EF6988A24B47B70209A2 /* PronunciationService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PronunciationService.swift; sourceTree = ""; }; - 02B2179562E54E148C98219D /* ListeningView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ListeningView.swift; sourceTree = ""; }; - E10603F454E54341AA4B9931 /* ConversationService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConversationService.swift; sourceTree = ""; }; - 5667AA04211A449A9150BD28 /* ChatLibraryView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatLibraryView.swift; sourceTree = ""; }; + EA1F177F7ABF5D2E4E5466CD /* CheckpointExamView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CheckpointExamView.swift; sourceTree = ""; }; FA5FE6E149F54A6BA7D01D99 /* ChatView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatView.swift; sourceTree = ""; }; - 12E9DDEFD53C49E0A48EA655 /* FeatureReferenceView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeatureReferenceView.swift; sourceTree = ""; }; + FC2B1F646394D7C03493F1BF /* LyricsLibraryView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LyricsLibraryView.swift; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -231,6 +261,14 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + C5C1BB325D49EE6ED3AC3D5F /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + E82C743EB1FDF6B67ED22EAD /* Foundation.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ @@ -242,6 +280,8 @@ 9E1FB35614B709E6B1D1D017 /* Conjuga.entitlements */, 1EB4830F9289AACC82D753F8 /* ConjugaApp.swift */, BC273716CD14A99EFF8206CA /* course_data.json */, + 7A1B2C3D4E5F60718293A4C6 /* textbook_data.json */, + 7A1B2C3D4E5F60718293A4C7 /* textbook_vocab.json */, 7E6AF62A3A949630E067DC22 /* Info.plist */, 353C5DE41FD410FA82E3AED7 /* Models */, 1994867BC8E985795A172854 /* Services */, @@ -276,6 +316,7 @@ 1C42EA0EBD4CB1E10A82BA25 /* AchievementService.swift */, DAF7CA1E6F9979CB2C699FDC /* CourseReviewStore.swift */, DADCA82DDD34DF36D59BB283 /* DataLoader.swift */, + 7A1B2C3D4E5F60718293AA14 /* AnswerChecker.swift */, 3695075616689E72DBB26D4C /* HandwritingRecognizer.swift */, 43B8AED76C14A05AF2339C27 /* LyricsSearchService.swift */, 842DB48F8570C39CDCFF2F57 /* PracticeSessionService.swift */, @@ -315,8 +356,8 @@ 3BC3247457109FC6BF00D85B /* TenseInfo.swift */, DAFE27F29412021AEC57E728 /* TestResult.swift */, E536AD1180FE10576EAC884A /* UserProgress.swift */, - 17E5252282F44ECD9BA70DB8 /* GrammarExercise.swift */, -); + 17E5252282F44ECD9BA70DB8 /* GrammarExercise.swift */, + ); path = Models; sourceTree = ""; }; @@ -375,7 +416,7 @@ DFD75E32A53845A693D98F48 /* Chat */, 02B2179562E54E148C98219D /* ListeningView.swift */, A649B04B8B3C49419AD9219C /* ClozeView.swift */, -); + ); path = Practice; sourceTree = ""; }; @@ -384,29 +425,10 @@ children = ( 3F1A6221A35699BD8065D064 /* GrammarNotesView.swift */, 8C935ECDF8A5D8D6FA541E20 /* GuideView.swift */, - 1F71CA5CD67342F18319DB9A /* GrammarExerciseView.swift */, -); + 1F71CA5CD67342F18319DB9A /* GrammarExerciseView.swift */, + ); path = Guide; sourceTree = ""; - }; - DFD75E32A53845A693D98F48 /* Chat */ = { - isa = PBXGroup; - children = ( - 5667AA04211A449A9150BD28 /* ChatLibraryView.swift */, - FA5FE6E149F54A6BA7D01D99 /* ChatView.swift */, - ); - path = Chat; - sourceTree = ""; - }; - 8A1DED0596E04DDE9536A9A9 /* Stories */ = { - isa = PBXGroup; - children = ( - 950347251CC94D4A9DFF7CBC /* StoryLibraryView.swift */, - 2A8B6081226847E0A0A174BC /* StoryReaderView.swift */, - E292A183ABB24FFE9CB719C8 /* StoryQuizView.swift */, - ); - path = Stories; - sourceTree = ""; }; 895E547BEFB5D0FBF676BE33 /* Lyrics */ = { isa = PBXGroup; @@ -419,6 +441,16 @@ path = Lyrics; sourceTree = ""; }; + 8A1DED0596E04DDE9536A9A9 /* Stories */ = { + isa = PBXGroup; + children = ( + 950347251CC94D4A9DFF7CBC /* StoryLibraryView.swift */, + 2A8B6081226847E0A0A174BC /* StoryReaderView.swift */, + E292A183ABB24FFE9CB719C8 /* StoryQuizView.swift */, + ); + path = Stories; + sourceTree = ""; + }; A591A3B6F1F13D23D68D7A9D = { isa = PBXGroup; children = ( @@ -426,6 +458,8 @@ 4B183AB0C56BC2EC302531E7 /* ConjugaWidget */, F7D740BB7D1E23949D4C1AE5 /* Packages */, F605D24E5EA11065FD18AF7E /* Products */, + B442229C0A26C1D531472C7D /* Frameworks */, + C77B065CF67D1F5128E10CC7 /* ConjugaUITests */, ); sourceTree = ""; }; @@ -445,6 +479,14 @@ path = Views; sourceTree = ""; }; + B442229C0A26C1D531472C7D /* Frameworks */ = { + isa = PBXGroup; + children = ( + E772BA9C3FF67FEA9A034B4B /* iOS */, + ); + name = Frameworks; + sourceTree = ""; + }; BA34B77A38B698101DBBE241 /* Dashboard */ = { isa = PBXGroup; children = ( @@ -460,9 +502,13 @@ 143D06606AE10DCA30A140C2 /* CourseQuizView.swift */, 833516C5D57F164C8660A479 /* CourseView.swift */, 631DC0A942DD57C81DECE083 /* DeckStudyView.swift */, + 7A1B2C3D4E5F60718293AA11 /* TextbookChapterListView.swift */, + 7A1B2C3D4E5F60718293AA12 /* TextbookChapterView.swift */, + 7A1B2C3D4E5F60718293AA13 /* TextbookExerciseView.swift */, 2931634BEB33B93429CE254F /* VocabFlashcardView.swift */, 5E7EF4161C73AAC67B3A0004 /* WeekTestView.swift */, EA1F177F7ABF5D2E4E5466CD /* CheckpointExamView.swift */, + CF3A181BF2399D34C23DA933 /* StemChangeConjugationView.swift */, ); path = Course; sourceTree = ""; @@ -474,11 +520,40 @@ path = Utilities; sourceTree = ""; }; + C77B065CF67D1F5128E10CC7 /* ConjugaUITests */ = { + isa = PBXGroup; + children = ( + CEEA84E15880A9D56DE18F33 /* TextbookFlowUITests.swift */, + 8A630C74D28CE1B280C9F296 /* AllChaptersScreenshotTests.swift */, + 8F08E1DC6932D9EA1D380913 /* StemChangeToggleTests.swift */, + ); + name = ConjugaUITests; + path = ConjugaUITests; + sourceTree = ""; + }; + DFD75E32A53845A693D98F48 /* Chat */ = { + isa = PBXGroup; + children = ( + 5667AA04211A449A9150BD28 /* ChatLibraryView.swift */, + FA5FE6E149F54A6BA7D01D99 /* ChatView.swift */, + ); + path = Chat; + sourceTree = ""; + }; + E772BA9C3FF67FEA9A034B4B /* iOS */ = { + isa = PBXGroup; + children = ( + A6153A5C7241C1AB0373AA17 /* Foundation.framework */, + ); + name = iOS; + sourceTree = ""; + }; F605D24E5EA11065FD18AF7E /* Products */ = { isa = PBXGroup; children = ( 16C1F74196C3C5628953BE3F /* Conjuga.app */, 9708FF3CF33E4765DB225F93 /* ConjugaWidgetExtension.appex */, + 27B2A75AAF79A9402AAF3F57 /* ConjugaUITests.xctest */, ); name = Products; sourceTree = ""; @@ -516,6 +591,24 @@ productReference = 16C1F74196C3C5628953BE3F /* Conjuga.app */; productType = "com.apple.product-type.application"; }; + C6CC399BFD5A2574CB9956B4 /* ConjugaUITests */ = { + isa = PBXNativeTarget; + buildConfigurationList = F454EA7279A44C5E151F71BA /* Build configuration list for PBXNativeTarget "ConjugaUITests" */; + buildPhases = ( + 66589E8F78971725CA2066ED /* Sources */, + C5C1BB325D49EE6ED3AC3D5F /* Frameworks */, + 425DC31DA6EF2C4C7A873DAA /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + 04C7E3C8079DE56024C2154E /* PBXTargetDependency */, + ); + name = ConjugaUITests; + productName = ConjugaUITests; + productReference = 27B2A75AAF79A9402AAF3F57 /* ConjugaUITests.xctest */; + productType = "com.apple.product-type.bundle.ui-testing"; + }; F73909B4044081DB8F6272AF /* ConjugaWidgetExtension */ = { isa = PBXNativeTarget; buildConfigurationList = EA7E12CF28EB750C2B8BB2F1 /* Build configuration list for PBXNativeTarget "ConjugaWidgetExtension" */; @@ -568,16 +661,25 @@ 548B46ED3C40F5F28A5ADCC6 /* XCLocalSwiftPackageReference "SharedModels" */, ); preferredProjectObjectVersion = 77; + productRefGroup = F605D24E5EA11065FD18AF7E /* Products */; projectDirPath = ""; projectRoot = ""; targets = ( 96127FACA68AE541F5C0F8BC /* Conjuga */, F73909B4044081DB8F6272AF /* ConjugaWidgetExtension */, + C6CC399BFD5A2574CB9956B4 /* ConjugaUITests */, ); }; /* End PBXProject section */ /* Begin PBXResourcesBuildPhase section */ + 425DC31DA6EF2C4C7A873DAA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; B74A8384221C70A670B902D8 /* Resources */ = { isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; @@ -585,6 +687,8 @@ F59655A8B8FCE6264315DD33 /* Assets.xcassets in Resources */, CF9E48ADF0501FB79F3DDB7B /* conjuga_data.json in Resources */, 2B5B2D63DC9C290F66890A4A /* course_data.json in Resources */, + 7A1B2C3D4E5F60718293A4B5 /* textbook_data.json in Resources */, + 7A1B2C3D4E5F60718293A4B6 /* textbook_vocab.json in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -602,6 +706,10 @@ C3851F960C1162239DC2F935 /* CourseQuizView.swift in Sources */, 8C43F09F52EA9B537EA27E43 /* CourseReviewStore.swift in Sources */, F0D0778207F144D6AC3D39C3 /* CourseView.swift in Sources */, + 7A1B2C3D4E5F60718293AA01 /* TextbookChapterListView.swift in Sources */, + 7A1B2C3D4E5F60718293AA02 /* TextbookChapterView.swift in Sources */, + 7A1B2C3D4E5F60718293AA03 /* TextbookExerciseView.swift in Sources */, + 7A1B2C3D4E5F60718293AA04 /* AnswerChecker.swift in Sources */, 1C2636790E70B6BC7FFCC904 /* DailyLog.swift in Sources */, BB48230C3B26EA6E84D2D823 /* DailyProgressRing.swift in Sources */, 35A0F6E7124D989312721F7D /* DashboardView.swift in Sources */, @@ -653,7 +761,7 @@ 6BB4B0A655E6CB6F82D81B5A /* WeekTestView.swift in Sources */, 968D626462B0ADEC8D7D56AA /* CheckpointExamView.swift in Sources */, E99473B7DF9BCAE150E9D1E1 /* WidgetDataService.swift in Sources */, - DDF58F3899FC4B92BF6587D2 /* StudyTimerService.swift in Sources */, + DDF58F3899FC4B92BF6587D2 /* StudyTimerService.swift in Sources */, 8C1E4E7F36D64EFF8D092AC8 /* StoryGenerator.swift in Sources */, 4C2649215B81470195F38ED0 /* StoryLibraryView.swift in Sources */, 8E3D8E8254CF4213B9D9FAD3 /* StoryReaderView.swift in Sources */, @@ -669,7 +777,8 @@ C8AF0931F7FD458C80B6EC0D /* ChatLibraryView.swift in Sources */, 6CCC8D51F5524688A4BC5AF8 /* ChatView.swift in Sources */, 8510085D78E248D885181E80 /* FeatureReferenceView.swift in Sources */, -); + 943728CD3E65FE6CCADB05EE /* StemChangeConjugationView.swift in Sources */, + ); runOnlyForDeploymentPostprocessing = 0; }; 217A29BCEDD9D44B6DD85AF6 /* Sources */ = { @@ -686,9 +795,25 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + 66589E8F78971725CA2066ED /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 96A3E5FA8EC63123D97365E1 /* TextbookFlowUITests.swift in Sources */, + F7E459C46F25A8A45D7E0DFB /* AllChaptersScreenshotTests.swift in Sources */, + 1B0B3B2C771AD72E25B3493C /* StemChangeToggleTests.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; /* End PBXSourcesBuildPhase section */ /* Begin PBXTargetDependency section */ + 04C7E3C8079DE56024C2154E /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + name = Conjuga; + target = 96127FACA68AE541F5C0F8BC /* Conjuga */; + targetProxy = 6E1F966015DA38BD4E3CE8AF /* PBXContainerItemProxy */; + }; 0B370CF10B68E386093E5BB2 /* PBXTargetDependency */ = { isa = PBXTargetDependency; target = F73909B4044081DB8F6272AF /* ConjugaWidgetExtension */; @@ -837,6 +962,24 @@ }; name = Release; }; + A923186E44A25A8086B27A34 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_OBJC_WEAK = NO; + CODE_SIGN_IDENTITY = "iPhone Developer"; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = V3PF3M6B6U; + GENERATE_INFOPLIST_FILE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 17.0; + PRODUCT_BUNDLE_IDENTIFIER = com.conjuga.app.uitests; + SDKROOT = iphoneos; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + TEST_TARGET_NAME = Conjuga; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; B9223DC55BB69E9AB81B59AE /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { @@ -902,6 +1045,23 @@ }; name = Debug; }; + DB8C0F513F77A50F2EF2D561 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_OBJC_WEAK = NO; + CODE_SIGN_IDENTITY = "iPhone Developer"; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = V3PF3M6B6U; + GENERATE_INFOPLIST_FILE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 17.0; + PRODUCT_BUNDLE_IDENTIFIER = com.conjuga.app.uitests; + SDKROOT = iphoneos; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + TEST_TARGET_NAME = Conjuga; + }; + name = Debug; + }; /* End XCBuildConfiguration section */ /* Begin XCConfigurationList section */ @@ -932,6 +1092,15 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Debug; }; + F454EA7279A44C5E151F71BA /* Build configuration list for PBXNativeTarget "ConjugaUITests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + A923186E44A25A8086B27A34 /* Release */, + DB8C0F513F77A50F2EF2D561 /* Debug */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; /* End XCConfigurationList section */ /* Begin XCLocalSwiftPackageReference section */ diff --git a/Conjuga/Conjuga.xcodeproj/xcshareddata/xcschemes/Conjuga.xcscheme b/Conjuga/Conjuga.xcodeproj/xcshareddata/xcschemes/Conjuga.xcscheme index d7181a7..4822c84 100644 --- a/Conjuga/Conjuga.xcodeproj/xcshareddata/xcschemes/Conjuga.xcscheme +++ b/Conjuga/Conjuga.xcodeproj/xcshareddata/xcschemes/Conjuga.xcscheme @@ -53,6 +53,16 @@ + + + + diff --git a/Conjuga/Conjuga/ConjugaApp.swift b/Conjuga/Conjuga/ConjugaApp.swift index 92b8613..f94bda4 100644 --- a/Conjuga/Conjuga/ConjugaApp.swift +++ b/Conjuga/Conjuga/ConjugaApp.swift @@ -69,12 +69,14 @@ struct ConjugaApp: App { schema: Schema([ ReviewCard.self, CourseReviewCard.self, UserProgress.self, TestResult.self, DailyLog.self, SavedSong.self, Story.self, Conversation.self, + TextbookExerciseAttempt.self, ]), cloudKitDatabase: .private("iCloud.com.conjuga.app") ) cloudContainer = try ModelContainer( for: ReviewCard.self, CourseReviewCard.self, UserProgress.self, TestResult.self, DailyLog.self, SavedSong.self, Story.self, Conversation.self, + TextbookExerciseAttempt.self, configurations: cloudConfig ) @@ -209,6 +211,7 @@ struct ConjugaApp: App { schema: Schema([ Verb.self, VerbForm.self, IrregularSpan.self, TenseGuide.self, CourseDeck.self, VocabCard.self, + TextbookChapter.self, ]), url: url, cloudKitDatabase: .none @@ -216,6 +219,7 @@ struct ConjugaApp: App { return try ModelContainer( for: Verb.self, VerbForm.self, IrregularSpan.self, TenseGuide.self, CourseDeck.self, VocabCard.self, + TextbookChapter.self, configurations: localConfig ) } diff --git a/Conjuga/Conjuga/Services/AnswerChecker.swift b/Conjuga/Conjuga/Services/AnswerChecker.swift new file mode 100644 index 0000000..0b3f688 --- /dev/null +++ b/Conjuga/Conjuga/Services/AnswerChecker.swift @@ -0,0 +1,10 @@ +import Foundation +import SharedModels + +/// Thin app-side wrapper around the SharedModels `AnswerGrader`. All logic +/// lives in SharedModels so it can be unit tested. +enum AnswerChecker { + static func grade(userText: String, canonical: String, alternates: [String] = []) -> TextbookGrade { + AnswerGrader.grade(userText: userText, canonical: canonical, alternates: alternates) + } +} diff --git a/Conjuga/Conjuga/Services/DataLoader.swift b/Conjuga/Conjuga/Services/DataLoader.swift index a033cb7..dbb8ae3 100644 --- a/Conjuga/Conjuga/Services/DataLoader.swift +++ b/Conjuga/Conjuga/Services/DataLoader.swift @@ -6,6 +6,9 @@ actor DataLoader { static let courseDataVersion = 7 static let courseDataKey = "courseDataVersion" + static let textbookDataVersion = 8 + static let textbookDataKey = "textbookDataVersion" + /// Quick check: does the DB need seeding or course data refresh? static func needsSeeding(container: ModelContainer) async -> Bool { let context = ModelContext(container) @@ -15,6 +18,9 @@ actor DataLoader { let storedVersion = UserDefaults.standard.integer(forKey: courseDataKey) if storedVersion < courseDataVersion { return true } + let textbookVersion = UserDefaults.standard.integer(forKey: textbookDataKey) + if textbookVersion < textbookDataVersion { return true } + return false } @@ -133,6 +139,35 @@ actor DataLoader { // Seed course data (uses the same mainContext so @Query sees it) seedCourseData(context: context) + + // Seed textbook data + seedTextbookData(context: context) + UserDefaults.standard.set(textbookDataVersion, forKey: textbookDataKey) + } + + /// Re-seed textbook data if the version has changed. + static func refreshTextbookDataIfNeeded(container: ModelContainer) async { + let shared = UserDefaults.standard + if shared.integer(forKey: textbookDataKey) >= textbookDataVersion { return } + + print("Textbook data version outdated — re-seeding...") + let context = ModelContext(container) + + // Only wipe textbook chapters and our textbook-scoped CourseDecks + // (not the LanGo decks, which live in the same tables). + try? context.delete(model: TextbookChapter.self) + let textbookCourseName = "Complete Spanish Step-by-Step" + let deckDescriptor = FetchDescriptor( + predicate: #Predicate { $0.courseName == textbookCourseName } + ) + if let decks = try? context.fetch(deckDescriptor) { + for deck in decks { context.delete(deck) } + } + try? context.save() + + seedTextbookData(context: context) + shared.set(textbookDataVersion, forKey: textbookDataKey) + print("Textbook data re-seeded to version \(textbookDataVersion)") } /// Re-seed course data if the version has changed (e.g. examples were added). @@ -170,6 +205,10 @@ actor DataLoader { // Re-seed course data seedCourseData(context: context) + // Textbook's vocab decks/cards share the same CourseDeck/VocabCard + // entities, so they were just wiped above. Reseed them. + seedTextbookVocabDecks(context: context, courseName: "Complete Spanish Step-by-Step") + shared.set(courseDataVersion, forKey: courseDataKey) print("Course data re-seeded to version \(courseDataVersion)") } @@ -336,4 +375,143 @@ actor DataLoader { context.insert(reviewCard) return reviewCard } + + // MARK: - Textbook seeding + + private static func seedTextbookData(context: ModelContext) { + let url = Bundle.main.url(forResource: "textbook_data", withExtension: "json") + ?? Bundle.main.bundleURL.appendingPathComponent("textbook_data.json") + guard let data = try? Data(contentsOf: url) else { + print("[DataLoader] textbook_data.json not bundled — skipping textbook seed") + return + } + guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else { + print("[DataLoader] ERROR: Could not parse textbook_data.json") + return + } + let courseName = (json["courseName"] as? String) ?? "Textbook" + guard let chapters = json["chapters"] as? [[String: Any]] else { + print("[DataLoader] ERROR: textbook_data.json missing chapters") + return + } + + var inserted = 0 + for ch in chapters { + guard let id = ch["id"] as? String, + let number = ch["number"] as? Int, + let title = ch["title"] as? String, + let blocksRaw = ch["blocks"] as? [[String: Any]] else { continue } + + let part = (ch["part"] as? Int) ?? 0 + + // Normalize each block to canonical keys expected by TextbookBlock decoder. + var normalized: [[String: Any]] = [] + var exerciseCount = 0 + var vocabTableCount = 0 + for (i, b) in blocksRaw.enumerated() { + var out: [String: Any] = [:] + out["index"] = i + let kind = (b["kind"] as? String) ?? "" + out["kind"] = kind + switch kind { + case "heading": + if let level = b["level"] { out["level"] = level } + if let text = b["text"] { out["text"] = text } + case "paragraph": + if let text = b["text"] { out["text"] = text } + case "key_vocab_header": + break + case "vocab_table": + vocabTableCount += 1 + if let src = b["sourceImage"] { out["sourceImage"] = src } + if let lines = b["ocrLines"] { out["ocrLines"] = lines } + if let conf = b["ocrConfidence"] { out["ocrConfidence"] = conf } + case "exercise": + exerciseCount += 1 + if let exId = b["id"] { out["exerciseId"] = exId } + if let inst = b["instruction"] { out["instruction"] = inst } + if let extra = b["extra"] { out["extra"] = extra } + if let prompts = b["prompts"] { out["prompts"] = prompts } + if let items = b["answerItems"] { out["answerItems"] = items } + if let freeform = b["freeform"] { out["freeform"] = freeform } + default: + break + } + normalized.append(out) + } + + let bodyJSON: Data + do { + bodyJSON = try JSONSerialization.data(withJSONObject: normalized, options: []) + } catch { + print("[DataLoader] failed to encode chapter \(number) blocks: \(error)") + continue + } + + let chapter = TextbookChapter( + id: id, + number: number, + title: title, + part: part, + courseName: courseName, + bodyJSON: bodyJSON, + exerciseCount: exerciseCount, + vocabTableCount: vocabTableCount + ) + context.insert(chapter) + inserted += 1 + } + + try? context.save() + + // Seed textbook-derived vocabulary flashcards as CourseDecks so the + // existing Course UI can surface them alongside LanGo decks. + seedTextbookVocabDecks(context: context, courseName: courseName) + + print("Textbook seeding complete: \(inserted) chapters") + } + + private static func seedTextbookVocabDecks(context: ModelContext, courseName: String) { + let url = Bundle.main.url(forResource: "textbook_vocab", withExtension: "json") + ?? Bundle.main.bundleURL.appendingPathComponent("textbook_vocab.json") + guard let data = try? Data(contentsOf: url), + let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], + let chaptersArr = json["chapters"] as? [[String: Any]] + else { return } + + let courseSlug = courseName.lowercased() + .replacingOccurrences(of: " ", with: "-") + + var deckCount = 0 + var cardCount = 0 + for chData in chaptersArr { + guard let chNum = chData["chapter"] as? Int, + let cards = chData["cards"] as? [[String: Any]], + !cards.isEmpty else { continue } + + let deckId = "textbook_\(courseSlug)_ch\(chNum)" + let title = "Chapter \(chNum) vocabulary" + let deck = CourseDeck( + id: deckId, + weekNumber: chNum, + title: title, + cardCount: cards.count, + courseName: courseName, + isReversed: false + ) + context.insert(deck) + deckCount += 1 + + for c in cards { + guard let front = c["front"] as? String, + let back = c["back"] as? String else { continue } + let card = VocabCard(front: front, back: back, deckId: deckId) + card.deck = deck + context.insert(card) + cardCount += 1 + } + } + try? context.save() + print("Textbook vocab seeding complete: \(deckCount) decks, \(cardCount) cards") + } } diff --git a/Conjuga/Conjuga/Services/StartupCoordinator.swift b/Conjuga/Conjuga/Services/StartupCoordinator.swift index 83831d2..d0fe2d0 100644 --- a/Conjuga/Conjuga/Services/StartupCoordinator.swift +++ b/Conjuga/Conjuga/Services/StartupCoordinator.swift @@ -9,6 +9,7 @@ enum StartupCoordinator { static func bootstrap(localContainer: ModelContainer) async { await DataLoader.seedIfNeeded(container: localContainer) await DataLoader.refreshCourseDataIfNeeded(container: localContainer) + await DataLoader.refreshTextbookDataIfNeeded(container: localContainer) } /// Recurring maintenance: legacy migrations, identity repair, cloud dedup. diff --git a/Conjuga/Conjuga/Views/Course/CourseView.swift b/Conjuga/Conjuga/Views/Course/CourseView.swift index 92325c8..64f08a9 100644 --- a/Conjuga/Conjuga/Views/Course/CourseView.swift +++ b/Conjuga/Conjuga/Views/Course/CourseView.swift @@ -5,9 +5,14 @@ import SwiftData struct CourseView: View { @Environment(\.cloudModelContextProvider) private var cloudModelContextProvider @Query(sort: \CourseDeck.weekNumber) private var decks: [CourseDeck] + @Query(sort: \TextbookChapter.number) private var textbookChapters: [TextbookChapter] @AppStorage("selectedCourse") private var selectedCourse: String? @State private var testResults: [TestResult] = [] + private var textbookCourses: [String] { + Array(Set(textbookChapters.map(\.courseName))).sorted() + } + private var cloudModelContext: ModelContext { cloudModelContextProvider() } private var courseNames: [String] { @@ -62,6 +67,32 @@ struct CourseView: View { description: Text("Course data is loading...") ) } else { + // Textbook entry (shown above course picker when available) + if !textbookCourses.isEmpty { + Section { + ForEach(textbookCourses, id: \.self) { name in + NavigationLink(value: TextbookDestination(courseName: name)) { + HStack(spacing: 12) { + Image(systemName: "book.fill") + .font(.title3) + .foregroundStyle(.indigo) + .frame(width: 32) + VStack(alignment: .leading, spacing: 2) { + Text(name) + .font(.subheadline.weight(.semibold)) + Text("Read chapters, do exercises") + .font(.caption) + .foregroundStyle(.secondary) + } + Spacer() + } + } + } + } header: { + Text("Textbook") + } + } + // Course picker if courseNames.count > 1 { Section { @@ -155,6 +186,24 @@ struct CourseView: View { .navigationDestination(for: CheckpointDestination.self) { dest in CheckpointExamView(courseName: dest.courseName, throughWeek: dest.throughWeek) } + .navigationDestination(for: TextbookDestination.self) { dest in + TextbookChapterListView(courseName: dest.courseName) + } + .navigationDestination(for: TextbookChapter.self) { chapter in + TextbookChapterView(chapter: chapter) + } + .navigationDestination(for: TextbookExerciseDestination.self) { dest in + textbookExerciseView(for: dest) + } + } + } + + @ViewBuilder + private func textbookExerciseView(for dest: TextbookExerciseDestination) -> some View { + if let chapter = textbookChapters.first(where: { $0.id == dest.chapterId }) { + TextbookExerciseView(chapter: chapter, blockIndex: dest.blockIndex) + } else { + ContentUnavailableView("Exercise unavailable", systemImage: "questionmark.circle") } } @@ -175,6 +224,10 @@ struct CheckpointDestination: Hashable { let throughWeek: Int } +struct TextbookDestination: Hashable { + let courseName: String +} + // MARK: - Deck Row private struct DeckRowView: View { diff --git a/Conjuga/Conjuga/Views/Course/DeckStudyView.swift b/Conjuga/Conjuga/Views/Course/DeckStudyView.swift index 49d9b3f..24e3399 100644 --- a/Conjuga/Conjuga/Views/Course/DeckStudyView.swift +++ b/Conjuga/Conjuga/Views/Course/DeckStudyView.swift @@ -8,6 +8,11 @@ struct DeckStudyView: View { @State private var isStudying = false @State private var speechService = SpeechService() @State private var deckCards: [VocabCard] = [] + @State private var expandedConjugations: Set = [] + + private var isStemChangingDeck: Bool { + deck.title.localizedCaseInsensitiveContains("stem changing") + } var body: some View { cardListView @@ -19,7 +24,8 @@ struct DeckStudyView: View { VocabFlashcardView( cards: deckCards.shuffled(), speechService: speechService, - onDone: { isStudying = false } + onDone: { isStudying = false }, + deckTitle: deck.title ) .toolbar { ToolbarItem(placement: .cancellationAction) { @@ -30,6 +36,24 @@ struct DeckStudyView: View { } } + /// Reversed stem-change decks have `front` as English, so prefer the + /// Spanish side when the card is stored that way. Strip parenthetical + /// notes and the reflexive `-se` ending for verb-table lookup. + private func inferInfinitive(card: VocabCard) -> String { + let raw: String + if deck.isReversed { + raw = card.back + } else { + raw = card.front + } + var t = raw.trimmingCharacters(in: .whitespacesAndNewlines) + if let paren = t.firstIndex(of: "(") { + t = String(t[.. 4 { t = String(t.dropLast(2)) } + return t + } + private func loadCards() { let deckId = deck.id let descriptor = FetchDescriptor( @@ -107,6 +131,36 @@ struct DeckStudyView: View { .multilineTextAlignment(.trailing) } + // Stem-change conjugation toggle + if isStemChangingDeck { + let verb = inferInfinitive(card: card) + let isOpen = expandedConjugations.contains(verb) + Button { + withAnimation(.smooth) { + if isOpen { + expandedConjugations.remove(verb) + } else { + expandedConjugations.insert(verb) + } + } + } label: { + Label( + isOpen ? "Hide conjugation" : "Show conjugation", + systemImage: isOpen ? "chevron.up" : "chevron.down" + ) + .font(.caption.weight(.medium)) + } + .buttonStyle(.borderless) + .tint(.blue) + .padding(.leading, 42) + + if isOpen { + StemChangeConjugationView(infinitive: verb) + .padding(.leading, 42) + .transition(.opacity.combined(with: .move(edge: .top))) + } + } + // Example sentences if !card.examplesES.isEmpty { VStack(alignment: .leading, spacing: 6) { diff --git a/Conjuga/Conjuga/Views/Course/StemChangeConjugationView.swift b/Conjuga/Conjuga/Views/Course/StemChangeConjugationView.swift new file mode 100644 index 0000000..800c2cc --- /dev/null +++ b/Conjuga/Conjuga/Views/Course/StemChangeConjugationView.swift @@ -0,0 +1,97 @@ +import SwiftUI +import SharedModels +import SwiftData + +/// Shows the present-tense conjugation of a verb (identified by infinitive), +/// with any irregular/stem-change spans highlighted. Designed to drop into +/// stem-changing verb flashcards so learners can see the conjugation in-place. +struct StemChangeConjugationView: View { + let infinitive: String + + @Environment(\.modelContext) private var modelContext + @State private var rows: [ConjugationRow] = [] + + private static let personLabels = ["yo", "tú", "él/ella/Ud.", "nosotros", "vosotros", "ellos/ellas/Uds."] + private static let tenseId = "ind_presente" + + var body: some View { + VStack(alignment: .leading, spacing: 8) { + HStack { + Text("Present tense") + .font(.subheadline.weight(.semibold)) + .foregroundStyle(.secondary) + Spacer() + } + if rows.isEmpty { + Text("Conjugation not available") + .font(.caption) + .foregroundStyle(.secondary) + .padding(.vertical, 4) + } else { + VStack(spacing: 6) { + ForEach(rows) { row in + HStack(alignment: .firstTextBaseline) { + Text(row.person) + .font(.callout) + .foregroundStyle(.secondary) + .frame(width: 130, alignment: .leading) + IrregularHighlightText( + form: row.form, + spans: row.spans, + font: .callout.monospaced(), + showLabels: false + ) + Spacer() + } + } + } + } + } + .padding(12) + .frame(maxWidth: .infinity, alignment: .leading) + .background(Color.blue.opacity(0.08), in: RoundedRectangle(cornerRadius: 10)) + .onAppear(perform: loadForms) + } + + private func loadForms() { + // Find the verb by infinitive (lowercase exact match). + let normalized = infinitive.lowercased().trimmingCharacters(in: .whitespaces) + let verbDescriptor = FetchDescriptor( + predicate: #Predicate { $0.infinitive == normalized } + ) + guard let verb = (try? modelContext.fetch(verbDescriptor))?.first else { + rows = [] + return + } + + let verbId = verb.id + let tenseId = Self.tenseId + let formDescriptor = FetchDescriptor( + predicate: #Predicate { $0.verbId == verbId && $0.tenseId == tenseId }, + sortBy: [SortDescriptor(\VerbForm.personIndex)] + ) + let forms = (try? modelContext.fetch(formDescriptor)) ?? [] + + rows = forms.map { f in + ConjugationRow( + id: f.personIndex, + person: Self.personLabels[safe: f.personIndex] ?? "", + form: f.form, + spans: f.spans ?? [] + ) + } + } +} + +private struct ConjugationRow: Identifiable { + let id: Int + let person: String + let form: String + let spans: [IrregularSpan] +} + +private extension Array { + subscript(safe index: Int) -> Element? { + indices.contains(index) ? self[index] : nil + } +} diff --git a/Conjuga/Conjuga/Views/Course/TextbookChapterListView.swift b/Conjuga/Conjuga/Views/Course/TextbookChapterListView.swift new file mode 100644 index 0000000..15bc9f1 --- /dev/null +++ b/Conjuga/Conjuga/Views/Course/TextbookChapterListView.swift @@ -0,0 +1,121 @@ +import SwiftUI +import SharedModels +import SwiftData + +struct TextbookChapterListView: View { + let courseName: String + + @Environment(\.cloudModelContextProvider) private var cloudModelContextProvider + @Query(sort: \TextbookChapter.number) private var allChapters: [TextbookChapter] + + private var cloudModelContext: ModelContext { cloudModelContextProvider() } + @State private var attempts: [TextbookExerciseAttempt] = [] + + private var chapters: [TextbookChapter] { + allChapters.filter { $0.courseName == courseName } + } + + private var byPart: [(part: Int, chapters: [TextbookChapter])] { + let grouped = Dictionary(grouping: chapters, by: \.part) + return grouped.keys.sorted().map { p in + (p, grouped[p]!.sorted { $0.number < $1.number }) + } + } + + private func progressFor(_ chapter: TextbookChapter) -> (correct: Int, total: Int) { + let chNum = chapter.number + let chAttempts = attempts.filter { + $0.courseName == courseName && $0.chapterNumber == chNum + } + let total = chAttempts.reduce(0) { $0 + $1.totalCount } + let correct = chAttempts.reduce(0) { $0 + $1.correctCount + $1.closeCount } + return (correct, total) + } + + var body: some View { + List { + if chapters.isEmpty { + ContentUnavailableView( + "Textbook loading", + systemImage: "book.closed", + description: Text("Textbook content is being prepared…") + ) + } else { + ForEach(byPart, id: \.part) { part, partChapters in + Section { + ForEach(partChapters, id: \.id) { chapter in + NavigationLink(value: chapter) { + chapterRow(chapter) + } + .accessibilityIdentifier("textbook-chapter-row-\(chapter.number)") + } + } header: { + if part > 0 { + Text("Part \(part)") + } else { + Text("Chapters") + } + } + } + } + } + .navigationTitle("Textbook") + .onAppear(perform: loadAttempts) + } + + @ViewBuilder + private func chapterRow(_ chapter: TextbookChapter) -> some View { + let p = progressFor(chapter) + HStack(alignment: .center, spacing: 12) { + ZStack { + Circle() + .stroke(Color.secondary.opacity(0.2), lineWidth: 3) + .frame(width: 36, height: 36) + if p.total > 0 { + Circle() + .trim(from: 0, to: CGFloat(p.correct) / CGFloat(p.total)) + .stroke(.orange, style: StrokeStyle(lineWidth: 3, lineCap: .round)) + .frame(width: 36, height: 36) + .rotationEffect(.degrees(-90)) + } + Text("\(chapter.number)") + .font(.footnote.weight(.bold)) + } + + VStack(alignment: .leading, spacing: 2) { + Text(chapter.title) + .font(.headline) + HStack(spacing: 10) { + if chapter.exerciseCount > 0 { + Label("\(chapter.exerciseCount)", systemImage: "pencil.and.list.clipboard") + .font(.caption) + .foregroundStyle(.secondary) + } + if chapter.vocabTableCount > 0 { + Label("\(chapter.vocabTableCount)", systemImage: "list.bullet.rectangle") + .font(.caption) + .foregroundStyle(.secondary) + } + if p.total > 0 { + Text("\(p.correct)/\(p.total)") + .font(.caption.monospacedDigit()) + .foregroundStyle(.secondary) + } + } + } + Spacer() + } + .padding(.vertical, 4) + } + + private func loadAttempts() { + attempts = (try? cloudModelContext.fetch(FetchDescriptor())) ?? [] + } +} + +#Preview { + NavigationStack { + TextbookChapterListView(courseName: "Complete Spanish Step-by-Step") + } + .modelContainer(for: [TextbookChapter.self], inMemory: true) +} diff --git a/Conjuga/Conjuga/Views/Course/TextbookChapterView.swift b/Conjuga/Conjuga/Views/Course/TextbookChapterView.swift new file mode 100644 index 0000000..f2e8f7c --- /dev/null +++ b/Conjuga/Conjuga/Views/Course/TextbookChapterView.swift @@ -0,0 +1,185 @@ +import SwiftUI +import SharedModels +import SwiftData + +struct TextbookChapterView: View { + let chapter: TextbookChapter + + @State private var expandedVocab: Set = [] + + private var blocks: [TextbookBlock] { chapter.blocks() } + + var body: some View { + ScrollView { + VStack(alignment: .leading, spacing: 12) { + headerView + Divider() + ForEach(blocks) { block in + blockView(block) + } + } + .padding(.horizontal) + .padding(.vertical, 12) + } + .navigationTitle(chapter.title) + .navigationBarTitleDisplayMode(.inline) + } + + private var headerView: some View { + VStack(alignment: .leading, spacing: 4) { + if chapter.part > 0 { + Text("Part \(chapter.part)") + .font(.subheadline) + .foregroundStyle(.secondary) + } + Text("Chapter \(chapter.number)") + .font(.subheadline) + .foregroundStyle(.secondary) + Text(chapter.title) + .font(.largeTitle.bold()) + } + } + + @ViewBuilder + private func blockView(_ block: TextbookBlock) -> some View { + switch block.kind { + case .heading: + headingView(block) + case .paragraph: + paragraphView(block) + case .keyVocabHeader: + HStack(spacing: 6) { + Image(systemName: "star.fill").foregroundStyle(.orange) + Text("Key Vocabulary") + .font(.headline) + .foregroundStyle(.orange) + } + .padding(.top, 8) + case .vocabTable: + vocabTableView(block) + case .exercise: + exerciseLinkView(block) + } + } + + private func headingView(_ block: TextbookBlock) -> some View { + let level = block.level ?? 3 + let font: Font + switch level { + case 2: font = .title.bold() + case 3: font = .title2.bold() + case 4: font = .title3.weight(.semibold) + default: font = .headline + } + return Text(stripInlineEmphasis(block.text ?? "")) + .font(font) + .padding(.top, 10) + } + + private func paragraphView(_ block: TextbookBlock) -> some View { + Text(attributedFromMarkdownish(block.text ?? "")) + .font(.body) + .fixedSize(horizontal: false, vertical: true) + } + + private func vocabTableView(_ block: TextbookBlock) -> some View { + let expanded = expandedVocab.contains(block.index) + let lines = block.ocrLines ?? [] + return VStack(alignment: .leading, spacing: 4) { + Button { + if expanded { expandedVocab.remove(block.index) } else { expandedVocab.insert(block.index) } + } label: { + HStack { + Image(systemName: expanded ? "chevron.down" : "chevron.right") + .font(.caption) + Text("Vocabulary (\(lines.count) items)") + .font(.subheadline.weight(.medium)) + .foregroundStyle(.primary) + Spacer() + } + .contentShape(Rectangle()) + } + .buttonStyle(.plain) + + if expanded { + VStack(alignment: .leading, spacing: 2) { + ForEach(Array(lines.enumerated()), id: \.offset) { _, line in + Text(line) + .font(.callout.monospaced()) + .foregroundStyle(.secondary) + } + } + .padding(.leading, 14) + } + } + .padding(10) + .frame(maxWidth: .infinity, alignment: .leading) + .background(Color.orange.opacity(0.08), in: RoundedRectangle(cornerRadius: 10)) + } + + private func exerciseLinkView(_ block: TextbookBlock) -> some View { + NavigationLink(value: TextbookExerciseDestination( + chapterId: chapter.id, + chapterNumber: chapter.number, + blockIndex: block.index + )) { + HStack(spacing: 10) { + Image(systemName: "pencil.and.list.clipboard") + .foregroundStyle(.orange) + .font(.title3) + VStack(alignment: .leading, spacing: 2) { + Text("Exercise \(block.exerciseId ?? "")") + .font(.headline) + if let inst = block.instruction, !inst.isEmpty { + Text(stripInlineEmphasis(inst)) + .font(.caption) + .foregroundStyle(.secondary) + .lineLimit(2) + } + } + Spacer() + Image(systemName: "chevron.right") + .foregroundStyle(.secondary) + .font(.caption) + } + .padding(12) + .background(Color.orange.opacity(0.1), in: RoundedRectangle(cornerRadius: 10)) + } + .buttonStyle(.plain) + } + + // Strip our ad-hoc ** / * markers from parsed text + private func stripInlineEmphasis(_ s: String) -> String { + s.replacingOccurrences(of: "**", with: "") + .replacingOccurrences(of: "*", with: "") + } + + private func attributedFromMarkdownish(_ s: String) -> AttributedString { + // Parser emits `**bold**` and `*italic*`. Try to render via AttributedString markdown. + if let parsed = try? AttributedString(markdown: s, options: .init(allowsExtendedAttributes: true)) { + return parsed + } + return AttributedString(stripInlineEmphasis(s)) + } +} + +struct TextbookExerciseDestination: Hashable { + let chapterId: String + let chapterNumber: Int + let blockIndex: Int +} + +#Preview { + NavigationStack { + TextbookChapterView(chapter: TextbookChapter( + id: "ch1", + number: 1, + title: "Sample", + part: 1, + courseName: "Preview", + bodyJSON: Data(), + exerciseCount: 0, + vocabTableCount: 0 + )) + } +} diff --git a/Conjuga/Conjuga/Views/Course/TextbookExerciseView.swift b/Conjuga/Conjuga/Views/Course/TextbookExerciseView.swift new file mode 100644 index 0000000..ee1f39e --- /dev/null +++ b/Conjuga/Conjuga/Views/Course/TextbookExerciseView.swift @@ -0,0 +1,360 @@ +import SwiftUI +import SharedModels +import SwiftData +import PencilKit + +/// Interactive fill-in-the-blank view for one textbook exercise. +/// Supports keyboard typing OR Apple Pencil handwriting input per prompt. +struct TextbookExerciseView: View { + let chapter: TextbookChapter + let blockIndex: Int + + @Environment(\.cloudModelContextProvider) private var cloudModelContextProvider + @State private var answers: [Int: String] = [:] + @State private var drawings: [Int: PKDrawing] = [:] + @State private var grades: [Int: TextbookGrade] = [:] + @State private var inputMode: InputMode = .keyboard + @State private var activePencilPromptNumber: Int? + @State private var isRecognizing = false + @State private var isChecked = false + @State private var recognizedTextForActive: String = "" + + private var cloudModelContext: ModelContext { cloudModelContextProvider() } + + enum InputMode: String { + case keyboard + case pencil + } + + private var block: TextbookBlock? { + chapter.blocks().first { $0.index == blockIndex } + } + + private var answerByNumber: [Int: TextbookAnswerItem] { + guard let items = block?.answerItems else { return [:] } + var out: [Int: TextbookAnswerItem] = [:] + for it in items { + out[it.number] = it + } + return out + } + + var body: some View { + ScrollView { + VStack(alignment: .leading, spacing: 16) { + if let b = block { + headerView(b) + inputModePicker + exerciseBody(b) + checkButton(b) + } else { + ContentUnavailableView( + "Exercise not found", + systemImage: "questionmark.circle" + ) + } + } + .padding() + } + .navigationTitle("Exercise \(block?.exerciseId ?? "")") + .navigationBarTitleDisplayMode(.inline) + .onAppear(perform: loadPreviousAttempt) + } + + private func headerView(_ b: TextbookBlock) -> some View { + VStack(alignment: .leading, spacing: 8) { + Text("Chapter \(chapter.number): \(chapter.title)") + .font(.caption) + .foregroundStyle(.secondary) + Text("Exercise \(b.exerciseId ?? "")") + .font(.title2.bold()) + if let inst = b.instruction, !inst.isEmpty { + Text(stripInlineEmphasis(inst)) + .font(.callout) + .foregroundStyle(.secondary) + .fixedSize(horizontal: false, vertical: true) + } + if let extra = b.extra, !extra.isEmpty { + ForEach(Array(extra.enumerated()), id: \.offset) { _, e in + Text(stripInlineEmphasis(e)) + .font(.callout) + .fixedSize(horizontal: false, vertical: true) + .padding(8) + .frame(maxWidth: .infinity, alignment: .leading) + .background(Color.secondary.opacity(0.1), in: RoundedRectangle(cornerRadius: 8)) + } + } + } + } + + private var inputModePicker: some View { + Picker("Input mode", selection: $inputMode) { + Label("Keyboard", systemImage: "keyboard").tag(InputMode.keyboard) + Label("Pencil", systemImage: "pencil.tip").tag(InputMode.pencil) + } + .pickerStyle(.segmented) + } + + private func exerciseBody(_ b: TextbookBlock) -> some View { + VStack(alignment: .leading, spacing: 14) { + if b.freeform == true { + VStack(alignment: .leading, spacing: 6) { + Label("Freeform exercise", systemImage: "text.bubble") + .font(.subheadline.weight(.semibold)) + .foregroundStyle(.orange) + Text("Answers will vary. Use this space to write your own responses; they won't be auto-checked.") + .font(.caption) + .foregroundStyle(.secondary) + } + .padding() + .background(Color.orange.opacity(0.1), in: RoundedRectangle(cornerRadius: 10)) + } + let rawPrompts = b.prompts ?? [] + let prompts = rawPrompts.isEmpty ? synthesizedPrompts(b) : rawPrompts + if prompts.isEmpty && b.extra?.isEmpty == false { + Text("Fill in the blanks above; answers will be graded when you tap Check.") + .font(.caption) + .foregroundStyle(.secondary) + } else { + ForEach(Array(prompts.enumerated()), id: \.offset) { i, prompt in + promptRow(index: i, prompt: prompt, expected: answerByNumber[i + 1]) + } + } + } + } + + /// When the source exercise prompts were embedded in a bitmap (common in + /// this textbook), we have no text for each question — only the answer + /// key. Synthesize numbered placeholders so the user still gets one input + /// field per answer. + private func synthesizedPrompts(_ b: TextbookBlock) -> [String] { + guard let items = b.answerItems, !items.isEmpty else { return [] } + return items.map { "\($0.number)." } + } + + private func promptRow(index: Int, prompt: String, expected: TextbookAnswerItem?) -> some View { + let number = index + 1 + let grade = grades[number] + return VStack(alignment: .leading, spacing: 8) { + HStack(alignment: .top, spacing: 8) { + if let grade { + Image(systemName: iconFor(grade)) + .foregroundStyle(colorFor(grade)) + .font(.title3) + .padding(.top, 2) + } + Text(stripInlineEmphasis(prompt)) + .font(.body) + .fixedSize(horizontal: false, vertical: true) + } + + switch inputMode { + case .keyboard: + TextField("Your answer", text: binding(for: number)) + .textFieldStyle(.roundedBorder) + .textInputAutocapitalization(.never) + .disableAutocorrection(true) + .font(.body) + .disabled(isChecked) + case .pencil: + pencilRow(number: number) + } + + if isChecked, let grade, grade != .correct, let expected { + HStack(spacing: 6) { + Text("Answer:") + .font(.caption.weight(.semibold)) + Text(expected.answer) + .font(.caption) + if !expected.alternates.isEmpty { + Text("(also: \(expected.alternates.joined(separator: ", ")))") + .font(.caption2) + .foregroundStyle(.secondary) + } + } + .foregroundStyle(colorFor(grade)) + } + } + .padding(10) + .background(backgroundFor(grade), in: RoundedRectangle(cornerRadius: 8)) + } + + private func pencilRow(number: Int) -> some View { + VStack(alignment: .leading, spacing: 6) { + HandwritingCanvas( + drawing: bindingDrawing(for: number), + onDrawingChanged: { recognizePencil(for: number) } + ) + .frame(height: 100) + .background(.fill.quinary, in: RoundedRectangle(cornerRadius: 10)) + .overlay(RoundedRectangle(cornerRadius: 10).stroke(.separator, lineWidth: 1)) + + HStack { + if let typed = answers[number], !typed.isEmpty { + Text("Recognized: \(typed)") + .font(.caption) + .foregroundStyle(.secondary) + } + Spacer() + Button("Clear") { + drawings[number] = PKDrawing() + answers[number] = "" + } + .font(.caption) + .tint(.secondary) + } + } + } + + private func checkButton(_ b: TextbookBlock) -> some View { + let hasAnyAnswer = answers.values.contains { !$0.isEmpty } + let disabled = b.freeform == true || (!isChecked && !hasAnyAnswer) + return Button { + if isChecked { + resetExercise() + } else { + checkAnswers(b) + } + } label: { + Text(isChecked ? "Try again" : "Check answers") + .font(.headline) + .frame(maxWidth: .infinity) + .padding(.vertical, 10) + } + .buttonStyle(.borderedProminent) + .tint(.orange) + .disabled(disabled) + } + + // MARK: - Actions + + private func checkAnswers(_ b: TextbookBlock) { + guard let prompts = b.prompts else { return } + var newGrades: [Int: TextbookGrade] = [:] + var states: [TextbookPromptState] = [] + for (i, _) in prompts.enumerated() { + let number = i + 1 + let user = answers[number] ?? "" + let expected = answerByNumber[number] + let canonical = expected?.answer ?? "" + let alts = expected?.alternates ?? [] + let grade: TextbookGrade + if canonical.isEmpty { + grade = .wrong + } else { + grade = AnswerChecker.grade(userText: user, canonical: canonical, alternates: alts) + } + newGrades[number] = grade + states.append(TextbookPromptState(number: number, userText: user, grade: grade)) + } + grades = newGrades + isChecked = true + saveAttempt(states: states, exerciseId: b.exerciseId ?? "") + } + + private func resetExercise() { + answers.removeAll() + drawings.removeAll() + grades.removeAll() + isChecked = false + } + + private func recognizePencil(for number: Int) { + guard let drawing = drawings[number], !drawing.strokes.isEmpty else { return } + isRecognizing = true + Task { + let result = await HandwritingRecognizer.recognize(drawing: drawing) + await MainActor.run { + answers[number] = result.text + isRecognizing = false + } + } + } + + private func saveAttempt(states: [TextbookPromptState], exerciseId: String) { + let attemptId = TextbookExerciseAttempt.attemptId( + courseName: chapter.courseName, + exerciseId: exerciseId + ) + let descriptor = FetchDescriptor( + predicate: #Predicate { $0.id == attemptId } + ) + let context = cloudModelContext + let existing = (try? context.fetch(descriptor))?.first + let attempt = existing ?? TextbookExerciseAttempt( + id: attemptId, + courseName: chapter.courseName, + chapterNumber: chapter.number, + exerciseId: exerciseId + ) + if existing == nil { context.insert(attempt) } + attempt.lastAttemptAt = Date() + attempt.setPromptStates(states) + try? context.save() + } + + private func loadPreviousAttempt() { + guard let b = block else { return } + let attemptId = TextbookExerciseAttempt.attemptId( + courseName: chapter.courseName, + exerciseId: b.exerciseId ?? "" + ) + let descriptor = FetchDescriptor( + predicate: #Predicate { $0.id == attemptId } + ) + guard let attempt = (try? cloudModelContext.fetch(descriptor))?.first else { return } + for s in attempt.promptStates() { + answers[s.number] = s.userText + grades[s.number] = s.grade + } + isChecked = !grades.isEmpty + } + + // MARK: - Bindings + + private func binding(for number: Int) -> Binding { + Binding( + get: { answers[number] ?? "" }, + set: { answers[number] = $0 } + ) + } + + private func bindingDrawing(for number: Int) -> Binding { + Binding( + get: { drawings[number] ?? PKDrawing() }, + set: { drawings[number] = $0 } + ) + } + + // MARK: - UI helpers + + private func iconFor(_ grade: TextbookGrade) -> String { + switch grade { + case .correct: return "checkmark.circle.fill" + case .close: return "circle.lefthalf.filled" + case .wrong: return "xmark.circle.fill" + } + } + + private func colorFor(_ grade: TextbookGrade) -> Color { + switch grade { + case .correct: return .green + case .close: return .orange + case .wrong: return .red + } + } + + private func backgroundFor(_ grade: TextbookGrade?) -> Color { + guard let grade else { return Color.secondary.opacity(0.05) } + switch grade { + case .correct: return .green.opacity(0.12) + case .close: return .orange.opacity(0.12) + case .wrong: return .red.opacity(0.12) + } + } + + private func stripInlineEmphasis(_ s: String) -> String { + s.replacingOccurrences(of: "**", with: "") + .replacingOccurrences(of: "*", with: "") + } +} diff --git a/Conjuga/Conjuga/Views/Course/VocabFlashcardView.swift b/Conjuga/Conjuga/Views/Course/VocabFlashcardView.swift index d7065b8..d86e74c 100644 --- a/Conjuga/Conjuga/Views/Course/VocabFlashcardView.swift +++ b/Conjuga/Conjuga/Views/Course/VocabFlashcardView.swift @@ -6,11 +6,19 @@ struct VocabFlashcardView: View { let cards: [VocabCard] let speechService: SpeechService let onDone: () -> Void + /// Optional deck context — when present and the title indicates a stem- + /// changing deck, each card gets an inline conjugation toggle. + var deckTitle: String? = nil @Environment(\.cloudModelContextProvider) private var cloudModelContextProvider @State private var currentIndex = 0 @State private var isRevealed = false @State private var sessionCorrect = 0 + @State private var showConjugation = false + + private var isStemChangingDeck: Bool { + (deckTitle ?? "").localizedCaseInsensitiveContains("stem changing") + } private var cloudModelContext: ModelContext { cloudModelContextProvider() } @@ -61,6 +69,25 @@ struct VocabFlashcardView: View { .padding(12) } .glassEffect(in: .circle) + + if isStemChangingDeck { + Button { + withAnimation(.smooth) { showConjugation.toggle() } + } label: { + Label( + showConjugation ? "Hide conjugation" : "Show conjugation", + systemImage: showConjugation ? "chevron.up" : "chevron.down" + ) + .font(.subheadline.weight(.medium)) + } + .buttonStyle(.bordered) + .tint(.blue) + + if showConjugation { + StemChangeConjugationView(infinitive: stripToInfinitive(card.front)) + .transition(.opacity.combined(with: .move(edge: .top))) + } + } } .transition(.blurReplace) } else { @@ -111,6 +138,7 @@ struct VocabFlashcardView: View { guard currentIndex > 0 else { return } withAnimation(.smooth) { isRevealed = false + showConjugation = false currentIndex -= 1 } } label: { @@ -125,6 +153,7 @@ struct VocabFlashcardView: View { Button { withAnimation(.smooth) { isRevealed = false + showConjugation = false currentIndex += 1 } } label: { @@ -189,9 +218,25 @@ struct VocabFlashcardView: View { // Next card withAnimation(.smooth) { isRevealed = false + showConjugation = false currentIndex += 1 } } + + /// Card fronts may be plain infinitives ("cerrar") or, in reversed decks, + /// stored as English. Strip any reflexive-se suffix or parenthetical notes + /// to improve the verb lookup hit rate. + private func stripToInfinitive(_ s: String) -> String { + var t = s.trimmingCharacters(in: .whitespacesAndNewlines) + if let paren = t.firstIndex(of: "(") { + t = String(t[.. 4 { + // "acostarse" → "acostar" for verb lookup + t = String(t.dropLast(2)) + } + return t + } } #Preview { diff --git a/Conjuga/ConjugaUITests/AllChaptersScreenshotTests.swift b/Conjuga/ConjugaUITests/AllChaptersScreenshotTests.swift new file mode 100644 index 0000000..6a088c6 --- /dev/null +++ b/Conjuga/ConjugaUITests/AllChaptersScreenshotTests.swift @@ -0,0 +1,95 @@ +import XCTest + +/// Screenshot every chapter of the textbook — one top + one bottom frame each — +/// so you can visually audit parsing / rendering issues across all 30 chapters. +final class AllChaptersScreenshotTests: XCTestCase { + + override func setUpWithError() throws { + continueAfterFailure = true + } + + func testScreenshotEveryChapter() throws { + let app = XCUIApplication() + app.launchArguments += ["-onboardingComplete", "YES"] + app.launch() + + let courseTab = app.tabBars.buttons["Course"] + XCTAssertTrue(courseTab.waitForExistence(timeout: 5)) + courseTab.tap() + + let textbookRow = app.buttons.containing(NSPredicate( + format: "label CONTAINS[c] 'Complete Spanish'" + )).firstMatch + XCTAssertTrue(textbookRow.waitForExistence(timeout: 5)) + textbookRow.tap() + + // NOTE: SwiftUI List preserves scroll position across navigation pushes, + // so visiting chapters in-order means the next one is already visible + // after we return from the previous one. No need to reset. + attach(app, name: "00-chapter-list-top") + + for chapter in 1...30 { + guard let row = findChapterRow(app: app, chapter: chapter) else { + XCTFail("Chapter \(chapter) row not reachable") + continue + } + row.tap() + + // Chapter body — wait until the chapter's title appears as a nav bar label + _ = app.navigationBars.firstMatch.waitForExistence(timeout: 3) + + attach(app, name: String(format: "ch%02d-top", chapter)) + // One big scroll to sample the bottom of the chapter + dragFullScreen(app, direction: .up) + dragFullScreen(app, direction: .up) + attach(app, name: String(format: "ch%02d-bottom", chapter)) + + tapNavBack(app) + // Small settle wait + _ = app.navigationBars.firstMatch.waitForExistence(timeout: 2) + } + } + + // MARK: - Helpers + + private enum DragDirection { case up, down } + + private func dragFullScreen(_ app: XCUIApplication, direction: DragDirection) { + let top = app.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.12)) + let bot = app.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.88)) + switch direction { + case .up: bot.press(forDuration: 0.1, thenDragTo: top) + case .down: top.press(forDuration: 0.1, thenDragTo: bot) + } + } + + private func findChapterRow(app: XCUIApplication, chapter: Int) -> XCUIElement? { + // Chapter row accessibility label: ", , ..." (SwiftUI composes + // label from inner Texts). Match by starting number. + let predicate = NSPredicate(format: "label BEGINSWITH %@", "\(chapter),") + let row = app.buttons.matching(predicate).firstMatch + + if row.exists && row.isHittable { return row } + + // Scroll down up to 8 times searching for the row — chapters visited + // in order, so usually 0–2 swipes suffice. + for _ in 0..<8 { + if row.exists && row.isHittable { return row } + dragFullScreen(app, direction: .up) + } + return row.exists ? row : nil + } + + private func tapNavBack(_ app: XCUIApplication) { + let back = app.navigationBars.buttons.firstMatch + if back.exists && back.isHittable { back.tap() } + } + + private func attach(_ app: XCUIApplication, name: String) { + let screenshot = app.screenshot() + let attachment = XCTAttachment(screenshot: screenshot) + attachment.name = name + attachment.lifetime = .keepAlways + add(attachment) + } +} diff --git a/Conjuga/ConjugaUITests/StemChangeToggleTests.swift b/Conjuga/ConjugaUITests/StemChangeToggleTests.swift new file mode 100644 index 0000000..5b1bbfa --- /dev/null +++ b/Conjuga/ConjugaUITests/StemChangeToggleTests.swift @@ -0,0 +1,66 @@ +import XCTest + +final class StemChangeToggleTests: XCTestCase { + + override func setUpWithError() throws { + continueAfterFailure = false + } + + func testStemChangeConjugationToggle() throws { + let app = XCUIApplication() + app.launchArguments += ["-onboardingComplete", "YES"] + app.launch() + + // Course → LanGo Beginner I → Week 4 → E-IE stem-changing verbs + app.tabBars.buttons["Course"].tap() + + // Locate the E-IE deck row. Deck titles appear as static text / button. + // Scroll until visible, then tap. + let deckPredicate = NSPredicate(format: "label CONTAINS[c] 'E-IE stem changing verbs' AND NOT label CONTAINS[c] 'REVÉS'") + let deckRow = app.buttons.matching(deckPredicate).firstMatch + + let listRef = app.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.85)) + let topRef = app.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.10)) + for _ in 0..<12 { + if deckRow.exists && deckRow.isHittable { break } + listRef.press(forDuration: 0.1, thenDragTo: topRef) + } + XCTAssertTrue(deckRow.waitForExistence(timeout: 3), "E-IE deck row missing") + deckRow.tap() + + attach(app, name: "01-deck-top") + + // Tap "Show conjugation" on the first card + let showBtn = app.buttons.matching(NSPredicate(format: "label BEGINSWITH 'Show conjugation'")).firstMatch + XCTAssertTrue(showBtn.waitForExistence(timeout: 3), "Show conjugation button missing") + showBtn.tap() + + // Wait for the conjugation rows + animation to settle. + let yoLabel = app.staticTexts["yo"].firstMatch + XCTAssertTrue(yoLabel.waitForExistence(timeout: 3), "yo row not rendered") + // Give the transition time to complete before snapshotting. + Thread.sleep(forTimeInterval: 0.6) + attach(app, name: "02-conjugation-open") + + // Also confirm all expected person labels are rendered. + for person in ["yo", "tú", "nosotros"] { + XCTAssertTrue( + app.staticTexts[person].firstMatch.exists, + "missing conjugation row for \(person)" + ) + } + + // Tap again to hide + let hideBtn = app.buttons.matching(NSPredicate(format: "label BEGINSWITH 'Hide conjugation'")).firstMatch + XCTAssertTrue(hideBtn.waitForExistence(timeout: 2)) + hideBtn.tap() + } + + private func attach(_ app: XCUIApplication, name: String) { + let s = app.screenshot() + let a = XCTAttachment(screenshot: s) + a.name = name + a.lifetime = .keepAlways + add(a) + } +} diff --git a/Conjuga/ConjugaUITests/TextbookFlowUITests.swift b/Conjuga/ConjugaUITests/TextbookFlowUITests.swift new file mode 100644 index 0000000..d1a1778 --- /dev/null +++ b/Conjuga/ConjugaUITests/TextbookFlowUITests.swift @@ -0,0 +1,80 @@ +import XCTest + +final class TextbookFlowUITests: XCTestCase { + + override func setUpWithError() throws { + continueAfterFailure = false + } + + func testTextbookFlow() throws { + let app = XCUIApplication() + // Skip onboarding via defaults (already set by run script, but harmless to override) + app.launchArguments += ["-onboardingComplete", "YES"] + app.launch() + + // Dashboard should be default tab. Switch to Course. + let courseTab = app.tabBars.buttons["Course"] + XCTAssertTrue(courseTab.waitForExistence(timeout: 5), "Course tab missing") + courseTab.tap() + + // Attach a screenshot of the Course list + attach(app, name: "01-course-list") + + // Tap the Textbook entry + let textbookRow = app.buttons.containing(NSPredicate( + format: "label CONTAINS[c] 'Complete Spanish'" + )).firstMatch + XCTAssertTrue(textbookRow.waitForExistence(timeout: 5), "Textbook row missing in Course") + textbookRow.tap() + + attach(app, name: "02-textbook-chapter-list") + + // Tap chapter 1 — should navigate to reader + let chapterOneRow = app.buttons.containing(NSPredicate( + format: "label CONTAINS[c] 'Nouns, Articles'" + )).firstMatch + XCTAssertTrue(chapterOneRow.waitForExistence(timeout: 5), "Chapter 1 row missing") + chapterOneRow.tap() + + attach(app, name: "03-chapter-body") + + // Find the first exercise link ("Exercise 1.1") + let exerciseRow = app.buttons.containing(NSPredicate( + format: "label CONTAINS[c] 'Exercise 1.1'" + )).firstMatch + XCTAssertTrue(exerciseRow.waitForExistence(timeout: 5), "Exercise 1.1 link missing") + exerciseRow.tap() + + attach(app, name: "04-exercise-view") + + // Check presence of input fields: at least a few numbered prompts + // Text fields use SwiftUI placeholder "Your answer" + let firstField = app.textFields["Your answer"].firstMatch + XCTAssertTrue(firstField.waitForExistence(timeout: 5), "No input fields rendered for exercise") + firstField.tap() + firstField.typeText("el") + + attach(app, name: "05-exercise-typed-el") + + // Tap Check answers + let checkButton = app.buttons["Check answers"] + XCTAssertTrue(checkButton.waitForExistence(timeout: 3), "Check answers button missing") + checkButton.tap() + + attach(app, name: "06-exercise-graded") + + // The first answer to Exercise 1.1 is "el" — we should see the first prompt + // graded correct. Iterating too deeply is fragile; just take a screenshot + // and check for presence of either a checkmark-like label or "Try again". + let tryAgain = app.buttons["Try again"] + XCTAssertTrue(tryAgain.waitForExistence(timeout: 3), "Grading did not complete") + } + + private func attach(_ app: XCUIApplication, name: String) { + let screenshot = app.screenshot() + let attachment = XCTAttachment(screenshot: screenshot) + attachment.name = name + attachment.lifetime = .keepAlways + add(attachment) + } +} diff --git a/Conjuga/Scripts/textbook/build_book.py b/Conjuga/Scripts/textbook/build_book.py new file mode 100644 index 0000000..01def96 --- /dev/null +++ b/Conjuga/Scripts/textbook/build_book.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python3 +"""Merge chapters.json + answers.json + ocr.json → book.json (single source). + +Also emits vocab_cards.json: flashcards derived from vocab_image blocks where +OCR text parses as a clean two-column (Spanish ↔ English) table. +""" + +import json +import re +import sys +from pathlib import Path + +HERE = Path(__file__).resolve().parent +CHAPTERS_JSON = HERE / "chapters.json" +ANSWERS_JSON = HERE / "answers.json" +OCR_JSON = HERE / "ocr.json" +OUT_BOOK = HERE / "book.json" +OUT_VOCAB = HERE / "vocab_cards.json" + +COURSE_NAME = "Complete Spanish Step-by-Step" + +# Heuristic: parseable "Spanish | English" vocab rows. +# OCR usually produces "word — translation" or "word translation" separated +# by 2+ spaces. We detect rows that contain both Spanish and English words. +SPANISH_ACCENT_RE = re.compile(r"[áéíóúñüÁÉÍÓÚÑÜ¿¡]") +SPANISH_ARTICLES = {"el", "la", "los", "las", "un", "una", "unos", "unas"} +ENGLISH_STARTERS = {"the", "a", "an", "to", "my", "his", "her", "our", "their", "your", "some"} +# English-only words that would never appear as Spanish +ENGLISH_ONLY_WORDS = {"the", "he", "she", "it", "we", "they", "I", "is", "are", "was", "were", + "been", "have", "has", "had", "will", "would", "should", "could"} +SEP_RE = re.compile(r"[ \t]{2,}|\s[—–−-]\s") + + +def classify_line(line: str) -> str: + """Return 'es', 'en', or 'unknown' for the dominant language of a vocab line.""" + line = line.strip() + if not line: + return "unknown" + # Accent = definitely Spanish + if SPANISH_ACCENT_RE.search(line): + return "es" + first = line.split()[0].lower().strip(",.;:") + if first in SPANISH_ARTICLES: + return "es" + if first in ENGLISH_STARTERS: + return "en" + # Check if the leading word is an English-only function word + if first in ENGLISH_ONLY_WORDS: + return "en" + return "unknown" + + +def looks_english(word: str) -> bool: + """Legacy helper — kept for try_split_row below.""" + w = word.lower().strip() + if not w: + return False + if SPANISH_ACCENT_RE.search(w): + return False + if w in SPANISH_ARTICLES: + return False + if w in ENGLISH_STARTERS or w in ENGLISH_ONLY_WORDS: + return True + return bool(re.match(r"^[a-z][a-z\s'/()\-,.]*$", w)) + + +def try_split_row(line: str) -> "tuple[str, str] | None": + """Split a line into (spanish, english) if it looks like a vocab entry.""" + line = line.strip() + if not line or len(line) < 3: + return None + # Try explicit separators first + parts = SEP_RE.split(line) + parts = [p.strip() for p in parts if p.strip()] + if len(parts) == 2: + spanish, english = parts + if looks_english(english) and not looks_english(spanish.split()[0]): + return (spanish, english) + return None + + +def load(p: Path) -> dict: + return json.loads(p.read_text(encoding="utf-8")) + + +def build_vocab_cards_for_block(block: dict, ocr_entry: dict, chapter: dict, context_title: str, idx: int) -> list: + """Given a vocab_image block + its OCR lines, derive flashcards. + + Vision OCR reads top-to-bottom, left-to-right; a two-column vocab table + produces Spanish lines first, then English lines. We split the list in + half when one side is predominantly Spanish and the other English. + Per-line '—' separators are also supported as a fallback. + """ + cards = [] + if not ocr_entry: + return cards + lines = [l.strip() for l in ocr_entry.get("lines", []) if l.strip()] + if not lines: + return cards + + def card(front: str, back: str) -> dict: + return { + "front": front, + "back": back, + "chapter": chapter["number"], + "chapterTitle": chapter["title"], + "section": context_title, + "sourceImage": block["src"], + } + + # Attempt 1: explicit inline separator (e.g. "la casa — the house") + inline = [] + all_inline = True + for line in lines: + pair = try_split_row(line) + if pair: + inline.append(pair) + else: + all_inline = False + break + if all_inline and inline: + for es, en in inline: + cards.append(card(es, en)) + return cards + + # Attempt 2: block-alternating layout. + # Vision OCR reads columns top-to-bottom, so a 2-col table rendered across + # 2 visual columns produces runs like: [ES...ES][EN...EN][ES...ES][EN...EN] + # We classify each line, smooth "unknown" using neighbors, then pair + # same-sized consecutive ES/EN blocks. + classes = [classify_line(l) for l in lines] + + # Pass 1: fill unknowns using nearest non-unknown neighbor (forward) + last_known = "unknown" + forward = [] + for c in classes: + if c != "unknown": + last_known = c + forward.append(last_known) + # Pass 2: backfill leading unknowns (backward) + last_known = "unknown" + backward = [""] * len(classes) + for i in range(len(classes) - 1, -1, -1): + if classes[i] != "unknown": + last_known = classes[i] + backward[i] = last_known + # Merge: prefer forward unless still unknown + resolved = [] + for f, b in zip(forward, backward): + if f != "unknown": + resolved.append(f) + elif b != "unknown": + resolved.append(b) + else: + resolved.append("unknown") + + # Group consecutive same-lang lines + blocks: list = [] + cur_lang: "str | None" = None + cur_block: list = [] + for line, lang in zip(lines, resolved): + if lang != cur_lang: + if cur_block and cur_lang is not None: + blocks.append((cur_lang, cur_block)) + cur_block = [line] + cur_lang = lang + else: + cur_block.append(line) + if cur_block and cur_lang is not None: + blocks.append((cur_lang, cur_block)) + + # Walk blocks pairing ES then EN of equal length + i = 0 + while i < len(blocks) - 1: + lang_a, lines_a = blocks[i] + lang_b, lines_b = blocks[i + 1] + if lang_a == "es" and lang_b == "en" and len(lines_a) == len(lines_b): + for es, en in zip(lines_a, lines_b): + cards.append(card(es, en)) + i += 2 + continue + # If reversed order (some pages have EN column on left), try that too + if lang_a == "en" and lang_b == "es" and len(lines_a) == len(lines_b): + for es, en in zip(lines_b, lines_a): + cards.append(card(es, en)) + i += 2 + continue + i += 1 + + return cards + + +def clean_instruction(text: str) -> str: + """Strip leading/trailing emphasis markers from a parsed instruction.""" + # Our XHTML parser emitted * and ** for emphasis; flatten them + t = re.sub(r"\*+", "", text) + return t.strip() + + +def merge() -> None: + chapters_data = load(CHAPTERS_JSON) + answers_data = load(ANSWERS_JSON) + try: + ocr_data = load(OCR_JSON) + except FileNotFoundError: + print("ocr.json not found — proceeding with empty OCR data") + ocr_data = {} + + answers = answers_data["answers"] + chapters = chapters_data["chapters"] + parts = chapters_data.get("part_memberships", {}) + + book_chapters = [] + all_vocab_cards = [] + missing_ocr = set() + current_section_title = "" + + for ch in chapters: + out_blocks = [] + current_section_title = ch["title"] + + for bi, block in enumerate(ch["blocks"]): + k = block["kind"] + + if k == "heading": + current_section_title = block["text"] + out_blocks.append(block) + continue + + if k == "paragraph": + out_blocks.append(block) + continue + + if k == "key_vocab_header": + out_blocks.append(block) + continue + + if k == "vocab_image": + ocr_entry = ocr_data.get(block["src"]) + if ocr_entry is None: + missing_ocr.add(block["src"]) + derived = build_vocab_cards_for_block( + block, ocr_entry, ch, current_section_title, bi + ) + all_vocab_cards.extend(derived) + out_blocks.append({ + "kind": "vocab_table", + "sourceImage": block["src"], + "ocrLines": ocr_entry.get("lines", []) if ocr_entry else [], + "ocrConfidence": ocr_entry.get("confidence", 0.0) if ocr_entry else 0.0, + "cardCount": len(derived), + }) + continue + + if k == "exercise": + ans = answers.get(block["id"]) + image_ocr_lines = [] + for src in block.get("image_refs", []): + e = ocr_data.get(src) + if e is None: + missing_ocr.add(src) + continue + image_ocr_lines.extend(e.get("lines", [])) + + # Build the final prompt list. If we have text prompts from + # XHTML, prefer them. Otherwise, attempt to use OCR lines. + prompts = [p for p in block.get("prompts", []) if p.strip()] + extras = [e for e in block.get("extra", []) if e.strip()] + if not prompts and image_ocr_lines: + # Extract numbered lines from OCR (look for "1. ..." pattern) + for line in image_ocr_lines: + m = re.match(r"^(\d+)[.)]\s*(.+)", line.strip()) + if m: + prompts.append(f"{m.group(1)}. {m.group(2)}") + + # Cross-reference prompts with answers + sub = ans["subparts"] if ans else [] + answer_items = [] + for sp in sub: + for it in sp["items"]: + answer_items.append({ + "label": sp["label"], + "number": it["number"], + "answer": it["answer"], + "alternates": it["alternates"], + }) + + out_blocks.append({ + "kind": "exercise", + "id": block["id"], + "ansAnchor": block.get("ans_anchor", ""), + "instruction": clean_instruction(block.get("instruction", "")), + "extra": extras, + "prompts": prompts, + "ocrLines": image_ocr_lines, + "freeform": ans["freeform"] if ans else False, + "answerItems": answer_items, + "answerRaw": ans["raw"] if ans else "", + "answerSubparts": sub, + }) + continue + + out_blocks.append(block) + + book_chapters.append({ + "id": ch["id"], + "number": ch["number"], + "title": ch["title"], + "part": ch.get("part"), + "blocks": out_blocks, + }) + + book = { + "courseName": COURSE_NAME, + "totalChapters": len(book_chapters), + "totalExercises": sum( + 1 for ch in book_chapters for b in ch["blocks"] if b["kind"] == "exercise" + ), + "totalVocabTables": sum( + 1 for ch in book_chapters for b in ch["blocks"] if b["kind"] == "vocab_table" + ), + "totalVocabCards": len(all_vocab_cards), + "parts": parts, + "chapters": book_chapters, + } + OUT_BOOK.write_text(json.dumps(book, ensure_ascii=False)) + + # Vocab cards as a separate file (grouped per chapter so they can be seeded + # as CourseDecks in the existing schema). + vocab_by_chapter: dict = {} + for card in all_vocab_cards: + vocab_by_chapter.setdefault(card["chapter"], []).append(card) + OUT_VOCAB.write_text(json.dumps({ + "courseName": COURSE_NAME, + "chapters": [ + { + "chapter": ch_num, + "cards": cards, + } + for ch_num, cards in sorted(vocab_by_chapter.items()) + ], + }, ensure_ascii=False, indent=2)) + + # Summary + print(f"Wrote {OUT_BOOK}") + print(f"Wrote {OUT_VOCAB}") + print(f"Chapters: {book['totalChapters']}") + print(f"Exercises: {book['totalExercises']}") + print(f"Vocab tables: {book['totalVocabTables']}") + print(f"Vocab cards (auto): {book['totalVocabCards']}") + if missing_ocr: + print(f"Missing OCR for {len(missing_ocr)} images (first 5): {sorted(list(missing_ocr))[:5]}") + + # Validation + total_exercises = book["totalExercises"] + exercises_with_prompts = sum( + 1 for ch in book_chapters for b in ch["blocks"] + if b["kind"] == "exercise" and (b["prompts"] or b["extra"]) + ) + exercises_with_answers = sum( + 1 for ch in book_chapters for b in ch["blocks"] + if b["kind"] == "exercise" and b["answerItems"] + ) + exercises_freeform = sum( + 1 for ch in book_chapters for b in ch["blocks"] + if b["kind"] == "exercise" and b["freeform"] + ) + print(f"Exercises with prompts: {exercises_with_prompts}/{total_exercises}") + print(f"Exercises with answers: {exercises_with_answers}/{total_exercises}") + print(f"Freeform exercises: {exercises_freeform}") + + +if __name__ == "__main__": + merge() diff --git a/Conjuga/Scripts/textbook/build_review.py b/Conjuga/Scripts/textbook/build_review.py new file mode 100644 index 0000000..7f11f96 --- /dev/null +++ b/Conjuga/Scripts/textbook/build_review.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +"""Render book.json + ocr.json into a static HTML review page. + +The HTML surfaces low-confidence OCR results in red, and shows the parsed +exercise prompts/answers next to the original image. Designed for rapid +visual diffing against the source book. +""" + +import html +import json +from pathlib import Path + +HERE = Path(__file__).resolve().parent +BOOK = HERE / "book.json" +OCR = HERE / "ocr.json" +OUT_HTML = HERE / "review.html" +EPUB_IMAGES = Path(HERE).parents[2] / "epub_extract" / "OEBPS" +IMAGE_REL = EPUB_IMAGES.relative_to(HERE.parent) if False else EPUB_IMAGES + + +def load(p: Path) -> dict: + return json.loads(p.read_text(encoding="utf-8")) + + +def esc(s: str) -> str: + return html.escape(s or "") + + +def img_tag(src: str) -> str: + full = (EPUB_IMAGES / src).resolve() + return f'<img src="file://{full}" alt="{esc(src)}" class="src"/>' + + +def render() -> None: + book = load(BOOK) + ocr = load(OCR) if OCR.exists() else {} + + out: list = [] + out.append("""<!DOCTYPE html> +<html><head><meta charset='utf-8'><title>Book review +""") + out.append(f"

{esc(book['courseName'])} — review

") + out.append(f"

{book['totalChapters']} chapters · {book['totalExercises']} exercises · {book['totalVocabTables']} vocab tables · {book['totalVocabCards']} auto-derived cards

") + + for ch in book["chapters"]: + part = ch.get("part") + part_str = f" (Part {part})" if part else "" + out.append(f"

Chapter {ch['number']}: {esc(ch['title'])}{esc(part_str)}

") + + for b in ch["blocks"]: + kind = b["kind"] + if kind == "heading": + level = b["level"] + out.append(f"{esc(b['text'])}") + elif kind == "paragraph": + out.append(f"

{esc(b['text'])}

") + elif kind == "key_vocab_header": + out.append(f"

★ Key Vocabulary

") + elif kind == "vocab_table": + src = b["sourceImage"] + conf = b["ocrConfidence"] + conf_class = "lowconf" if conf < 0.85 else "" + out.append(f"
") + out.append(f"
vocab {esc(src)} · confidence {conf:.2f} · {b['cardCount']} card(s)") + out.append(img_tag(src)) + out.append("
") + for line in b.get("ocrLines", []): + out.append(f"
{esc(line)}
") + out.append("
") + # Show derived pairs (if any). We don't have them inline in book.json, + # but we can recompute from ocrLines using the same function. + out.append("
") + elif kind == "exercise": + out.append(f"
") + out.append(f"Exercise {esc(b['id'])}{esc(b['instruction'])}") + if b.get("extra"): + for e in b["extra"]: + out.append(f"
{esc(e)}
") + if b.get("ocrLines"): + out.append(f"
OCR lines from image") + for line in b["ocrLines"]: + out.append(f"
{esc(line)}
") + out.append("
") + if b.get("prompts"): + out.append("
Parsed prompts:
") + for p in b["prompts"]: + out.append(f"
• {esc(p)}
") + if b.get("answerItems"): + out.append("
Answer key:
") + for a in b["answerItems"]: + label_str = f"{a['label']}. " if a.get("label") else "" + alts = ", ".join(a["alternates"]) + alt_str = f" (also: {esc(alts)})" if alts else "" + out.append(f"
{esc(label_str)}{a['number']}. {esc(a['answer'])}{alt_str}
") + if b.get("freeform"): + out.append("
(Freeform — answers will vary)
") + for img_src in b.get("image_refs", []): + out.append(img_tag(img_src)) + out.append("
") + + out.append("") + OUT_HTML.write_text("\n".join(out), encoding="utf-8") + print(f"Wrote {OUT_HTML}") + + +if __name__ == "__main__": + render() diff --git a/Conjuga/Scripts/textbook/extract_answers.py b/Conjuga/Scripts/textbook/extract_answers.py new file mode 100644 index 0000000..4367841 --- /dev/null +++ b/Conjuga/Scripts/textbook/extract_answers.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +"""Parse ans.xhtml into structured answers.json. + +Output schema: +{ + "answers": { + "1.1": { + "id": "1.1", + "anchor": "ch1ans1", + "chapter": 1, + "subparts": [ + {"label": null, "items": [ + {"number": 1, "answer": "el", "alternates": []}, + {"number": 2, "answer": "el", "alternates": []}, + ... + ]} + ], + "freeform": false, # true if "Answers will vary" + "raw": "..." # raw text for fallback + }, + "2.4": { # multi-part exercise + "subparts": [ + {"label": "A", "items": [...]}, + {"label": "B", "items": [...]}, + {"label": "C", "items": [...]} + ] + } + } +} +""" + +import json +import re +from pathlib import Path +from bs4 import BeautifulSoup, NavigableString + +ROOT = Path(__file__).resolve().parents[3] / "epub_extract" / "OEBPS" +OUT = Path(__file__).resolve().parent / "answers.json" + +ANSWER_CLASSES = {"answerq", "answerq1", "answerq2", "answerqa"} +EXERCISE_ID_RE = re.compile(r"^([0-9]+)\.([0-9]+)$") +SUBPART_LABEL_RE = re.compile(r"^([A-Z])\b") +NUMBERED_ITEM_RE = re.compile(r"(?:^|\s)(\d+)\.\s+") +FREEFORM_PATTERNS = [ + re.compile(r"answers? will vary", re.IGNORECASE), + re.compile(r"answer will vary", re.IGNORECASE), +] +OR_TOKEN = "{{OR}}" + + +def render_with_or(p) -> str: + """Convert

to plain text, replacing 'OR' span markers with sentinel.""" + soup = BeautifulSoup(str(p), "lxml") + # Replace OR with sentinel + for span in soup.find_all("span"): + cls = span.get("class") or [] + if "small" in cls and span.get_text(strip=True).upper() == "OR": + span.replace_with(f" {OR_TOKEN} ") + # Drop pagebreak spans + for span in soup.find_all("span", attrs={"epub:type": "pagebreak"}): + span.decompose() + # Drop emphasis but keep text + for tag in soup.find_all(["em", "i", "strong", "b"]): + tag.unwrap() + text = soup.get_text(separator=" ", strip=False) + text = re.sub(r"\s+", " ", text).strip() + return text + + +def split_numbered_items(text: str) -> "list[dict]": + """Given '1. el 2. la 3. el ...' return [{'number':1,'answer':'el'}, ...].""" + # Find positions of N. tokens + matches = list(NUMBERED_ITEM_RE.finditer(text)) + items = [] + for i, m in enumerate(matches): + num = int(m.group(1)) + start = m.end() + end = matches[i + 1].start() if i + 1 < len(matches) else len(text) + body = text[start:end].strip().rstrip(".,;") + # Split alternates on the OR token + parts = [p.strip() for p in body.split(OR_TOKEN) if p.strip()] + if not parts: + continue + items.append({ + "number": num, + "answer": parts[0], + "alternates": parts[1:], + }) + return items + + +def parse_subpart_label(text: str) -> "tuple[str | None, str]": + """Try to peel a leading subpart label (A, B, C) from the text. + Returns (label_or_None, remaining_text).""" + # Pattern at start: "A " or "A " (lots of whitespace from A) + m = re.match(r"^([A-Z])\s+(?=\d)", text) + if m: + return m.group(1), text[m.end():] + return None, text + + +def parse_answer_paragraph(p, exercise_id: str) -> "list[dict]": + """Convert one

into a list of subparts. + For p.answerq, the text typically starts with the exercise id, then items. + For p.answerqa, the text starts with a subpart label letter.""" + raw = render_with_or(p) + # Strip the leading exercise id if present + raw = re.sub(rf"^{re.escape(exercise_id)}\s*", "", raw) + + label, body = parse_subpart_label(raw) + + # Detect freeform + freeform = any(pat.search(body) for pat in FREEFORM_PATTERNS) + if freeform: + return [{"label": label, "items": [], "freeform": True, "raw": body}] + + items = split_numbered_items(body) + return [{"label": label, "items": items, "freeform": False, "raw": body}] + + +def main() -> None: + src = ROOT / "ans.xhtml" + soup = BeautifulSoup(src.read_text(encoding="utf-8"), "lxml") + body = soup.find("body") + + answers: dict = {} + current_chapter = None + current_exercise_id: "str | None" = None + + for el in body.find_all(["h3", "p"]): + classes = set(el.get("class") or []) + + # Chapter boundary + if el.name == "h3" and "h3b" in classes: + text = el.get_text(strip=True) + m = re.search(r"Chapter\s+(\d+)", text) + if m: + current_chapter = int(m.group(1)) + current_exercise_id = None + continue + + if el.name != "p" or not (classes & ANSWER_CLASSES): + continue + + # Find the exercise-id anchor (only present on p.answerq, not on continuation) + a = el.find("a", href=True) + ex_link = None + if a: + link_text = a.get_text(strip=True) + if EXERCISE_ID_RE.match(link_text): + ex_link = link_text + + if ex_link: + current_exercise_id = ex_link + anchor = "" + href = a.get("href", "") + anchor_m = re.search(r"#(ch\d+ans\d+)", href + " " + (a.get("id") or "")) + anchor = anchor_m.group(1) if anchor_m else (a.get("id") or "") + # Use the anchor's `id` attr if it's the entry id (e.g. "ch1ans1") + entry_id = a.get("id") or anchor + + answers[ex_link] = { + "id": ex_link, + "anchor": entry_id, + "chapter": current_chapter, + "subparts": [], + "freeform": False, + "raw": "", + } + new_subparts = parse_answer_paragraph(el, ex_link) + answers[ex_link]["subparts"].extend(new_subparts) + answers[ex_link]["raw"] = render_with_or(el) + answers[ex_link]["freeform"] = any(sp["freeform"] for sp in new_subparts) + else: + # Continuation paragraph for current exercise + if current_exercise_id and current_exercise_id in answers: + more = parse_answer_paragraph(el, current_exercise_id) + answers[current_exercise_id]["subparts"].extend(more) + if any(sp["freeform"] for sp in more): + answers[current_exercise_id]["freeform"] = True + + out = {"answers": answers} + OUT.write_text(json.dumps(out, ensure_ascii=False, indent=2)) + + total = len(answers) + freeform = sum(1 for v in answers.values() if v["freeform"]) + multipart = sum(1 for v in answers.values() if len(v["subparts"]) > 1) + total_items = sum( + len(sp["items"]) for v in answers.values() for sp in v["subparts"] + ) + with_alternates = sum( + 1 for v in answers.values() + for sp in v["subparts"] for it in sp["items"] + if it["alternates"] + ) + print(f"Exercises with answers: {total}") + print(f" freeform: {freeform}") + print(f" multi-part (A/B/C): {multipart}") + print(f" total numbered items: {total_items}") + print(f" items with alternates:{with_alternates}") + print(f"Wrote {OUT}") + + +if __name__ == "__main__": + main() diff --git a/Conjuga/Scripts/textbook/extract_chapters.py b/Conjuga/Scripts/textbook/extract_chapters.py new file mode 100644 index 0000000..71a2eb4 --- /dev/null +++ b/Conjuga/Scripts/textbook/extract_chapters.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python3 +"""Parse all chapter XHTMLs + appendix into structured chapters.json. + +Output schema: +{ + "chapters": [ + { + "id": "ch1", + "number": 1, + "title": "Nouns, Articles, and Adjectives", + "part": 1, # part 1/2/3 or null + "blocks": [ # ordered content + {"kind": "heading", "level": 3, "text": "..."}, + {"kind": "paragraph", "text": "...", "hasItalic": false}, + {"kind": "key_vocab_header", "title": "Los colores (The colors)"}, + {"kind": "vocab_image", "src": "f0010-03.jpg"}, + { + "kind": "exercise", + "id": "1.1", + "ans_anchor": "ch1ans1", + "instruction": "Write the appropriate...", + "image_refs": ["f0005-02.jpg"] + }, + {"kind": "image", "src": "...", "alt": "..."} + ] + } + ] +} +""" + +import json +import re +from pathlib import Path +from bs4 import BeautifulSoup + +ROOT = Path(__file__).resolve().parents[3] / "epub_extract" / "OEBPS" +OUT = Path(__file__).resolve().parent / "chapters.json" + +# Common icon images embedded in headings — ignore when collecting content images +ICON_IMAGES = {"Common01.jpg", "Common02.jpg", "Common03.jpg", "Common04.jpg", "Common05.jpg"} + +EXERCISE_ID_RE = re.compile(r"Exercise\s+([0-9]+\.[0-9]+)") +ANS_REF_RE = re.compile(r"ch(\d+)ans(\d+)") + + +def clean_text(el) -> str: + """Extract text preserving inline emphasis markers.""" + if el is None: + return "" + # Replace / with markdown-ish *...*, / with **...** + html = str(el) + soup = BeautifulSoup(html, "lxml") + # First: flatten nested emphasis so we don't emit overlapping markers. + # For X, drop the inner em (the bold wrapping + # already carries the emphasis visually). Same for .... + for tag in soup.find_all(["strong", "b"]): + for inner in tag.find_all(["em", "i"]): + inner.unwrap() + for tag in soup.find_all(["em", "i"]): + for inner in tag.find_all(["strong", "b"]): + inner.unwrap() + # Drop ALL inline emphasis. The source has nested/sibling em/strong + # patterns that CommonMark can't reliably parse, causing markers to leak + # into the UI. Plain text renders cleanly everywhere. + for tag in soup.find_all(["em", "i", "strong", "b"]): + tag.unwrap() + # Drop pagebreak spans + for tag in soup.find_all("span", attrs={"epub:type": "pagebreak"}): + tag.decompose() + # Replace
with newline + for br in soup.find_all("br"): + br.replace_with("\n") + # Use a separator so adjacent inline tags don't concatenate without spaces + # (e.g. "Ir and" would otherwise become "Irand"). + text = soup.get_text(separator=" ", strip=False) + # Collapse runs of whitespace first. + text = re.sub(r"\s+", " ", text).strip() + # Strip any stray asterisks that sneak through (e.g. author's literal *). + text = text.replace("*", "") + # De-space punctuation + text = re.sub(r"\s+([,.;:!?])", r"\1", text) + # Tighten brackets that picked up separator-spaces: "( foo )" -> "(foo)" + text = re.sub(r"([(\[])\s+", r"\1", text) + text = re.sub(r"\s+([)\]])", r"\1", text) + # Collapse any double-spaces + text = re.sub(r" +", " ", text).strip() + return text + + +def is_exercise_header(h) -> bool: + """Heading with an Exercise N.N link. + Chapters 1-16 use h3.h3k; chapters 17+ use h4.h4.""" + if h.name not in ("h3", "h4"): + return False + a = h.find("a", href=True) + if a and "ans.xhtml" in a["href"]: + return True + return False + + +def is_key_vocab_header(h) -> bool: + """Heading with 'Key Vocabulary' text (no anchor link to answers).""" + if h.name not in ("h3", "h4"): + return False + text = h.get_text(strip=True) + if "Key Vocabulary" in text and not h.find("a", href=lambda v: v and "ans.xhtml" in v): + return True + return False + + +def extract_image_srcs(parent) -> list: + """Return list of image src attributes, skipping icon images.""" + srcs = [] + for img in parent.find_all("img"): + src = img.get("src", "") + if not src or Path(src).name in ICON_IMAGES: + continue + srcs.append(src) + return srcs + + +def parse_chapter(path: Path) -> "dict | None": + """Parse one chapter file into structured blocks.""" + html = path.read_text(encoding="utf-8") + soup = BeautifulSoup(html, "lxml") + body = soup.find("body") + if body is None: + return None + + # Chapter number + title + number = None + title = "" + h2s = body.find_all("h2") + for h2 in h2s: + classes = h2.get("class") or [] + # Use a separator so consecutive inline tags don't concatenate + # (e.g. "Ir and the Future" → "Ir and the Future") + text_with_sep = re.sub(r"\s+", " ", h2.get_text(" ", strip=True)) + # Strip spaces that were inserted before punctuation + text_with_sep = re.sub(r"\s+([,.;:!?])", r"\1", text_with_sep).strip() + if "h2c" in classes and text_with_sep.isdigit(): + number = int(text_with_sep) + # Chapters 1–16 use h2c1; chapters 17+ use h2-c + elif ("h2c1" in classes or "h2-c" in classes) and not title: + title = text_with_sep + if number is None: + # Try id on chapter header (ch1 → 1) + for h2 in h2s: + id_ = h2.get("id", "") + m = re.match(r"ch(\d+)", id_) + if m: + number = int(m.group(1)) + break + + chapter_id = path.stem # ch1, ch2, ... + + # Walk section content in document order + section = body.find("section") or body + blocks: list = [] + pending_instruction = None # holds italic paragraph following an exercise header + + for el in section.descendants: + if el.name is None: + continue + + classes = el.get("class") or [] + + # Skip nested tags already captured via parent processing + # We operate only on direct h2/h3/h4/h5/p elements + if el.name not in ("h2", "h3", "h4", "h5", "p"): + continue + + # Exercise header detection (h3 in ch1-16, h4 in ch17+) + if is_exercise_header(el): + a = el.find("a", href=True) + href = a["href"] if a else "" + m = EXERCISE_ID_RE.search(el.get_text()) + ex_id = m.group(1) if m else "" + anchor_m = ANS_REF_RE.search(href) + ans_anchor = anchor_m.group(0) if anchor_m else "" + blocks.append({ + "kind": "exercise", + "id": ex_id, + "ans_anchor": ans_anchor, + "instruction": "", + "image_refs": [], + "prompts": [] + }) + pending_instruction = blocks[-1] + continue + + # Key Vocabulary header + if is_key_vocab_header(el): + blocks.append({"kind": "key_vocab_header", "title": "Key Vocabulary"}) + pending_instruction = None + continue + + # Other headings + if el.name in ("h2", "h3", "h4", "h5"): + if el.name == "h2": + # Skip the chapter-number/chapter-title h2s we already captured + continue + txt = clean_text(el) + if txt: + blocks.append({ + "kind": "heading", + "level": int(el.name[1]), + "text": txt, + }) + pending_instruction = None + continue + + # Paragraphs + if el.name == "p": + imgs = extract_image_srcs(el) + text = clean_text(el) + p_classes = set(classes) + + # Skip pure blank-line class ("nump" = underscore lines under number prompts) + if p_classes & {"nump", "numpa"} and not text: + continue + + # Exercise prompt:

1. Prompt text

+ # Also number1, number2 (continuation numbering), numbera, numbert + if pending_instruction is not None and p_classes & {"number", "number1", "number2", "numbera", "numbert"}: + if text: + pending_instruction["prompts"].append(text) + continue + + # Image container for a pending exercise + if pending_instruction is not None and imgs and not text: + pending_instruction["image_refs"].extend(imgs) + continue + + # Instruction line right after the exercise header + if pending_instruction is not None and text and not imgs and not pending_instruction["instruction"]: + pending_instruction["instruction"] = text + continue + + # While in pending-exercise state, extra text paragraphs are word + # banks / context ("from the following list:" etc) — keep pending alive. + if pending_instruction is not None and text and not imgs: + pending_instruction.setdefault("extra", []).append(text) + continue + + # Paragraphs that contain an image belong to vocab/key-vocab callouts + if imgs and not text: + for src in imgs: + blocks.append({"kind": "vocab_image", "src": src}) + continue + + # Mixed paragraph: image with caption + if imgs and text: + for src in imgs: + blocks.append({"kind": "vocab_image", "src": src}) + blocks.append({"kind": "paragraph", "text": text}) + continue + + # Plain paragraph — outside any exercise + if text: + blocks.append({"kind": "paragraph", "text": text}) + + return { + "id": chapter_id, + "number": number, + "title": title, + "blocks": blocks, + } + + +def assign_parts(chapters: list, part_files: "dict[int, list[int]]") -> None: + """Annotate chapters with part number based on TOC membership.""" + for part_num, chapter_nums in part_files.items(): + for ch in chapters: + if ch["number"] in chapter_nums: + ch["part"] = part_num + for ch in chapters: + ch.setdefault("part", None) + + +def read_part_memberships() -> "dict[int, list[int]]": + """Derive part→chapter grouping from the OPF spine order.""" + opf = next(ROOT.glob("*.opf"), None) + if opf is None: + return {} + soup = BeautifulSoup(opf.read_text(encoding="utf-8"), "xml") + memberships: dict = {} + current_part: "int | None" = None + for item in soup.find_all("item"): + href = item.get("href", "") + m_part = re.match(r"part(\d+)\.xhtml", href) + m_ch = re.match(r"ch(\d+)\.xhtml", href) + if m_part: + current_part = int(m_part.group(1)) + memberships.setdefault(current_part, []) + elif m_ch and current_part is not None: + memberships[current_part].append(int(m_ch.group(1))) + # Manifest order tends to match spine order for this book; verify via spine just in case + spine = soup.find("spine") + if spine is not None: + order = [] + for ref in spine.find_all("itemref"): + idref = ref.get("idref") + item = soup.find("item", attrs={"id": idref}) + if item is not None: + order.append(item.get("href", "")) + # Rebuild from spine order + memberships = {} + current_part = None + for href in order: + m_part = re.match(r"part(\d+)\.xhtml", href) + m_ch = re.match(r"ch(\d+)\.xhtml", href) + if m_part: + current_part = int(m_part.group(1)) + memberships.setdefault(current_part, []) + elif m_ch and current_part is not None: + memberships[current_part].append(int(m_ch.group(1))) + return memberships + + +def main() -> None: + chapter_files = sorted( + ROOT.glob("ch*.xhtml"), + key=lambda p: int(re.match(r"ch(\d+)", p.stem).group(1)) + ) + chapters = [] + for path in chapter_files: + ch = parse_chapter(path) + if ch: + chapters.append(ch) + + part_memberships = read_part_memberships() + assign_parts(chapters, part_memberships) + + out = { + "chapters": chapters, + "part_memberships": part_memberships, + } + OUT.write_text(json.dumps(out, ensure_ascii=False, indent=2)) + + # Summary + ex_total = sum(1 for ch in chapters for b in ch["blocks"] if b["kind"] == "exercise") + ex_with_prompts = sum( + 1 for ch in chapters for b in ch["blocks"] + if b["kind"] == "exercise" and b["prompts"] + ) + ex_with_images = sum( + 1 for ch in chapters for b in ch["blocks"] + if b["kind"] == "exercise" and b["image_refs"] + ) + ex_empty = sum( + 1 for ch in chapters for b in ch["blocks"] + if b["kind"] == "exercise" and not b["prompts"] and not b["image_refs"] + ) + para_total = sum(1 for ch in chapters for b in ch["blocks"] if b["kind"] == "paragraph") + vocab_img_total = sum(1 for ch in chapters for b in ch["blocks"] if b["kind"] == "vocab_image") + print(f"Chapters: {len(chapters)}") + print(f"Exercises total: {ex_total}") + print(f" with text prompts: {ex_with_prompts}") + print(f" with image prompts: {ex_with_images}") + print(f" empty: {ex_empty}") + print(f"Paragraphs: {para_total}") + print(f"Vocab images: {vocab_img_total}") + print(f"Parts: {part_memberships}") + print(f"Wrote {OUT}") + + +if __name__ == "__main__": + main() diff --git a/Conjuga/Scripts/textbook/extract_pdf_text.py b/Conjuga/Scripts/textbook/extract_pdf_text.py new file mode 100644 index 0000000..c0c74c6 --- /dev/null +++ b/Conjuga/Scripts/textbook/extract_pdf_text.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +"""Extract clean text from the PDF source and map each PDF page to the +book's printed page number. + +Output: pdf_text.json +{ + "pdfPageCount": 806, + "bookPages": { + "3": { "text": "...", "pdfIndex": 29 }, + "4": { ... }, + ... + }, + "unmapped": [list of pdfIndex values with no detectable book page number] +} +""" + +import json +import re +from pathlib import Path +import pypdf + +HERE = Path(__file__).resolve().parent +PDF = next( + Path(__file__).resolve().parents[3].glob("Complete Spanish Step-By-Step*.pdf"), + None, +) +OUT = HERE / "pdf_text.json" + +ROMAN_RE = re.compile(r"^[ivxlcdmIVXLCDM]+$") +# Match a page number on its own line at top/bottom of the page. +# The book uses Arabic numerals for main chapters (e.g., "3") and Roman for front matter. +PAGE_NUM_LINE_RE = re.compile(r"^\s*(\d{1,4})\s*$", re.MULTILINE) + + +def detect_book_page(text: str) -> "int | None": + """Find the printed page number from standalone page-number lines at the + top or bottom of a page.""" + lines = [l.strip() for l in text.splitlines() if l.strip()] + # Check first 2 lines and last 2 lines + for candidate in lines[:2] + lines[-2:]: + m = re.match(r"^(\d{1,4})$", candidate) + if m: + return int(m.group(1)) + return None + + +def main() -> None: + if PDF is None: + print("No PDF found in project root") + return + + print(f"Reading {PDF.name}") + reader = pypdf.PdfReader(str(PDF)) + pages = reader.pages + print(f"PDF has {len(pages)} pages") + + by_book_page: dict = {} + unmapped: list = [] + last_seen: "int | None" = None + missed_count = 0 + + for i, page in enumerate(pages): + text = page.extract_text() or "" + book_page = detect_book_page(text) + + if book_page is None: + # Carry forward sequence: if we saw page N last, assume N+1. + if last_seen is not None: + book_page = last_seen + 1 + missed_count += 1 + else: + unmapped.append(i) + continue + last_seen = book_page + # Strip the detected page number from text to clean the output + cleaned = re.sub(r"(?m)^\s*\d{1,4}\s*$", "", text).strip() + by_book_page[str(book_page)] = { + "text": cleaned, + "pdfIndex": i, + } + + out = { + "pdfPageCount": len(pages), + "bookPages": by_book_page, + "unmapped": unmapped, + "inferredPages": missed_count, + } + OUT.write_text(json.dumps(out, ensure_ascii=False)) + print(f"Mapped {len(by_book_page)} book pages; {missed_count} inferred; {len(unmapped)} unmapped") + print(f"Wrote {OUT}") + + +if __name__ == "__main__": + main() diff --git a/Conjuga/Scripts/textbook/fix_vocab.py b/Conjuga/Scripts/textbook/fix_vocab.py new file mode 100644 index 0000000..50c7dd0 --- /dev/null +++ b/Conjuga/Scripts/textbook/fix_vocab.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 +"""Apply high-confidence auto-fixes from vocab_validation.json to vocab_cards.json. + +Auto-fix rules (conservative): + 1. If a flagged word has exactly one suggestion AND that suggestion differs by + <= 2 characters AND has the same starting letter (high-confidence character swap). + 2. If a card is detected as reversed (Spanish on EN side, English on ES side), + swap front/back. + +Cards that aren't auto-fixable end up in manual_review.json. +""" + +import json +import re +import unicodedata +from pathlib import Path + +HERE = Path(__file__).resolve().parent +VOCAB = HERE / "vocab_cards.json" +VALIDATION = HERE / "vocab_validation.json" +OUT_VOCAB = HERE / "vocab_cards.json" +OUT_REVIEW = HERE / "manual_review.json" +OUT_QUARANTINE = HERE / "quarantined_cards.json" + + +def _strip_accents(s: str) -> str: + return "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn") + + +def _levenshtein(a: str, b: str) -> int: + if a == b: return 0 + if not a: return len(b) + if not b: return len(a) + prev = list(range(len(b) + 1)) + for i, ca in enumerate(a, 1): + curr = [i] + for j, cb in enumerate(b, 1): + cost = 0 if ca == cb else 1 + curr.append(min(prev[j] + 1, curr[j - 1] + 1, prev[j - 1] + cost)) + prev = curr + return prev[-1] + + +SPANISH_ACCENT_RE = re.compile(r"[áéíóúñüÁÉÍÓÚÑÜ¿¡]") +SPANISH_ARTICLES = {"el", "la", "los", "las", "un", "una", "unos", "unas"} +ENGLISH_STARTERS = {"the", "a", "an", "to", "my", "his", "her", "our", "their"} + + +def language_score(s: str) -> "tuple[int, int]": + """Return (es_score, en_score) for a string.""" + es = 0 + en = 0 + if SPANISH_ACCENT_RE.search(s): + es += 3 + words = s.lower().split() + if not words: + return (es, en) + first = words[0].strip(",.;:") + if first in SPANISH_ARTICLES: + es += 2 + if first in ENGLISH_STARTERS: + en += 2 + # Spanish-likely endings on later words + for w in words: + w = w.strip(",.;:") + if not w: continue + if w.endswith(("ción", "sión", "dad", "tud")): + es += 1 + if w.endswith(("ing", "tion", "ness", "ment", "able", "ly")): + en += 1 + return (es, en) + + +def is_reversed(front: str, back: str) -> bool: + """True when front looks like English and back looks like Spanish (i.e. swapped).""" + fes, fen = language_score(front) + bes, ben = language_score(back) + # Front English-leaning AND back Spanish-leaning + return fen > fes and bes > ben + + +def best_replacement(word: str, suggestions: list) -> "str | None": + """Pick the one safe correction, or None to leave it alone.""" + if not suggestions: + return None + # Prefer suggestions that share the same first letter + same_initial = [s for s in suggestions if s and word and s[0].lower() == word[0].lower()] + candidates = same_initial or suggestions + # Single best: short edit distance + best = None + best_d = 99 + for s in candidates: + d = _levenshtein(word.lower(), s.lower()) + # Don't apply if the "fix" changes too much + if d == 0: + continue + if d > 2: + continue + if d < best_d: + best = s + best_d = d + return best + + +def side_language_match(text: str, expected_side: str) -> bool: + """Return True when `text` looks like the expected language (es/en). + Guards against applying Spanish spell-fix to English words on a mis-paired card. + """ + es, en = language_score(text) + if expected_side == "es": + return es > en # require clear Spanish signal + if expected_side == "en": + return en >= es # allow equal when text has no strong signal (common for English) + return False + + +def apply_word_fixes(text: str, bad_words: list, expected_side: str) -> "tuple[str, list]": + """Apply word-level corrections inside a string. Skips fixes entirely when + the side's actual language doesn't match the dictionary used, to avoid + corrupting mis-paired cards.""" + if not side_language_match(text, expected_side): + return (text, []) + + new_text = text + applied = [] + for bw in bad_words: + word = bw["word"] + sugg = bw["suggestions"] + replacement = best_replacement(word, sugg) + if replacement is None: + continue + # Match standalone word including the (possibly-omitted) trailing period: + # `Uds` in the text should be replaced with `Uds.` even when adjacent to `.`. + escaped = re.escape(word) + # Allow an optional existing period that we'd otherwise duplicate. + pattern = re.compile(rf"(? None: + vocab_data = json.loads(VOCAB.read_text(encoding="utf-8")) + val_data = json.loads(VALIDATION.read_text(encoding="utf-8")) + + # Index validation by (chapter, front, back, sourceImage) for lookup + val_index: dict = {} + for f in val_data["flags"]: + key = (f["chapter"], f["front"], f["back"], f["sourceImage"]) + val_index[key] = f + + # Walk the cards in place + auto_fixed_word = 0 + auto_swapped = 0 + quarantined = 0 + manual_review_cards = [] + quarantined_cards = [] + + for ch in vocab_data["chapters"]: + kept_cards = [] + for card in ch["cards"]: + key = (ch["chapter"], card["front"], card["back"], card.get("sourceImage", "")) + flag = val_index.get(key) + + # 1) Reversal swap (apply even when not flagged) + if is_reversed(card["front"], card["back"]): + card["front"], card["back"] = card["back"], card["front"] + auto_swapped += 1 + # Re-key for any further validation lookup (no-op here) + + if flag is None: + kept_cards.append(card) + continue + + # Quarantine obvious mis-pairs: both sides same language OR language mismatch + fes, fen = language_score(card["front"]) + bes, ben = language_score(card["back"]) + front_lang = "es" if fes > fen else ("en" if fen > fes else "unknown") + back_lang = "es" if bes > ben else ("en" if ben > bes else "unknown") + # A good card has front=es, back=en. Anything else when the card is + # flagged is almost always a column-pairing error. + if front_lang != "es" or back_lang != "en": + quarantined_cards.append({ + "chapter": ch["chapter"], + "front": card["front"], + "back": card["back"], + "sourceImage": card.get("sourceImage", ""), + "reason": f"language-mismatch front={front_lang} back={back_lang}", + }) + quarantined += 1 + continue + + # 2) Word-level fixes (language-aware) + new_front, applied_front = apply_word_fixes(card["front"], flag["badFront"], "es") + new_back, applied_back = apply_word_fixes(card["back"], flag["badBack"], "en") + card["front"] = new_front + card["back"] = new_back + auto_fixed_word += len(applied_front) + len(applied_back) + + # If after auto-fix there are STILL flagged words with no + # confident replacement, flag for manual review. + unresolved_front = [ + bw for bw in flag["badFront"] + if not any(a["from"] == bw["word"] for a in applied_front) + and best_replacement(bw["word"], bw["suggestions"]) is None + ] + unresolved_back = [ + bw for bw in flag["badBack"] + if not any(a["from"] == bw["word"] for a in applied_back) + and best_replacement(bw["word"], bw["suggestions"]) is None + ] + if unresolved_front or unresolved_back: + manual_review_cards.append({ + "chapter": ch["chapter"], + "front": card["front"], + "back": card["back"], + "sourceImage": card.get("sourceImage", ""), + "unresolvedFront": unresolved_front, + "unresolvedBack": unresolved_back, + }) + kept_cards.append(card) + + ch["cards"] = kept_cards + + OUT_VOCAB.write_text(json.dumps(vocab_data, ensure_ascii=False, indent=2)) + OUT_REVIEW.write_text(json.dumps({ + "totalManualReview": len(manual_review_cards), + "cards": manual_review_cards, + }, ensure_ascii=False, indent=2)) + + OUT_QUARANTINE.write_text(json.dumps({ + "totalQuarantined": len(quarantined_cards), + "cards": quarantined_cards, + }, ensure_ascii=False, indent=2)) + + total_cards = sum(len(c["cards"]) for c in vocab_data["chapters"]) + print(f"Active cards (after quarantine): {total_cards}") + print(f"Auto-swapped (reversed): {auto_swapped}") + print(f"Auto-fixed words: {auto_fixed_word}") + print(f"Quarantined (mis-paired): {quarantined}") + print(f"Cards needing manual review: {len(manual_review_cards)}") + print(f"Wrote {OUT_VOCAB}") + print(f"Wrote {OUT_REVIEW}") + print(f"Wrote {OUT_QUARANTINE}") + + +if __name__ == "__main__": + main() diff --git a/Conjuga/Scripts/textbook/integrate_repaired.py b/Conjuga/Scripts/textbook/integrate_repaired.py new file mode 100644 index 0000000..05602d2 --- /dev/null +++ b/Conjuga/Scripts/textbook/integrate_repaired.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +"""Merge repaired_cards.json into vocab_cards.json. + +Rules: + 1. New pairs are added to their chapter's deck if they don't duplicate an existing pair. + 2. Duplicate detection uses normalize(front)+normalize(back). + 3. Pairs whose back side starts with a Spanish-article or front side starts + with an English article are dropped (pairer got orientation wrong). + 4. Emits integrate_report.json with counts. +""" + +import json +import re +import unicodedata +from pathlib import Path + +HERE = Path(__file__).resolve().parent +VOCAB = HERE / "vocab_cards.json" +REPAIRED = HERE / "repaired_cards.json" +QUARANTINED = HERE / "quarantined_cards.json" +OUT = HERE / "vocab_cards.json" +REPORT = HERE / "integrate_report.json" + + +def _strip_accents(s: str) -> str: + return "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn") + + +def norm(s: str) -> str: + return _strip_accents(s.lower()).strip() + + +SPANISH_ACCENT_RE = re.compile(r"[áéíóúñüÁÉÍÓÚÑÜ¿¡]") +SPANISH_ARTICLES = {"el", "la", "los", "las", "un", "una", "unos", "unas"} +ENGLISH_STARTERS = {"the", "a", "an", "to", "my", "his", "her", "our", "their"} + + +def looks_swapped(front: str, back: str) -> bool: + """True if front looks English and back looks Spanish (pair should be swapped).""" + fl = front.lower().split() + bl = back.lower().split() + if not fl or not bl: + return False + f_first = fl[0].strip(",.;:") + b_first = bl[0].strip(",.;:") + front_is_en = f_first in ENGLISH_STARTERS + back_is_es = ( + SPANISH_ACCENT_RE.search(back) is not None + or b_first in SPANISH_ARTICLES + ) + return front_is_en and back_is_es + + +def looks_good(pair: dict) -> bool: + """Basic sanity filter on a repaired pair before it enters the deck.""" + es = pair["es"].strip() + en = pair["en"].strip() + if not es or not en: return False + if len(es) < 2 or len(en) < 2: return False + # Drop if both sides obviously same language (neither has clear orientation) + es_has_accent = SPANISH_ACCENT_RE.search(es) is not None + en_has_accent = SPANISH_ACCENT_RE.search(en) is not None + if en_has_accent and not es_has_accent: + # The "en" side has accents — likely swapped + return False + return True + + +def main() -> None: + vocab = json.loads(VOCAB.read_text(encoding="utf-8")) + repaired = json.loads(REPAIRED.read_text(encoding="utf-8")) + quarantined = json.loads(QUARANTINED.read_text(encoding="utf-8")) + + # Map image → chapter (from the quarantine list — all images here belong to the + # chapter they were quarantined from). + image_chapter: dict = {} + for c in quarantined["cards"]: + image_chapter[c["sourceImage"]] = c["chapter"] + + # Build existing key set + existing_keys = set() + chapter_map: dict = {c["chapter"]: c for c in vocab["chapters"]} + for c in vocab["chapters"]: + for card in c["cards"]: + existing_keys.add((c["chapter"], norm(card["front"]), norm(card["back"]))) + + added_per_image: dict = {} + dropped_swapped = 0 + dropped_sanity = 0 + dropped_dup = 0 + + for image_name, data in repaired["byImage"].items(): + ch_num = image_chapter.get(image_name) + if ch_num is None: + # Image not in quarantine list (shouldn't happen, but bail) + continue + deck = chapter_map.setdefault(ch_num, {"chapter": ch_num, "cards": []}) + added = 0 + for p in data.get("pairs", []): + es = p["es"].strip() + en = p["en"].strip() + if looks_swapped(es, en): + es, en = en, es + pair = {"es": es, "en": en} + if not looks_good(pair): + dropped_sanity += 1 + continue + key = (ch_num, norm(pair["es"]), norm(pair["en"])) + if key in existing_keys: + dropped_dup += 1 + continue + existing_keys.add(key) + card = { + "front": pair["es"], + "back": pair["en"], + "chapter": ch_num, + "chapterTitle": "", + "section": "", + "sourceImage": image_name, + } + deck["cards"].append(card) + added += 1 + if added: + added_per_image[image_name] = added + + # If any new chapter was created, ensure ordered insertion + vocab["chapters"] = sorted(chapter_map.values(), key=lambda c: c["chapter"]) + OUT.write_text(json.dumps(vocab, ensure_ascii=False, indent=2)) + + total_added = sum(added_per_image.values()) + report = { + "totalRepairedInput": repaired["totalPairs"], + "added": total_added, + "dropped_duplicate": dropped_dup, + "dropped_sanity": dropped_sanity, + "addedPerImage": added_per_image, + } + REPORT.write_text(json.dumps(report, ensure_ascii=False, indent=2)) + print(f"Repaired pairs in: {repaired['totalPairs']}") + print(f"Added to deck: {total_added}") + print(f"Dropped as duplicate: {dropped_dup}") + print(f"Dropped as swapped/bad: {dropped_sanity}") + print(f"Wrote {OUT}") + + +if __name__ == "__main__": + main() diff --git a/Conjuga/Scripts/textbook/merge_pdf_into_book.py b/Conjuga/Scripts/textbook/merge_pdf_into_book.py new file mode 100644 index 0000000..326f2c6 --- /dev/null +++ b/Conjuga/Scripts/textbook/merge_pdf_into_book.py @@ -0,0 +1,403 @@ +#!/usr/bin/env python3 +"""Second-pass extractor: use PDF OCR (from ocr_pdf.swift) as a supplementary +source of clean text, then re-build book.json with PDF-derived content where it +improves on the EPUB's image-based extraction. + +Inputs: + chapters.json — EPUB structural extraction (narrative text + exercise prompts + image refs) + answers.json — EPUB answer key + ocr.json — EPUB image OCR (first pass) + pdf_ocr.json — PDF page-level OCR (this pass, higher DPI + cleaner) + +Outputs: + book.json — merged book used by the app + vocab_cards.json — derived vocabulary flashcards +""" + +import json +import re +import sys +from pathlib import Path + +HERE = Path(__file__).resolve().parent +sys.path.insert(0, str(HERE)) +from build_book import ( # reuse the helpers defined in build_book.py + COURSE_NAME, + build_vocab_cards_for_block, + clean_instruction, + classify_line, + load, +) + +CHAPTERS_JSON = HERE / "chapters.json" +ANSWERS_JSON = HERE / "answers.json" +OCR_JSON = HERE / "ocr.json" +PDF_OCR_JSON = HERE / "pdf_ocr.json" +OUT_BOOK = HERE / "book.json" +OUT_VOCAB = HERE / "vocab_cards.json" + +IMAGE_NAME_RE = re.compile(r"^f(\d{4})-(\d{2})\.jpg$") + + +def extract_book_page(image_src: str) -> "int | None": + m = IMAGE_NAME_RE.match(image_src) + return int(m.group(1)) if m else None + + +def build_pdf_page_index(pdf_ocr: dict) -> "dict[int, dict]": + """Map bookPage → {lines, confidence, pdfIndex}. + + Strategy: use chapter-start alignments as anchors. For each chapter N, + anchor[N] = (pdf_idx_where_chapter_starts, book_page_where_chapter_starts). + Between anchors we interpolate page-by-page (pages run sequentially within + a chapter in this textbook's layout). + """ + pages: "dict[int, dict]" = {} + sorted_keys = sorted(pdf_ocr.keys(), key=lambda k: int(k)) + + # --- Detect chapter starts in the PDF OCR --- + pdf_ch_start: "dict[int, int]" = {} + for k in sorted_keys: + entry = pdf_ocr[k] + lines = entry.get("lines", []) + if len(lines) < 2: + continue + first = lines[0].strip() + second = lines[1].strip() + if first.isdigit() and 1 <= int(first) <= 30 and len(second) > 5 and second[0:1].isupper(): + ch = int(first) + if ch not in pdf_ch_start: + pdf_ch_start[ch] = int(k) + + # --- Load EPUB's authoritative book-page starts --- + import re as _re + from bs4 import BeautifulSoup as _BS + epub_root = HERE.parents[2] / "epub_extract" / "OEBPS" + book_ch_start: "dict[int, int]" = {} + for ch in sorted(pdf_ch_start.keys()): + p = epub_root / f"ch{ch}.xhtml" + if not p.exists(): + continue + soup = _BS(p.read_text(encoding="utf-8"), "lxml") + for span in soup.find_all(True): + id_ = span.get("id", "") or "" + m = _re.match(r"page_(\d+)$", id_) + if m: + book_ch_start[ch] = int(m.group(1)) + break + + # Build per-chapter (pdf_anchor, book_anchor, next_pdf_anchor) intervals + anchors = [] # list of (ch, pdf_start, book_start) + for ch in sorted(pdf_ch_start.keys()): + if ch in book_ch_start: + anchors.append((ch, pdf_ch_start[ch], book_ch_start[ch])) + + for i, (ch, pdf_s, book_s) in enumerate(anchors): + next_pdf = anchors[i + 1][1] if i + 1 < len(anchors) else pdf_s + 50 + # Interpolate book page for each pdf index in [pdf_s, next_pdf) + for pdf_idx in range(pdf_s, next_pdf): + book_page = book_s + (pdf_idx - pdf_s) + entry = pdf_ocr.get(str(pdf_idx)) + if entry is None: + continue + if book_page in pages: + continue + pages[book_page] = { + "lines": entry["lines"], + "confidence": entry.get("confidence", 0), + "pdfIndex": pdf_idx, + } + return pages + + +def merge_ocr(epub_lines: list, pdf_lines: list) -> list: + """EPUB per-image OCR is our primary (targeted, no prose bleed). PDF + page-level OCR is only used when EPUB is missing. Per-line accent repair + is handled separately via `repair_accents_from_pdf`. + """ + if epub_lines: + return epub_lines + return pdf_lines + + +import unicodedata as _u + +def _strip_accents(s: str) -> str: + return "".join(c for c in _u.normalize("NFD", s) if _u.category(c) != "Mn") + + +def _levenshtein(a: str, b: str) -> int: + if a == b: return 0 + if not a: return len(b) + if not b: return len(a) + prev = list(range(len(b) + 1)) + for i, ca in enumerate(a, 1): + curr = [i] + for j, cb in enumerate(b, 1): + cost = 0 if ca == cb else 1 + curr.append(min(prev[j] + 1, curr[j - 1] + 1, prev[j - 1] + cost)) + prev = curr + return prev[-1] + + +def repair_accents_from_pdf(epub_lines: list, pdf_page_lines: list) -> "tuple[list, int]": + """For each EPUB OCR line, find a near-match in the PDF page OCR and + prefer the PDF version. Repairs include: + 1. exact accent/case differences (e.g. 'iglesia' vs 'Iglesia') + 2. single-character OCR errors (e.g. 'the hrother' -> 'the brother') + 3. two-character OCR errors when the target is long enough + """ + if not epub_lines or not pdf_page_lines: + return (epub_lines, 0) + # Pre-normalize PDF lines for matching + pdf_cleaned = [p.strip() for p in pdf_page_lines if p.strip()] + pdf_by_stripped: dict = {} + for p in pdf_cleaned: + key = _strip_accents(p.lower()) + pdf_by_stripped.setdefault(key, p) + + out: list = [] + repairs = 0 + for e in epub_lines: + e_stripped = e.strip() + e_key = _strip_accents(e_stripped.lower()) + # Pass 1: exact accent-only difference + if e_key and e_key in pdf_by_stripped and pdf_by_stripped[e_key] != e_stripped: + out.append(pdf_by_stripped[e_key]) + repairs += 1 + continue + # Pass 2: fuzzy — find best PDF line within edit distance 1 or 2 + if len(e_key) >= 4: + max_distance = 1 if len(e_key) < 10 else 2 + best_match = None + best_d = max_distance + 1 + for p in pdf_cleaned: + p_key = _strip_accents(p.lower()) + # Only match lines of similar length + if abs(len(p_key) - len(e_key)) > max_distance: + continue + d = _levenshtein(e_key, p_key) + if d < best_d: + best_d = d + best_match = p + if d == 0: + break + if best_match and best_match != e_stripped and best_d <= max_distance: + out.append(best_match) + repairs += 1 + continue + out.append(e) + return (out, repairs) + + +def vocab_lines_from_pdf_page( + pdf_page_entry: dict, + epub_narrative_lines: set +) -> list: + """Extract likely vocab-table lines from a PDF page's OCR by filtering out + narrative-looking lines (long sentences) and already-known EPUB content.""" + lines = pdf_page_entry.get("lines", []) + out: list = [] + for raw in lines: + line = raw.strip() + if not line: + continue + # Skip lines that look like body prose (too long) + if len(line) > 80: + continue + # Skip narrative we already captured in the EPUB + if line in epub_narrative_lines: + continue + # Skip page-number-only lines + if re.fullmatch(r"\d{1,4}", line): + continue + # Skip standalone chapter headers (e.g. "Nouns, Articles, and Adjectives") + out.append(line) + return out + + +def main() -> None: + chapters_data = load(CHAPTERS_JSON) + answers = load(ANSWERS_JSON)["answers"] + epub_ocr = load(OCR_JSON) + pdf_ocr_raw = load(PDF_OCR_JSON) if PDF_OCR_JSON.exists() else {} + pdf_pages = build_pdf_page_index(pdf_ocr_raw) if pdf_ocr_raw else {} + print(f"Mapped {len(pdf_pages)} PDF pages to book page numbers") + + # Build a global set of EPUB narrative lines (for subtraction when pulling vocab) + narrative_set = set() + for ch in chapters_data["chapters"]: + for b in ch["blocks"]: + if b["kind"] == "paragraph" and b.get("text"): + narrative_set.add(b["text"].strip()) + + book_chapters = [] + all_vocab_cards = [] + pdf_hits = 0 + pdf_misses = 0 + merged_pages = 0 + + for ch in chapters_data["chapters"]: + out_blocks = [] + current_section_title = ch["title"] + + for bi, block in enumerate(ch["blocks"]): + k = block["kind"] + + if k == "heading": + current_section_title = block["text"] + out_blocks.append(block) + continue + + if k == "paragraph": + out_blocks.append(block) + continue + + if k == "key_vocab_header": + out_blocks.append(block) + continue + + if k == "vocab_image": + src = block["src"] + epub_entry = epub_ocr.get(src) + epub_lines = epub_entry.get("lines", []) if epub_entry else [] + epub_conf = epub_entry.get("confidence", 0.0) if epub_entry else 0.0 + + book_page = extract_book_page(src) + pdf_entry = pdf_pages.get(book_page) if book_page else None + pdf_lines = pdf_entry["lines"] if pdf_entry else [] + + # Primary: EPUB per-image OCR. Supplementary: PDF page OCR + # used only for accent/diacritic repair where keys match. + if pdf_lines: + pdf_hits += 1 + else: + pdf_misses += 1 + repaired_lines, repairs = repair_accents_from_pdf(epub_lines, pdf_lines) + merged_lines = repaired_lines if repaired_lines else pdf_lines + merged_conf = max(epub_conf, pdf_entry.get("confidence", 0) if pdf_entry else 0.0) + if repairs > 0: + merged_pages += 1 + + derived = build_vocab_cards_for_block( + {"src": src}, + {"lines": merged_lines, "confidence": merged_conf}, + ch, current_section_title, bi + ) + all_vocab_cards.extend(derived) + out_blocks.append({ + "kind": "vocab_table", + "sourceImage": src, + "ocrLines": merged_lines, + "ocrConfidence": merged_conf, + "cardCount": len(derived), + "source": "pdf-repaired" if repairs > 0 else ("epub" if epub_lines else "pdf"), + "bookPage": book_page, + "repairs": repairs, + }) + continue + + if k == "exercise": + ans = answers.get(block["id"]) + # EPUB image OCR (if any image refs) + image_ocr_lines: list = [] + for src in block.get("image_refs", []): + ee = epub_ocr.get(src) + if ee: + image_ocr_lines.extend(ee.get("lines", [])) + # Add PDF-page OCR for that page if available + bp = extract_book_page(src) + if bp and pdf_pages.get(bp): + # Only add lines not already present from EPUB OCR + pdf_lines = pdf_pages[bp]["lines"] + for line in pdf_lines: + line = line.strip() + if not line or line in image_ocr_lines: + continue + if line in narrative_set: + continue + image_ocr_lines.append(line) + + prompts = [p for p in block.get("prompts", []) if p.strip()] + extras = [e for e in block.get("extra", []) if e.strip()] + if not prompts and image_ocr_lines: + # Extract numbered lines from OCR + for line in image_ocr_lines: + m = re.match(r"^(\d+)[.)]\s*(.+)", line.strip()) + if m: + prompts.append(f"{m.group(1)}. {m.group(2)}") + + sub = ans["subparts"] if ans else [] + answer_items = [] + for sp in sub: + for it in sp["items"]: + answer_items.append({ + "label": sp["label"], + "number": it["number"], + "answer": it["answer"], + "alternates": it["alternates"], + }) + + out_blocks.append({ + "kind": "exercise", + "id": block["id"], + "ansAnchor": block.get("ans_anchor", ""), + "instruction": clean_instruction(block.get("instruction", "")), + "extra": extras, + "prompts": prompts, + "ocrLines": image_ocr_lines, + "freeform": ans["freeform"] if ans else False, + "answerItems": answer_items, + "answerRaw": ans["raw"] if ans else "", + "answerSubparts": sub, + }) + continue + + out_blocks.append(block) + + book_chapters.append({ + "id": ch["id"], + "number": ch["number"], + "title": ch["title"], + "part": ch.get("part"), + "blocks": out_blocks, + }) + + book = { + "courseName": COURSE_NAME, + "totalChapters": len(book_chapters), + "totalExercises": sum(1 for ch in book_chapters for b in ch["blocks"] if b["kind"] == "exercise"), + "totalVocabTables": sum(1 for ch in book_chapters for b in ch["blocks"] if b["kind"] == "vocab_table"), + "totalVocabCards": len(all_vocab_cards), + "parts": chapters_data.get("part_memberships", {}), + "chapters": book_chapters, + "sources": { + "epub_images_ocr": bool(epub_ocr), + "pdf_pages_ocr": bool(pdf_ocr_raw), + "pdf_pages_mapped": len(pdf_pages), + }, + } + OUT_BOOK.write_text(json.dumps(book, ensure_ascii=False)) + + vocab_by_chapter: dict = {} + for card in all_vocab_cards: + vocab_by_chapter.setdefault(card["chapter"], []).append(card) + OUT_VOCAB.write_text(json.dumps({ + "courseName": COURSE_NAME, + "chapters": [ + {"chapter": n, "cards": cs} + for n, cs in sorted(vocab_by_chapter.items()) + ], + }, ensure_ascii=False, indent=2)) + + print(f"Wrote {OUT_BOOK}") + print(f"Wrote {OUT_VOCAB}") + print(f"Chapters: {book['totalChapters']}") + print(f"Exercises: {book['totalExercises']}") + print(f"Vocab tables: {book['totalVocabTables']}") + print(f"Vocab cards (derived): {book['totalVocabCards']}") + print(f"PDF hits vs misses: {pdf_hits} / {pdf_misses}") + + +if __name__ == "__main__": + main() diff --git a/Conjuga/Scripts/textbook/ocr_images.swift b/Conjuga/Scripts/textbook/ocr_images.swift new file mode 100644 index 0000000..65b75ed --- /dev/null +++ b/Conjuga/Scripts/textbook/ocr_images.swift @@ -0,0 +1,110 @@ +#!/usr/bin/env swift +// OCR every JPG in the given input directory using the macOS Vision framework. +// Output: JSON map of { "": { "lines": [...], "confidence": Double } } +// +// Usage: swift ocr_images.swift +// Example: swift ocr_images.swift ../../../epub_extract/OEBPS ocr.json + +import Foundation +import Vision +import AppKit + +guard CommandLine.arguments.count >= 3 else { + print("Usage: swift ocr_images.swift ") + exit(1) +} + +let inputDir = URL(fileURLWithPath: CommandLine.arguments[1]) +let outputURL = URL(fileURLWithPath: CommandLine.arguments[2]) + +// Skip images that are icons/inline markers — not real content +let skipSubstrings = ["Common", "cover", "title"] + +let fileManager = FileManager.default +guard let enumerator = fileManager.enumerator(at: inputDir, includingPropertiesForKeys: nil) else { + print("Could not enumerate \(inputDir.path)") + exit(1) +} + +var jpgs: [URL] = [] +for case let url as URL in enumerator { + let name = url.lastPathComponent + guard name.hasSuffix(".jpg") || name.hasSuffix(".jpeg") || name.hasSuffix(".png") else { continue } + if skipSubstrings.contains(where: { name.contains($0) }) { continue } + jpgs.append(url) +} +jpgs.sort { $0.lastPathComponent < $1.lastPathComponent } +print("Found \(jpgs.count) images to OCR") + +struct OCRResult: Encodable { + var lines: [String] + var confidence: Double +} + +var results: [String: OCRResult] = [:] +let total = jpgs.count +var processed = 0 +let startTime = Date() + +for url in jpgs { + processed += 1 + let name = url.lastPathComponent + + guard let nsImage = NSImage(contentsOf: url), + let tiffData = nsImage.tiffRepresentation, + let bitmap = NSBitmapImageRep(data: tiffData), + let cgImage = bitmap.cgImage else { + print("\(processed)/\(total) \(name) — could not load") + continue + } + + let handler = VNImageRequestHandler(cgImage: cgImage, options: [:]) + let request = VNRecognizeTextRequest() + request.recognitionLevel = .accurate + request.recognitionLanguages = ["es-ES", "es", "en-US"] + request.usesLanguageCorrection = true + // For the 2020 book, automaticallyDetectsLanguage helps with mixed content + if #available(macOS 13.0, *) { + request.automaticallyDetectsLanguage = true + } + + do { + try handler.perform([request]) + let observations = request.results ?? [] + var lines: [String] = [] + var totalConfidence: Float = 0 + var count = 0 + for obs in observations { + if let top = obs.topCandidates(1).first { + let s = top.string.trimmingCharacters(in: .whitespaces) + if !s.isEmpty { + lines.append(s) + totalConfidence += top.confidence + count += 1 + } + } + } + let avg = count > 0 ? Double(totalConfidence) / Double(count) : 0.0 + results[name] = OCRResult(lines: lines, confidence: avg) + } catch { + print("\(processed)/\(total) \(name) — error: \(error)") + } + + if processed % 50 == 0 || processed == total { + let elapsed = Date().timeIntervalSince(startTime) + let rate = Double(processed) / max(elapsed, 0.001) + let remaining = Double(total - processed) / max(rate, 0.001) + print(String(format: "%d/%d %.1f img/s eta %.0fs", processed, total, rate, remaining)) + } +} + +let encoder = JSONEncoder() +encoder.outputFormatting = [.prettyPrinted, .sortedKeys] +do { + let data = try encoder.encode(results) + try data.write(to: outputURL) + print("Wrote \(results.count) OCR entries to \(outputURL.path)") +} catch { + print("Error writing output: \(error)") + exit(1) +} diff --git a/Conjuga/Scripts/textbook/ocr_pdf.swift b/Conjuga/Scripts/textbook/ocr_pdf.swift new file mode 100644 index 0000000..d85f5cf --- /dev/null +++ b/Conjuga/Scripts/textbook/ocr_pdf.swift @@ -0,0 +1,133 @@ +#!/usr/bin/env swift +// Rasterize each page of a PDF at high DPI and OCR it with Vision. +// Output: { "": { "lines": [...], "confidence": Double, "bookPage": Int? } } +// +// Usage: swift ocr_pdf.swift [dpi] +// Example: swift ocr_pdf.swift "book.pdf" pdf_ocr.json 240 + +import Foundation +import Vision +import AppKit +import Quartz + +guard CommandLine.arguments.count >= 3 else { + print("Usage: swift ocr_pdf.swift [dpi]") + exit(1) +} + +let pdfURL = URL(fileURLWithPath: CommandLine.arguments[1]) +let outputURL = URL(fileURLWithPath: CommandLine.arguments[2]) +let dpi: CGFloat = CommandLine.arguments.count >= 4 ? CGFloat(Double(CommandLine.arguments[3]) ?? 240.0) : 240.0 + +guard let pdfDoc = PDFDocument(url: pdfURL) else { + print("Could not open PDF at \(pdfURL.path)") + exit(1) +} + +let pageCount = pdfDoc.pageCount +print("PDF has \(pageCount) pages. Rendering at \(dpi) DPI.") + +struct PageResult: Encodable { + var lines: [String] + var confidence: Double + var bookPage: Int? +} + +var results: [String: PageResult] = [:] +let startTime = Date() + +// Render at scale = dpi / 72 (72 is default PDF DPI) +let scale: CGFloat = dpi / 72.0 + +for i in 0.. 0 ? Double(totalConfidence) / Double(count) : 0.0 + + // Try to detect book page number: a short numeric line in the first + // 3 or last 3 entries (typical page-number placement). + var bookPage: Int? = nil + let candidates = Array(lines.prefix(3)) + Array(lines.suffix(3)) + for c in candidates { + let trimmed = c.trimmingCharacters(in: .whitespaces) + if let n = Int(trimmed), n >= 1 && n <= 1000 { + bookPage = n + break + } + } + + results[String(i)] = PageResult(lines: lines, confidence: avg, bookPage: bookPage) + } catch { + print("\(i): \(error)") + } + + if (i + 1) % 25 == 0 || (i + 1) == pageCount { + let elapsed = Date().timeIntervalSince(startTime) + let rate = Double(i + 1) / max(elapsed, 0.001) + let remaining = Double(pageCount - (i + 1)) / max(rate, 0.001) + print(String(format: "%d/%d %.1f pg/s eta %.0fs", i + 1, pageCount, rate, remaining)) + } +} + +let encoder = JSONEncoder() +encoder.outputFormatting = [.sortedKeys] +do { + let data = try encoder.encode(results) + try data.write(to: outputURL) + print("Wrote \(results.count) pages to \(outputURL.path)") +} catch { + print("Error writing output: \(error)") + exit(1) +} diff --git a/Conjuga/Scripts/textbook/repair_quarantined.swift b/Conjuga/Scripts/textbook/repair_quarantined.swift new file mode 100644 index 0000000..0d8b488 --- /dev/null +++ b/Conjuga/Scripts/textbook/repair_quarantined.swift @@ -0,0 +1,177 @@ +#!/usr/bin/env swift +// Re-OCR the images referenced in quarantined_cards.json using Vision with +// bounding-box info, then pair lines by column position (left = Spanish, +// right = English) instead of by document read order. +// +// Output: repaired_cards.json — {"byImage": {"f0142-02.jpg": [{"es":..., "en":...}, ...]}} + +import Foundation +import Vision +import AppKit + +guard CommandLine.arguments.count >= 4 else { + print("Usage: swift repair_quarantined.swift ") + exit(1) +} + +let quarantinedURL = URL(fileURLWithPath: CommandLine.arguments[1]) +let imageDir = URL(fileURLWithPath: CommandLine.arguments[2]) +let outputURL = URL(fileURLWithPath: CommandLine.arguments[3]) + +guard let data = try? Data(contentsOf: quarantinedURL), + let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], + let cards = json["cards"] as? [[String: Any]] else { + print("Could not load \(quarantinedURL.path)") + exit(1) +} + +var uniqueImages = Set() +for card in cards { + if let src = card["sourceImage"] as? String { uniqueImages.insert(src) } +} +print("Unique images to re-OCR: \(uniqueImages.count)") + +struct RecognizedLine { + let text: String + let cx: CGFloat // center X (normalized 0..1) + let cy: CGFloat // center Y (normalized 0..1 from top) + let confidence: Float +} + +struct Pair: Encodable { + var es: String + var en: String + var confidence: Double +} + +struct ImageResult: Encodable { + var pairs: [Pair] + var lineCount: Int + var strategy: String +} + +func classify(_ s: String) -> String { + // "es" if has accents or starts with ES article; "en" if starts with EN article; else "?" + let lower = s.lowercased() + let accentChars: Set = ["á", "é", "í", "ó", "ú", "ñ", "ü", "¿", "¡"] + if lower.contains(where: { accentChars.contains($0) }) { return "es" } + let first = lower.split(separator: " ").first.map(String.init) ?? "" + let esArticles: Set = ["el", "la", "los", "las", "un", "una", "unos", "unas"] + let enStarters: Set = ["the", "a", "an", "to", "my", "his", "her", "our", "their"] + if esArticles.contains(first) { return "es" } + if enStarters.contains(first) { return "en" } + return "?" +} + +func recognizeLines(cgImage: CGImage) -> [RecognizedLine] { + let handler = VNImageRequestHandler(cgImage: cgImage, options: [:]) + let request = VNRecognizeTextRequest() + request.recognitionLevel = .accurate + request.recognitionLanguages = ["es-ES", "es", "en-US"] + request.usesLanguageCorrection = true + if #available(macOS 13.0, *) { + request.automaticallyDetectsLanguage = true + } + do { try handler.perform([request]) } catch { return [] } + var out: [RecognizedLine] = [] + for obs in request.results ?? [] { + guard let top = obs.topCandidates(1).first else { continue } + let s = top.string.trimmingCharacters(in: .whitespaces) + if s.isEmpty { continue } + // Vision's boundingBox is normalized with origin at lower-left + let bb = obs.boundingBox + let cx = bb.origin.x + bb.width / 2 + let cyTop = 1.0 - (bb.origin.y + bb.height / 2) // flip to top-origin + out.append(RecognizedLine(text: s, cx: cx, cy: cyTop, confidence: top.confidence)) + } + return out +} + +/// Pair lines by column position: left column = Spanish, right column = English. +/// Groups lines into rows by Y proximity, then within each row pairs left-right. +func pairByPosition(_ lines: [RecognizedLine]) -> ([Pair], String) { + guard !lines.isEmpty else { return ([], "empty") } + + // Cluster by Y into rows. Use adaptive row height: median line gap * 0.6 + let sortedByY = lines.sorted { $0.cy < $1.cy } + var rows: [[RecognizedLine]] = [] + var current: [RecognizedLine] = [] + let rowTol: CGFloat = 0.015 // 1.5% of page height + for l in sortedByY { + if let last = current.last, abs(l.cy - last.cy) > rowTol { + rows.append(current) + current = [l] + } else { + current.append(l) + } + } + if !current.isEmpty { rows.append(current) } + + var pairs: [Pair] = [] + var strategy = "row-pair" + for row in rows { + guard row.count >= 2 else { continue } + // Sort row by X, split at midpoint; left = Spanish, right = English + let sortedX = row.sorted { $0.cx < $1.cx } + // Find gap: pick the biggest x-gap in the row to split + var maxGap: CGFloat = 0 + var splitIdx = 1 + for i in 1.. maxGap { + maxGap = gap + splitIdx = i + } + } + let leftLines = Array(sortedX[0.. \(pairs.count) pairs via \(strategy)") +} + +struct Output: Encodable { + var byImage: [String: ImageResult] + var totalPairs: Int +} +let output = Output( + byImage: results, + totalPairs: results.values.reduce(0) { $0 + $1.pairs.count } +) + +let enc = JSONEncoder() +enc.outputFormatting = [.prettyPrinted, .sortedKeys] +try enc.encode(output).write(to: outputURL) +print("Wrote \(output.totalPairs) repaired pairs to \(outputURL.path)") diff --git a/Conjuga/Scripts/textbook/run_pipeline.sh b/Conjuga/Scripts/textbook/run_pipeline.sh new file mode 100755 index 0000000..51690c9 --- /dev/null +++ b/Conjuga/Scripts/textbook/run_pipeline.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# End-to-end textbook extraction pipeline. +# +# Requires: Python 3 + lxml/beautifulsoup4/pypdf installed. +# macOS for Vision + NSSpellChecker (Swift). +# +# Inputs: EPUB extracted to epub_extract/OEBPS/ and the PDF at project root. +# Outputs: book.json, vocab_cards.json, manual_review.json, quarantined_cards.json + +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" +cd "$ROOT" + +echo "=== Phase 1a: parse XHTML chapters ===" +python3 "$SCRIPT_DIR/extract_chapters.py" + +echo "=== Phase 1b: parse answer key ===" +python3 "$SCRIPT_DIR/extract_answers.py" + +if [ ! -f "$SCRIPT_DIR/ocr.json" ]; then + echo "=== Phase 1c: OCR EPUB images (first-time only) ===" + swift "$SCRIPT_DIR/ocr_images.swift" "$ROOT/epub_extract/OEBPS" "$SCRIPT_DIR/ocr.json" +else + echo "=== Phase 1c: EPUB OCR already cached ===" +fi + +PDF_FILE="$(ls "$ROOT"/Complete\ Spanish\ Step-By-Step*.pdf 2>/dev/null | head -1 || true)" +if [ -n "$PDF_FILE" ] && [ ! -f "$SCRIPT_DIR/pdf_ocr.json" ]; then + echo "=== Phase 1d: OCR PDF pages (first-time only) ===" + swift "$SCRIPT_DIR/ocr_pdf.swift" "$PDF_FILE" "$SCRIPT_DIR/pdf_ocr.json" 240 +fi + +echo "=== Phase 1e: merge into book.json ===" +python3 "$SCRIPT_DIR/merge_pdf_into_book.py" + +echo "=== Phase 2: spell-check validation ===" +swift "$SCRIPT_DIR/validate_vocab.swift" "$SCRIPT_DIR/vocab_cards.json" "$SCRIPT_DIR/vocab_validation.json" + +echo "=== Phase 3: auto-fix + quarantine pass 1 ===" +python3 "$SCRIPT_DIR/fix_vocab.py" + +echo "=== Phase 3: auto-fix + quarantine pass 2 (convergence) ===" +swift "$SCRIPT_DIR/validate_vocab.swift" "$SCRIPT_DIR/vocab_cards.json" "$SCRIPT_DIR/vocab_validation.json" +python3 "$SCRIPT_DIR/fix_vocab.py" + +echo "" +echo "=== Copy to app bundle ===" +cp "$SCRIPT_DIR/book.json" "$ROOT/Conjuga/Conjuga/textbook_data.json" +cp "$SCRIPT_DIR/vocab_cards.json" "$ROOT/Conjuga/Conjuga/textbook_vocab.json" +ls -lh "$ROOT/Conjuga/Conjuga/textbook_"*.json +echo "" +echo "Done. Bump textbookDataVersion in DataLoader.swift to trigger re-seed." diff --git a/Conjuga/Scripts/textbook/validate_vocab.swift b/Conjuga/Scripts/textbook/validate_vocab.swift new file mode 100644 index 0000000..7ede53d --- /dev/null +++ b/Conjuga/Scripts/textbook/validate_vocab.swift @@ -0,0 +1,156 @@ +#!/usr/bin/env swift +// Validate every Spanish/English word in vocab_cards.json using NSSpellChecker. +// For each flagged word, produce up to 3 candidate corrections. +// +// Usage: swift validate_vocab.swift + +import Foundation +import AppKit + +guard CommandLine.arguments.count >= 3 else { + print("Usage: swift validate_vocab.swift ") + exit(1) +} + +let inputURL = URL(fileURLWithPath: CommandLine.arguments[1]) +let outputURL = URL(fileURLWithPath: CommandLine.arguments[2]) + +guard let data = try? Data(contentsOf: inputURL), + let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], + let chapters = json["chapters"] as? [[String: Any]] else { + print("Could not load \(inputURL.path)") + exit(1) +} + +let checker = NSSpellChecker.shared + +// Tokenize — only letter runs (Unicode aware for Spanish accents) +func tokens(_ s: String) -> [String] { + let letters = CharacterSet.letters + return s.unicodeScalars + .split { !letters.contains($0) } + .map { String(String.UnicodeScalarView($0)) } + .filter { !$0.isEmpty } +} + +// Minimal stopword set — names, proper nouns, numeric tokens already filtered +let stopES: Set = [ + "el", "la", "los", "las", "un", "una", "unos", "unas", "del", "al", "de", + "a", "en", "y", "o", "que", "no", "se", "con", "por", "para", "lo", "le", + "su", "mi", "tu", "yo", "te", "me", "es", "son", "está", "están", +] +let stopEN: Set = [ + "the", "a", "an", "to", "of", "in", "and", "or", "is", "are", "was", "were", + "be", "been", "my", "his", "her", "our", "their", "your", +] + +func checkWord(_ w: String, lang: String, stop: Set) -> [String]? { + // Return nil if word is OK, else list of candidate corrections. + if w.count < 2 { return nil } + if stop.contains(w.lowercased()) { return nil } + if w.rangeOfCharacter(from: .decimalDigits) != nil { return nil } + + let range = checker.checkSpelling( + of: w, + startingAt: 0, + language: lang, + wrap: false, + inSpellDocumentWithTag: 0, + wordCount: nil + ) + // Range of `(0, 0)` means no misspelling; otherwise we have a misspelling. + if range.location == NSNotFound || range.length == 0 { return nil } + + let guesses = checker.guesses( + forWordRange: NSRange(location: 0, length: (w as NSString).length), + in: w, + language: lang, + inSpellDocumentWithTag: 0 + ) ?? [] + return Array(guesses.prefix(3)) +} + +struct Flag: Encodable { + var chapter: Int + var front: String + var back: String + var badFront: [BadWord] + var badBack: [BadWord] + var sourceImage: String +} +struct BadWord: Encodable { + var word: String + var suggestions: [String] + var side: String // "es" or "en" +} + +var flags: [Flag] = [] +var totalCards = 0 +var totalBadES = 0 +var totalBadEN = 0 + +for ch in chapters { + guard let chNum = ch["chapter"] as? Int, + let cards = ch["cards"] as? [[String: Any]] else { continue } + for card in cards { + totalCards += 1 + let front = (card["front"] as? String) ?? "" + let back = (card["back"] as? String) ?? "" + let img = (card["sourceImage"] as? String) ?? "" + + var badFront: [BadWord] = [] + for w in tokens(front) { + if let sugg = checkWord(w, lang: "es", stop: stopES) { + badFront.append(BadWord(word: w, suggestions: sugg, side: "es")) + totalBadES += 1 + } + } + var badBack: [BadWord] = [] + for w in tokens(back) { + if let sugg = checkWord(w, lang: "en", stop: stopEN) { + badBack.append(BadWord(word: w, suggestions: sugg, side: "en")) + totalBadEN += 1 + } + } + if !badFront.isEmpty || !badBack.isEmpty { + flags.append(Flag( + chapter: chNum, + front: front, + back: back, + badFront: badFront, + badBack: badBack, + sourceImage: img + )) + } + } +} + +struct Report: Encodable { + var totalCards: Int + var flaggedCards: Int + var flaggedSpanishWords: Int + var flaggedEnglishWords: Int + var flags: [Flag] +} +let report = Report( + totalCards: totalCards, + flaggedCards: flags.count, + flaggedSpanishWords: totalBadES, + flaggedEnglishWords: totalBadEN, + flags: flags +) + +let encoder = JSONEncoder() +encoder.outputFormatting = [.prettyPrinted, .sortedKeys] +do { + let data = try encoder.encode(report) + try data.write(to: outputURL) + print("Cards: \(totalCards)") + print("Flagged cards: \(flags.count) (\(Double(flags.count)/Double(totalCards)*100.0 as Double)%)") + print("Flagged ES words: \(totalBadES)") + print("Flagged EN words: \(totalBadEN)") + print("Wrote \(outputURL.path)") +} catch { + print("Error writing output: \(error)") + exit(1) +} diff --git a/Conjuga/SharedModels/Sources/SharedModels/AnswerGrader.swift b/Conjuga/SharedModels/Sources/SharedModels/AnswerGrader.swift new file mode 100644 index 0000000..294acfb --- /dev/null +++ b/Conjuga/SharedModels/Sources/SharedModels/AnswerGrader.swift @@ -0,0 +1,68 @@ +import Foundation + +/// On-device deterministic answer grader with partial-credit support. +/// No network calls, no API keys. Handles accent stripping and single-char typos. +public enum AnswerGrader { + + /// Evaluate `userText` against the canonical answer (plus alternates). + /// Returns `.correct` for exact/normalized match, `.close` for accent-strip + /// match or Levenshtein distance 1, `.wrong` otherwise. + public static func grade(userText: String, canonical: String, alternates: [String] = []) -> TextbookGrade { + let candidates = [canonical] + alternates + let normalizedUser = normalize(userText) + if normalizedUser.isEmpty { return .wrong } + + for c in candidates { + if normalize(c) == normalizedUser { return .correct } + } + for c in candidates { + if stripAccents(normalize(c)) == stripAccents(normalizedUser) { + return .close + } + } + for c in candidates { + if levenshtein(normalizedUser, normalize(c)) <= 1 { + return .close + } + } + return .wrong + } + + /// Lowercase, collapse whitespace, strip leading/trailing punctuation. + public static func normalize(_ s: String) -> String { + let lowered = s.lowercased(with: Locale(identifier: "es")) + let collapsed = lowered.replacingOccurrences(of: "\\s+", with: " ", options: .regularExpression) + let trimmed = collapsed.trimmingCharacters(in: .whitespacesAndNewlines) + let punct = CharacterSet(charactersIn: ".,;:!?¿¡\"'()[]{}—–-") + return trimmed.trimmingCharacters(in: punct) + } + + /// Remove combining diacritics (á→a, ñ→n, ü→u). + public static func stripAccents(_ s: String) -> String { + s.folding(options: .diacriticInsensitive, locale: Locale(identifier: "en")) + } + + /// Standard Levenshtein edit distance. + public static func levenshtein(_ a: String, _ b: String) -> Int { + if a == b { return 0 } + if a.isEmpty { return b.count } + if b.isEmpty { return a.count } + let aa = Array(a) + let bb = Array(b) + var prev = Array(0...bb.count) + var curr = Array(repeating: 0, count: bb.count + 1) + for i in 1...aa.count { + curr[0] = i + for j in 1...bb.count { + let cost = aa[i - 1] == bb[j - 1] ? 0 : 1 + curr[j] = min( + prev[j] + 1, + curr[j - 1] + 1, + prev[j - 1] + cost + ) + } + swap(&prev, &curr) + } + return prev[bb.count] + } +} diff --git a/Conjuga/SharedModels/Sources/SharedModels/TextbookChapter.swift b/Conjuga/SharedModels/Sources/SharedModels/TextbookChapter.swift new file mode 100644 index 0000000..b5e9607 --- /dev/null +++ b/Conjuga/SharedModels/Sources/SharedModels/TextbookChapter.swift @@ -0,0 +1,86 @@ +import Foundation +import SwiftData + +/// One chapter of the textbook. Ordered content blocks are stored as JSON in `bodyJSON` +/// (encoded [TextbookBlock]) since SwiftData @Model doesn't support heterogeneous arrays. +@Model +public final class TextbookChapter { + @Attribute(.unique) public var id: String = "" + public var number: Int = 0 + public var title: String = "" + public var part: Int = 0 // 0 = no part assignment + public var courseName: String = "" + public var bodyJSON: Data = Data() + public var exerciseCount: Int = 0 + public var vocabTableCount: Int = 0 + + public init( + id: String, + number: Int, + title: String, + part: Int, + courseName: String, + bodyJSON: Data, + exerciseCount: Int, + vocabTableCount: Int + ) { + self.id = id + self.number = number + self.title = title + self.part = part + self.courseName = courseName + self.bodyJSON = bodyJSON + self.exerciseCount = exerciseCount + self.vocabTableCount = vocabTableCount + } + + public func blocks() -> [TextbookBlock] { + (try? JSONDecoder().decode([TextbookBlock].self, from: bodyJSON)) ?? [] + } +} + +/// One content block within a chapter. Polymorphic via `kind`. +public struct TextbookBlock: Codable, Identifiable, Sendable { + public enum Kind: String, Codable, Sendable { + case heading + case paragraph + case keyVocabHeader = "key_vocab_header" + case vocabTable = "vocab_table" + case exercise + } + + public var id: String { "\(kind.rawValue):\(index)" } + public var index: Int + public var kind: Kind + + // heading + public var level: Int? + // heading / paragraph + public var text: String? + + // vocab_table + public var sourceImage: String? + public var ocrLines: [String]? + public var ocrConfidence: Double? + public var cards: [TextbookVocabPair]? + + // exercise + public var exerciseId: String? + public var instruction: String? + public var extra: [String]? + public var prompts: [String]? + public var answerItems: [TextbookAnswerItem]? + public var freeform: Bool? +} + +public struct TextbookVocabPair: Codable, Sendable { + public var front: String + public var back: String +} + +public struct TextbookAnswerItem: Codable, Sendable { + public var label: String? // A/B/C subpart label or nil + public var number: Int + public var answer: String + public var alternates: [String] +} diff --git a/Conjuga/SharedModels/Sources/SharedModels/TextbookExerciseAttempt.swift b/Conjuga/SharedModels/Sources/SharedModels/TextbookExerciseAttempt.swift new file mode 100644 index 0000000..d9caab7 --- /dev/null +++ b/Conjuga/SharedModels/Sources/SharedModels/TextbookExerciseAttempt.swift @@ -0,0 +1,83 @@ +import Foundation +import SwiftData + +/// Per-prompt grading state recorded after the user submits an exercise. +public enum TextbookGrade: Int, Codable, Sendable { + case wrong = 0 + case close = 1 + case correct = 2 +} + +/// User's attempt for one exercise. Stored in the cloud container so progress +/// syncs across devices. +@Model +public final class TextbookExerciseAttempt { + /// Deterministic id: "|". CloudKit-synced models can't + /// use @Attribute(.unique); code that writes attempts must fetch-or-create. + public var id: String = "" + public var courseName: String = "" + public var chapterNumber: Int = 0 + public var exerciseId: String = "" + + /// JSON-encoded per-prompt state array. + /// Each entry: { "number": Int, "userText": String, "grade": Int } + public var stateJSON: Data = Data() + + public var lastAttemptAt: Date = Date() + public var correctCount: Int = 0 + public var closeCount: Int = 0 + public var wrongCount: Int = 0 + public var totalCount: Int = 0 + + public init( + id: String, + courseName: String, + chapterNumber: Int, + exerciseId: String, + stateJSON: Data = Data(), + lastAttemptAt: Date = Date(), + correctCount: Int = 0, + closeCount: Int = 0, + wrongCount: Int = 0, + totalCount: Int = 0 + ) { + self.id = id + self.courseName = courseName + self.chapterNumber = chapterNumber + self.exerciseId = exerciseId + self.stateJSON = stateJSON + self.lastAttemptAt = lastAttemptAt + self.correctCount = correctCount + self.closeCount = closeCount + self.wrongCount = wrongCount + self.totalCount = totalCount + } + + public func promptStates() -> [TextbookPromptState] { + (try? JSONDecoder().decode([TextbookPromptState].self, from: stateJSON)) ?? [] + } + + public func setPromptStates(_ states: [TextbookPromptState]) { + stateJSON = (try? JSONEncoder().encode(states)) ?? Data() + correctCount = states.filter { $0.grade == .correct }.count + closeCount = states.filter { $0.grade == .close }.count + wrongCount = states.filter { $0.grade == .wrong }.count + totalCount = states.count + } + + public static func attemptId(courseName: String, exerciseId: String) -> String { + "\(courseName)|\(exerciseId)" + } +} + +public struct TextbookPromptState: Codable, Sendable { + public var number: Int + public var userText: String + public var grade: TextbookGrade + + public init(number: Int, userText: String, grade: TextbookGrade) { + self.number = number + self.userText = userText + self.grade = grade + } +} diff --git a/Conjuga/SharedModels/Tests/SharedModelsTests/AnswerGraderTests.swift b/Conjuga/SharedModels/Tests/SharedModelsTests/AnswerGraderTests.swift new file mode 100644 index 0000000..2b0fea5 --- /dev/null +++ b/Conjuga/SharedModels/Tests/SharedModelsTests/AnswerGraderTests.swift @@ -0,0 +1,80 @@ +import Testing +@testable import SharedModels + +@Suite("AnswerGrader") +struct AnswerGraderTests { + + @Test("exact match is correct") + func exact() { + #expect(AnswerGrader.grade(userText: "tengo", canonical: "tengo") == .correct) + #expect(AnswerGrader.grade(userText: "Tengo", canonical: "tengo") == .correct) + #expect(AnswerGrader.grade(userText: " tengo ", canonical: "tengo") == .correct) + } + + @Test("missing accent is close") + func missingAccent() { + #expect(AnswerGrader.grade(userText: "esta", canonical: "está") == .close) + #expect(AnswerGrader.grade(userText: "nino", canonical: "niño") == .close) + #expect(AnswerGrader.grade(userText: "asi", canonical: "así") == .close) + } + + @Test("single-char typo is close") + func singleCharTypo() { + // deletion + #expect(AnswerGrader.grade(userText: "tngo", canonical: "tengo") == .close) + // insertion + #expect(AnswerGrader.grade(userText: "tengoo", canonical: "tengo") == .close) + // substitution + #expect(AnswerGrader.grade(userText: "tengu", canonical: "tengo") == .close) + } + + @Test("two-char typo is wrong") + func twoCharTypo() { + #expect(AnswerGrader.grade(userText: "tngu", canonical: "tengo") == .wrong) + } + + @Test("empty is wrong") + func empty() { + #expect(AnswerGrader.grade(userText: "", canonical: "tengo") == .wrong) + #expect(AnswerGrader.grade(userText: " ", canonical: "tengo") == .wrong) + } + + @Test("alternates accepted") + func alternates() { + #expect(AnswerGrader.grade(userText: "flaca", canonical: "delgada", alternates: ["flaca"]) == .correct) + #expect(AnswerGrader.grade(userText: "flacca", canonical: "delgada", alternates: ["flaca"]) == .close) + } + + @Test("punctuation stripped") + func punctuation() { + #expect(AnswerGrader.grade(userText: "el libro.", canonical: "el libro") == .correct) + #expect(AnswerGrader.grade(userText: "¿dónde?", canonical: "dónde") == .correct) + } + + @Test("very different text is wrong") + func wrong() { + #expect(AnswerGrader.grade(userText: "hola", canonical: "tengo") == .wrong) + #expect(AnswerGrader.grade(userText: "casa", canonical: "perro") == .wrong) + } + + @Test("normalize produces expected output") + func normalize() { + #expect(AnswerGrader.normalize(" Hola ") == "hola") + #expect(AnswerGrader.normalize("ABC!") == "abc") + } + + @Test("stripAccents handles common Spanish diacritics") + func stripAccents() { + #expect(AnswerGrader.stripAccents("niño") == "nino") + #expect(AnswerGrader.stripAccents("está") == "esta") + #expect(AnswerGrader.stripAccents("güero") == "guero") + } + + @Test("levenshtein computes edit distance") + func levenshtein() { + #expect(AnswerGrader.levenshtein("kitten", "sitting") == 3) + #expect(AnswerGrader.levenshtein("flaw", "lawn") == 2) + #expect(AnswerGrader.levenshtein("abc", "abc") == 0) + #expect(AnswerGrader.levenshtein("", "abc") == 3) + } +}