// backfill-completion-columns is a one-time migration script that retroactively // sets the completed_from_column field on existing TaskCompletion records. // // Strategy: // - One-time tasks: compare completed_at vs due_date to determine if overdue/due_soon/upcoming // - Recurring tasks: work backwards from next_due_date using frequency interval to estimate // the due date that was active at completion time // - Tasks with no due date: default to "completed_tasks" (no way to determine) // // Usage: // go run ./cmd/backfill-completion-columns --dry-run # Preview changes // go run ./cmd/backfill-completion-columns # Apply changes // // Safe to run multiple times — only updates rows still set to the default "completed_tasks". package main import ( "flag" "os" "sort" "time" "github.com/rs/zerolog" "github.com/rs/zerolog/log" "gorm.io/driver/postgres" "gorm.io/gorm" "gorm.io/gorm/logger" "github.com/treytartt/honeydue-api/internal/config" ) const daysThreshold = 30 // "due soon" window in days, matches app default func main() { dryRun := flag.Bool("dry-run", false, "Preview changes without writing to database") flag.Parse() log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339}) cfg, err := config.Load() if err != nil { log.Fatal().Err(err).Msg("Failed to load config") } dsn := cfg.Database.DSN() db, err := gorm.Open(postgres.Open(dsn), &gorm.Config{ Logger: logger.Default.LogMode(logger.Silent), }) if err != nil { log.Fatal().Err(err).Msg("Failed to connect to database") } log.Info().Bool("dry_run", *dryRun).Msg("Starting backfill of completed_from_column") // Query all completions that still have the default value type completionRow struct { ID uint TaskID uint CompletedAt time.Time } var completions []completionRow err = db.Table("task_taskcompletion"). Select("id, task_id, completed_at"). Where("completed_from_column = ? OR completed_from_column IS NULL OR completed_from_column = ''", "completed_tasks"). Order("task_id, completed_at ASC"). Scan(&completions).Error if err != nil { log.Fatal().Err(err).Msg("Failed to query completions") } log.Info().Int("total_completions", len(completions)).Msg("Found completions to backfill") if len(completions) == 0 { log.Info().Msg("Nothing to backfill") return } // Load all tasks referenced by these completions taskIDs := make(map[uint]bool) for _, c := range completions { taskIDs[c.TaskID] = true } taskIDList := make([]uint, 0, len(taskIDs)) for id := range taskIDs { taskIDList = append(taskIDList, id) } type taskRow struct { ID uint DueDate *time.Time NextDueDate *time.Time FrequencyID *uint CustomIntervalDays *int IsCancelled bool IsArchived bool } var tasks []taskRow err = db.Table("task_task"). Select("id, due_date, next_due_date, frequency_id, custom_interval_days, is_cancelled, is_archived"). Where("id IN ?", taskIDList). Scan(&tasks).Error if err != nil { log.Fatal().Err(err).Msg("Failed to query tasks") } taskMap := make(map[uint]*taskRow, len(tasks)) for i := range tasks { taskMap[tasks[i].ID] = &tasks[i] } // Load frequency intervals type freqRow struct { ID uint Name string Days *int } var frequencies []freqRow err = db.Table("task_taskfrequency").Select("id, name, days").Scan(&frequencies).Error if err != nil { log.Fatal().Err(err).Msg("Failed to query frequencies") } freqMap := make(map[uint]*freqRow, len(frequencies)) for i := range frequencies { freqMap[frequencies[i].ID] = &frequencies[i] } // Group completions by task for recurring task logic taskCompletions := make(map[uint][]completionRow) for _, c := range completions { taskCompletions[c.TaskID] = append(taskCompletions[c.TaskID], c) } // Sort each group by completed_at ascending for taskID := range taskCompletions { sort.Slice(taskCompletions[taskID], func(i, j int) bool { return taskCompletions[taskID][i].CompletedAt.Before(taskCompletions[taskID][j].CompletedAt) }) } // Determine the column for each completion type updateEntry struct { completionID uint column string } var updates []updateEntry stats := map[string]int{ "overdue_tasks": 0, "due_soon_tasks": 0, "upcoming_tasks": 0, "in_progress_tasks": 0, "completed_tasks": 0, "no_due_date": 0, } for taskID, comps := range taskCompletions { task, ok := taskMap[taskID] if !ok { continue } // Get interval days for this task var intervalDays int isRecurring := false if task.FrequencyID != nil { if freq, ok := freqMap[*task.FrequencyID]; ok { if freq.Name == "Custom" && task.CustomIntervalDays != nil { intervalDays = *task.CustomIntervalDays } else if freq.Days != nil { intervalDays = *freq.Days } } } isRecurring = intervalDays > 0 if !isRecurring { // One-time task: compare each completion against due_date effectiveDate := task.DueDate if effectiveDate == nil { // No due date — can't determine, keep default for _, c := range comps { stats["no_due_date"]++ updates = append(updates, updateEntry{c.ID, "completed_tasks"}) } continue } for _, c := range comps { column := classifyCompletion(c.CompletedAt, *effectiveDate, daysThreshold) stats[column]++ updates = append(updates, updateEntry{c.ID, column}) } } else { // Recurring task: reconstruct due dates by walking backwards from current state // // Logic: For each completion (oldest first), we estimate what the due date was. // - The original DueDate is the first due date // - After each completion, the next due date = completedAt + intervalDays // // So: effectiveDate starts at DueDate, then advances per completion. effectiveDate := task.DueDate if effectiveDate == nil && task.NextDueDate != nil { // If no original DueDate but NextDueDate exists, work backwards // from the most recent completion est := task.NextDueDate.AddDate(0, 0, -intervalDays*len(comps)) effectiveDate = &est } if effectiveDate == nil { for _, c := range comps { stats["no_due_date"]++ updates = append(updates, updateEntry{c.ID, "completed_tasks"}) } continue } currentDueDate := *effectiveDate for _, c := range comps { column := classifyCompletion(c.CompletedAt, currentDueDate, daysThreshold) stats[column]++ updates = append(updates, updateEntry{c.ID, column}) // Advance due date for next completion currentDueDate = c.CompletedAt.AddDate(0, 0, intervalDays) } } } // Log stats log.Info(). Int("overdue", stats["overdue_tasks"]). Int("due_soon", stats["due_soon_tasks"]). Int("upcoming", stats["upcoming_tasks"]). Int("no_due_date", stats["no_due_date"]). Int("total_updates", len(updates)). Msg("Classification results") if *dryRun { log.Info().Msg("Dry run complete — no changes written") return } // Apply updates in batches batchSize := 500 updated := 0 for i := 0; i < len(updates); i += batchSize { end := i + batchSize if end > len(updates) { end = len(updates) } batch := updates[i:end] err := db.Transaction(func(tx *gorm.DB) error { for _, u := range batch { if err := tx.Table("task_taskcompletion"). Where("id = ?", u.completionID). Update("completed_from_column", u.column).Error; err != nil { return err } } return nil }) if err != nil { log.Fatal().Err(err).Int("batch_start", i).Msg("Failed to update batch") } updated += len(batch) log.Info().Int("updated", updated).Int("total", len(updates)).Msg("Progress") } log.Info().Int("total_updated", updated).Msg("Backfill complete") } // classifyCompletion determines what kanban column a task was in when completed. func classifyCompletion(completedAt time.Time, dueDate time.Time, threshold int) string { // Normalize to date-only comparison (matches app behavior) completedDay := time.Date(completedAt.Year(), completedAt.Month(), completedAt.Day(), 0, 0, 0, 0, time.UTC) dueDay := time.Date(dueDate.Year(), dueDate.Month(), dueDate.Day(), 0, 0, 0, 0, time.UTC) if completedDay.After(dueDay) { return "overdue_tasks" } daysBefore := dueDay.Sub(completedDay).Hours() / 24 if daysBefore <= float64(threshold) { return "due_soon_tasks" } return "upcoming_tasks" }