Files
Trey t 6803f6ec18 Add honeycomb completion heatmap and data migration framework
- Add completion_summary endpoint data to residence detail response
- Track completed_from_column on task completions (overdue/due_soon/upcoming)
- Add GetCompletionSummary repo method with monthly aggregation
- Add one-time data migration framework (data_migrations table + registry)
- Add backfill migration to classify historical completions
- Add standalone backfill script for manual/dry-run usage

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 00:05:10 -05:00

289 lines
8.3 KiB
Go

// backfill-completion-columns is a one-time migration script that retroactively
// sets the completed_from_column field on existing TaskCompletion records.
//
// Strategy:
// - One-time tasks: compare completed_at vs due_date to determine if overdue/due_soon/upcoming
// - Recurring tasks: work backwards from next_due_date using frequency interval to estimate
// the due date that was active at completion time
// - Tasks with no due date: default to "completed_tasks" (no way to determine)
//
// Usage:
// go run ./cmd/backfill-completion-columns --dry-run # Preview changes
// go run ./cmd/backfill-completion-columns # Apply changes
//
// Safe to run multiple times — only updates rows still set to the default "completed_tasks".
package main
import (
"flag"
"os"
"sort"
"time"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"gorm.io/driver/postgres"
"gorm.io/gorm"
"gorm.io/gorm/logger"
"github.com/treytartt/honeydue-api/internal/config"
)
const daysThreshold = 30 // "due soon" window in days, matches app default
func main() {
dryRun := flag.Bool("dry-run", false, "Preview changes without writing to database")
flag.Parse()
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339})
cfg, err := config.Load()
if err != nil {
log.Fatal().Err(err).Msg("Failed to load config")
}
dsn := cfg.Database.DSN()
db, err := gorm.Open(postgres.Open(dsn), &gorm.Config{
Logger: logger.Default.LogMode(logger.Silent),
})
if err != nil {
log.Fatal().Err(err).Msg("Failed to connect to database")
}
log.Info().Bool("dry_run", *dryRun).Msg("Starting backfill of completed_from_column")
// Query all completions that still have the default value
type completionRow struct {
ID uint
TaskID uint
CompletedAt time.Time
}
var completions []completionRow
err = db.Table("task_taskcompletion").
Select("id, task_id, completed_at").
Where("completed_from_column = ? OR completed_from_column IS NULL OR completed_from_column = ''", "completed_tasks").
Order("task_id, completed_at ASC").
Scan(&completions).Error
if err != nil {
log.Fatal().Err(err).Msg("Failed to query completions")
}
log.Info().Int("total_completions", len(completions)).Msg("Found completions to backfill")
if len(completions) == 0 {
log.Info().Msg("Nothing to backfill")
return
}
// Load all tasks referenced by these completions
taskIDs := make(map[uint]bool)
for _, c := range completions {
taskIDs[c.TaskID] = true
}
taskIDList := make([]uint, 0, len(taskIDs))
for id := range taskIDs {
taskIDList = append(taskIDList, id)
}
type taskRow struct {
ID uint
DueDate *time.Time
NextDueDate *time.Time
FrequencyID *uint
CustomIntervalDays *int
IsCancelled bool
IsArchived bool
}
var tasks []taskRow
err = db.Table("task_task").
Select("id, due_date, next_due_date, frequency_id, custom_interval_days, is_cancelled, is_archived").
Where("id IN ?", taskIDList).
Scan(&tasks).Error
if err != nil {
log.Fatal().Err(err).Msg("Failed to query tasks")
}
taskMap := make(map[uint]*taskRow, len(tasks))
for i := range tasks {
taskMap[tasks[i].ID] = &tasks[i]
}
// Load frequency intervals
type freqRow struct {
ID uint
Name string
Days *int
}
var frequencies []freqRow
err = db.Table("task_taskfrequency").Select("id, name, days").Scan(&frequencies).Error
if err != nil {
log.Fatal().Err(err).Msg("Failed to query frequencies")
}
freqMap := make(map[uint]*freqRow, len(frequencies))
for i := range frequencies {
freqMap[frequencies[i].ID] = &frequencies[i]
}
// Group completions by task for recurring task logic
taskCompletions := make(map[uint][]completionRow)
for _, c := range completions {
taskCompletions[c.TaskID] = append(taskCompletions[c.TaskID], c)
}
// Sort each group by completed_at ascending
for taskID := range taskCompletions {
sort.Slice(taskCompletions[taskID], func(i, j int) bool {
return taskCompletions[taskID][i].CompletedAt.Before(taskCompletions[taskID][j].CompletedAt)
})
}
// Determine the column for each completion
type updateEntry struct {
completionID uint
column string
}
var updates []updateEntry
stats := map[string]int{
"overdue_tasks": 0,
"due_soon_tasks": 0,
"upcoming_tasks": 0,
"in_progress_tasks": 0,
"completed_tasks": 0,
"no_due_date": 0,
}
for taskID, comps := range taskCompletions {
task, ok := taskMap[taskID]
if !ok {
continue
}
// Get interval days for this task
var intervalDays int
isRecurring := false
if task.FrequencyID != nil {
if freq, ok := freqMap[*task.FrequencyID]; ok {
if freq.Name == "Custom" && task.CustomIntervalDays != nil {
intervalDays = *task.CustomIntervalDays
} else if freq.Days != nil {
intervalDays = *freq.Days
}
}
}
isRecurring = intervalDays > 0
if !isRecurring {
// One-time task: compare each completion against due_date
effectiveDate := task.DueDate
if effectiveDate == nil {
// No due date — can't determine, keep default
for _, c := range comps {
stats["no_due_date"]++
updates = append(updates, updateEntry{c.ID, "completed_tasks"})
}
continue
}
for _, c := range comps {
column := classifyCompletion(c.CompletedAt, *effectiveDate, daysThreshold)
stats[column]++
updates = append(updates, updateEntry{c.ID, column})
}
} else {
// Recurring task: reconstruct due dates by walking backwards from current state
//
// Logic: For each completion (oldest first), we estimate what the due date was.
// - The original DueDate is the first due date
// - After each completion, the next due date = completedAt + intervalDays
//
// So: effectiveDate starts at DueDate, then advances per completion.
effectiveDate := task.DueDate
if effectiveDate == nil && task.NextDueDate != nil {
// If no original DueDate but NextDueDate exists, work backwards
// from the most recent completion
est := task.NextDueDate.AddDate(0, 0, -intervalDays*len(comps))
effectiveDate = &est
}
if effectiveDate == nil {
for _, c := range comps {
stats["no_due_date"]++
updates = append(updates, updateEntry{c.ID, "completed_tasks"})
}
continue
}
currentDueDate := *effectiveDate
for _, c := range comps {
column := classifyCompletion(c.CompletedAt, currentDueDate, daysThreshold)
stats[column]++
updates = append(updates, updateEntry{c.ID, column})
// Advance due date for next completion
currentDueDate = c.CompletedAt.AddDate(0, 0, intervalDays)
}
}
}
// Log stats
log.Info().
Int("overdue", stats["overdue_tasks"]).
Int("due_soon", stats["due_soon_tasks"]).
Int("upcoming", stats["upcoming_tasks"]).
Int("no_due_date", stats["no_due_date"]).
Int("total_updates", len(updates)).
Msg("Classification results")
if *dryRun {
log.Info().Msg("Dry run complete — no changes written")
return
}
// Apply updates in batches
batchSize := 500
updated := 0
for i := 0; i < len(updates); i += batchSize {
end := i + batchSize
if end > len(updates) {
end = len(updates)
}
batch := updates[i:end]
err := db.Transaction(func(tx *gorm.DB) error {
for _, u := range batch {
if err := tx.Table("task_taskcompletion").
Where("id = ?", u.completionID).
Update("completed_from_column", u.column).Error; err != nil {
return err
}
}
return nil
})
if err != nil {
log.Fatal().Err(err).Int("batch_start", i).Msg("Failed to update batch")
}
updated += len(batch)
log.Info().Int("updated", updated).Int("total", len(updates)).Msg("Progress")
}
log.Info().Int("total_updated", updated).Msg("Backfill complete")
}
// classifyCompletion determines what kanban column a task was in when completed.
func classifyCompletion(completedAt time.Time, dueDate time.Time, threshold int) string {
// Normalize to date-only comparison (matches app behavior)
completedDay := time.Date(completedAt.Year(), completedAt.Month(), completedAt.Day(), 0, 0, 0, 0, time.UTC)
dueDay := time.Date(dueDate.Year(), dueDate.Month(), dueDate.Day(), 0, 0, 0, 0, time.UTC)
if completedDay.After(dueDay) {
return "overdue_tasks"
}
daysBefore := dueDay.Sub(completedDay).Hours() / 24
if daysBefore <= float64(threshold) {
return "due_soon_tasks"
}
return "upcoming_tasks"
}