29c9014a33
Replaces the multipart-via-API path for image uploads with a three-step
direct-to-storage flow:
1. Client POSTs /api/uploads/presign with content_length + content_type;
server validates size (10 MB cap), mime allow-list per category, rate
limit (50/hour/user via Redis sliding window), and concurrent unclaimed
cap (10 in-flight per user). On success it persists a pending_uploads
row, signs an S3 POST policy with content-length-range bound to the
claimed length ±256 bytes, and returns the URL+fields.
2. Client POSTs the bytes directly to B2 using the signed policy. B2
enforces size, content-type, and key match before accepting.
3. Client passes upload_ids[] to /api/task-completions/ or /api/documents/.
Service HEADs each B2 object, verifies size matches expected_bytes
within slack, marks pending_uploads claimed_at, and creates the
associated TaskCompletionImage / DocumentImage rows.
Bytes never traverse our API server. The 1 MB Echo BodyLimit middleware
that was rejecting all task-completion image uploads becomes irrelevant
for this path. Existing multipart endpoints stay functional alongside,
soak-testing the new path before legacy removal.
Cleanup:
- cmd/worker registers a new hourly cron (TypeUploadCleanup, "30 * * * *")
that reaps pending_uploads where claimed_at IS NULL AND expires_at < NOW().
Reaps both the B2 object and the row.
- B2 bucket lifecycle rule on `uploads/` prefix (7 days hide → 1 day delete)
documented in deploy-k3s/manifests/b2-lifecycle.md as a backstop.
Schema:
- migrations/000002_pending_uploads.sql adds the table + partial index for
cleanup + nullable pending_upload_id FKs on task_taskcompletionimage and
task_documentimage.
Policy (single tier, no free/pro split):
- 10 MB cap per upload
- 50 presigns/hour/user
- 10 concurrent unclaimed uploads/user
- allow-list: jpeg/png/heic/heif/webp for image categories;
+ pdf for document_file
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
337 lines
12 KiB
Go
337 lines
12 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
"os"
|
|
"os/signal"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/hibiken/asynq"
|
|
"github.com/redis/go-redis/v9"
|
|
"github.com/rs/zerolog/log"
|
|
"go.opentelemetry.io/otel/attribute"
|
|
"go.opentelemetry.io/otel/codes"
|
|
"go.opentelemetry.io/otel/trace"
|
|
|
|
"github.com/treytartt/honeydue-api/internal/config"
|
|
"github.com/treytartt/honeydue-api/internal/database"
|
|
"github.com/treytartt/honeydue-api/internal/monitoring"
|
|
"github.com/treytartt/honeydue-api/internal/prom"
|
|
"github.com/treytartt/honeydue-api/internal/push"
|
|
"github.com/treytartt/honeydue-api/internal/repositories"
|
|
"github.com/treytartt/honeydue-api/internal/services"
|
|
"github.com/treytartt/honeydue-api/internal/tracing"
|
|
"github.com/treytartt/honeydue-api/internal/worker/jobs"
|
|
"github.com/treytartt/honeydue-api/pkg/utils"
|
|
)
|
|
|
|
const workerHealthAddr = ":6060"
|
|
|
|
func main() {
|
|
// Initialize logger
|
|
utils.InitLogger(true)
|
|
|
|
// Load configuration
|
|
cfg, err := config.Load()
|
|
if err != nil {
|
|
log.Fatal().Err(err).Msg("Failed to load configuration")
|
|
}
|
|
|
|
// Check worker kill switch
|
|
if !cfg.Features.WorkerEnabled {
|
|
log.Warn().Msg("Worker disabled by FEATURE_WORKER_ENABLED=false — exiting")
|
|
os.Exit(0)
|
|
}
|
|
|
|
// Initialize OpenTelemetry tracing for the worker process. Same OTLP
|
|
// destination as the api; service.name distinguishes them in Jaeger.
|
|
tracingShutdown, err := tracing.Init(context.Background(), tracing.Config{
|
|
ServiceName: "honeydue-worker",
|
|
Environment: workerDeploymentEnv(cfg.Server.Debug),
|
|
EndpointURL: os.Getenv("OBS_TRACES_URL"),
|
|
BearerToken: os.Getenv("OBS_INGEST_TOKEN"),
|
|
SampleRatio: tracing.SampleRatioFromEnv(),
|
|
})
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("worker tracing init failed — continuing without traces")
|
|
}
|
|
defer func() {
|
|
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer cancel()
|
|
if err := tracingShutdown(shutdownCtx); err != nil {
|
|
log.Warn().Err(err).Msg("worker tracing shutdown error")
|
|
}
|
|
}()
|
|
asynqTracer := tracing.Tracer("honeydue/worker/asynq")
|
|
|
|
// Initialize database
|
|
db, err := database.Connect(&cfg.Database, cfg.Server.Debug)
|
|
if err != nil {
|
|
log.Fatal().Err(err).Msg("Failed to connect to database")
|
|
}
|
|
log.Info().Msg("Connected to database")
|
|
|
|
// Get underlying *sql.DB for cleanup
|
|
sqlDB, _ := db.DB()
|
|
defer sqlDB.Close()
|
|
|
|
// Initialize push client (APNs + FCM)
|
|
var pushClient *push.Client
|
|
pushClient, err = push.NewClient(&cfg.Push, cfg.Features.PushEnabled)
|
|
if err != nil {
|
|
log.Warn().Err(err).Msg("Failed to initialize push client - push notifications disabled")
|
|
} else {
|
|
log.Info().
|
|
Bool("ios_enabled", pushClient.IsIOSEnabled()).
|
|
Bool("android_enabled", pushClient.IsAndroidEnabled()).
|
|
Msg("Push notification client initialized")
|
|
}
|
|
|
|
// Initialize email service (optional)
|
|
var emailService *services.EmailService
|
|
if cfg.Email.Host != "" {
|
|
emailService = services.NewEmailService(&cfg.Email, cfg.Features.EmailEnabled)
|
|
log.Info().Str("host", cfg.Email.Host).Msg("Email service initialized")
|
|
}
|
|
|
|
// Initialize notification service for actionable push notifications
|
|
notificationRepo := repositories.NewNotificationRepository(db)
|
|
notificationService := services.NewNotificationService(notificationRepo, pushClient)
|
|
log.Info().Msg("Notification service initialized")
|
|
|
|
// Parse Redis URL for Asynq
|
|
redisOpt, err := asynq.ParseRedisURI(cfg.Redis.URL)
|
|
if err != nil {
|
|
log.Fatal().Err(err).Msg("Failed to parse Redis URL")
|
|
}
|
|
|
|
// Initialize monitoring service (if Redis is available)
|
|
var monitoringService *monitoring.Service
|
|
redisClientOpt, ok := redisOpt.(asynq.RedisClientOpt)
|
|
if ok {
|
|
redisClient := redis.NewClient(&redis.Options{
|
|
Addr: redisClientOpt.Addr,
|
|
Password: redisClientOpt.Password,
|
|
DB: redisClientOpt.DB,
|
|
})
|
|
|
|
// Verify Redis connection
|
|
if err := redisClient.Ping(context.Background()).Err(); err != nil {
|
|
log.Warn().Err(err).Msg("Failed to connect to Redis for monitoring - monitoring disabled")
|
|
} else {
|
|
monitoringService = monitoring.NewService(monitoring.Config{
|
|
Process: "worker",
|
|
RedisClient: redisClient,
|
|
DB: db, // Pass database for enable_monitoring setting sync
|
|
})
|
|
|
|
// Reinitialize logger with monitoring writer
|
|
utils.InitLoggerWithWriter(cfg.Server.Debug, monitoringService.LogWriter())
|
|
|
|
// Create Asynq inspector for queue statistics
|
|
inspector := asynq.NewInspector(redisOpt)
|
|
monitoringService.SetAsynqInspector(inspector)
|
|
|
|
// Start stats collection
|
|
monitoringService.Start()
|
|
defer monitoringService.Stop()
|
|
|
|
log.Info().
|
|
Bool("log_capture_enabled", monitoringService.IsEnabled()).
|
|
Msg("Monitoring service initialized")
|
|
}
|
|
}
|
|
|
|
// Create Asynq server
|
|
srv := asynq.NewServer(
|
|
redisOpt,
|
|
asynq.Config{
|
|
Concurrency: 10,
|
|
Queues: map[string]int{
|
|
"critical": 6,
|
|
"default": 3,
|
|
"low": 1,
|
|
},
|
|
ErrorHandler: asynq.ErrorHandlerFunc(func(ctx context.Context, task *asynq.Task, err error) {
|
|
log.Error().
|
|
Err(err).
|
|
Str("type", task.Type()).
|
|
Bytes("payload", task.Payload()).
|
|
Msg("Task processing failed")
|
|
}),
|
|
},
|
|
)
|
|
|
|
// Create job handler
|
|
jobHandler := jobs.NewHandler(db, pushClient, emailService, notificationService, cfg)
|
|
|
|
// Wire upload service for the pending_uploads cleanup cron. Storage may
|
|
// be local-disk (no S3 backend), in which case the upload service stays
|
|
// nil and the cleanup handler no-ops. Cache is optional — the cleanup
|
|
// path doesn't rate-limit and works fine with a nil cache.
|
|
if storageService, sErr := services.NewStorageService(&cfg.Storage); sErr == nil {
|
|
if s3 := storageService.S3Backend(); s3 != nil {
|
|
pendingUploadRepo := repositories.NewPendingUploadRepository(db)
|
|
uploadService := services.NewUploadService(pendingUploadRepo, s3, &cfg.Storage, nil)
|
|
jobHandler.SetUploadService(uploadService)
|
|
}
|
|
} else {
|
|
log.Warn().Err(sErr).Msg("Failed to initialize storage service for upload cleanup; cleanup cron will no-op")
|
|
}
|
|
|
|
// Create Asynq mux and register handlers
|
|
mux := asynq.NewServeMux()
|
|
|
|
// Tracing + metrics middleware: every job runs inside a span and emits
|
|
// asynq_job_duration_seconds{task_type,result}.
|
|
mux.Use(asynqTracingMiddleware(asynqTracer))
|
|
|
|
mux.HandleFunc(jobs.TypeSmartReminder, jobHandler.HandleSmartReminder)
|
|
mux.HandleFunc(jobs.TypeDailyDigest, jobHandler.HandleDailyDigest)
|
|
mux.HandleFunc(jobs.TypeSendEmail, jobHandler.HandleSendEmail)
|
|
mux.HandleFunc(jobs.TypeSendPush, jobHandler.HandleSendPush)
|
|
mux.HandleFunc(jobs.TypeOnboardingEmails, jobHandler.HandleOnboardingEmails)
|
|
mux.HandleFunc(jobs.TypeReminderLogCleanup, jobHandler.HandleReminderLogCleanup)
|
|
mux.HandleFunc(jobs.TypeUploadCleanup, jobHandler.HandleUploadCleanup)
|
|
|
|
// Register email job handlers (welcome, verification, password reset, password changed)
|
|
if emailService != nil {
|
|
emailJobHandler := jobs.NewEmailJobHandler(emailService)
|
|
emailJobHandler.RegisterHandlers(mux)
|
|
}
|
|
|
|
// Start scheduler for periodic tasks
|
|
scheduler := asynq.NewScheduler(redisOpt, nil)
|
|
|
|
// Schedule smart reminder notifications (runs every hour to support per-user custom times)
|
|
// Replaces old task reminder and overdue reminder with frequency-aware system
|
|
// Uses TaskReminderLog to prevent duplicate notifications
|
|
if _, err := scheduler.Register("0 * * * *", asynq.NewTask(jobs.TypeSmartReminder, nil)); err != nil {
|
|
log.Fatal().Err(err).Msg("Failed to register smart reminder job")
|
|
}
|
|
log.Info().Str("cron", "0 * * * *").Int("default_hour", cfg.Worker.TaskReminderHour).Msg("Registered smart reminder job (runs hourly for per-user times)")
|
|
|
|
// Schedule daily digest (runs every hour to support per-user custom times)
|
|
// The job handler filters users based on their preferred notification hour
|
|
if _, err := scheduler.Register("0 * * * *", asynq.NewTask(jobs.TypeDailyDigest, nil)); err != nil {
|
|
log.Fatal().Err(err).Msg("Failed to register daily digest job")
|
|
}
|
|
log.Info().Str("cron", "0 * * * *").Int("default_hour", cfg.Worker.DailyNotifHour).Msg("Registered daily digest job (runs hourly for per-user times)")
|
|
|
|
// Schedule onboarding emails (runs daily at 10:00 AM UTC)
|
|
// Sends emails to users who haven't created residences or tasks after registration
|
|
if _, err := scheduler.Register("0 10 * * *", asynq.NewTask(jobs.TypeOnboardingEmails, nil)); err != nil {
|
|
log.Fatal().Err(err).Msg("Failed to register onboarding emails job")
|
|
}
|
|
log.Info().Str("cron", "0 10 * * *").Msg("Registered onboarding emails job (runs daily at 10:00 AM UTC)")
|
|
|
|
// Schedule reminder log cleanup (runs daily at 3:00 AM UTC)
|
|
// Removes reminder logs older than 90 days to prevent table bloat
|
|
if _, err := scheduler.Register("0 3 * * *", asynq.NewTask(jobs.TypeReminderLogCleanup, nil)); err != nil {
|
|
log.Fatal().Err(err).Msg("Failed to register reminder log cleanup job")
|
|
}
|
|
log.Info().Str("cron", "0 3 * * *").Msg("Registered reminder log cleanup job (runs daily at 3:00 AM UTC)")
|
|
|
|
// Schedule pending_uploads cleanup (hourly at :30 to avoid colliding with
|
|
// the top-of-hour reminder + digest crons). Reaps unclaimed expired
|
|
// upload sessions; the B2 bucket lifecycle (7 days on uploads/ prefix)
|
|
// is the backstop if this worker is offline for an extended period.
|
|
if _, err := scheduler.Register("30 * * * *", asynq.NewTask(jobs.TypeUploadCleanup, nil)); err != nil {
|
|
log.Fatal().Err(err).Msg("Failed to register upload cleanup job")
|
|
}
|
|
log.Info().Str("cron", "30 * * * *").Msg("Registered pending_uploads cleanup job (runs hourly)")
|
|
|
|
// Handle graceful shutdown
|
|
quit := make(chan os.Signal, 1)
|
|
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
|
|
|
|
// Health server (for container healthchecks; not externally published)
|
|
healthMux := http.NewServeMux()
|
|
healthMux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = w.Write([]byte(`{"status":"ok"}`))
|
|
})
|
|
healthSrv := &http.Server{
|
|
Addr: workerHealthAddr,
|
|
Handler: healthMux,
|
|
ReadHeaderTimeout: 5 * time.Second,
|
|
}
|
|
go func() {
|
|
log.Info().Str("addr", workerHealthAddr).Msg("Health server listening")
|
|
if err := healthSrv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
|
log.Warn().Err(err).Msg("Health server terminated")
|
|
}
|
|
}()
|
|
|
|
// Start scheduler in goroutine
|
|
go func() {
|
|
if err := scheduler.Run(); err != nil {
|
|
log.Fatal().Err(err).Msg("Failed to start scheduler")
|
|
}
|
|
}()
|
|
|
|
// Start worker server in goroutine
|
|
go func() {
|
|
log.Info().Msg("Starting worker server...")
|
|
if err := srv.Run(mux); err != nil {
|
|
log.Fatal().Err(err).Msg("Failed to start worker server")
|
|
}
|
|
}()
|
|
|
|
<-quit
|
|
log.Info().Msg("Shutting down worker...")
|
|
|
|
// Graceful shutdown
|
|
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
defer shutdownCancel()
|
|
_ = healthSrv.Shutdown(shutdownCtx)
|
|
srv.Shutdown()
|
|
scheduler.Shutdown()
|
|
|
|
log.Info().Msg("Worker stopped")
|
|
}
|
|
|
|
// asynqTracingMiddleware returns an asynq.MiddlewareFunc that opens a span
|
|
// per task execution and records asynq_job_duration_seconds. Span attrs
|
|
// include task type, queue, retry count, and the result outcome.
|
|
func asynqTracingMiddleware(tracer trace.Tracer) asynq.MiddlewareFunc {
|
|
return func(next asynq.Handler) asynq.Handler {
|
|
return asynq.HandlerFunc(func(ctx context.Context, t *asynq.Task) error {
|
|
ctx, span := tracer.Start(ctx, "asynq.handle:"+t.Type(),
|
|
trace.WithAttributes(
|
|
attribute.String("asynq.task_type", t.Type()),
|
|
attribute.Int("asynq.payload_bytes", len(t.Payload())),
|
|
),
|
|
)
|
|
defer span.End()
|
|
|
|
start := time.Now()
|
|
err := next.ProcessTask(ctx, t)
|
|
dur := time.Since(start)
|
|
result := "ok"
|
|
if err != nil {
|
|
result = "error"
|
|
span.SetStatus(codes.Error, err.Error())
|
|
span.RecordError(err)
|
|
}
|
|
span.SetAttributes(attribute.String("asynq.result", result))
|
|
prom.ObserveAsynqJob(t.Type(), result, dur)
|
|
return err
|
|
})
|
|
}
|
|
}
|
|
|
|
// workerDeploymentEnv mirrors deploymentEnvironment in cmd/api/main.go.
|
|
func workerDeploymentEnv(debug bool) string {
|
|
if env := os.Getenv("DEPLOYMENT_ENVIRONMENT"); env != "" {
|
|
return env
|
|
}
|
|
if debug {
|
|
return "dev"
|
|
}
|
|
return "prod"
|
|
}
|