feat(uploads): direct-to-B2 presigned uploads with content-length-range policy
Backend CI / Test (push) Has been cancelled
Backend CI / Contract Tests (push) Has been cancelled
Backend CI / Build (push) Has been cancelled
Backend CI / Lint (push) Has been cancelled
Backend CI / Secret Scanning (push) Has been cancelled

Replaces the multipart-via-API path for image uploads with a three-step
direct-to-storage flow:

  1. Client POSTs /api/uploads/presign with content_length + content_type;
     server validates size (10 MB cap), mime allow-list per category, rate
     limit (50/hour/user via Redis sliding window), and concurrent unclaimed
     cap (10 in-flight per user). On success it persists a pending_uploads
     row, signs an S3 POST policy with content-length-range bound to the
     claimed length ±256 bytes, and returns the URL+fields.
  2. Client POSTs the bytes directly to B2 using the signed policy. B2
     enforces size, content-type, and key match before accepting.
  3. Client passes upload_ids[] to /api/task-completions/ or /api/documents/.
     Service HEADs each B2 object, verifies size matches expected_bytes
     within slack, marks pending_uploads claimed_at, and creates the
     associated TaskCompletionImage / DocumentImage rows.

Bytes never traverse our API server. The 1 MB Echo BodyLimit middleware
that was rejecting all task-completion image uploads becomes irrelevant
for this path. Existing multipart endpoints stay functional alongside,
soak-testing the new path before legacy removal.

Cleanup:
  - cmd/worker registers a new hourly cron (TypeUploadCleanup, "30 * * * *")
    that reaps pending_uploads where claimed_at IS NULL AND expires_at < NOW().
    Reaps both the B2 object and the row.
  - B2 bucket lifecycle rule on `uploads/` prefix (7 days hide → 1 day delete)
    documented in deploy-k3s/manifests/b2-lifecycle.md as a backstop.

Schema:
  - migrations/000002_pending_uploads.sql adds the table + partial index for
    cleanup + nullable pending_upload_id FKs on task_taskcompletionimage and
    task_documentimage.

Policy (single tier, no free/pro split):
  - 10 MB cap per upload
  - 50 presigns/hour/user
  - 10 concurrent unclaimed uploads/user
  - allow-list: jpeg/png/heic/heif/webp for image categories;
    + pdf for document_file

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-05-01 14:36:42 -07:00
parent 9bee436e86
commit 29c9014a33
20 changed files with 1032 additions and 9 deletions
+70 -4
View File
@@ -22,9 +22,11 @@ import (
// DocumentService handles document business logic
type DocumentService struct {
documentRepo *repositories.DocumentRepository
residenceRepo *repositories.ResidenceRepository
cache *CacheService
documentRepo *repositories.DocumentRepository
residenceRepo *repositories.ResidenceRepository
storageService *StorageService
uploadService *UploadService
cache *CacheService
}
// NewDocumentService creates a new document service
@@ -40,6 +42,19 @@ func (s *DocumentService) SetCacheService(cache *CacheService) {
s.cache = cache
}
// SetStorageService wires the storage service so URLs for presigned uploads
// can be generated using the same BaseURL the legacy uploader uses.
func (s *DocumentService) SetStorageService(ss *StorageService) {
s.storageService = ss
}
// SetUploadService wires the presigned-URL upload service so CreateDocument
// can claim pending_uploads rows by id and convert them into document_image
// rows (or, for category=document_file, set the document's main file fields).
func (s *DocumentService) SetUploadService(us *UploadService) {
s.uploadService = us
}
// GetDocument gets a document by ID with access check
func (s *DocumentService) GetDocument(ctx context.Context, documentID, userID uint) (*responses.DocumentResponse, error) {
document, err := s.documentRepo.WithContext(ctx).FindByID(documentID)
@@ -154,11 +169,42 @@ func (s *DocumentService) CreateDocument(ctx context.Context, req *requests.Crea
IsActive: true,
}
// Claim presigned uploads BEFORE the document insert. If the client
// passed a category=document_file row, lift it onto the document's
// FileURL/FileName/FileSize/MimeType fields rather than creating an
// image row for it. Image categories produce DocumentImage rows below.
var claimedUploads []models.PendingUpload
if len(req.UploadIDs) > 0 && s.uploadService != nil {
var claimErr error
claimedUploads, claimErr = s.uploadService.VerifyAndClaim(ctx, userID, req.UploadIDs)
if claimErr != nil {
return nil, claimErr
}
// Lift the (single) document_file upload, if present, onto the
// document fields. Multiple document_file claims aren't meaningful;
// take the first and ignore extras to keep the surface narrow.
for _, pu := range claimedUploads {
if pu.Category == models.UploadCategoryDocumentFile {
if document.FileURL == "" {
document.FileURL = urlForUploadKey(s.storageService, pu.B2Key)
}
if document.MimeType == "" {
document.MimeType = pu.ContentType
}
if document.FileSize == nil && pu.ActualBytes != nil {
b := *pu.ActualBytes
document.FileSize = &b
}
break
}
}
}
if err := s.documentRepo.WithContext(ctx).Create(document); err != nil {
return nil, apperrors.Internal(err)
}
// Create images if provided
// Legacy multipart path — already-uploaded URLs.
for _, imageURL := range req.ImageURLs {
if imageURL != "" {
img := &models.DocumentImage{
@@ -172,6 +218,26 @@ func (s *DocumentService) CreateDocument(ctx context.Context, req *requests.Crea
}
}
// New presigned path — claimed image uploads become DocumentImage rows.
// The document_file row (if any) was already lifted onto the document above.
for i := range claimedUploads {
pu := claimedUploads[i]
if pu.Category == models.UploadCategoryDocumentFile {
continue
}
img := &models.DocumentImage{
DocumentID: document.ID,
ImageURL: urlForUploadKey(s.storageService, pu.B2Key),
PendingUploadID: &pu.ID,
}
if err := s.documentRepo.WithContext(ctx).CreateDocumentImage(img); err != nil {
// Don't fail the whole document for an image insert failure;
// matches the legacy ImageURLs behavior. The orphaned upload
// row is benign (still claimed, just unreferenced).
continue
}
}
// Reload with relations
document, err = s.documentRepo.WithContext(ctx).FindByID(document.ID)
if err != nil {