Files
honeyDueAPI/internal/services/storage_backend_s3.go
T
Trey t 29c9014a33
Backend CI / Test (push) Has been cancelled
Backend CI / Contract Tests (push) Has been cancelled
Backend CI / Build (push) Has been cancelled
Backend CI / Lint (push) Has been cancelled
Backend CI / Secret Scanning (push) Has been cancelled
feat(uploads): direct-to-B2 presigned uploads with content-length-range policy
Replaces the multipart-via-API path for image uploads with a three-step
direct-to-storage flow:

  1. Client POSTs /api/uploads/presign with content_length + content_type;
     server validates size (10 MB cap), mime allow-list per category, rate
     limit (50/hour/user via Redis sliding window), and concurrent unclaimed
     cap (10 in-flight per user). On success it persists a pending_uploads
     row, signs an S3 POST policy with content-length-range bound to the
     claimed length ±256 bytes, and returns the URL+fields.
  2. Client POSTs the bytes directly to B2 using the signed policy. B2
     enforces size, content-type, and key match before accepting.
  3. Client passes upload_ids[] to /api/task-completions/ or /api/documents/.
     Service HEADs each B2 object, verifies size matches expected_bytes
     within slack, marks pending_uploads claimed_at, and creates the
     associated TaskCompletionImage / DocumentImage rows.

Bytes never traverse our API server. The 1 MB Echo BodyLimit middleware
that was rejecting all task-completion image uploads becomes irrelevant
for this path. Existing multipart endpoints stay functional alongside,
soak-testing the new path before legacy removal.

Cleanup:
  - cmd/worker registers a new hourly cron (TypeUploadCleanup, "30 * * * *")
    that reaps pending_uploads where claimed_at IS NULL AND expires_at < NOW().
    Reaps both the B2 object and the row.
  - B2 bucket lifecycle rule on `uploads/` prefix (7 days hide → 1 day delete)
    documented in deploy-k3s/manifests/b2-lifecycle.md as a backstop.

Schema:
  - migrations/000002_pending_uploads.sql adds the table + partial index for
    cleanup + nullable pending_upload_id FKs on task_taskcompletionimage and
    task_documentimage.

Policy (single tier, no free/pro split):
  - 10 MB cap per upload
  - 50 presigns/hour/user
  - 10 concurrent unclaimed uploads/user
  - allow-list: jpeg/png/heic/heif/webp for image categories;
    + pdf for document_file

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 14:36:42 -07:00

183 lines
5.6 KiB
Go

package services
import (
"bytes"
"context"
"fmt"
"io"
"net/url"
"time"
"github.com/minio/minio-go/v7"
"github.com/minio/minio-go/v7/pkg/credentials"
"github.com/rs/zerolog/log"
)
// S3Backend stores files in S3-compatible storage (Backblaze B2, MinIO, AWS S3).
type S3Backend struct {
client *minio.Client
bucket string
}
// NewS3Backend creates an S3-compatible storage backend.
func NewS3Backend(endpoint, keyID, appKey, bucket string, useSSL bool, region string) (*S3Backend, error) {
if region == "" {
region = "us-east-1"
}
client, err := minio.New(endpoint, &minio.Options{
Creds: credentials.NewStaticV4(keyID, appKey, ""),
Secure: useSSL,
Region: region,
})
if err != nil {
return nil, fmt.Errorf("failed to create S3 client: %w", err)
}
// Verify bucket exists
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
exists, err := client.BucketExists(ctx, bucket)
if err != nil {
return nil, fmt.Errorf("failed to check bucket %q: %w", bucket, err)
}
if !exists {
return nil, fmt.Errorf("bucket %q does not exist", bucket)
}
log.Info().
Str("endpoint", endpoint).
Str("bucket", bucket).
Bool("ssl", useSSL).
Msg("S3 storage backend initialized")
return &S3Backend{client: client, bucket: bucket}, nil
}
func (b *S3Backend) Write(key string, data []byte) error {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
_, err := b.client.PutObject(ctx, b.bucket, key, bytes.NewReader(data), int64(len(data)), minio.PutObjectOptions{})
if err != nil {
return fmt.Errorf("failed to upload to S3: %w", err)
}
return nil
}
func (b *S3Backend) Read(key string) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
obj, err := b.client.GetObject(ctx, b.bucket, key, minio.GetObjectOptions{})
if err != nil {
return nil, fmt.Errorf("failed to get S3 object: %w", err)
}
defer obj.Close()
data, err := io.ReadAll(obj)
if err != nil {
return nil, fmt.Errorf("failed to read S3 object: %w", err)
}
return data, nil
}
func (b *S3Backend) Delete(key string) error {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
err := b.client.RemoveObject(ctx, b.bucket, key, minio.RemoveObjectOptions{})
if err != nil {
return fmt.Errorf("failed to delete S3 object: %w", err)
}
return nil
}
func (b *S3Backend) ReadStream(key string) (io.ReadCloser, error) {
ctx := context.Background() // caller controls lifetime by closing the reader
obj, err := b.client.GetObject(ctx, b.bucket, key, minio.GetObjectOptions{})
if err != nil {
return nil, fmt.Errorf("failed to get S3 object stream: %w", err)
}
return obj, nil
}
// PresignedPostResult is the data a client needs to perform a direct multipart
// POST to S3-compatible storage. The caller assembles a multipart/form-data
// request with the fields below as form parts (in order) and the file last.
type PresignedPostResult struct {
URL string // e.g. https://s3.us-east-005.backblazeb2.com/honeyDueProd
Fields map[string]string // policy, x-amz-*, key, Content-Type, etc.
}
// PresignedPost generates a POST policy that constrains uploads at the protocol
// level: only the named key, only the named content-type, only sizes within
// the requested range. S3 (and B2's S3-compatible endpoint) reject anything
// that doesn't satisfy every condition before accepting the body.
//
// minBytes/maxBytes are inclusive. The returned URL + Fields can be sent
// straight to the client.
func (b *S3Backend) PresignedPost(ctx context.Context, key, contentType string, minBytes, maxBytes int64, ttl time.Duration) (*PresignedPostResult, error) {
policy := minio.NewPostPolicy()
if err := policy.SetBucket(b.bucket); err != nil {
return nil, fmt.Errorf("set bucket: %w", err)
}
if err := policy.SetKey(key); err != nil {
return nil, fmt.Errorf("set key: %w", err)
}
if err := policy.SetContentType(contentType); err != nil {
return nil, fmt.Errorf("set content-type: %w", err)
}
if err := policy.SetContentLengthRange(minBytes, maxBytes); err != nil {
return nil, fmt.Errorf("set content-length-range: %w", err)
}
if err := policy.SetExpires(time.Now().UTC().Add(ttl)); err != nil {
return nil, fmt.Errorf("set expires: %w", err)
}
u, fields, err := b.client.PresignedPostPolicy(ctx, policy)
if err != nil {
return nil, fmt.Errorf("presign post policy: %w", err)
}
return &PresignedPostResult{
URL: stripQuery(u),
Fields: fields,
}, nil
}
// Stat returns object metadata without fetching the body. Used by the attach
// path to verify the uploaded object's size and content-type match what the
// client claimed when requesting the presign.
type ObjectInfo struct {
Size int64
ContentType string
ETag string
}
func (b *S3Backend) Stat(ctx context.Context, key string) (*ObjectInfo, error) {
info, err := b.client.StatObject(ctx, b.bucket, key, minio.StatObjectOptions{})
if err != nil {
return nil, fmt.Errorf("stat S3 object: %w", err)
}
return &ObjectInfo{
Size: info.Size,
ContentType: info.ContentType,
ETag: info.ETag,
}, nil
}
// stripQuery returns the URL with its query string removed. minio-go encodes
// the policy/signature into both the form fields and the query; the form
// fields are the source of truth for POST policy uploads, and many clients
// (including Apple's NSURLSession) will reject the request if the same
// signature appears in both places.
func stripQuery(u *url.URL) string {
if u == nil {
return ""
}
clone := *u
clone.RawQuery = ""
return clone.String()
}