cb1dc383b4
Two small Go CLIs for production ops that previously required ad-hoc
psql or kubectl gymnastics. Both load DB credentials from prod.env-style
env vars and read POSTGRES_PASSWORD from deploy/secrets/postgres_password.txt
by default, so the workflow is `set -a && source deploy/prod.env && set +a`
followed by go run.
cmd/admin-reset/main.go:
--list print all admin_users rows
--verify --email X bcrypt-check a password against the stored hash
using the same case-insensitive lookup the live
/api/admin/auth/login endpoint uses
--new-email Y rename an admin's email (with unique-index check)
default (--email X) prompt for a new password twice (no echo, min 12
chars), bcrypt at DefaultCost, update the row
cmd/notif-diag/main.go:
default print pending/sent counts, breakdown by type and
age, the 5 most recent pending rows with their
error_message, and registered APNs/FCM device
counts
--mark-failed-as-sent cosmetic cleanup — UPDATE pending rows that have
a recorded error to sent=true,
sent_at=COALESCE(updated_at, NOW())
--yes skip the interactive confirmation prompt
Both bypass internal/config.Load() entirely so they don't need
SECRET_KEY or other unrelated env vars to run. .gitignore excludes the
build artifacts at /admin-reset and /notif-diag.
go.mod adds golang.org/x/term v0.41.0 (promoted from indirect to direct)
for no-echo password input in admin-reset.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
334 lines
9.7 KiB
Go
334 lines
9.7 KiB
Go
// notif-diag is a CLI for inspecting and (optionally) cleaning up stuck
|
|
// notification rows. Default mode is read-only — runs SELECTs and prints a
|
|
// summary. With --mark-failed-as-sent, marks pending rows that already have a
|
|
// recorded error as sent (cosmetic — no retry, no resend).
|
|
//
|
|
// Usage:
|
|
//
|
|
// set -a && source deploy/prod.env && set +a
|
|
// go run ./cmd/notif-diag # diagnose
|
|
// go run ./cmd/notif-diag --mark-failed-as-sent --yes # clean up errored backlog
|
|
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/rs/zerolog"
|
|
"github.com/rs/zerolog/log"
|
|
"gorm.io/driver/postgres"
|
|
"gorm.io/gorm"
|
|
"gorm.io/gorm/logger"
|
|
)
|
|
|
|
func main() {
|
|
passwordFile := stringFlag("password-file", "deploy/secrets/postgres_password.txt",
|
|
"Path to file containing POSTGRES_PASSWORD (used if env var is empty)")
|
|
markFailed := boolFlag("mark-failed-as-sent",
|
|
"Mark every pending row with a non-empty error_message as sent. Cosmetic only — does not retry the push.")
|
|
yes := boolFlag("yes", "Skip the interactive confirmation prompt for destructive actions.")
|
|
|
|
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339})
|
|
|
|
dsn, host, err := buildDSN(*passwordFile)
|
|
if err != nil {
|
|
log.Fatal().Err(err).Msg("failed to build database DSN")
|
|
}
|
|
|
|
db, err := gorm.Open(postgres.Open(dsn), &gorm.Config{
|
|
Logger: logger.Default.LogMode(logger.Silent),
|
|
})
|
|
if err != nil {
|
|
log.Fatal().Err(err).Msg("failed to connect to database")
|
|
}
|
|
|
|
fmt.Printf("DB host: %s\n", host)
|
|
fmt.Println(strings.Repeat("=", 80))
|
|
|
|
overallTotals(db)
|
|
pendingByType(db)
|
|
recentPending(db)
|
|
deviceCounts(db)
|
|
|
|
if *markFailed {
|
|
markFailedAsSent(db, *yes)
|
|
}
|
|
}
|
|
|
|
// markFailedAsSent updates pending rows whose error_message is non-empty,
|
|
// flipping them to sent=true with sent_at=updated_at. This is purely cosmetic:
|
|
// it removes them from the "pending" count so dashboards and the diag tool
|
|
// don't keep flagging an old, unfixable backlog. It does NOT re-send anything.
|
|
func markFailedAsSent(db *gorm.DB, skipPrompt bool) {
|
|
var candidate int64
|
|
if err := db.Raw(`
|
|
SELECT COUNT(*) FROM notifications_notification
|
|
WHERE sent = false AND error_message IS NOT NULL AND error_message <> ''
|
|
`).Scan(&candidate).Error; err != nil {
|
|
log.Fatal().Err(err).Msg("failed to count cleanup candidates")
|
|
}
|
|
|
|
fmt.Printf("\n# Cleanup candidate count: %d\n", candidate)
|
|
if candidate == 0 {
|
|
fmt.Println(" (nothing to clean up)")
|
|
return
|
|
}
|
|
fmt.Println(" These rows have a recorded send error and will never be retried.")
|
|
fmt.Println(" Marking them sent=true is cosmetic — it just prevents them from")
|
|
fmt.Println(" showing up as pending in admin dashboards going forward.")
|
|
|
|
if !skipPrompt {
|
|
fmt.Printf("\nProceed? Type 'yes' to update %d rows: ", candidate)
|
|
s, err := bufio.NewReader(os.Stdin).ReadString('\n')
|
|
if err != nil {
|
|
log.Fatal().Err(err).Msg("failed to read confirmation")
|
|
}
|
|
if strings.TrimSpace(s) != "yes" {
|
|
fmt.Println("Aborted.")
|
|
return
|
|
}
|
|
}
|
|
|
|
res := db.Exec(`
|
|
UPDATE notifications_notification
|
|
SET sent = true, sent_at = COALESCE(updated_at, NOW())
|
|
WHERE sent = false AND error_message IS NOT NULL AND error_message <> ''
|
|
`)
|
|
if res.Error != nil {
|
|
log.Fatal().Err(res.Error).Msg("failed to update rows")
|
|
}
|
|
fmt.Printf("OK — updated %d rows.\n", res.RowsAffected)
|
|
}
|
|
|
|
// overallTotals shows the high-level sent/pending/read split.
|
|
func overallTotals(db *gorm.DB) {
|
|
type row struct {
|
|
Total int64
|
|
Sent int64
|
|
Pending int64
|
|
Read int64
|
|
Errored int64
|
|
}
|
|
var r row
|
|
db.Raw(`
|
|
SELECT
|
|
COUNT(*) AS total,
|
|
COUNT(*) FILTER (WHERE sent = true) AS sent,
|
|
COUNT(*) FILTER (WHERE sent = false) AS pending,
|
|
COUNT(*) FILTER (WHERE read = true) AS read,
|
|
COUNT(*) FILTER (WHERE error_message IS NOT NULL AND error_message <> '') AS errored
|
|
FROM notifications_notification
|
|
`).Scan(&r)
|
|
|
|
fmt.Println("\n# Overall notification counts")
|
|
fmt.Printf(" total: %d\n", r.Total)
|
|
fmt.Printf(" sent: %d\n", r.Sent)
|
|
fmt.Printf(" pending: %d\n", r.Pending)
|
|
fmt.Printf(" read: %d\n", r.Read)
|
|
fmt.Printf(" errored: %d (rows with non-empty error_message)\n", r.Errored)
|
|
}
|
|
|
|
// pendingByType breaks the pending rows down by type and age.
|
|
func pendingByType(db *gorm.DB) {
|
|
type row struct {
|
|
NotificationType string
|
|
PendingCount int64
|
|
Oldest *time.Time
|
|
Newest *time.Time
|
|
WithErrors int64
|
|
Last24h int64
|
|
Last7d int64
|
|
}
|
|
var rows []row
|
|
db.Raw(`
|
|
SELECT
|
|
notification_type,
|
|
COUNT(*) AS pending_count,
|
|
MIN(created_at) AS oldest,
|
|
MAX(created_at) AS newest,
|
|
COUNT(*) FILTER (WHERE error_message IS NOT NULL AND error_message <> '') AS with_errors,
|
|
COUNT(*) FILTER (WHERE created_at > NOW() - INTERVAL '24 hours') AS last_24h,
|
|
COUNT(*) FILTER (WHERE created_at > NOW() - INTERVAL '7 days') AS last_7d
|
|
FROM notifications_notification
|
|
WHERE sent = false
|
|
GROUP BY notification_type
|
|
ORDER BY MAX(created_at) DESC NULLS LAST
|
|
`).Scan(&rows)
|
|
|
|
fmt.Println("\n# Pending rows by type")
|
|
if len(rows) == 0 {
|
|
fmt.Println(" (no pending notifications)")
|
|
return
|
|
}
|
|
fmt.Printf(" %-22s %7s %7s %7s %7s %-19s %-19s\n",
|
|
"TYPE", "PENDING", "ERRORED", "LAST24H", "LAST7D", "OLDEST", "NEWEST")
|
|
for _, r := range rows {
|
|
fmt.Printf(" %-22s %7d %7d %7d %7d %-19s %-19s\n",
|
|
r.NotificationType, r.PendingCount, r.WithErrors, r.Last24h, r.Last7d,
|
|
fmtTime(r.Oldest), fmtTime(r.Newest))
|
|
}
|
|
}
|
|
|
|
// recentPending shows the 5 most recent pending rows with full detail.
|
|
func recentPending(db *gorm.DB) {
|
|
type row struct {
|
|
ID uint
|
|
UserID uint
|
|
NotificationType string
|
|
Title string
|
|
Body string
|
|
ErrorMessage string
|
|
CreatedAt time.Time
|
|
}
|
|
var rows []row
|
|
db.Raw(`
|
|
SELECT id, user_id, notification_type, title, body, COALESCE(error_message, '') AS error_message, created_at
|
|
FROM notifications_notification
|
|
WHERE sent = false
|
|
ORDER BY created_at DESC
|
|
LIMIT 5
|
|
`).Scan(&rows)
|
|
|
|
fmt.Println("\n# 5 most recent pending notifications")
|
|
if len(rows) == 0 {
|
|
fmt.Println(" (none)")
|
|
return
|
|
}
|
|
for _, r := range rows {
|
|
errPart := ""
|
|
if r.ErrorMessage != "" {
|
|
errPart = fmt.Sprintf("\n error: %s", r.ErrorMessage)
|
|
}
|
|
fmt.Printf(" [%d] user=%d %s %s%s\n title: %s\n body: %s\n",
|
|
r.ID, r.UserID, r.CreatedAt.Format("2006-01-02 15:04:05"), r.NotificationType, errPart,
|
|
truncate(r.Title, 100), truncate(r.Body, 100))
|
|
}
|
|
}
|
|
|
|
// deviceCounts shows how many push devices are registered (active vs inactive).
|
|
func deviceCounts(db *gorm.DB) {
|
|
type row struct {
|
|
Total int64
|
|
Active int64
|
|
WithUser int64
|
|
DistinctUsers int64
|
|
}
|
|
|
|
fmt.Println("\n# Registered push devices")
|
|
for _, t := range []struct {
|
|
label string
|
|
table string
|
|
}{
|
|
{"APNs (iOS)", "push_notifications_apnsdevice"},
|
|
{"GCM (Android)", "push_notifications_gcmdevice"},
|
|
} {
|
|
var r row
|
|
err := db.Raw(fmt.Sprintf(`
|
|
SELECT
|
|
COUNT(*) AS total,
|
|
COUNT(*) FILTER (WHERE active = true) AS active,
|
|
COUNT(*) FILTER (WHERE user_id IS NOT NULL) AS with_user,
|
|
COUNT(DISTINCT user_id) AS distinct_users
|
|
FROM %s
|
|
`, t.table)).Scan(&r).Error
|
|
if err != nil {
|
|
fmt.Printf(" %-15s ERROR: %v\n", t.label, err)
|
|
continue
|
|
}
|
|
fmt.Printf(" %-15s total=%-5d active=%-5d with_user=%-5d distinct_users=%d\n",
|
|
t.label, r.Total, r.Active, r.WithUser, r.DistinctUsers)
|
|
}
|
|
}
|
|
|
|
func buildDSN(passwordFile string) (dsn, host string, err error) {
|
|
host = os.Getenv("DB_HOST")
|
|
user := os.Getenv("POSTGRES_USER")
|
|
dbname := os.Getenv("POSTGRES_DB")
|
|
sslmode := os.Getenv("DB_SSLMODE")
|
|
if sslmode == "" {
|
|
sslmode = "require"
|
|
}
|
|
|
|
port := 5432
|
|
if s := os.Getenv("DB_PORT"); s != "" {
|
|
p, perr := strconv.Atoi(s)
|
|
if perr != nil {
|
|
return "", "", fmt.Errorf("invalid DB_PORT %q: %w", s, perr)
|
|
}
|
|
port = p
|
|
}
|
|
|
|
password := os.Getenv("POSTGRES_PASSWORD")
|
|
if password == "" && passwordFile != "" {
|
|
b, rerr := os.ReadFile(passwordFile)
|
|
if rerr != nil {
|
|
return "", "", fmt.Errorf("POSTGRES_PASSWORD not set and could not read %s: %w", passwordFile, rerr)
|
|
}
|
|
password = strings.TrimRight(string(b), "\r\n")
|
|
}
|
|
|
|
missing := []string{}
|
|
if host == "" {
|
|
missing = append(missing, "DB_HOST")
|
|
}
|
|
if user == "" {
|
|
missing = append(missing, "POSTGRES_USER")
|
|
}
|
|
if dbname == "" {
|
|
missing = append(missing, "POSTGRES_DB")
|
|
}
|
|
if password == "" {
|
|
missing = append(missing, "POSTGRES_PASSWORD")
|
|
}
|
|
if len(missing) > 0 {
|
|
return "", "", fmt.Errorf("missing required env vars: %s", strings.Join(missing, ", "))
|
|
}
|
|
|
|
dsn = fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=%s",
|
|
host, port, user, password, dbname, sslmode)
|
|
return dsn, host, nil
|
|
}
|
|
|
|
// stringFlag is a tiny stand-in for flag.String to keep imports lean — using it
|
|
// also dodges flag-package quirks when this file is rebuilt with go run.
|
|
func stringFlag(name, def, _usage string) *string {
|
|
v := def
|
|
prefix := "--" + name + "="
|
|
for _, a := range os.Args[1:] {
|
|
if strings.HasPrefix(a, prefix) {
|
|
v = strings.TrimPrefix(a, prefix)
|
|
}
|
|
}
|
|
return &v
|
|
}
|
|
|
|
// boolFlag is true if --name is present in os.Args (no value form).
|
|
func boolFlag(name, _usage string) *bool {
|
|
want := "--" + name
|
|
v := false
|
|
for _, a := range os.Args[1:] {
|
|
if a == want {
|
|
v = true
|
|
}
|
|
}
|
|
return &v
|
|
}
|
|
|
|
func fmtTime(t *time.Time) string {
|
|
if t == nil {
|
|
return "-"
|
|
}
|
|
return t.Format("2006-01-02 15:04:05")
|
|
}
|
|
|
|
func truncate(s string, n int) string {
|
|
if len(s) <= n {
|
|
return s
|
|
}
|
|
return s[:n] + "…"
|
|
}
|