tools: add admin-reset and notif-diag operational CLIs
Two small Go CLIs for production ops that previously required ad-hoc
psql or kubectl gymnastics. Both load DB credentials from prod.env-style
env vars and read POSTGRES_PASSWORD from deploy/secrets/postgres_password.txt
by default, so the workflow is `set -a && source deploy/prod.env && set +a`
followed by go run.
cmd/admin-reset/main.go:
--list print all admin_users rows
--verify --email X bcrypt-check a password against the stored hash
using the same case-insensitive lookup the live
/api/admin/auth/login endpoint uses
--new-email Y rename an admin's email (with unique-index check)
default (--email X) prompt for a new password twice (no echo, min 12
chars), bcrypt at DefaultCost, update the row
cmd/notif-diag/main.go:
default print pending/sent counts, breakdown by type and
age, the 5 most recent pending rows with their
error_message, and registered APNs/FCM device
counts
--mark-failed-as-sent cosmetic cleanup — UPDATE pending rows that have
a recorded error to sent=true,
sent_at=COALESCE(updated_at, NOW())
--yes skip the interactive confirmation prompt
Both bypass internal/config.Load() entirely so they don't need
SECRET_KEY or other unrelated env vars to run. .gitignore excludes the
build artifacts at /admin-reset and /notif-diag.
go.mod adds golang.org/x/term v0.41.0 (promoted from indirect to direct)
for no-echo password input in admin-reset.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,333 @@
|
||||
// notif-diag is a CLI for inspecting and (optionally) cleaning up stuck
|
||||
// notification rows. Default mode is read-only — runs SELECTs and prints a
|
||||
// summary. With --mark-failed-as-sent, marks pending rows that already have a
|
||||
// recorded error as sent (cosmetic — no retry, no resend).
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// set -a && source deploy/prod.env && set +a
|
||||
// go run ./cmd/notif-diag # diagnose
|
||||
// go run ./cmd/notif-diag --mark-failed-as-sent --yes # clean up errored backlog
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gorm.io/driver/postgres"
|
||||
"gorm.io/gorm"
|
||||
"gorm.io/gorm/logger"
|
||||
)
|
||||
|
||||
func main() {
|
||||
passwordFile := stringFlag("password-file", "deploy/secrets/postgres_password.txt",
|
||||
"Path to file containing POSTGRES_PASSWORD (used if env var is empty)")
|
||||
markFailed := boolFlag("mark-failed-as-sent",
|
||||
"Mark every pending row with a non-empty error_message as sent. Cosmetic only — does not retry the push.")
|
||||
yes := boolFlag("yes", "Skip the interactive confirmation prompt for destructive actions.")
|
||||
|
||||
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339})
|
||||
|
||||
dsn, host, err := buildDSN(*passwordFile)
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Msg("failed to build database DSN")
|
||||
}
|
||||
|
||||
db, err := gorm.Open(postgres.Open(dsn), &gorm.Config{
|
||||
Logger: logger.Default.LogMode(logger.Silent),
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Msg("failed to connect to database")
|
||||
}
|
||||
|
||||
fmt.Printf("DB host: %s\n", host)
|
||||
fmt.Println(strings.Repeat("=", 80))
|
||||
|
||||
overallTotals(db)
|
||||
pendingByType(db)
|
||||
recentPending(db)
|
||||
deviceCounts(db)
|
||||
|
||||
if *markFailed {
|
||||
markFailedAsSent(db, *yes)
|
||||
}
|
||||
}
|
||||
|
||||
// markFailedAsSent updates pending rows whose error_message is non-empty,
|
||||
// flipping them to sent=true with sent_at=updated_at. This is purely cosmetic:
|
||||
// it removes them from the "pending" count so dashboards and the diag tool
|
||||
// don't keep flagging an old, unfixable backlog. It does NOT re-send anything.
|
||||
func markFailedAsSent(db *gorm.DB, skipPrompt bool) {
|
||||
var candidate int64
|
||||
if err := db.Raw(`
|
||||
SELECT COUNT(*) FROM notifications_notification
|
||||
WHERE sent = false AND error_message IS NOT NULL AND error_message <> ''
|
||||
`).Scan(&candidate).Error; err != nil {
|
||||
log.Fatal().Err(err).Msg("failed to count cleanup candidates")
|
||||
}
|
||||
|
||||
fmt.Printf("\n# Cleanup candidate count: %d\n", candidate)
|
||||
if candidate == 0 {
|
||||
fmt.Println(" (nothing to clean up)")
|
||||
return
|
||||
}
|
||||
fmt.Println(" These rows have a recorded send error and will never be retried.")
|
||||
fmt.Println(" Marking them sent=true is cosmetic — it just prevents them from")
|
||||
fmt.Println(" showing up as pending in admin dashboards going forward.")
|
||||
|
||||
if !skipPrompt {
|
||||
fmt.Printf("\nProceed? Type 'yes' to update %d rows: ", candidate)
|
||||
s, err := bufio.NewReader(os.Stdin).ReadString('\n')
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Msg("failed to read confirmation")
|
||||
}
|
||||
if strings.TrimSpace(s) != "yes" {
|
||||
fmt.Println("Aborted.")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
res := db.Exec(`
|
||||
UPDATE notifications_notification
|
||||
SET sent = true, sent_at = COALESCE(updated_at, NOW())
|
||||
WHERE sent = false AND error_message IS NOT NULL AND error_message <> ''
|
||||
`)
|
||||
if res.Error != nil {
|
||||
log.Fatal().Err(res.Error).Msg("failed to update rows")
|
||||
}
|
||||
fmt.Printf("OK — updated %d rows.\n", res.RowsAffected)
|
||||
}
|
||||
|
||||
// overallTotals shows the high-level sent/pending/read split.
|
||||
func overallTotals(db *gorm.DB) {
|
||||
type row struct {
|
||||
Total int64
|
||||
Sent int64
|
||||
Pending int64
|
||||
Read int64
|
||||
Errored int64
|
||||
}
|
||||
var r row
|
||||
db.Raw(`
|
||||
SELECT
|
||||
COUNT(*) AS total,
|
||||
COUNT(*) FILTER (WHERE sent = true) AS sent,
|
||||
COUNT(*) FILTER (WHERE sent = false) AS pending,
|
||||
COUNT(*) FILTER (WHERE read = true) AS read,
|
||||
COUNT(*) FILTER (WHERE error_message IS NOT NULL AND error_message <> '') AS errored
|
||||
FROM notifications_notification
|
||||
`).Scan(&r)
|
||||
|
||||
fmt.Println("\n# Overall notification counts")
|
||||
fmt.Printf(" total: %d\n", r.Total)
|
||||
fmt.Printf(" sent: %d\n", r.Sent)
|
||||
fmt.Printf(" pending: %d\n", r.Pending)
|
||||
fmt.Printf(" read: %d\n", r.Read)
|
||||
fmt.Printf(" errored: %d (rows with non-empty error_message)\n", r.Errored)
|
||||
}
|
||||
|
||||
// pendingByType breaks the pending rows down by type and age.
|
||||
func pendingByType(db *gorm.DB) {
|
||||
type row struct {
|
||||
NotificationType string
|
||||
PendingCount int64
|
||||
Oldest *time.Time
|
||||
Newest *time.Time
|
||||
WithErrors int64
|
||||
Last24h int64
|
||||
Last7d int64
|
||||
}
|
||||
var rows []row
|
||||
db.Raw(`
|
||||
SELECT
|
||||
notification_type,
|
||||
COUNT(*) AS pending_count,
|
||||
MIN(created_at) AS oldest,
|
||||
MAX(created_at) AS newest,
|
||||
COUNT(*) FILTER (WHERE error_message IS NOT NULL AND error_message <> '') AS with_errors,
|
||||
COUNT(*) FILTER (WHERE created_at > NOW() - INTERVAL '24 hours') AS last_24h,
|
||||
COUNT(*) FILTER (WHERE created_at > NOW() - INTERVAL '7 days') AS last_7d
|
||||
FROM notifications_notification
|
||||
WHERE sent = false
|
||||
GROUP BY notification_type
|
||||
ORDER BY MAX(created_at) DESC NULLS LAST
|
||||
`).Scan(&rows)
|
||||
|
||||
fmt.Println("\n# Pending rows by type")
|
||||
if len(rows) == 0 {
|
||||
fmt.Println(" (no pending notifications)")
|
||||
return
|
||||
}
|
||||
fmt.Printf(" %-22s %7s %7s %7s %7s %-19s %-19s\n",
|
||||
"TYPE", "PENDING", "ERRORED", "LAST24H", "LAST7D", "OLDEST", "NEWEST")
|
||||
for _, r := range rows {
|
||||
fmt.Printf(" %-22s %7d %7d %7d %7d %-19s %-19s\n",
|
||||
r.NotificationType, r.PendingCount, r.WithErrors, r.Last24h, r.Last7d,
|
||||
fmtTime(r.Oldest), fmtTime(r.Newest))
|
||||
}
|
||||
}
|
||||
|
||||
// recentPending shows the 5 most recent pending rows with full detail.
|
||||
func recentPending(db *gorm.DB) {
|
||||
type row struct {
|
||||
ID uint
|
||||
UserID uint
|
||||
NotificationType string
|
||||
Title string
|
||||
Body string
|
||||
ErrorMessage string
|
||||
CreatedAt time.Time
|
||||
}
|
||||
var rows []row
|
||||
db.Raw(`
|
||||
SELECT id, user_id, notification_type, title, body, COALESCE(error_message, '') AS error_message, created_at
|
||||
FROM notifications_notification
|
||||
WHERE sent = false
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 5
|
||||
`).Scan(&rows)
|
||||
|
||||
fmt.Println("\n# 5 most recent pending notifications")
|
||||
if len(rows) == 0 {
|
||||
fmt.Println(" (none)")
|
||||
return
|
||||
}
|
||||
for _, r := range rows {
|
||||
errPart := ""
|
||||
if r.ErrorMessage != "" {
|
||||
errPart = fmt.Sprintf("\n error: %s", r.ErrorMessage)
|
||||
}
|
||||
fmt.Printf(" [%d] user=%d %s %s%s\n title: %s\n body: %s\n",
|
||||
r.ID, r.UserID, r.CreatedAt.Format("2006-01-02 15:04:05"), r.NotificationType, errPart,
|
||||
truncate(r.Title, 100), truncate(r.Body, 100))
|
||||
}
|
||||
}
|
||||
|
||||
// deviceCounts shows how many push devices are registered (active vs inactive).
|
||||
func deviceCounts(db *gorm.DB) {
|
||||
type row struct {
|
||||
Total int64
|
||||
Active int64
|
||||
WithUser int64
|
||||
DistinctUsers int64
|
||||
}
|
||||
|
||||
fmt.Println("\n# Registered push devices")
|
||||
for _, t := range []struct {
|
||||
label string
|
||||
table string
|
||||
}{
|
||||
{"APNs (iOS)", "push_notifications_apnsdevice"},
|
||||
{"GCM (Android)", "push_notifications_gcmdevice"},
|
||||
} {
|
||||
var r row
|
||||
err := db.Raw(fmt.Sprintf(`
|
||||
SELECT
|
||||
COUNT(*) AS total,
|
||||
COUNT(*) FILTER (WHERE active = true) AS active,
|
||||
COUNT(*) FILTER (WHERE user_id IS NOT NULL) AS with_user,
|
||||
COUNT(DISTINCT user_id) AS distinct_users
|
||||
FROM %s
|
||||
`, t.table)).Scan(&r).Error
|
||||
if err != nil {
|
||||
fmt.Printf(" %-15s ERROR: %v\n", t.label, err)
|
||||
continue
|
||||
}
|
||||
fmt.Printf(" %-15s total=%-5d active=%-5d with_user=%-5d distinct_users=%d\n",
|
||||
t.label, r.Total, r.Active, r.WithUser, r.DistinctUsers)
|
||||
}
|
||||
}
|
||||
|
||||
func buildDSN(passwordFile string) (dsn, host string, err error) {
|
||||
host = os.Getenv("DB_HOST")
|
||||
user := os.Getenv("POSTGRES_USER")
|
||||
dbname := os.Getenv("POSTGRES_DB")
|
||||
sslmode := os.Getenv("DB_SSLMODE")
|
||||
if sslmode == "" {
|
||||
sslmode = "require"
|
||||
}
|
||||
|
||||
port := 5432
|
||||
if s := os.Getenv("DB_PORT"); s != "" {
|
||||
p, perr := strconv.Atoi(s)
|
||||
if perr != nil {
|
||||
return "", "", fmt.Errorf("invalid DB_PORT %q: %w", s, perr)
|
||||
}
|
||||
port = p
|
||||
}
|
||||
|
||||
password := os.Getenv("POSTGRES_PASSWORD")
|
||||
if password == "" && passwordFile != "" {
|
||||
b, rerr := os.ReadFile(passwordFile)
|
||||
if rerr != nil {
|
||||
return "", "", fmt.Errorf("POSTGRES_PASSWORD not set and could not read %s: %w", passwordFile, rerr)
|
||||
}
|
||||
password = strings.TrimRight(string(b), "\r\n")
|
||||
}
|
||||
|
||||
missing := []string{}
|
||||
if host == "" {
|
||||
missing = append(missing, "DB_HOST")
|
||||
}
|
||||
if user == "" {
|
||||
missing = append(missing, "POSTGRES_USER")
|
||||
}
|
||||
if dbname == "" {
|
||||
missing = append(missing, "POSTGRES_DB")
|
||||
}
|
||||
if password == "" {
|
||||
missing = append(missing, "POSTGRES_PASSWORD")
|
||||
}
|
||||
if len(missing) > 0 {
|
||||
return "", "", fmt.Errorf("missing required env vars: %s", strings.Join(missing, ", "))
|
||||
}
|
||||
|
||||
dsn = fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s sslmode=%s",
|
||||
host, port, user, password, dbname, sslmode)
|
||||
return dsn, host, nil
|
||||
}
|
||||
|
||||
// stringFlag is a tiny stand-in for flag.String to keep imports lean — using it
|
||||
// also dodges flag-package quirks when this file is rebuilt with go run.
|
||||
func stringFlag(name, def, _usage string) *string {
|
||||
v := def
|
||||
prefix := "--" + name + "="
|
||||
for _, a := range os.Args[1:] {
|
||||
if strings.HasPrefix(a, prefix) {
|
||||
v = strings.TrimPrefix(a, prefix)
|
||||
}
|
||||
}
|
||||
return &v
|
||||
}
|
||||
|
||||
// boolFlag is true if --name is present in os.Args (no value form).
|
||||
func boolFlag(name, _usage string) *bool {
|
||||
want := "--" + name
|
||||
v := false
|
||||
for _, a := range os.Args[1:] {
|
||||
if a == want {
|
||||
v = true
|
||||
}
|
||||
}
|
||||
return &v
|
||||
}
|
||||
|
||||
func fmtTime(t *time.Time) string {
|
||||
if t == nil {
|
||||
return "-"
|
||||
}
|
||||
return t.Format("2006-01-02 15:04:05")
|
||||
}
|
||||
|
||||
func truncate(s string, n int) string {
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
return s[:n] + "…"
|
||||
}
|
||||
Reference in New Issue
Block a user