From 6c27a0cd1d74b3463e51fd15753d792b55d1a733 Mon Sep 17 00:00:00 2001 From: Paul Buetow Date: Thu, 16 Apr 2026 10:27:37 +0300 Subject: goprecords: add Prometheus scraping and stale-host alert rule - service.yaml: add 'metrics' port (8080) so kubernetes SD auto-discovers the /metrics endpoint alongside the existing http port (80) - prometheus/manifests/goprecords-alerts.yaml: GoprecordsHostNotReporting fires (warning) when a non-excluded host last reported >5 months ago Co-Authored-By: Claude Sonnet 4.6 --- f3s/goprecords/helm-chart/templates/service.yaml | 4 +++ f3s/prometheus/manifests/goprecords-alerts.yaml | 31 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 f3s/prometheus/manifests/goprecords-alerts.yaml diff --git a/f3s/goprecords/helm-chart/templates/service.yaml b/f3s/goprecords/helm-chart/templates/service.yaml index 9266d46..4459775 100644 --- a/f3s/goprecords/helm-chart/templates/service.yaml +++ b/f3s/goprecords/helm-chart/templates/service.yaml @@ -11,5 +11,9 @@ spec: port: 80 protocol: TCP targetPort: 8080 + - name: metrics + port: 8080 + protocol: TCP + targetPort: 8080 selector: app: goprecords diff --git a/f3s/prometheus/manifests/goprecords-alerts.yaml b/f3s/prometheus/manifests/goprecords-alerts.yaml new file mode 100644 index 0000000..a04698a --- /dev/null +++ b/f3s/prometheus/manifests/goprecords-alerts.yaml @@ -0,0 +1,31 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: goprecords-alerts + namespace: monitoring + labels: + release: prometheus +spec: + groups: + - name: goprecords + interval: 5m + rules: + - alert: GoprecordsHostNotReporting + expr: | + (time() - goprecords_host_records_last_update_timestamp_seconds{excluded="false"}) + > (5 * 30 * 24 * 3600) + for: 1h + labels: + severity: warning + component: goprecords + annotations: + summary: "Host {{ $labels.host }} has not reported uptimed stats for >5 months" + description: > + Host {{ $labels.host }} last updated its records + {{ $value | humanizeDuration }} ago. + Expected at most every 5 months. + action: > + Check whether uptimed is running and the upload script is scheduled on {{ $labels.host }}. + To silence permanently: kubectl exec -n services deployment/goprecords -- + goprecords exclude -db=/data/stats/goprecords-auth.db + -reason="decommissioned" {{ $labels.host }} -- cgit v1.2.3