summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-04-16 10:27:37 +0300
committerPaul Buetow <paul@buetow.org>2026-04-16 10:27:37 +0300
commit6c27a0cd1d74b3463e51fd15753d792b55d1a733 (patch)
treed2d5697d3d3e3996c175a238ef902d9e1fb5b35d
parent426dfec90546f8dd9f5965d9df5567f625246515 (diff)
goprecords: add Prometheus scraping and stale-host alert rule
- service.yaml: add 'metrics' port (8080) so kubernetes SD auto-discovers the /metrics endpoint alongside the existing http port (80) - prometheus/manifests/goprecords-alerts.yaml: GoprecordsHostNotReporting fires (warning) when a non-excluded host last reported >5 months ago Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
-rw-r--r--f3s/goprecords/helm-chart/templates/service.yaml4
-rw-r--r--f3s/prometheus/manifests/goprecords-alerts.yaml31
2 files changed, 35 insertions, 0 deletions
diff --git a/f3s/goprecords/helm-chart/templates/service.yaml b/f3s/goprecords/helm-chart/templates/service.yaml
index 9266d46..4459775 100644
--- a/f3s/goprecords/helm-chart/templates/service.yaml
+++ b/f3s/goprecords/helm-chart/templates/service.yaml
@@ -11,5 +11,9 @@ spec:
port: 80
protocol: TCP
targetPort: 8080
+ - name: metrics
+ port: 8080
+ protocol: TCP
+ targetPort: 8080
selector:
app: goprecords
diff --git a/f3s/prometheus/manifests/goprecords-alerts.yaml b/f3s/prometheus/manifests/goprecords-alerts.yaml
new file mode 100644
index 0000000..a04698a
--- /dev/null
+++ b/f3s/prometheus/manifests/goprecords-alerts.yaml
@@ -0,0 +1,31 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: goprecords-alerts
+ namespace: monitoring
+ labels:
+ release: prometheus
+spec:
+ groups:
+ - name: goprecords
+ interval: 5m
+ rules:
+ - alert: GoprecordsHostNotReporting
+ expr: |
+ (time() - goprecords_host_records_last_update_timestamp_seconds{excluded="false"})
+ > (5 * 30 * 24 * 3600)
+ for: 1h
+ labels:
+ severity: warning
+ component: goprecords
+ annotations:
+ summary: "Host {{ $labels.host }} has not reported uptimed stats for >5 months"
+ description: >
+ Host {{ $labels.host }} last updated its records
+ {{ $value | humanizeDuration }} ago.
+ Expected at most every 5 months.
+ action: >
+ Check whether uptimed is running and the upload script is scheduled on {{ $labels.host }}.
+ To silence permanently: kubectl exec -n services deployment/goprecords --
+ goprecords exclude -db=/data/stats/goprecords-auth.db
+ -reason="decommissioned" {{ $labels.host }}