diff options
| author | Paul Buetow <paul@buetow.org> | 2026-04-16 10:27:37 +0300 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-04-16 10:27:37 +0300 |
| commit | 6c27a0cd1d74b3463e51fd15753d792b55d1a733 (patch) | |
| tree | d2d5697d3d3e3996c175a238ef902d9e1fb5b35d | |
| parent | 426dfec90546f8dd9f5965d9df5567f625246515 (diff) | |
goprecords: add Prometheus scraping and stale-host alert rule
- service.yaml: add 'metrics' port (8080) so kubernetes SD auto-discovers
the /metrics endpoint alongside the existing http port (80)
- prometheus/manifests/goprecords-alerts.yaml: GoprecordsHostNotReporting
fires (warning) when a non-excluded host last reported >5 months ago
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
| -rw-r--r-- | f3s/goprecords/helm-chart/templates/service.yaml | 4 | ||||
| -rw-r--r-- | f3s/prometheus/manifests/goprecords-alerts.yaml | 31 |
2 files changed, 35 insertions, 0 deletions
diff --git a/f3s/goprecords/helm-chart/templates/service.yaml b/f3s/goprecords/helm-chart/templates/service.yaml index 9266d46..4459775 100644 --- a/f3s/goprecords/helm-chart/templates/service.yaml +++ b/f3s/goprecords/helm-chart/templates/service.yaml @@ -11,5 +11,9 @@ spec: port: 80 protocol: TCP targetPort: 8080 + - name: metrics + port: 8080 + protocol: TCP + targetPort: 8080 selector: app: goprecords diff --git a/f3s/prometheus/manifests/goprecords-alerts.yaml b/f3s/prometheus/manifests/goprecords-alerts.yaml new file mode 100644 index 0000000..a04698a --- /dev/null +++ b/f3s/prometheus/manifests/goprecords-alerts.yaml @@ -0,0 +1,31 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: goprecords-alerts + namespace: monitoring + labels: + release: prometheus +spec: + groups: + - name: goprecords + interval: 5m + rules: + - alert: GoprecordsHostNotReporting + expr: | + (time() - goprecords_host_records_last_update_timestamp_seconds{excluded="false"}) + > (5 * 30 * 24 * 3600) + for: 1h + labels: + severity: warning + component: goprecords + annotations: + summary: "Host {{ $labels.host }} has not reported uptimed stats for >5 months" + description: > + Host {{ $labels.host }} last updated its records + {{ $value | humanizeDuration }} ago. + Expected at most every 5 months. + action: > + Check whether uptimed is running and the upload script is scheduled on {{ $labels.host }}. + To silence permanently: kubectl exec -n services deployment/goprecords -- + goprecords exclude -db=/data/stats/goprecords-auth.db + -reason="decommissioned" {{ $labels.host }} |
