diff options
| -rw-r--r-- | f3s/goprecords/helm-chart/templates/service.yaml | 4 | ||||
| -rw-r--r-- | f3s/prometheus/manifests/goprecords-alerts.yaml | 31 |
2 files changed, 35 insertions, 0 deletions
diff --git a/f3s/goprecords/helm-chart/templates/service.yaml b/f3s/goprecords/helm-chart/templates/service.yaml index 9266d46..4459775 100644 --- a/f3s/goprecords/helm-chart/templates/service.yaml +++ b/f3s/goprecords/helm-chart/templates/service.yaml @@ -11,5 +11,9 @@ spec: port: 80 protocol: TCP targetPort: 8080 + - name: metrics + port: 8080 + protocol: TCP + targetPort: 8080 selector: app: goprecords diff --git a/f3s/prometheus/manifests/goprecords-alerts.yaml b/f3s/prometheus/manifests/goprecords-alerts.yaml new file mode 100644 index 0000000..a04698a --- /dev/null +++ b/f3s/prometheus/manifests/goprecords-alerts.yaml @@ -0,0 +1,31 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: goprecords-alerts + namespace: monitoring + labels: + release: prometheus +spec: + groups: + - name: goprecords + interval: 5m + rules: + - alert: GoprecordsHostNotReporting + expr: | + (time() - goprecords_host_records_last_update_timestamp_seconds{excluded="false"}) + > (5 * 30 * 24 * 3600) + for: 1h + labels: + severity: warning + component: goprecords + annotations: + summary: "Host {{ $labels.host }} has not reported uptimed stats for >5 months" + description: > + Host {{ $labels.host }} last updated its records + {{ $value | humanizeDuration }} ago. + Expected at most every 5 months. + action: > + Check whether uptimed is running and the upload script is scheduled on {{ $labels.host }}. + To silence permanently: kubectl exec -n services deployment/goprecords -- + goprecords exclude -db=/data/stats/goprecords-auth.db + -reason="decommissioned" {{ $labels.host }} |
