summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-01-21 23:40:14 +0200
committerPaul Buetow <paul@buetow.org>2026-01-21 23:40:14 +0200
commit4e3459bee4ecc8ceef33b0a876e584b494ad4e4a (patch)
tree3cd76c41bef90f8ccc9d5fda8b8031b0f9281c03
parent64c4f058e6f9975effb38e952531458054f512c3 (diff)
add OnlyIfNotExists alert suppression feature
Adds ability to suppress alerts during maintenance windows by checking for the existence of a file. When the file exists and is recent (within configured max age), matching alerts are excluded from email reports. Features: - Global PrometheusOnlyIfNotExists config for Prometheus alerts - Per-check OnlyIfNotExists config for individual checks - Configurable max age (default 86400s) for suppression file - New "Suppressed alerts" section in email and HTML reports - Suppressed checks excluded from counts and unhandled sections Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
-rw-r--r--README.md55
-rw-r--r--internal/check.go16
-rw-r--r--internal/config.go12
-rw-r--r--internal/html.go71
-rw-r--r--internal/html_test.go5
-rw-r--r--internal/run.go2
-rw-r--r--internal/state.go83
-rw-r--r--internal/suppress.go59
-rw-r--r--internal/suppress_test.go163
9 files changed, 425 insertions, 41 deletions
diff --git a/README.md b/README.md
index 00045da..4b9d39f 100644
--- a/README.md
+++ b/README.md
@@ -197,6 +197,61 @@ Gogios includes special handling for the Prometheus Watchdog alert, which is typ
This ensures you are immediately notified if Alertmanager stops working, preventing a situation where alerts might not be delivered properly.
+### Alert Suppression (OnlyIfNotExists)
+
+Gogios supports suppressing alerts based on the presence of a file. This is useful for planned maintenance windows where you don't want to receive alerts for expected downtime (e.g., shutting down a Kubernetes cluster overnight).
+
+When a suppression file exists and is recent (within the configured max age), alerts matching the suppression rule are completely excluded from email reports - they won't appear in status changed, unhandled alerts, stale alerts sections, or in the subject line counts.
+
+#### Prometheus Alert Suppression
+
+To suppress all Prometheus alerts during maintenance:
+
+```json
+{
+ "PrometheusHosts": ["localhost:9090"],
+ "PrometheusOnlyIfNotExists": "/tmp/k8s_maintenance",
+ "PrometheusOnlyIfNotExistsMaxS": 86400
+}
+```
+
+* `PrometheusOnlyIfNotExists`: Path to the suppression file. When this file exists and is recent, all Prometheus alerts are suppressed from email notifications.
+* `PrometheusOnlyIfNotExistsMaxS`: Maximum age in seconds for the suppression file (default: 86400 = 24 hours). If the file is older than this, alerts are sent normally.
+
+#### Per-Check Alert Suppression
+
+Individual checks can also be suppressed:
+
+```json
+{
+ "Checks": {
+ "Check HTTPS myserver.example.com": {
+ "Plugin": "/usr/local/libexec/nagios/check_http",
+ "Args": ["-H", "myserver.example.com", "-S"],
+ "OnlyIfNotExists": "/tmp/myserver_maintenance",
+ "OnlyIfNotExistsMaxS": 3600
+ }
+ }
+}
+```
+
+* `OnlyIfNotExists`: Path to the suppression file for this specific check.
+* `OnlyIfNotExistsMaxS`: Maximum age in seconds for the suppression file. If set to 0 or omitted, uses the global `PrometheusOnlyIfNotExistsMaxS` value as default.
+
+#### Usage Example
+
+Before starting maintenance:
+```bash
+touch /tmp/k8s_maintenance
+```
+
+After maintenance is complete:
+```bash
+rm /tmp/k8s_maintenance
+```
+
+The file's modification time is checked against the max age, so even if you forget to remove the file, alerts will resume after the configured period.
+
## Running Gogios
Now it is time to give it a first run. On OpenBSD, do:
diff --git a/internal/check.go b/internal/check.go
index a171c29..aabb3ec 100644
--- a/internal/check.go
+++ b/internal/check.go
@@ -9,13 +9,15 @@ import (
)
type check struct {
- Plugin string
- Args []string
- DependsOn []string `json:"DependsOn,omitempty"`
- Retries int `json:"Retries,omitempty"`
- RetryInterval int `json:"RetryInterval,omitempty"`
- RunInterval int `json:"RunInterval,omitempty"`
- RandomSpread int `json:"RandomSpread,omitempty"`
+ Plugin string
+ Args []string
+ DependsOn []string `json:"DependsOn,omitempty"`
+ Retries int `json:"Retries,omitempty"`
+ RetryInterval int `json:"RetryInterval,omitempty"`
+ RunInterval int `json:"RunInterval,omitempty"`
+ RandomSpread int `json:"RandomSpread,omitempty"`
+ OnlyIfNotExists string `json:"OnlyIfNotExists,omitempty"` // Suppress alerts if this file exists and is recent
+ OnlyIfNotExistsMaxS int `json:"OnlyIfNotExistsMaxS,omitempty"` // Max age in seconds for suppression file (uses global default if 0)
}
type namedCheck struct {
diff --git a/internal/config.go b/internal/config.go
index 5d172d6..c3a0d5f 100644
--- a/internal/config.go
+++ b/internal/config.go
@@ -21,9 +21,11 @@ type config struct {
CheckConcurrency int
StaleThreshold int `json:"StaleThreshold,omitempty"`
Federated []string `json:"Federated,omitempty"` // TODO: Document this option
- PrometheusHosts []string `json:"PrometheusHosts,omitempty"`
- PrometheusTimeoutS int `json:"PrometheusTimeoutS,omitempty"`
- Checks map[string]check
+ PrometheusHosts []string `json:"PrometheusHosts,omitempty"`
+ PrometheusTimeoutS int `json:"PrometheusTimeoutS,omitempty"`
+ PrometheusOnlyIfNotExists string `json:"PrometheusOnlyIfNotExists,omitempty"` // Suppress Prometheus alerts if this file exists and is recent
+ PrometheusOnlyIfNotExistsMaxS int `json:"PrometheusOnlyIfNotExistsMaxS,omitempty"` // Max age in seconds for suppression file (default 86400)
+ Checks map[string]check
}
func newConfig(configFile string) (config, error) {
@@ -67,6 +69,10 @@ func newConfig(configFile string) (config, error) {
conf.PrometheusTimeoutS = 2 // Default to 2 seconds
}
+ if conf.PrometheusOnlyIfNotExistsMaxS == 0 {
+ conf.PrometheusOnlyIfNotExistsMaxS = 86400 // Default to 24 hours
+ }
+
if !conf.HTMLDisable && conf.HTMLStatusFile == "" {
conf.HTMLStatusFile = "/var/www/htdocs/buetow.org/self/gogios/index.html"
log.Println("Set HTMLStatusFile to " + conf.HTMLStatusFile)
diff --git a/internal/html.go b/internal/html.go
index 1ccb894..facb575 100644
--- a/internal/html.go
+++ b/internal/html.go
@@ -40,7 +40,7 @@ func persistHTMLReport(state state, subject string, conf config) error {
}
defer f.Close()
- htmlContent := state.htmlReport(subject)
+ htmlContent := state.htmlReport(subject, conf)
if _, err = f.WriteString(htmlContent); err != nil {
log.Println("debug: error writing HTML:", err)
return fmt.Errorf("failed to write HTML: %w", err)
@@ -58,23 +58,30 @@ func persistHTMLReport(state state, subject string, conf config) error {
// htmlReport generates the complete HTML status page.
// Mirrors state.report() pattern from state.go:133-163.
-func (s state) htmlReport(subject string) string {
+// Note: HTML report shows full state without suppression in main sections for visibility,
+// but includes a dedicated "Suppressed alerts" section showing which checks are muted.
+func (s state) htmlReport(subject string, conf config) string {
var sb strings.Builder
+ // Use empty config for main sections so no checks are suppressed.
+ // The HTML status page shows full state for visibility.
+ emptyConf := config{}
+
// Calculate counts for header summary (without generating HTML yet)
- numCriticals := s.countBy(func(cs checkState) bool {
+ numCriticals := s.countBy(emptyConf, func(cs checkState) bool {
return cs.Status == nagiosCritical
})
- numWarnings := s.countBy(func(cs checkState) bool {
+ numWarnings := s.countBy(emptyConf, func(cs checkState) bool {
return cs.Status == nagiosWarning
})
- numUnknown := s.countBy(func(cs checkState) bool {
+ numUnknown := s.countBy(emptyConf, func(cs checkState) bool {
return cs.Status == nagiosUnknown
})
- numOK := s.countBy(func(cs checkState) bool {
+ numOK := s.countBy(emptyConf, func(cs checkState) bool {
return cs.Status == nagiosOk
})
- numStale := s.countStale()
+ numStale := s.countStale(emptyConf)
+ numSuppressed := s.countSuppressed(conf)
// Write HTML header with summary
sb.WriteString(htmlHeader(subject, numCriticals, numWarnings, numUnknown, numStale, numOK))
@@ -109,6 +116,16 @@ func (s state) htmlReport(subject string) string {
}
sb.WriteString(`</div>` + "\n\n")
+ // Suppressed alerts section
+ sb.WriteString(`<div class="section">` + "\n")
+ sb.WriteString(`<h2>Suppressed alerts</h2>` + "\n")
+ if numSuppressed == 0 {
+ sb.WriteString(`<p>There are no suppressed alerts...</p>` + "\n")
+ } else {
+ s.htmlReportSuppressed(&sb, conf)
+ }
+ sb.WriteString(`</div>` + "\n\n")
+
// OK checks section
sb.WriteString(`<div class="section">` + "\n")
sb.WriteString(`<h2>OK checks</h2>` + "\n")
@@ -181,6 +198,42 @@ func (s state) htmlReportStaleAlerts(sb *strings.Builder) int {
})
}
+// htmlReportSuppressed generates HTML for suppressed checks.
+// Shows which checks are currently muted via OnlyIfNotExists for visibility.
+func (s state) htmlReportSuppressed(sb *strings.Builder, conf config) (count int) {
+ for name, cs := range s.checks {
+ if !isCheckSuppressed(name, conf) {
+ continue
+ }
+ count++
+
+ sb.WriteString(`<div class="check-item">` + "\n")
+ sb.WriteString(htmlStatusBadge(nagiosCode(cs.Status)))
+ sb.WriteString(": ")
+ sb.WriteString(html.EscapeString(name))
+ sb.WriteString(": ")
+ sb.WriteString(html.EscapeString(cs.Output))
+ if cs.federated() {
+ sb.WriteString(" [federated from ")
+ sb.WriteString(html.EscapeString(cs.FederatedFrom))
+ sb.WriteString("]")
+ }
+ sb.WriteString(` <span class="UNKNOWN">[SUPPRESSED]</span>`)
+ sb.WriteString("\n</div>\n")
+ }
+ return
+}
+
+// countSuppressed counts the number of suppressed checks.
+func (s state) countSuppressed(conf config) (count int) {
+ for name := range s.checks {
+ if isCheckSuppressed(name, conf) {
+ count++
+ }
+ }
+ return
+}
+
// htmlReportBy is the generic HTML generator for check items.
// Mirrors state.reportBy() from state.go:222-262 but outputs HTML.
func (s state) htmlReportBy(sb *strings.Builder, showStatusChange, isStaleReport bool,
@@ -231,8 +284,8 @@ func (s state) htmlReportBy(sb *strings.Builder, showStatusChange, isStaleReport
// countStale counts the number of stale checks (excluding OK status).
// Helper function for generating summary counts.
-func (s state) countStale() int {
- return s.countBy(func(cs checkState) bool {
+func (s state) countStale(conf config) int {
+ return s.countBy(conf, func(cs checkState) bool {
return cs.Epoch < s.staleEpoch && cs.Status != nagiosOk
})
}
diff --git a/internal/html_test.go b/internal/html_test.go
index d482eda..b77c937 100644
--- a/internal/html_test.go
+++ b/internal/html_test.go
@@ -219,7 +219,7 @@ func TestHtmlReport(t *testing.T) {
}
subject := "GOGIOS Report [C:1 W:1 U:0 S:1 OK:2]"
- result := s.htmlReport(subject)
+ result := s.htmlReport(subject, config{})
// Check that all major sections are present
expectedSections := []string{
@@ -228,6 +228,7 @@ func TestHtmlReport(t *testing.T) {
"Alerts with status changed",
"Unhandled alerts",
"Stale alerts",
+ "Suppressed alerts",
"Generated by Gogios",
"</html>",
}
@@ -399,7 +400,7 @@ func TestW3CCompliance(t *testing.T) {
}
subject := "GOGIOS Report [C:1 W:0 U:0 S:0 OK:0]"
- html := s.htmlReport(subject)
+ html := s.htmlReport(subject, config{})
// W3C HTML5 Required Elements
requiredElements := map[string]string{
diff --git a/internal/run.go b/internal/run.go
index 348bdd9..9d1b21c 100644
--- a/internal/run.go
+++ b/internal/run.go
@@ -30,7 +30,7 @@ func Run(ctx context.Context, configFile string, renotify, force bool) {
notifyError(conf, err)
}
- subject, body, doNotify := state.report(renotify, force, conf.StatusPageURL)
+ subject, body, doNotify := state.report(renotify, force, conf.StatusPageURL, conf)
if doNotify {
if err := notify(conf, subject, body); err != nil {
log.Println("error:", err)
diff --git a/internal/state.go b/internal/state.go
index cb2c665..4df7757 100644
--- a/internal/state.go
+++ b/internal/state.go
@@ -132,30 +132,37 @@ func (s state) persist() error {
// report generates the notification email content.
// statusPageURL is included as a link to the HTML status page.
-func (s state) report(renotify, force bool, statusPageURL string) (string, string, bool) {
+// conf is used to determine which checks should be suppressed from the report.
+func (s state) report(renotify, force bool, statusPageURL string, conf config) (string, string, bool) {
var sb strings.Builder
sb.WriteString("This is the recent Gogios report!\n\n")
sb.WriteString("# Alerts with status changed:\n\n")
- changed := s.reportChanged(&sb)
+ changed := s.reportChanged(&sb, conf)
if !changed {
sb.WriteString("There were no status changes...\n\n")
}
sb.WriteString("# Unhandled alerts:\n\n")
- numCriticals, numWarnings, numUnknown, numOK := s.reportUnhandled(&sb)
+ numCriticals, numWarnings, numUnknown, numOK := s.reportUnhandled(&sb, conf)
hasUnhandled := (numCriticals + numWarnings + numUnknown) > 0
if !hasUnhandled {
sb.WriteString("There are no unhandled alerts...\n\n")
}
sb.WriteString("# Stale alerts:\n\n")
- numStale := s.reportStaleAlerts(&sb)
+ numStale := s.reportStaleAlerts(&sb, conf)
if numStale == 0 {
sb.WriteString("There are no stale alerts...\n\n")
}
+ sb.WriteString("# Suppressed alerts:\n\n")
+ numSuppressed := s.reportSuppressed(&sb, conf)
+ if numSuppressed == 0 {
+ sb.WriteString("There are no suppressed alerts...\n\n")
+ }
+
sb.WriteString("# Status page:\n\n")
sb.WriteString(statusPageURL)
sb.WriteString("\n\n")
@@ -169,26 +176,26 @@ func (s state) report(renotify, force bool, statusPageURL string) (string, strin
return subject, sb.String(), doNotify
}
-func (s state) reportChanged(sb *strings.Builder) (changed bool) {
- if 0 < s.reportBy(sb, true, false, func(cs checkState) bool {
+func (s state) reportChanged(sb *strings.Builder, conf config) (changed bool) {
+ if 0 < s.reportBy(sb, true, false, conf, func(cs checkState) bool {
return cs.Status == nagiosCritical && cs.changed()
}) {
changed = true
}
- if 0 < s.reportBy(sb, true, false, func(cs checkState) bool {
+ if 0 < s.reportBy(sb, true, false, conf, func(cs checkState) bool {
return cs.Status == nagiosWarning && cs.changed()
}) {
changed = true
}
- if 0 < s.reportBy(sb, true, false, func(cs checkState) bool {
+ if 0 < s.reportBy(sb, true, false, conf, func(cs checkState) bool {
return cs.Status == nagiosUnknown && cs.changed()
}) {
changed = true
}
- if 0 < s.reportBy(sb, true, false, func(cs checkState) bool {
+ if 0 < s.reportBy(sb, true, false, conf, func(cs checkState) bool {
return cs.Status == nagiosOk && cs.changed()
}) {
changed = true
@@ -197,37 +204,68 @@ func (s state) reportChanged(sb *strings.Builder) (changed bool) {
return
}
-func (s state) reportUnhandled(sb *strings.Builder) (numCriticals, numWarnings,
+func (s state) reportUnhandled(sb *strings.Builder, conf config) (numCriticals, numWarnings,
numUnknown, numOK int,
) {
- numCriticals = s.reportBy(sb, false, false, func(cs checkState) bool {
+ numCriticals = s.reportBy(sb, false, false, conf, func(cs checkState) bool {
return cs.Status == nagiosCritical
})
- numWarnings = s.reportBy(sb, false, false, func(cs checkState) bool {
+ numWarnings = s.reportBy(sb, false, false, conf, func(cs checkState) bool {
return cs.Status == nagiosWarning
})
- numUnknown = s.reportBy(sb, false, false, func(cs checkState) bool {
+ numUnknown = s.reportBy(sb, false, false, conf, func(cs checkState) bool {
return cs.Status == nagiosUnknown
})
- numOK = s.countBy(func(cs checkState) bool {
+ numOK = s.countBy(conf, func(cs checkState) bool {
return cs.Status == nagiosOk
})
return
}
-func (s state) reportStaleAlerts(sb *strings.Builder) int {
+func (s state) reportStaleAlerts(sb *strings.Builder, conf config) int {
// Only report stale alerts that are not OK, since stale OK alerts aren't concerning
- return s.reportBy(sb, false, true, func(cs checkState) bool {
+ return s.reportBy(sb, false, true, conf, func(cs checkState) bool {
return cs.Epoch < s.staleEpoch && cs.Status != nagiosOk
})
}
+// reportSuppressed lists all checks that are currently suppressed via OnlyIfNotExists.
+// This provides visibility into which alerts are being muted during maintenance windows.
+func (s state) reportSuppressed(sb *strings.Builder, conf config) (count int) {
+ for name, cs := range s.checks {
+ if !isCheckSuppressed(name, conf) {
+ continue
+ }
+ count++
+
+ sb.WriteString(nagiosCode(cs.Status).Str())
+ sb.WriteString(": ")
+ sb.WriteString(name)
+ sb.WriteString(": ")
+ sb.WriteString(cs.Output)
+ if cs.federated() {
+ sb.WriteString(" [federated from ")
+ sb.WriteString(cs.FederatedFrom)
+ sb.WriteString("]")
+ }
+ sb.WriteString(" [SUPPRESSED]")
+ sb.WriteString("\n")
+ }
+
+ if count > 0 {
+ sb.WriteString("\n")
+ }
+ return
+}
+
+// reportBy iterates over checks matching the filter and writes them to sb.
+// Checks that are suppressed via OnlyIfNotExists are excluded from the report.
func (s state) reportBy(sb *strings.Builder, showStatusChange, isStaleReport bool,
- filter func(cs checkState) bool,
+ conf config, filter func(cs checkState) bool,
) (count int) {
for name, cs := range s.checks {
if !filter(cs) {
@@ -236,6 +274,9 @@ func (s state) reportBy(sb *strings.Builder, showStatusChange, isStaleReport boo
if !isStaleReport && cs.Epoch < s.staleEpoch {
continue // skip stale checks in non-stale report
}
+ if isCheckSuppressed(name, conf) {
+ continue // skip suppressed checks
+ }
count++
if showStatusChange && cs.changed() {
@@ -268,8 +309,12 @@ func (s state) reportBy(sb *strings.Builder, showStatusChange, isStaleReport boo
return
}
-func (s state) countBy(filter func(cs checkState) bool) (count int) {
- for _, cs := range s.checks {
+// countBy counts checks matching the filter, excluding suppressed checks.
+func (s state) countBy(conf config, filter func(cs checkState) bool) (count int) {
+ for name, cs := range s.checks {
+ if isCheckSuppressed(name, conf) {
+ continue // skip suppressed checks
+ }
if filter(cs) {
count++
}
diff --git a/internal/suppress.go b/internal/suppress.go
new file mode 100644
index 0000000..a270656
--- /dev/null
+++ b/internal/suppress.go
@@ -0,0 +1,59 @@
+package internal
+
+import (
+ "log"
+ "os"
+ "strings"
+ "time"
+)
+
+// isSuppressed checks if alerts should be suppressed based on a file's existence and age.
+// Returns true if the file exists AND its modification time is within maxAgeS seconds of now.
+// Returns false if filePath is empty, file doesn't exist, or file is too old.
+func isSuppressed(filePath string, maxAgeS int) bool {
+ if filePath == "" {
+ return false
+ }
+ info, err := os.Stat(filePath)
+ if err != nil {
+ return false // file doesn't exist or other error
+ }
+ age := time.Since(info.ModTime())
+ return age <= time.Duration(maxAgeS)*time.Second
+}
+
+// isCheckSuppressed determines if a check should be suppressed from email reports.
+// For Prometheus checks (name starts with "Prometheus"): uses PrometheusOnlyIfNotExists config.
+// For regular checks: uses per-check OnlyIfNotExists config if set.
+func isCheckSuppressed(name string, conf config) bool {
+ // Check if this is a Prometheus alert (name starts with "Prometheus")
+ if strings.HasPrefix(name, "Prometheus") {
+ if isSuppressed(conf.PrometheusOnlyIfNotExists, conf.PrometheusOnlyIfNotExistsMaxS) {
+ log.Printf("Suppressing %s: file %s exists and is recent", name, conf.PrometheusOnlyIfNotExists)
+ return true
+ }
+ return false
+ }
+
+ // For regular checks, look up the check config
+ chk, ok := conf.Checks[name]
+ if !ok {
+ return false // check not found in config (e.g., federated)
+ }
+
+ if chk.OnlyIfNotExists == "" {
+ return false
+ }
+
+ // Use per-check max age if set, otherwise use global Prometheus default
+ maxAgeS := chk.OnlyIfNotExistsMaxS
+ if maxAgeS == 0 {
+ maxAgeS = conf.PrometheusOnlyIfNotExistsMaxS
+ }
+
+ if isSuppressed(chk.OnlyIfNotExists, maxAgeS) {
+ log.Printf("Suppressing %s: file %s exists and is recent", name, chk.OnlyIfNotExists)
+ return true
+ }
+ return false
+}
diff --git a/internal/suppress_test.go b/internal/suppress_test.go
new file mode 100644
index 0000000..2a399d7
--- /dev/null
+++ b/internal/suppress_test.go
@@ -0,0 +1,163 @@
+package internal
+
+import (
+ "os"
+ "testing"
+ "time"
+)
+
+func TestIsSuppressed_EmptyPath(t *testing.T) {
+ // Empty file path should not suppress
+ if isSuppressed("", 86400) {
+ t.Error("Expected empty path to not suppress")
+ }
+}
+
+func TestIsSuppressed_NonExistentFile(t *testing.T) {
+ // Non-existent file should not suppress
+ if isSuppressed("/nonexistent/path/to/file", 86400) {
+ t.Error("Expected non-existent file to not suppress")
+ }
+}
+
+func TestIsSuppressed_RecentFile(t *testing.T) {
+ // Create a temporary file
+ tmpFile, err := os.CreateTemp("", "suppress_test")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpFile.Name())
+ tmpFile.Close()
+
+ // Recent file should suppress
+ if !isSuppressed(tmpFile.Name(), 86400) {
+ t.Error("Expected recent file to suppress")
+ }
+}
+
+func TestIsSuppressed_OldFile(t *testing.T) {
+ // Create a temporary file
+ tmpFile, err := os.CreateTemp("", "suppress_test")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpFile.Name())
+ tmpFile.Close()
+
+ // Set the file's modification time to 2 hours ago
+ oldTime := time.Now().Add(-2 * time.Hour)
+ if err := os.Chtimes(tmpFile.Name(), oldTime, oldTime); err != nil {
+ t.Fatalf("Failed to change file time: %v", err)
+ }
+
+ // File older than maxAgeS (1 hour = 3600s) should not suppress
+ if isSuppressed(tmpFile.Name(), 3600) {
+ t.Error("Expected old file to not suppress")
+ }
+
+ // File within maxAgeS (3 hours = 10800s) should suppress
+ if !isSuppressed(tmpFile.Name(), 10800) {
+ t.Error("Expected file within max age to suppress")
+ }
+}
+
+func TestIsCheckSuppressed_PrometheusCheck(t *testing.T) {
+ // Create a temporary file for Prometheus suppression
+ tmpFile, err := os.CreateTemp("", "prometheus_suppress_test")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpFile.Name())
+ tmpFile.Close()
+
+ conf := config{
+ PrometheusOnlyIfNotExists: tmpFile.Name(),
+ PrometheusOnlyIfNotExistsMaxS: 86400,
+ Checks: make(map[string]check),
+ }
+
+ // Prometheus check should be suppressed when file exists
+ if !isCheckSuppressed("Prometheus: TestAlert", conf) {
+ t.Error("Expected Prometheus check to be suppressed")
+ }
+
+ // Non-Prometheus check should not be affected by Prometheus suppression
+ conf.Checks["Regular Check"] = check{}
+ if isCheckSuppressed("Regular Check", conf) {
+ t.Error("Expected regular check to not be suppressed by Prometheus config")
+ }
+}
+
+func TestIsCheckSuppressed_RegularCheck(t *testing.T) {
+ // Create a temporary file for check suppression
+ tmpFile, err := os.CreateTemp("", "check_suppress_test")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpFile.Name())
+ tmpFile.Close()
+
+ conf := config{
+ PrometheusOnlyIfNotExistsMaxS: 86400,
+ Checks: map[string]check{
+ "Suppressed Check": {
+ OnlyIfNotExists: tmpFile.Name(),
+ OnlyIfNotExistsMaxS: 86400,
+ },
+ "Normal Check": {},
+ },
+ }
+
+ // Check with suppression file should be suppressed
+ if !isCheckSuppressed("Suppressed Check", conf) {
+ t.Error("Expected check with suppression file to be suppressed")
+ }
+
+ // Check without suppression file should not be suppressed
+ if isCheckSuppressed("Normal Check", conf) {
+ t.Error("Expected check without suppression file to not be suppressed")
+ }
+
+ // Unknown check (not in config) should not be suppressed
+ if isCheckSuppressed("Unknown Check", conf) {
+ t.Error("Expected unknown check to not be suppressed")
+ }
+}
+
+func TestIsCheckSuppressed_UsesGlobalDefaultMaxAge(t *testing.T) {
+ // Create a temporary file
+ tmpFile, err := os.CreateTemp("", "suppress_test")
+ if err != nil {
+ t.Fatalf("Failed to create temp file: %v", err)
+ }
+ defer os.Remove(tmpFile.Name())
+ tmpFile.Close()
+
+ // Set the file's modification time to 2 hours ago
+ oldTime := time.Now().Add(-2 * time.Hour)
+ if err := os.Chtimes(tmpFile.Name(), oldTime, oldTime); err != nil {
+ t.Fatalf("Failed to change file time: %v", err)
+ }
+
+ // Config with short global max age (1 hour)
+ conf := config{
+ PrometheusOnlyIfNotExistsMaxS: 3600,
+ Checks: map[string]check{
+ "Test Check": {
+ OnlyIfNotExists: tmpFile.Name(),
+ OnlyIfNotExistsMaxS: 0, // Use global default
+ },
+ },
+ }
+
+ // Should NOT be suppressed because file is older than global default (1 hour)
+ if isCheckSuppressed("Test Check", conf) {
+ t.Error("Expected check to not be suppressed when file is older than global max age")
+ }
+
+ // Config with longer global max age (3 hours)
+ conf.PrometheusOnlyIfNotExistsMaxS = 10800
+ if !isCheckSuppressed("Test Check", conf) {
+ t.Error("Expected check to be suppressed when file is within global max age")
+ }
+}