summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2025-10-27 23:36:49 +0200
committerPaul Buetow <paul@buetow.org>2025-10-27 23:36:49 +0200
commit81d1550df55318beff8e9f762952a33daaa7c0cf (patch)
tree897e3c044c8e3bf5c9d71d98345fde9a645e8c7a
parent6352e8c33c1c22af382093d406d477d1530950db (diff)
feat: Add randomSpread and RunInterval to checks
This commit introduces two new optional parameters to the check configuration: - `randomSpread`: This parameter allows specifying a random sleep time up to N seconds before a check is executed. This is useful to avoid all checks running at the same time. - `RunInterval`: This parameter defines the minimum interval in seconds between two executions of a check. This is useful if gogios is run more frequently than a specific check should be. The `README.md` has been updated to document these new features. fix: Fix deadlock when skipping checks This commit also fixes a deadlock that occurred when a check was skipped due to the `RunInterval` setting. The `inputWg.Done()` was not being called, causing the main goroutine to wait forever. build: Replace Taskfile with Magefile The `Taskfile.yml` has been replaced with a `Magefile.go` to manage the build process. This provides more flexibility and is more idiomatic for Go projects.
-rw-r--r--.gitignore3
-rw-r--r--Magefile.go105
-rw-r--r--README.md14
-rw-r--r--internal/check.go4
-rw-r--r--internal/runchecks.go34
-rw-r--r--internal/state.go20
-rw-r--r--internal/state_test.go24
7 files changed, 188 insertions, 16 deletions
diff --git a/.gitignore b/.gitignore
index 849ddff..0cd806a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-dist/
+gogios
+dist/ \ No newline at end of file
diff --git a/Magefile.go b/Magefile.go
new file mode 100644
index 0000000..dc7d90b
--- /dev/null
+++ b/Magefile.go
@@ -0,0 +1,105 @@
+//go:build mage
+// +build mage
+
+package main
+
+import (
+ "fmt"
+ "os"
+ "os/exec"
+
+ "github.com/magefile/mage/mg"
+)
+
+// Build builds the gogios binary.
+func Build() error {
+ fmt.Println("Building...")
+ cmd := exec.Command("go", "build", "-o", "gogios", "cmd/gogios/main.go")
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ return cmd.Run()
+}
+
+// Dev builds the gogios binary with race detection.
+func Dev() error {
+ mg.Deps(Vet, Lint)
+ fmt.Println("Building with race detector...")
+ cmd := exec.Command("go", "build", "-race", "-o", "gogios", "cmd/gogios/main.go")
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ return cmd.Run()
+}
+
+// Vet runs go vet on all go files.
+func Vet() error {
+ fmt.Println("Vetting...")
+ cmd := exec.Command("go", "vet", "./...")
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ return cmd.Run()
+}
+
+// Lint runs golangci-lint.
+func Lint() error {
+ fmt.Println("Linting...")
+ cmd := exec.Command("golangci-lint", "run")
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ return cmd.Run()
+}
+
+// LintInstall installs golangci-lint.
+func LintInstall() error {
+ fmt.Println("Installing golangci-lint...")
+ cmd := exec.Command("go", "install", "github.com/golangci/golangci-lint/cmd/golangci-lint@latest")
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ return cmd.Run()
+}
+
+// Test runs all unit tests.
+func Test() error {
+ fmt.Println("Cleaning test cache...")
+ cleanCmd := exec.Command("go", "clean", "-testcache")
+ cleanCmd.Stdout = os.Stdout
+ cleanCmd.Stderr = os.Stderr
+ if err := cleanCmd.Run(); err != nil {
+ return err
+ }
+
+ fmt.Println("Running tests...")
+ testCmd := exec.Command("go", "test", "./...")
+ testCmd.Stdout = os.Stdout
+ testCmd.Stderr = os.Stderr
+ return testCmd.Run()
+}
+
+// Openbsd builds and deploys the gogios binary for OpenBSD.
+func Openbsd() error {
+ mg.Deps(BuildOpenbsd, DeployOpenbsd)
+ return nil
+}
+
+// BuildOpenbsd builds the gogios binary for OpenBSD.
+func BuildOpenbsd() error {
+ fmt.Println("Building for OpenBSD...")
+ if err := os.Setenv("GOOS", "openbsd"); err != nil {
+ return err
+ }
+ if err := os.Setenv("GOARCH", "amd64"); err != nil {
+ return err
+ }
+ cmd := exec.Command("go", "build", "-o", "gogios", "cmd/gogios/main.go")
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ return cmd.Run()
+}
+
+// DeployOpenbsd copies the gogios binary for OpenBSD.
+func DeployOpenbsd() error {
+ fmt.Println("Copying binary...")
+ cpCmd := exec.Command("cp", "gogios", "/home/paul/git/conf/frontends/usr/local/bin/gogios")
+ cpCmd.Stdout = os.Stdout
+ cpCmd.Stderr = os.Stderr
+ return cpCmd.Run()
+}
diff --git a/README.md b/README.md
index ae596e3..3c2e334 100644
--- a/README.md
+++ b/README.md
@@ -117,13 +117,15 @@ To configure Gogios, create a JSON configuration file (e.g., `/etc/gogios.json`)
"Plugin": "/usr/local/libexec/nagios/check_ping",
"Args": [ "-H", "www.foo.zone", "-4", "-w", "50,10%", "-c", "100,15%" ],
"Retries": 3,
- "RetryInterval": 10
+ "RetryInterval": 10,
+ "RandomSpread": 60
},
"Check ICMP6 www.foo.zone": {
"Plugin": "/usr/local/libexec/nagios/check_ping",
- "Args": [ "-H", "www.foo.zone", "-6", "-w", "50,10%", "-c", "100,15%" ]
+ "Args": [ "-H", "www.foo.zone", "-6", "-w", "50,10%", "-c", "100,15%" ],
"Retries": 3,
- "RetryInterval": 10
+ "RetryInterval": 10,
+ "RunInterval": 300
},
"www.foo.zone HTTP IPv4": {
"Plugin": "/usr/local/libexec/nagios/check_http",
@@ -157,6 +159,10 @@ If you want to execute checks only when another check succeeded (status OK), use
`Retries` and `RetryInterval` are optional check configuration parameters. In case of failure, Gogios will retry `Retries` times each `RetryInterval` seconds.
+`RandomSpread` is an optional check configuration parameter. It will cause a random sleep of up to N seconds (specified by config by each check) before the check is being executed. This is useful to avoid all checks running at the same time.
+
+`RunInterval` is an optional check configuration parameter. It defines the minimum interval in seconds between two executions of a check. This is useful if you run gogios more frequently than you want to run a specific check.
+
For remote checks, use the `check_nrpe` plugin. You also need to have the NRPE server set up correctly on the target host (out of scope for this document).
The `state.json` file mentioned above keeps track of the monitoring state and check results between Gogios runs, enabling Gogios only to send email notifications when there are changes in the check status.
@@ -202,4 +208,4 @@ My primary goal was to have a single email address for notifications and a simpl
This led me to create Gogios, a lightweight monitoring tool tailored to my specific needs. I chose the Go programming language for this project as it allowed me to refresh my Go programming skills and provided a robust platform for developing a fast and efficient monitoring tool.
-Gogios eliminates unnecessary features and focuses on simplicity, providing a no-frills monitoring solution for small-scale self-hosted servers and virtual machines. The result is a tool that is easy to configure, set up, and maintain, ensuring that monitoring your resources is as hassle-free as possible.
+Gogios eliminates unnecessary features and focuses on simplicity, providing a no-frills monitoring solution for small-scale self-hosted servers and virtual machines. The result is a tool that is easy to configure, set up, and maintain, ensuring that monitoring your resources is as hassle-free as possible. \ No newline at end of file
diff --git a/internal/check.go b/internal/check.go
index 70f0044..3f2e4cc 100644
--- a/internal/check.go
+++ b/internal/check.go
@@ -14,6 +14,8 @@ type check struct {
DependsOn []string `json:"DependsOn,omitempty"`
Retries int `json:"Retries,omitempty"`
RetryInterval int `json:"RetryInterval,omitempty"`
+ RunInterval int `json:"RunInterval,omitempty"`
+ RandomSpread int `json:"RandomSpread,omitempty"`
}
type namedCheck struct {
@@ -65,4 +67,4 @@ func (c namedCheck) run(ctx context.Context) checkResult {
func (c namedCheck) skip(output string) checkResult {
return c.check.skip(c.name, output)
-}
+} \ No newline at end of file
diff --git a/internal/runchecks.go b/internal/runchecks.go
index 788e77d..fb7a9c4 100644
--- a/internal/runchecks.go
+++ b/internal/runchecks.go
@@ -3,6 +3,7 @@ package internal
import (
"context"
"log"
+ "math/rand"
"sync"
"time"
)
@@ -36,6 +37,25 @@ func runChecks(ctx context.Context, state state, conf config) state {
inputWg.Add(len(conf.Checks))
for check := range inputCh {
+ if age := state.age(check.name); check.RunInterval > int(age.Seconds()) {
+ lastCheckState, ok := state.checks[check.name]
+ if ok {
+ log.Printf("Skipping %s: interval not yet reached (%v (%v) <= %v)", check.name,
+ int(age.Seconds()), age, check.RunInterval)
+ outputCh <- checkResult{
+ name: check.name,
+ output: lastCheckState.output,
+ epoch: lastCheckState.Epoch,
+ status: lastCheckState.Status,
+ federated: lastCheckState.federated,
+ }
+ inputWg.Done()
+ continue
+ }
+ log.Println("Something went wrong... expected check state for", check,
+ "bug got nothing! Proceeding anyway")
+ }
+
go func(check namedCheck) {
outputCh <- runCheck(ctx, limitCh, deps, check, conf, check.Retries)
inputWg.Done()
@@ -52,14 +72,20 @@ func runChecks(ctx context.Context, state state, conf config) state {
return state
}
-func runCheck(ctx context.Context, limitCh chan struct{},
- deps dependency, check namedCheck, conf config, retries int) checkResult {
-
+func runCheck(ctx context.Context, limitCh chan struct{}, deps dependency,
+ check namedCheck, conf config, retries int,
+) checkResult {
if err := deps.wait(ctx, check.DependsOn); err != nil {
deps.notOk(check.name)
return check.skip(err.Error())
}
+ if check.RandomSpread > 0 {
+ d := time.Duration(rand.Intn(check.RandomSpread)) * time.Second
+ log.Printf("Sleeping %v before running %s", d, check.name)
+ time.Sleep(d)
+ }
+
limitCh <- struct{}{}
checkCtx, cancel := context.WithTimeout(ctx,
@@ -84,4 +110,4 @@ func runCheck(ctx context.Context, limitCh chan struct{},
<-limitCh
return checkResult
-}
+} \ No newline at end of file
diff --git a/internal/state.go b/internal/state.go
index 8de7f15..dceb108 100644
--- a/internal/state.go
+++ b/internal/state.go
@@ -83,6 +83,14 @@ func (s state) update(result checkResult) {
log.Println(result.name, cs)
}
+func (s state) age(name string) time.Duration {
+ if prevState, ok := s.checks[name]; ok {
+ return time.Since(time.Unix(prevState.Epoch, 0))
+ }
+
+ return time.Duration(0)
+}
+
// To be used to merge the state of another server running Gogios
func (s state) merge(other state) error {
for name, cs := range other.checks {
@@ -105,7 +113,7 @@ func (s state) mergeFromBytes(bytes []byte) error {
func (s state) persist() error {
stateDir := filepath.Dir(s.stateFile)
if _, err := os.Stat(stateDir); os.IsNotExist(err) {
- if err := os.MkdirAll(stateDir, 0755); err != nil {
+ if err := os.MkdirAll(stateDir, 0o755); err != nil {
return err
}
}
@@ -180,8 +188,8 @@ func (s state) reportChanged(sb *strings.Builder) (changed bool) {
}
func (s state) reportUnhandled(sb *strings.Builder) (numCriticals, numWarnings,
- numUnknown, numOK int) {
-
+ numUnknown, numOK int,
+) {
numCriticals = s.reportBy(sb, false, false, func(cs checkState) bool {
return cs.Status == nagiosCritical
})
@@ -208,8 +216,8 @@ func (s state) reportStaleAlerts(sb *strings.Builder) int {
}
func (s state) reportBy(sb *strings.Builder, showStatusChange, isStaleReport bool,
- filter func(cs checkState) bool) (count int) {
-
+ filter func(cs checkState) bool,
+) (count int) {
for name, cs := range s.checks {
if !filter(cs) {
continue
@@ -254,4 +262,4 @@ func (s state) countBy(filter func(cs checkState) bool) (count int) {
}
}
return
-}
+} \ No newline at end of file
diff --git a/internal/state_test.go b/internal/state_test.go
new file mode 100644
index 0000000..aacc023
--- /dev/null
+++ b/internal/state_test.go
@@ -0,0 +1,24 @@
+package internal
+
+import (
+ "testing"
+ "time"
+)
+
+func TestAge(t *testing.T) {
+ state := state{checks: make(map[string]checkState)}
+
+ state.checks["Check Foo"] = checkState{Epoch: 0}
+ minAge := time.Duration(time.Now().Unix())
+
+ if reportedAge := state.age("Check Foo"); reportedAge < minAge {
+ t.Errorf("expected age >= %v, got %v", minAge, reportedAge)
+ }
+
+ maxAge := time.Duration(time.Now().Unix())
+ state.checks["Check Bar"] = checkState{Epoch: time.Now().Unix()}
+
+ if reportedAge := state.age("Check Bar"); reportedAge >= minAge {
+ t.Errorf("expected age < %v, got %v", maxAge, reportedAge)
+ }
+}