summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2023-04-25 00:16:39 +0300
committerPaul Buetow <paul@buetow.org>2023-04-25 00:16:39 +0300
commitb4b85046fef253d28f8e90ad76dca2e7552fe0ab (patch)
tree44e7f41cdcbabdbce2f134753be9828799f58066
parent3abf436db869c99586ff55af9bd562a33d114607 (diff)
add DependsOn
-rw-r--r--README.md20
-rw-r--r--internal/check.go13
-rw-r--r--internal/config.go11
-rw-r--r--internal/dependency.go51
-rw-r--r--internal/run.go4
-rw-r--r--internal/runchecks.go24
6 files changed, 113 insertions, 10 deletions
diff --git a/README.md b/README.md
index 0708a4b..76f84c0 100644
--- a/README.md
+++ b/README.md
@@ -102,13 +102,23 @@ To configure Gogios, create a JSON configuration file (e.g., `/etc/gogios.json`)
"CheckConcurrency": 2,
"StateDir": "/var/run/gogios",
"Checks": {
+ "Check ICMP4 www.foo.zone": {
+ "Plugin": "/usr/local/libexec/nagios/check_ping",
+ "Args": [ "-H", "www.foo.zone", "-4", "-w", "50,10%", "-c", "100,15%" ]
+ },
+ "Check ICMP6 www.foo.zone": {
+ "Plugin": "/usr/local/libexec/nagios/check_ping",
+ "Args": [ "-H", "www.foo.zone", "-6", "-w", "50,10%", "-c", "100,15%" ]
+ },
"www.foo.zone HTTP IPv4": {
"Plugin": "/usr/local/libexec/nagios/check_http",
- "Args": ["www.foo.zone", "-4"]
+ "Args": ["www.foo.zone", "-4"],
+ "DependsOn": ["Check ICMP4 www.foo.zone"]
},
"www.foo.zone HTTP IPv6": {
"Plugin": "/usr/local/libexec/nagios/check_http",
- "Args": ["www.foo.zone", "-6"]
+ "Args": ["www.foo.zone", "-6"],
+ "DependsOn": ["Check ICMP6 www.foo.zone"]
}
"Check NRPE Disk Usage foo.zone": {
"Plugin": "/usr/local/libexec/nagios/check_nrpe",
@@ -125,7 +135,11 @@ To configure Gogios, create a JSON configuration file (e.g., `/etc/gogios.json`)
* `StateDir`: Specifies the directory where Gogios stores its persistent state in a `state.json` file.
* `Checks`: Defines a list of checks to be performed, with each check having a unique name, plugin path, and arguments.
-Adjust the configuration file according to your needs, specifying the checks you want Gogios to perform. For remote checks, use the `check_nrpe` plugin. You also need to have the NRPE server set up correctly on the target host (out of scope for this document).
+Adjust the configuration file according to your needs, specifying the checks you want Gogios to perform.
+
+If you want to execute checks only when another check succeeded (status OK), then use `DependsOn`. In the example above the HTTP checks won't get executed when the hosts aren't pingable. They will show up as `UNKNOWN` in the report.
+
+For remote checks, use the `check_nrpe` plugin. You also need to have the NRPE server set up correctly on the target host (out of scope for this document).
The `state.json` file mentioned above keeps track of the monitoring state and check results between Gogios runs, enabling Gogios to only send email notifications when there are changes in the check status.
diff --git a/internal/check.go b/internal/check.go
index 8a554ee..a9efd29 100644
--- a/internal/check.go
+++ b/internal/check.go
@@ -8,8 +8,9 @@ import (
)
type check struct {
- Plugin string
- Args []string
+ Plugin string
+ Args []string
+ DependsOn []string `json:"DependsOn,omitempty"`
}
type namedCheck struct {
@@ -43,6 +44,14 @@ func (c check) run(ctx context.Context, name string) checkResult {
return checkResult{name, output, nagiosCode(cmd.ProcessState.ExitCode())}
}
+func (c check) skip(name, output string) checkResult {
+ return checkResult{name, output, unknown}
+}
+
func (c namedCheck) run(ctx context.Context) checkResult {
return c.check.run(ctx, c.name)
}
+
+func (c namedCheck) skip(output string) checkResult {
+ return c.check.skip(c.name, output)
+}
diff --git a/internal/config.go b/internal/config.go
index 85782cf..cb55ec8 100644
--- a/internal/config.go
+++ b/internal/config.go
@@ -53,3 +53,14 @@ func newConfig(configFile string) (config, error) {
return config, nil
}
+
+func (c config) sanityCheck() error {
+ for name, check := range c.Checks {
+ for _, depName := range check.DependsOn {
+ if _, ok := c.Checks[depName]; !ok {
+ return fmt.Errorf("check '%s' depends on non existant check '%s'", depName, name)
+ }
+ }
+ }
+ return nil
+}
diff --git a/internal/dependency.go b/internal/dependency.go
new file mode 100644
index 0000000..020b4f5
--- /dev/null
+++ b/internal/dependency.go
@@ -0,0 +1,51 @@
+package internal
+
+import (
+ "context"
+ "fmt"
+)
+
+type dependency struct {
+ okMap map[string]chan struct{}
+ nokMap map[string]chan struct{}
+}
+
+func newDependency(config config) dependency {
+ d := dependency{
+ okMap: make(map[string]chan struct{}, len(config.Checks)),
+ nokMap: make(map[string]chan struct{}, len(config.Checks)),
+ }
+
+ for name := range config.Checks {
+ d.okMap[name] = make(chan struct{})
+ d.nokMap[name] = make(chan struct{})
+ }
+
+ return d
+}
+
+func (d dependency) ok(name string) {
+ close(d.okMap[name])
+}
+
+func (d dependency) notOk(name string) {
+ close(d.nokMap[name])
+}
+
+// Wait for all dependant checks to be executed!
+func (d dependency) wait(ctx context.Context, dependencies []string) error {
+ for _, dep := range dependencies {
+ if _, ok := d.okMap[dep]; !ok {
+ // We sent an error mail already via config.sanityCheck for this case.
+ continue
+ }
+ select {
+ case <-d.okMap[dep]:
+ case <-d.nokMap[dep]:
+ return fmt.Errorf("dependency '%s' is not OK!", dep)
+ case <-ctx.Done():
+ return fmt.Errorf("waited for too long for dependency '%s': %s", dep, ctx.Err().Error())
+ }
+ }
+ return nil
+}
diff --git a/internal/run.go b/internal/run.go
index 2fd1f7c..9583a69 100644
--- a/internal/run.go
+++ b/internal/run.go
@@ -8,6 +8,10 @@ func Run(ctx context.Context, configFile string, renotify bool) {
panic(err)
}
+ if err := config.sanityCheck(); err != nil {
+ notifyError(config, err)
+ }
+
state, err := readState(config)
if err != nil {
notifyError(config, err)
diff --git a/internal/runchecks.go b/internal/runchecks.go
index 360a1b5..48a80e8 100644
--- a/internal/runchecks.go
+++ b/internal/runchecks.go
@@ -11,6 +11,7 @@ func runChecks(globalCtx context.Context, state state, config config) state {
limiterCh := make(chan struct{}, config.CheckConcurrency)
inputCh := make(chan namedCheck)
outputCh := make(chan checkResult)
+ deps := newDependency(config)
go func() {
for name, check := range config.Checks {
@@ -34,17 +35,30 @@ func runChecks(globalCtx context.Context, state state, config config) state {
for check := range inputCh {
go func(check namedCheck) {
+ defer inputWg.Done()
+
+ if err := deps.wait(globalCtx, check.DependsOn); err != nil {
+ deps.notOk(check.name)
+ outputCh <- check.skip(err.Error())
+ return
+ }
+
limiterCh <- struct{}{}
- defer func() {
- <-limiterCh
- inputWg.Done()
- }()
+ defer func() { <-limiterCh }()
ctx, cancel := context.WithTimeout(globalCtx,
time.Duration(config.CheckTimeoutS)*time.Second)
defer cancel()
- outputCh <- check.run(ctx)
+ checkResult := check.run(ctx)
+
+ if checkResult.status == critical {
+ deps.notOk(check.name)
+ } else {
+ deps.ok(check.name)
+ }
+
+ outputCh <- checkResult
}(check)
}