summaryrefslogtreecommitdiff
path: root/integrationtests
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-02-21 20:16:19 +0200
committerPaul Buetow <paul@buetow.org>2026-02-21 20:18:41 +0200
commite51b8571bc192e7122f25a3d05a6407dfa8a6998 (patch)
tree81e7e1fbe1c9e0a91d033b1aded00b4273502313 /integrationtests
parent2f0ac27ec92840cab408e5f5a71d225be070cc0f (diff)
Add ior crash/timeout harness tests (task 343)
- Add waitBoth unit tests: ior exit error, ior timeout, both timeout, both succeed — using real processes (true/false/sleep) - Add TestIorCrashReportsError: full harness test with fake ior binary that exits with error, verifying error mentions 'ior' and workload PID is returned - Add TestIorStartFailureCleansUpWorkload: verifies workload process is killed when ior binary doesn't exist, checking with signal 0 - Refactor waitBoth to accept grace duration parameter for testability (production code passes iorShutdownGrace, tests use 500ms) - Fix pipe drain in startWorkload: drain remaining stdout after reading PID so cmd.Wait() doesn't block on pending I/O - Add writeScript helper to helpers_test.go for creating fake binaries Co-authored-by: Amp <amp@ampcode.com> Amp-Thread-ID: https://ampcode.com/threads/T-019c8162-c1cf-7612-b8f5-84c61e3d2021
Diffstat (limited to 'integrationtests')
-rw-r--r--integrationtests/harness.go9
-rw-r--r--integrationtests/harness_test.go157
-rw-r--r--integrationtests/helpers_test.go10
3 files changed, 173 insertions, 3 deletions
diff --git a/integrationtests/harness.go b/integrationtests/harness.go
index 7edde44..fde52e6 100644
--- a/integrationtests/harness.go
+++ b/integrationtests/harness.go
@@ -3,6 +3,7 @@ package integrationtests
import (
"bufio"
"fmt"
+ "io"
"os"
"os/exec"
"path/filepath"
@@ -41,7 +42,7 @@ func (h *TestHarness) Run(scenario string, duration int) (TestResult, int, error
return TestResult{}, workloadPID, err
}
- workloadErr, iorErr := waitBoth(workloadCmd, iorCmd, duration)
+ workloadErr, iorErr := waitBoth(workloadCmd, iorCmd, duration, iorShutdownGrace)
if iorErr != nil {
return TestResult{}, workloadPID, fmt.Errorf("ior: %w", iorErr)
@@ -92,6 +93,8 @@ func (h *TestHarness) startWorkload(scenario string) (*exec.Cmd, int, error) {
} else {
errCh <- fmt.Errorf("workload produced no output")
}
+ // Drain remaining pipe data so cmd.Wait() does not block.
+ io.Copy(io.Discard, stdout) //nolint:errcheck
}()
select {
@@ -132,14 +135,14 @@ func (h *TestHarness) startIor(pid int, scenario string, duration int) (*exec.Cm
// waitBoth waits for both the workload and ior commands concurrently.
// If ior does not finish within duration + grace period, it is killed.
-func waitBoth(workloadCmd, iorCmd *exec.Cmd, duration int) (workloadErr, iorErr error) {
+func waitBoth(workloadCmd, iorCmd *exec.Cmd, duration int, grace time.Duration) (workloadErr, iorErr error) {
workloadDone := make(chan error, 1)
iorDone := make(chan error, 1)
go func() { workloadDone <- workloadCmd.Wait() }()
go func() { iorDone <- iorCmd.Wait() }()
- timeout := time.After(time.Duration(duration)*time.Second + iorShutdownGrace)
+ timeout := time.After(time.Duration(duration)*time.Second + grace)
for workloadDone != nil || iorDone != nil {
select {
diff --git a/integrationtests/harness_test.go b/integrationtests/harness_test.go
index 813e9d6..6e076ad 100644
--- a/integrationtests/harness_test.go
+++ b/integrationtests/harness_test.go
@@ -1,8 +1,13 @@
package integrationtests
import (
+ "os"
+ "os/exec"
+ "path/filepath"
"strings"
+ "syscall"
"testing"
+ "time"
)
func TestWorkloadCrashReportsError(t *testing.T) {
@@ -21,3 +26,155 @@ func TestWorkloadCrashReportsError(t *testing.T) {
t.Errorf("expected no records from crashed workload, got %d", len(result.Records))
}
}
+
+func TestWaitBothIorExitError(t *testing.T) {
+ workloadCmd := exec.Command("true")
+ iorCmd := exec.Command("false")
+ if err := workloadCmd.Start(); err != nil {
+ t.Fatalf("start workload: %v", err)
+ }
+ if err := iorCmd.Start(); err != nil {
+ t.Fatalf("start ior: %v", err)
+ }
+
+ workloadErr, iorErr := waitBoth(workloadCmd, iorCmd, 5, iorShutdownGrace)
+ if iorErr == nil {
+ t.Fatal("expected ior error, got nil")
+ }
+ if workloadErr != nil {
+ t.Errorf("expected nil workload error, got: %v", workloadErr)
+ }
+}
+
+func TestWaitBothIorTimeout(t *testing.T) {
+ workloadCmd := exec.Command("true")
+ iorCmd := exec.Command("sleep", "60")
+ if err := workloadCmd.Start(); err != nil {
+ t.Fatalf("start workload: %v", err)
+ }
+ if err := iorCmd.Start(); err != nil {
+ t.Fatalf("start ior: %v", err)
+ }
+
+ // Use duration=0 and a short grace period so timeout fires quickly.
+ // Workload ("true") exits instantly; ior ("sleep 60") exceeds the timeout.
+ workloadErr, iorErr := waitBoth(workloadCmd, iorCmd, 0, 500*time.Millisecond)
+ if workloadErr != nil {
+ t.Errorf("expected nil workload error, got: %v", workloadErr)
+ }
+ if iorErr == nil {
+ t.Fatal("expected ior error from timeout, got nil")
+ }
+ if !strings.Contains(iorErr.Error(), "timed out") {
+ t.Errorf("expected timeout error, got: %v", iorErr)
+ }
+}
+
+func TestWaitBothBothTimeout(t *testing.T) {
+ workloadCmd := exec.Command("sleep", "60")
+ iorCmd := exec.Command("sleep", "60")
+ if err := workloadCmd.Start(); err != nil {
+ t.Fatalf("start workload: %v", err)
+ }
+ if err := iorCmd.Start(); err != nil {
+ t.Fatalf("start ior: %v", err)
+ }
+
+ workloadErr, iorErr := waitBoth(workloadCmd, iorCmd, 0, 500*time.Millisecond)
+ if workloadErr == nil {
+ t.Fatal("expected workload timeout error, got nil")
+ }
+ if !strings.Contains(workloadErr.Error(), "timed out") {
+ t.Errorf("expected workload timeout error, got: %v", workloadErr)
+ }
+ if iorErr == nil {
+ t.Fatal("expected ior timeout error, got nil")
+ }
+ if !strings.Contains(iorErr.Error(), "timed out") {
+ t.Errorf("expected ior timeout error, got: %v", iorErr)
+ }
+}
+
+func TestWaitBothBothSucceed(t *testing.T) {
+ workloadCmd := exec.Command("true")
+ iorCmd := exec.Command("true")
+ if err := workloadCmd.Start(); err != nil {
+ t.Fatalf("start workload: %v", err)
+ }
+ if err := iorCmd.Start(); err != nil {
+ t.Fatalf("start ior: %v", err)
+ }
+
+ workloadErr, iorErr := waitBoth(workloadCmd, iorCmd, 5, iorShutdownGrace)
+ if workloadErr != nil {
+ t.Errorf("expected nil workload error, got: %v", workloadErr)
+ }
+ if iorErr != nil {
+ t.Errorf("expected nil ior error, got: %v", iorErr)
+ }
+}
+
+func TestIorCrashReportsError(t *testing.T) {
+ tmpDir := t.TempDir()
+ outputDir := t.TempDir()
+
+ // Create a fake workload that prints its PID and exits cleanly.
+ workloadBin := writeScript(t, tmpDir, "workload", `echo $$`)
+
+ // Create a fake ior that exits with error immediately.
+ iorBin := writeScript(t, tmpDir, "ior", `exit 1`)
+
+ h := TestHarness{
+ IorBinary: iorBin,
+ WorkloadBinary: workloadBin,
+ BpfObject: filepath.Join(tmpDir, "fake.bpf.o"),
+ OutputDir: outputDir,
+ }
+
+ result, pid, err := h.Run("test", 5)
+ if err == nil {
+ t.Fatal("expected error when ior crashes, got nil")
+ }
+ if !strings.Contains(err.Error(), "ior") {
+ t.Errorf("error should mention ior, got: %v", err)
+ }
+ if pid == 0 {
+ t.Fatal("expected non-zero workload PID")
+ }
+ if len(result.Records) != 0 {
+ t.Errorf("expected no records from crashed ior, got %d", len(result.Records))
+ }
+}
+
+func TestIorStartFailureCleansUpWorkload(t *testing.T) {
+ tmpDir := t.TempDir()
+ outputDir := t.TempDir()
+
+ // Create a fake workload that prints PID and sleeps.
+ // Use exec to replace the shell so killing the process kills the sleep too.
+ workloadBin := writeScript(t, tmpDir, "workload", `echo $$; exec sleep 30`)
+
+ h := TestHarness{
+ IorBinary: "/nonexistent/ior",
+ WorkloadBinary: workloadBin,
+ BpfObject: filepath.Join(tmpDir, "fake.bpf.o"),
+ OutputDir: outputDir,
+ }
+
+ _, pid, err := h.Run("test", 5)
+ if err == nil {
+ t.Fatal("expected error when ior binary doesn't exist, got nil")
+ }
+ if pid == 0 {
+ t.Fatal("expected non-zero workload PID even when ior fails to start")
+ }
+ // Verify the workload process was cleaned up (killed).
+ // After Run returns, the workload should no longer be running.
+ // On Linux, FindProcess always succeeds, so we check with signal 0.
+ proc, procErr := os.FindProcess(pid)
+ if procErr == nil {
+ if signalErr := proc.Signal(syscall.Signal(0)); signalErr == nil {
+ t.Error("workload process is still running after ior start failure")
+ }
+ }
+}
diff --git a/integrationtests/helpers_test.go b/integrationtests/helpers_test.go
index edf57b9..7db54b2 100644
--- a/integrationtests/helpers_test.go
+++ b/integrationtests/helpers_test.go
@@ -36,6 +36,16 @@ func absPath(t *testing.T, rel string) string {
return p
}
+// writeScript creates an executable shell script in dir and returns its path.
+func writeScript(t *testing.T, dir, name, content string) string {
+ t.Helper()
+ path := filepath.Join(dir, name)
+ if err := os.WriteFile(path, []byte("#!/bin/sh\n"+content+"\n"), 0o755); err != nil {
+ t.Fatalf("write script %s: %v", name, err)
+ }
+ return path
+}
+
func runScenario(t *testing.T, scenario string, expected []ExpectedEvent) {
t.Helper()
h := newTestHarness(t)