diff options
| author | Paul Buetow <paul@buetow.org> | 2026-02-21 20:16:19 +0200 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-02-21 20:18:41 +0200 |
| commit | e51b8571bc192e7122f25a3d05a6407dfa8a6998 (patch) | |
| tree | 81e7e1fbe1c9e0a91d033b1aded00b4273502313 /integrationtests | |
| parent | 2f0ac27ec92840cab408e5f5a71d225be070cc0f (diff) | |
Add ior crash/timeout harness tests (task 343)
- Add waitBoth unit tests: ior exit error, ior timeout, both timeout,
both succeed — using real processes (true/false/sleep)
- Add TestIorCrashReportsError: full harness test with fake ior binary
that exits with error, verifying error mentions 'ior' and workload PID
is returned
- Add TestIorStartFailureCleansUpWorkload: verifies workload process is
killed when ior binary doesn't exist, checking with signal 0
- Refactor waitBoth to accept grace duration parameter for testability
(production code passes iorShutdownGrace, tests use 500ms)
- Fix pipe drain in startWorkload: drain remaining stdout after reading
PID so cmd.Wait() doesn't block on pending I/O
- Add writeScript helper to helpers_test.go for creating fake binaries
Co-authored-by: Amp <amp@ampcode.com>
Amp-Thread-ID: https://ampcode.com/threads/T-019c8162-c1cf-7612-b8f5-84c61e3d2021
Diffstat (limited to 'integrationtests')
| -rw-r--r-- | integrationtests/harness.go | 9 | ||||
| -rw-r--r-- | integrationtests/harness_test.go | 157 | ||||
| -rw-r--r-- | integrationtests/helpers_test.go | 10 |
3 files changed, 173 insertions, 3 deletions
diff --git a/integrationtests/harness.go b/integrationtests/harness.go index 7edde44..fde52e6 100644 --- a/integrationtests/harness.go +++ b/integrationtests/harness.go @@ -3,6 +3,7 @@ package integrationtests import ( "bufio" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -41,7 +42,7 @@ func (h *TestHarness) Run(scenario string, duration int) (TestResult, int, error return TestResult{}, workloadPID, err } - workloadErr, iorErr := waitBoth(workloadCmd, iorCmd, duration) + workloadErr, iorErr := waitBoth(workloadCmd, iorCmd, duration, iorShutdownGrace) if iorErr != nil { return TestResult{}, workloadPID, fmt.Errorf("ior: %w", iorErr) @@ -92,6 +93,8 @@ func (h *TestHarness) startWorkload(scenario string) (*exec.Cmd, int, error) { } else { errCh <- fmt.Errorf("workload produced no output") } + // Drain remaining pipe data so cmd.Wait() does not block. + io.Copy(io.Discard, stdout) //nolint:errcheck }() select { @@ -132,14 +135,14 @@ func (h *TestHarness) startIor(pid int, scenario string, duration int) (*exec.Cm // waitBoth waits for both the workload and ior commands concurrently. // If ior does not finish within duration + grace period, it is killed. -func waitBoth(workloadCmd, iorCmd *exec.Cmd, duration int) (workloadErr, iorErr error) { +func waitBoth(workloadCmd, iorCmd *exec.Cmd, duration int, grace time.Duration) (workloadErr, iorErr error) { workloadDone := make(chan error, 1) iorDone := make(chan error, 1) go func() { workloadDone <- workloadCmd.Wait() }() go func() { iorDone <- iorCmd.Wait() }() - timeout := time.After(time.Duration(duration)*time.Second + iorShutdownGrace) + timeout := time.After(time.Duration(duration)*time.Second + grace) for workloadDone != nil || iorDone != nil { select { diff --git a/integrationtests/harness_test.go b/integrationtests/harness_test.go index 813e9d6..6e076ad 100644 --- a/integrationtests/harness_test.go +++ b/integrationtests/harness_test.go @@ -1,8 +1,13 @@ package integrationtests import ( + "os" + "os/exec" + "path/filepath" "strings" + "syscall" "testing" + "time" ) func TestWorkloadCrashReportsError(t *testing.T) { @@ -21,3 +26,155 @@ func TestWorkloadCrashReportsError(t *testing.T) { t.Errorf("expected no records from crashed workload, got %d", len(result.Records)) } } + +func TestWaitBothIorExitError(t *testing.T) { + workloadCmd := exec.Command("true") + iorCmd := exec.Command("false") + if err := workloadCmd.Start(); err != nil { + t.Fatalf("start workload: %v", err) + } + if err := iorCmd.Start(); err != nil { + t.Fatalf("start ior: %v", err) + } + + workloadErr, iorErr := waitBoth(workloadCmd, iorCmd, 5, iorShutdownGrace) + if iorErr == nil { + t.Fatal("expected ior error, got nil") + } + if workloadErr != nil { + t.Errorf("expected nil workload error, got: %v", workloadErr) + } +} + +func TestWaitBothIorTimeout(t *testing.T) { + workloadCmd := exec.Command("true") + iorCmd := exec.Command("sleep", "60") + if err := workloadCmd.Start(); err != nil { + t.Fatalf("start workload: %v", err) + } + if err := iorCmd.Start(); err != nil { + t.Fatalf("start ior: %v", err) + } + + // Use duration=0 and a short grace period so timeout fires quickly. + // Workload ("true") exits instantly; ior ("sleep 60") exceeds the timeout. + workloadErr, iorErr := waitBoth(workloadCmd, iorCmd, 0, 500*time.Millisecond) + if workloadErr != nil { + t.Errorf("expected nil workload error, got: %v", workloadErr) + } + if iorErr == nil { + t.Fatal("expected ior error from timeout, got nil") + } + if !strings.Contains(iorErr.Error(), "timed out") { + t.Errorf("expected timeout error, got: %v", iorErr) + } +} + +func TestWaitBothBothTimeout(t *testing.T) { + workloadCmd := exec.Command("sleep", "60") + iorCmd := exec.Command("sleep", "60") + if err := workloadCmd.Start(); err != nil { + t.Fatalf("start workload: %v", err) + } + if err := iorCmd.Start(); err != nil { + t.Fatalf("start ior: %v", err) + } + + workloadErr, iorErr := waitBoth(workloadCmd, iorCmd, 0, 500*time.Millisecond) + if workloadErr == nil { + t.Fatal("expected workload timeout error, got nil") + } + if !strings.Contains(workloadErr.Error(), "timed out") { + t.Errorf("expected workload timeout error, got: %v", workloadErr) + } + if iorErr == nil { + t.Fatal("expected ior timeout error, got nil") + } + if !strings.Contains(iorErr.Error(), "timed out") { + t.Errorf("expected ior timeout error, got: %v", iorErr) + } +} + +func TestWaitBothBothSucceed(t *testing.T) { + workloadCmd := exec.Command("true") + iorCmd := exec.Command("true") + if err := workloadCmd.Start(); err != nil { + t.Fatalf("start workload: %v", err) + } + if err := iorCmd.Start(); err != nil { + t.Fatalf("start ior: %v", err) + } + + workloadErr, iorErr := waitBoth(workloadCmd, iorCmd, 5, iorShutdownGrace) + if workloadErr != nil { + t.Errorf("expected nil workload error, got: %v", workloadErr) + } + if iorErr != nil { + t.Errorf("expected nil ior error, got: %v", iorErr) + } +} + +func TestIorCrashReportsError(t *testing.T) { + tmpDir := t.TempDir() + outputDir := t.TempDir() + + // Create a fake workload that prints its PID and exits cleanly. + workloadBin := writeScript(t, tmpDir, "workload", `echo $$`) + + // Create a fake ior that exits with error immediately. + iorBin := writeScript(t, tmpDir, "ior", `exit 1`) + + h := TestHarness{ + IorBinary: iorBin, + WorkloadBinary: workloadBin, + BpfObject: filepath.Join(tmpDir, "fake.bpf.o"), + OutputDir: outputDir, + } + + result, pid, err := h.Run("test", 5) + if err == nil { + t.Fatal("expected error when ior crashes, got nil") + } + if !strings.Contains(err.Error(), "ior") { + t.Errorf("error should mention ior, got: %v", err) + } + if pid == 0 { + t.Fatal("expected non-zero workload PID") + } + if len(result.Records) != 0 { + t.Errorf("expected no records from crashed ior, got %d", len(result.Records)) + } +} + +func TestIorStartFailureCleansUpWorkload(t *testing.T) { + tmpDir := t.TempDir() + outputDir := t.TempDir() + + // Create a fake workload that prints PID and sleeps. + // Use exec to replace the shell so killing the process kills the sleep too. + workloadBin := writeScript(t, tmpDir, "workload", `echo $$; exec sleep 30`) + + h := TestHarness{ + IorBinary: "/nonexistent/ior", + WorkloadBinary: workloadBin, + BpfObject: filepath.Join(tmpDir, "fake.bpf.o"), + OutputDir: outputDir, + } + + _, pid, err := h.Run("test", 5) + if err == nil { + t.Fatal("expected error when ior binary doesn't exist, got nil") + } + if pid == 0 { + t.Fatal("expected non-zero workload PID even when ior fails to start") + } + // Verify the workload process was cleaned up (killed). + // After Run returns, the workload should no longer be running. + // On Linux, FindProcess always succeeds, so we check with signal 0. + proc, procErr := os.FindProcess(pid) + if procErr == nil { + if signalErr := proc.Signal(syscall.Signal(0)); signalErr == nil { + t.Error("workload process is still running after ior start failure") + } + } +} diff --git a/integrationtests/helpers_test.go b/integrationtests/helpers_test.go index edf57b9..7db54b2 100644 --- a/integrationtests/helpers_test.go +++ b/integrationtests/helpers_test.go @@ -36,6 +36,16 @@ func absPath(t *testing.T, rel string) string { return p } +// writeScript creates an executable shell script in dir and returns its path. +func writeScript(t *testing.T, dir, name, content string) string { + t.Helper() + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte("#!/bin/sh\n"+content+"\n"), 0o755); err != nil { + t.Fatalf("write script %s: %v", name, err) + } + return path +} + func runScenario(t *testing.T, scenario string, expected []ExpectedEvent) { t.Helper() h := newTestHarness(t) |
