diff options
| author | Paul Buetow <paul@buetow.org> | 2026-03-18 20:53:50 +0200 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-03-18 20:53:50 +0200 |
| commit | b421b2232351049277ee4ad5b31367bb2b6779bb (patch) | |
| tree | 6e185cbf1736bb0308231563b29eeab10160d682 | |
| parent | c234be24815452fa1143c9a523f4615e0d745fb7 (diff) | |
feat: add ParquetValidate mage target and ClickHouse querying docs
Adds a `mage parquetValidate` target that validates a Parquet recording
via clickhouse-local in Docker (schema presence, row count, seq/time_ns
sanity). Adds docs/parquet-querying.md with schema reference, invocation
pattern, and seven example queries with representative outputs.
Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
| -rw-r--r-- | Magefile.go | 146 | ||||
| -rw-r--r-- | docs/parquet-querying.md | 223 |
2 files changed, 369 insertions, 0 deletions
diff --git a/Magefile.go b/Magefile.go index ecb4060..95e4c0c 100644 --- a/Magefile.go +++ b/Magefile.go @@ -26,6 +26,7 @@ import ( ) const ( + clickhouseImage = "clickhouse/clickhouse-server:latest" binaryName = "ior" workloadBinaryName = "ioworkload" defaultLibbpfgoPath = "../libbpfgo" @@ -999,3 +1000,148 @@ func isIntegrationTest(testName string) (bool, error) { } return false, nil } + +// ParquetValidate validates a Parquet file using clickhouse-local in Docker. +// Set PARQUET_FILE to override the default (latest *.parquet in repo root). +// Checks schema column presence, row count > 0, and basic sanity on seq/time_ns. +func ParquetValidate() error { + path, err := resolveParquetFile() + if err != nil { + return err + } + if err := checkDockerAvailable(); err != nil { + return err + } + abs, err := filepath.Abs(path) + if err != nil { + return fmt.Errorf("resolve absolute path for %s: %w", path, err) + } + dir, file := filepath.Dir(abs), filepath.Base(abs) + fmt.Printf("Validating parquet file: %s\n", abs) + return runParquetChecks(dir, file) +} + +// resolveParquetFile returns the parquet file path from PARQUET_FILE env or +// globs for the latest *.parquet in the repo root. +func resolveParquetFile() (string, error) { + if path := os.Getenv("PARQUET_FILE"); path != "" { + if _, err := os.Stat(path); err != nil { + return "", fmt.Errorf("PARQUET_FILE=%s: %w", path, err) + } + return path, nil + } + matches, err := filepath.Glob("*.parquet") + if err != nil { + return "", fmt.Errorf("glob *.parquet: %w", err) + } + if len(matches) == 0 { + return "", fmt.Errorf("no *.parquet files found in repo root; set PARQUET_FILE to specify one") + } + // Use the last match (lexicographically latest, which matches timestamp-named files). + return matches[len(matches)-1], nil +} + +// checkDockerAvailable verifies that Docker is reachable via `docker info`. +func checkDockerAvailable() error { + cmd := exec.Command("docker", "info") + cmd.Stdout = io.Discard + cmd.Stderr = io.Discard + if err := cmd.Run(); err != nil { + return fmt.Errorf("docker is not available (is the daemon running?): %w", err) + } + return nil +} + +// runClickHouseQuery runs a SQL query against a parquet file using clickhouse-local +// in Docker. The file must be in dir; it is mounted read-only at /data inside the +// container. Returns the trimmed stdout output. +func runClickHouseQuery(dir, file, sql string) (string, error) { + // Mount dir as /data read-only; pass the SQL via -q so no shell quoting is needed. + out, err := sh.Output("docker", "run", "--rm", + "-v", dir+":/data:ro", + clickhouseImage, + "clickhouse", "local", "-q", sql, + ) + if err != nil { + return "", fmt.Errorf("clickhouse query %q: %w", sql, err) + } + return strings.TrimSpace(out), nil +} + +// expectedParquetColumns lists the 14 column names that the parquet schema must contain. +var expectedParquetColumns = []string{ + "seq", "time_ns", "gap_ns", "latency_ns", "comm", + "pid", "tid", "syscall", "fd", "ret", + "bytes", "file", "is_error", "filter_epoch", +} + +// runParquetChecks runs schema, row-count, and sanity checks against the parquet file. +// dir is the absolute directory containing file (mounted at /data in the container). +func runParquetChecks(dir, file string) error { + dataFile := "/data/" + file + + // --- Schema check --- + fmt.Println("--- Schema check ---") + schemaOut, err := runClickHouseQuery(dir, file, + fmt.Sprintf("DESCRIBE TABLE file('%s', Parquet)", dataFile)) + if err != nil { + return err + } + fmt.Println(schemaOut) + // Verify all expected column names appear somewhere in the DESCRIBE output. + missing := []string{} + for _, col := range expectedParquetColumns { + if !strings.Contains(schemaOut, col) { + missing = append(missing, col) + } + } + if len(missing) > 0 { + return fmt.Errorf("schema check FAIL: missing columns: %s", strings.Join(missing, ", ")) + } + fmt.Printf("Schema check PASS: all %d expected columns present\n\n", len(expectedParquetColumns)) + + // --- Row count check --- + fmt.Println("--- Row count check ---") + countOut, err := runClickHouseQuery(dir, file, + fmt.Sprintf("SELECT count(*) FROM file('%s', Parquet)", dataFile)) + if err != nil { + return err + } + rowCount, err := strconv.ParseInt(strings.TrimSpace(countOut), 10, 64) + if err != nil { + return fmt.Errorf("parse row count %q: %w", countOut, err) + } + if rowCount <= 0 { + return fmt.Errorf("row count check FAIL: got %d rows, expected > 0", rowCount) + } + fmt.Printf("Row count check PASS: %d rows\n\n", rowCount) + + // --- Sanity check: seq and time_ns ranges --- + fmt.Println("--- Sanity check ---") + sanityOut, err := runClickHouseQuery(dir, file, + fmt.Sprintf("SELECT min(seq), max(seq), min(time_ns), countIf(is_error) FROM file('%s', Parquet)", dataFile)) + if err != nil { + return err + } + fmt.Println(sanityOut) + parts := strings.Split(sanityOut, "\t") + if len(parts) != 4 { + return fmt.Errorf("sanity check FAIL: unexpected output format (got %d tab-separated columns)", len(parts)) + } + minSeq, err1 := strconv.ParseUint(strings.TrimSpace(parts[0]), 10, 64) + maxSeq, err2 := strconv.ParseUint(strings.TrimSpace(parts[1]), 10, 64) + minTimeNs, err3 := strconv.ParseUint(strings.TrimSpace(parts[2]), 10, 64) + if err1 != nil || err2 != nil || err3 != nil { + return fmt.Errorf("sanity check FAIL: could not parse numeric values from: %s", sanityOut) + } + if maxSeq <= minSeq { + return fmt.Errorf("sanity check FAIL: seq not monotonically increasing (min=%d, max=%d)", minSeq, maxSeq) + } + if minTimeNs == 0 { + return fmt.Errorf("sanity check FAIL: min(time_ns) is zero") + } + fmt.Printf("Sanity check PASS: seq range [%d, %d], min time_ns=%d, error_count=%s\n", + minSeq, maxSeq, minTimeNs, strings.TrimSpace(parts[3])) + + return nil +} diff --git a/docs/parquet-querying.md b/docs/parquet-querying.md new file mode 100644 index 0000000..e940f9c --- /dev/null +++ b/docs/parquet-querying.md @@ -0,0 +1,223 @@ +# Querying ior Parquet recordings with ClickHouse + +ior can record I/O events to a Parquet file (see `--record` flag). This document explains +how to explore those recordings interactively using `clickhouse local`, which is bundled +inside the standard `clickhouse/clickhouse-server` Docker image — no server, no persistent +state, no installation needed beyond Docker. + +--- + +## Prerequisites + +- Docker running locally (`docker info` must succeed) +- A recording file, e.g. `ior-recording-20260313-170234.parquet` + +--- + +## Schema + +| Column | Type | Description | +|---|---|---| +| `seq` | UInt64 | Monotonically increasing event sequence number | +| `time_ns` | UInt64 | Absolute timestamp (nanoseconds since boot) | +| `gap_ns` | UInt64 | Time since previous event (ns) | +| `latency_ns` | UInt64 | Syscall duration (ns) | +| `comm` | String | Process name | +| `pid` | UInt32 | Process ID | +| `tid` | UInt32 | Thread ID | +| `syscall` | String | Syscall name (e.g. `read`, `openat`) | +| `fd` | Int32 | File descriptor | +| `ret` | Int64 | Return value (negative = errno) | +| `bytes` | UInt64 | Bytes transferred (0 if not applicable) | +| `file` | String | File path (empty if not resolved) | +| `is_error` | Bool | True when `ret` is a negative errno | +| `filter_epoch` | UInt64 | Filter generation at capture time | + +--- + +## Running a query + +The general pattern mounts the directory containing the file as `/data` inside the container: + +```sh +docker run --rm \ + -v /path/to/recording/dir:/data:ro \ + clickhouse/clickhouse-server:latest \ + clickhouse local -q "SELECT ... FROM file('/data/recording.parquet', Parquet)" +``` + +For convenience, set shell variables: + +```sh +FILE="ior-recording-20260313-170234.parquet" +DIR="$(pwd)" # or the absolute directory containing the file +IMG="clickhouse/clickhouse-server:latest" + +docker run --rm -v "$DIR:/data:ro" "$IMG" clickhouse local -q \ + "SELECT ... FROM file('/data/$FILE', Parquet)" +``` + +--- + +## Example queries + +### Inspect the schema + +```sql +DESCRIBE TABLE file('/data/recording.parquet', Parquet) +``` + +``` +seq UInt64 +time_ns UInt64 +gap_ns UInt64 +latency_ns UInt64 +comm String +pid UInt32 +tid UInt32 +syscall String +fd Int32 +ret Int64 +bytes UInt64 +file String +is_error Bool +filter_epoch UInt64 +``` + +### Row count + +```sql +SELECT count(*) FROM file('/data/recording.parquet', Parquet) +``` + +### Top syscalls by call count + +```sql +SELECT syscall, count(*) AS n +FROM file('/data/recording.parquet', Parquet) +GROUP BY syscall +ORDER BY n DESC +LIMIT 15 +``` + +``` +read 61800 +ioctl 14642 +statx 10916 +close 8660 +openat 6310 +write 6053 +... +``` + +### Error breakdown + +```sql +SELECT syscall, ret, count(*) AS n +FROM file('/data/recording.parquet', Parquet) +WHERE is_error +GROUP BY syscall, ret +ORDER BY n DESC +LIMIT 15 +``` + +``` +read -11 23597 -- EAGAIN (non-blocking, normal) +statx -2 1216 -- ENOENT +ioctl -25 540 -- ENOTTY +openat -2 376 -- ENOENT +... +``` + +### Latency percentiles per syscall + +```sql +SELECT + syscall, + count(*) AS n, + quantile(0.50)(latency_ns) AS p50_ns, + quantile(0.90)(latency_ns) AS p90_ns, + quantile(0.99)(latency_ns) AS p99_ns +FROM file('/data/recording.parquet', Parquet) +GROUP BY syscall +ORDER BY p99_ns DESC +LIMIT 15 +``` + +``` +fdatasync 11 1745501 9444892 9994682 -- ~10ms p99, flushes to disk +fallocate 31 35062 487162 589146 +rename 3 126619 170921 180889 +ftruncate 59 5449 35776 100399 +ioctl 14642 1155 8845 63626 +... +``` + +### Top files by I/O bytes + +```sql +SELECT file, sum(bytes) AS total_bytes, count(*) AS ops +FROM file('/data/recording.parquet', Parquet) +WHERE file != '' +GROUP BY file +ORDER BY total_bytes DESC +LIMIT 15 +``` + +``` +/home/paul/.mozilla/firefox/.../cookies.sqlite-wal 7082880 432 +/dev/ptmx 1590757 1472 +/proc/3680458/smaps 1555387 419 +... +``` + +### Activity by process + +```sql +SELECT comm, pid, count(*) AS n, sum(bytes) AS total_bytes +FROM file('/data/recording.parquet', Parquet) +GROUP BY comm, pid +ORDER BY n DESC +LIMIT 15 +``` + +### Slow syscalls (above threshold) + +```sql +SELECT time_ns, comm, pid, syscall, file, latency_ns +FROM file('/data/recording.parquet', Parquet) +WHERE latency_ns > 1000000 -- 1ms +ORDER BY latency_ns DESC +LIMIT 20 +``` + +### Event timeline (10ms buckets) + +```sql +SELECT + intDiv(time_ns, 10000000) AS bucket_10ms, + count(*) AS events, + sum(bytes) AS bytes +FROM file('/data/recording.parquet', Parquet) +GROUP BY bucket_10ms +ORDER BY bucket_10ms +``` + +--- + +## Automated validation + +The `mage parquetValidate` target runs schema, row-count, and sanity checks +against the latest `*.parquet` in the repo root: + +```sh +env GOTOOLCHAIN=auto mage parquetValidate + +# Or against a specific file: +PARQUET_FILE=ior-recording-20260313-170234.parquet env GOTOOLCHAIN=auto mage parquetValidate +``` + +It checks: +1. All 14 expected columns are present +2. Row count > 0 +3. `seq` is monotonically ordered and `time_ns` is non-zero |
