summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-03-18 20:53:50 +0200
committerPaul Buetow <paul@buetow.org>2026-03-18 20:53:50 +0200
commitb421b2232351049277ee4ad5b31367bb2b6779bb (patch)
tree6e185cbf1736bb0308231563b29eeab10160d682
parentc234be24815452fa1143c9a523f4615e0d745fb7 (diff)
feat: add ParquetValidate mage target and ClickHouse querying docs
Adds a `mage parquetValidate` target that validates a Parquet recording via clickhouse-local in Docker (schema presence, row count, seq/time_ns sanity). Adds docs/parquet-querying.md with schema reference, invocation pattern, and seven example queries with representative outputs. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
-rw-r--r--Magefile.go146
-rw-r--r--docs/parquet-querying.md223
2 files changed, 369 insertions, 0 deletions
diff --git a/Magefile.go b/Magefile.go
index ecb4060..95e4c0c 100644
--- a/Magefile.go
+++ b/Magefile.go
@@ -26,6 +26,7 @@ import (
)
const (
+ clickhouseImage = "clickhouse/clickhouse-server:latest"
binaryName = "ior"
workloadBinaryName = "ioworkload"
defaultLibbpfgoPath = "../libbpfgo"
@@ -999,3 +1000,148 @@ func isIntegrationTest(testName string) (bool, error) {
}
return false, nil
}
+
+// ParquetValidate validates a Parquet file using clickhouse-local in Docker.
+// Set PARQUET_FILE to override the default (latest *.parquet in repo root).
+// Checks schema column presence, row count > 0, and basic sanity on seq/time_ns.
+func ParquetValidate() error {
+ path, err := resolveParquetFile()
+ if err != nil {
+ return err
+ }
+ if err := checkDockerAvailable(); err != nil {
+ return err
+ }
+ abs, err := filepath.Abs(path)
+ if err != nil {
+ return fmt.Errorf("resolve absolute path for %s: %w", path, err)
+ }
+ dir, file := filepath.Dir(abs), filepath.Base(abs)
+ fmt.Printf("Validating parquet file: %s\n", abs)
+ return runParquetChecks(dir, file)
+}
+
+// resolveParquetFile returns the parquet file path from PARQUET_FILE env or
+// globs for the latest *.parquet in the repo root.
+func resolveParquetFile() (string, error) {
+ if path := os.Getenv("PARQUET_FILE"); path != "" {
+ if _, err := os.Stat(path); err != nil {
+ return "", fmt.Errorf("PARQUET_FILE=%s: %w", path, err)
+ }
+ return path, nil
+ }
+ matches, err := filepath.Glob("*.parquet")
+ if err != nil {
+ return "", fmt.Errorf("glob *.parquet: %w", err)
+ }
+ if len(matches) == 0 {
+ return "", fmt.Errorf("no *.parquet files found in repo root; set PARQUET_FILE to specify one")
+ }
+ // Use the last match (lexicographically latest, which matches timestamp-named files).
+ return matches[len(matches)-1], nil
+}
+
+// checkDockerAvailable verifies that Docker is reachable via `docker info`.
+func checkDockerAvailable() error {
+ cmd := exec.Command("docker", "info")
+ cmd.Stdout = io.Discard
+ cmd.Stderr = io.Discard
+ if err := cmd.Run(); err != nil {
+ return fmt.Errorf("docker is not available (is the daemon running?): %w", err)
+ }
+ return nil
+}
+
+// runClickHouseQuery runs a SQL query against a parquet file using clickhouse-local
+// in Docker. The file must be in dir; it is mounted read-only at /data inside the
+// container. Returns the trimmed stdout output.
+func runClickHouseQuery(dir, file, sql string) (string, error) {
+ // Mount dir as /data read-only; pass the SQL via -q so no shell quoting is needed.
+ out, err := sh.Output("docker", "run", "--rm",
+ "-v", dir+":/data:ro",
+ clickhouseImage,
+ "clickhouse", "local", "-q", sql,
+ )
+ if err != nil {
+ return "", fmt.Errorf("clickhouse query %q: %w", sql, err)
+ }
+ return strings.TrimSpace(out), nil
+}
+
+// expectedParquetColumns lists the 14 column names that the parquet schema must contain.
+var expectedParquetColumns = []string{
+ "seq", "time_ns", "gap_ns", "latency_ns", "comm",
+ "pid", "tid", "syscall", "fd", "ret",
+ "bytes", "file", "is_error", "filter_epoch",
+}
+
+// runParquetChecks runs schema, row-count, and sanity checks against the parquet file.
+// dir is the absolute directory containing file (mounted at /data in the container).
+func runParquetChecks(dir, file string) error {
+ dataFile := "/data/" + file
+
+ // --- Schema check ---
+ fmt.Println("--- Schema check ---")
+ schemaOut, err := runClickHouseQuery(dir, file,
+ fmt.Sprintf("DESCRIBE TABLE file('%s', Parquet)", dataFile))
+ if err != nil {
+ return err
+ }
+ fmt.Println(schemaOut)
+ // Verify all expected column names appear somewhere in the DESCRIBE output.
+ missing := []string{}
+ for _, col := range expectedParquetColumns {
+ if !strings.Contains(schemaOut, col) {
+ missing = append(missing, col)
+ }
+ }
+ if len(missing) > 0 {
+ return fmt.Errorf("schema check FAIL: missing columns: %s", strings.Join(missing, ", "))
+ }
+ fmt.Printf("Schema check PASS: all %d expected columns present\n\n", len(expectedParquetColumns))
+
+ // --- Row count check ---
+ fmt.Println("--- Row count check ---")
+ countOut, err := runClickHouseQuery(dir, file,
+ fmt.Sprintf("SELECT count(*) FROM file('%s', Parquet)", dataFile))
+ if err != nil {
+ return err
+ }
+ rowCount, err := strconv.ParseInt(strings.TrimSpace(countOut), 10, 64)
+ if err != nil {
+ return fmt.Errorf("parse row count %q: %w", countOut, err)
+ }
+ if rowCount <= 0 {
+ return fmt.Errorf("row count check FAIL: got %d rows, expected > 0", rowCount)
+ }
+ fmt.Printf("Row count check PASS: %d rows\n\n", rowCount)
+
+ // --- Sanity check: seq and time_ns ranges ---
+ fmt.Println("--- Sanity check ---")
+ sanityOut, err := runClickHouseQuery(dir, file,
+ fmt.Sprintf("SELECT min(seq), max(seq), min(time_ns), countIf(is_error) FROM file('%s', Parquet)", dataFile))
+ if err != nil {
+ return err
+ }
+ fmt.Println(sanityOut)
+ parts := strings.Split(sanityOut, "\t")
+ if len(parts) != 4 {
+ return fmt.Errorf("sanity check FAIL: unexpected output format (got %d tab-separated columns)", len(parts))
+ }
+ minSeq, err1 := strconv.ParseUint(strings.TrimSpace(parts[0]), 10, 64)
+ maxSeq, err2 := strconv.ParseUint(strings.TrimSpace(parts[1]), 10, 64)
+ minTimeNs, err3 := strconv.ParseUint(strings.TrimSpace(parts[2]), 10, 64)
+ if err1 != nil || err2 != nil || err3 != nil {
+ return fmt.Errorf("sanity check FAIL: could not parse numeric values from: %s", sanityOut)
+ }
+ if maxSeq <= minSeq {
+ return fmt.Errorf("sanity check FAIL: seq not monotonically increasing (min=%d, max=%d)", minSeq, maxSeq)
+ }
+ if minTimeNs == 0 {
+ return fmt.Errorf("sanity check FAIL: min(time_ns) is zero")
+ }
+ fmt.Printf("Sanity check PASS: seq range [%d, %d], min time_ns=%d, error_count=%s\n",
+ minSeq, maxSeq, minTimeNs, strings.TrimSpace(parts[3]))
+
+ return nil
+}
diff --git a/docs/parquet-querying.md b/docs/parquet-querying.md
new file mode 100644
index 0000000..e940f9c
--- /dev/null
+++ b/docs/parquet-querying.md
@@ -0,0 +1,223 @@
+# Querying ior Parquet recordings with ClickHouse
+
+ior can record I/O events to a Parquet file (see `--record` flag). This document explains
+how to explore those recordings interactively using `clickhouse local`, which is bundled
+inside the standard `clickhouse/clickhouse-server` Docker image — no server, no persistent
+state, no installation needed beyond Docker.
+
+---
+
+## Prerequisites
+
+- Docker running locally (`docker info` must succeed)
+- A recording file, e.g. `ior-recording-20260313-170234.parquet`
+
+---
+
+## Schema
+
+| Column | Type | Description |
+|---|---|---|
+| `seq` | UInt64 | Monotonically increasing event sequence number |
+| `time_ns` | UInt64 | Absolute timestamp (nanoseconds since boot) |
+| `gap_ns` | UInt64 | Time since previous event (ns) |
+| `latency_ns` | UInt64 | Syscall duration (ns) |
+| `comm` | String | Process name |
+| `pid` | UInt32 | Process ID |
+| `tid` | UInt32 | Thread ID |
+| `syscall` | String | Syscall name (e.g. `read`, `openat`) |
+| `fd` | Int32 | File descriptor |
+| `ret` | Int64 | Return value (negative = errno) |
+| `bytes` | UInt64 | Bytes transferred (0 if not applicable) |
+| `file` | String | File path (empty if not resolved) |
+| `is_error` | Bool | True when `ret` is a negative errno |
+| `filter_epoch` | UInt64 | Filter generation at capture time |
+
+---
+
+## Running a query
+
+The general pattern mounts the directory containing the file as `/data` inside the container:
+
+```sh
+docker run --rm \
+ -v /path/to/recording/dir:/data:ro \
+ clickhouse/clickhouse-server:latest \
+ clickhouse local -q "SELECT ... FROM file('/data/recording.parquet', Parquet)"
+```
+
+For convenience, set shell variables:
+
+```sh
+FILE="ior-recording-20260313-170234.parquet"
+DIR="$(pwd)" # or the absolute directory containing the file
+IMG="clickhouse/clickhouse-server:latest"
+
+docker run --rm -v "$DIR:/data:ro" "$IMG" clickhouse local -q \
+ "SELECT ... FROM file('/data/$FILE', Parquet)"
+```
+
+---
+
+## Example queries
+
+### Inspect the schema
+
+```sql
+DESCRIBE TABLE file('/data/recording.parquet', Parquet)
+```
+
+```
+seq UInt64
+time_ns UInt64
+gap_ns UInt64
+latency_ns UInt64
+comm String
+pid UInt32
+tid UInt32
+syscall String
+fd Int32
+ret Int64
+bytes UInt64
+file String
+is_error Bool
+filter_epoch UInt64
+```
+
+### Row count
+
+```sql
+SELECT count(*) FROM file('/data/recording.parquet', Parquet)
+```
+
+### Top syscalls by call count
+
+```sql
+SELECT syscall, count(*) AS n
+FROM file('/data/recording.parquet', Parquet)
+GROUP BY syscall
+ORDER BY n DESC
+LIMIT 15
+```
+
+```
+read 61800
+ioctl 14642
+statx 10916
+close 8660
+openat 6310
+write 6053
+...
+```
+
+### Error breakdown
+
+```sql
+SELECT syscall, ret, count(*) AS n
+FROM file('/data/recording.parquet', Parquet)
+WHERE is_error
+GROUP BY syscall, ret
+ORDER BY n DESC
+LIMIT 15
+```
+
+```
+read -11 23597 -- EAGAIN (non-blocking, normal)
+statx -2 1216 -- ENOENT
+ioctl -25 540 -- ENOTTY
+openat -2 376 -- ENOENT
+...
+```
+
+### Latency percentiles per syscall
+
+```sql
+SELECT
+ syscall,
+ count(*) AS n,
+ quantile(0.50)(latency_ns) AS p50_ns,
+ quantile(0.90)(latency_ns) AS p90_ns,
+ quantile(0.99)(latency_ns) AS p99_ns
+FROM file('/data/recording.parquet', Parquet)
+GROUP BY syscall
+ORDER BY p99_ns DESC
+LIMIT 15
+```
+
+```
+fdatasync 11 1745501 9444892 9994682 -- ~10ms p99, flushes to disk
+fallocate 31 35062 487162 589146
+rename 3 126619 170921 180889
+ftruncate 59 5449 35776 100399
+ioctl 14642 1155 8845 63626
+...
+```
+
+### Top files by I/O bytes
+
+```sql
+SELECT file, sum(bytes) AS total_bytes, count(*) AS ops
+FROM file('/data/recording.parquet', Parquet)
+WHERE file != ''
+GROUP BY file
+ORDER BY total_bytes DESC
+LIMIT 15
+```
+
+```
+/home/paul/.mozilla/firefox/.../cookies.sqlite-wal 7082880 432
+/dev/ptmx 1590757 1472
+/proc/3680458/smaps 1555387 419
+...
+```
+
+### Activity by process
+
+```sql
+SELECT comm, pid, count(*) AS n, sum(bytes) AS total_bytes
+FROM file('/data/recording.parquet', Parquet)
+GROUP BY comm, pid
+ORDER BY n DESC
+LIMIT 15
+```
+
+### Slow syscalls (above threshold)
+
+```sql
+SELECT time_ns, comm, pid, syscall, file, latency_ns
+FROM file('/data/recording.parquet', Parquet)
+WHERE latency_ns > 1000000 -- 1ms
+ORDER BY latency_ns DESC
+LIMIT 20
+```
+
+### Event timeline (10ms buckets)
+
+```sql
+SELECT
+ intDiv(time_ns, 10000000) AS bucket_10ms,
+ count(*) AS events,
+ sum(bytes) AS bytes
+FROM file('/data/recording.parquet', Parquet)
+GROUP BY bucket_10ms
+ORDER BY bucket_10ms
+```
+
+---
+
+## Automated validation
+
+The `mage parquetValidate` target runs schema, row-count, and sanity checks
+against the latest `*.parquet` in the repo root:
+
+```sh
+env GOTOOLCHAIN=auto mage parquetValidate
+
+# Or against a specific file:
+PARQUET_FILE=ior-recording-20260313-170234.parquet env GOTOOLCHAIN=auto mage parquetValidate
+```
+
+It checks:
+1. All 14 expected columns are present
+2. Row count > 0
+3. `seq` is monotonically ordered and `time_ns` is non-zero