docs: restructure documentation and move scripts to scripts/

- Add docs/ hierarchy: guides, backends, operations, reference, design - Slim root README; add documentation index and links to docs/ - Add missing docs: csv-format-flexibility, dns-resolution, dtail-metrics-example, magefile - Document Prometheus/VictoriaMetrics and ClickHouse backends - Move all helper shell scripts to scripts/; update Magefile and doc references - Add ASCII diagrams for watch mode (CSV watcher), auto mode, and ingestion paths - Add .gitignore Co-authored-by: Cursor <cursoragent@cursor.com>
author: Paul Buetow <paul@buetow.org> 2026-02-14 13:54:54 +0200
committer: Paul Buetow <paul@buetow.org> 2026-02-14 13:54:54 +0200
commit: 3a6e01c1abd4a68810f1d85c9aa75293af47f579 (patch)
tree: 2e3c066392cf2a292e89c90f259d039ce0afcb9b /scripts
parent: f3ea9a7a1f466b6109271c76eb58189d2a799998 (diff)
8 files changed, 771 insertions, 0 deletions
diff --git a/scripts/backfill-historic-data.sh b/scripts/backfill-historic-data.sh
new file mode 100644
index 0000000..c755da7
--- /dev/null
+++ b/scripts/backfill-historic-data.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# Backfill historic data to Prometheus for Epimetheus dashboard
+# Run from repo root: ./scripts/backfill-historic-data.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$REPO_ROOT"
+
+echo "=== Epimetheus Historic Data Backfill ==="
+echo ""
+echo "This script will populate Prometheus with historic test data"
+echo "going back 7 days, with data points every 12 hours."
+echo ""
+
+# Port-forward to Prometheus
+echo "Step 1: Setting up port-forward to Prometheus..."
+kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090 > /tmp/epimetheus-prom-pf.log 2>&1 &
+PF_PID=$!
+echo "Port-forward started (PID: $PF_PID)"
+
+# Wait for port-forward to be ready
+sleep 5
+
+# Run backfill
+echo ""
+echo "Step 2: Backfilling data from 7 days ago to now (12-hour intervals)..."
+echo ""
+./epimetheus -mode=backfill \
+  -prometheus=http://localhost:9090/api/v1/write \
+  -start-hours=168 \
+  -end-hours=0 \
+  -interval=12
+
+EXIT_CODE=$?
+
+# Clean up
+echo ""
+echo "Step 3: Cleaning up port-forward..."
+kill $PF_PID 2>/dev/null || true
+
+if [ $EXIT_CODE -eq 0 ]; then
+    echo ""
+    echo "✅ Historic data backfill complete!"
+    echo ""
+    echo "The Grafana dashboard timeline should now show data from:"
+    echo "  - 7 days ago"
+    echo "  - 6 days ago"
+    echo "  - 5 days ago"
+    echo "  - 4 days ago"
+    echo "  - 3 days ago"
+    echo "  - 2 days ago"
+    echo "  - 1 day ago"
+    echo "  - 12 hours ago"
+    echo "  - Now (from previous realtime push)"
+else
+    echo ""
+    echo "❌ Backfill failed with exit code $EXIT_CODE"
+    echo "Check /tmp/epimetheus-prom-pf.log for port-forward logs"
+fi
+
+exit $EXIT_CODE
diff --git a/scripts/benchmark-100mb.sh b/scripts/benchmark-100mb.sh
new file mode 100644
index 0000000..bda6476
--- /dev/null
+++ b/scripts/benchmark-100mb.sh
@@ -0,0 +1,200 @@
+#!/bin/bash
+# Benchmark script: Generate and ingest 100MB of historic metrics
+# This tests Epimetheus performance with large-scale data ingestion
+# Run from repo root: ./scripts/benchmark-100mb.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$REPO_ROOT"
+
+# Optimize Go GC for better performance (Phase 3 optimization)
+export GOGC=200  # Reduce GC frequency (default 100)
+export GOMEMLIMIT=3GiB  # Set memory limit for Go 1.19+
+
+BENCHMARK_DIR="benchmark-results"
+TIMESTAMP=$(date +%Y%m%d-%H%M%S)
+RESULT_FILE="$BENCHMARK_DIR/benchmark-$TIMESTAMP.log"
+
+mkdir -p "$BENCHMARK_DIR"
+
+echo "=== Epimetheus 100MB Benchmark ===" | tee "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Timestamp: $(date)" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 1: Generate 100MB of test data
+echo "Step 1: Generating 100MB of test data..." | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Calculate: ~70 bytes per line, 100MB = ~1.5M lines
+TARGET_SIZE_MB=100
+TARGET_BYTES=$((TARGET_SIZE_MB * 1024 * 1024))
+BYTES_PER_LINE=70
+TARGET_LINES=$((TARGET_BYTES / BYTES_PER_LINE))
+
+echo "Target size: ${TARGET_SIZE_MB}MB" | tee -a "$RESULT_FILE"
+echo "Estimated lines needed: $TARGET_LINES" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Generate data going back 7 days with 1-minute intervals
+GENERATION_START=$(date +%s)
+
+NOW=$(date +%s)000  # Current time in milliseconds
+ONE_HOUR_AGO=$((NOW - 3600000))  # Start from 1 hour ago to ensure all data is historic
+SEVEN_DAYS_AGO=$((ONE_HOUR_AGO - 604800000))  # 7 days before that
+
+# CSV header
+cat > benchmark-data-100mb.csv << 'EOF'
+# Prometheus metrics - 100MB benchmark dataset
+# Format: metric_name,labels,value,timestamp_ms
+EOF
+
+# Generate metrics
+METRICS=(
+  "epimetheus_benchmark_cpu_usage"
+  "epimetheus_benchmark_memory_bytes"
+  "epimetheus_benchmark_disk_io_bytes"
+  "epimetheus_benchmark_network_rx_bytes"
+  "epimetheus_benchmark_network_tx_bytes"
+  "epimetheus_benchmark_requests_total"
+  "epimetheus_benchmark_errors_total"
+  "epimetheus_benchmark_response_time_ms"
+  "epimetheus_benchmark_active_connections"
+  "epimetheus_benchmark_queue_depth"
+)
+
+INSTANCES=(
+  "web-01" "web-02" "web-03" "web-04" "web-05"
+  "api-01" "api-02" "api-03" "api-04" "api-05"
+  "db-01" "db-02" "db-03" "worker-01" "worker-02"
+)
+
+INTERVAL_MS=60000  # 1 minute interval
+TOTAL_INTERVALS=10080  # 7 days of 1-minute intervals
+
+echo "Generating data..." | tee -a "$RESULT_FILE"
+LINES_GENERATED=0
+
+for ((i=0; i<TOTAL_INTERVALS; i++)); do
+  TIMESTAMP=$((SEVEN_DAYS_AGO + (i * INTERVAL_MS)))
+
+  for METRIC in "${METRICS[@]}"; do
+    for INSTANCE in "${INSTANCES[@]}"; do
+      VALUE=$((RANDOM % 1000))
+      echo "$METRIC,instance=$INSTANCE;env=benchmark,$VALUE,$TIMESTAMP" >> benchmark-data-100mb.csv
+      LINES_GENERATED=$((LINES_GENERATED + 1))
+    done
+  done
+
+  if [ $((i % 1000)) -eq 0 ]; then
+    PROGRESS=$((i * 100 / TOTAL_INTERVALS))
+    echo -ne "\rProgress: $PROGRESS% ($LINES_GENERATED lines)" | tee -a "$RESULT_FILE"
+  fi
+done
+
+echo "" | tee -a "$RESULT_FILE"
+
+GENERATION_END=$(date +%s)
+GENERATION_TIME=$((GENERATION_END - GENERATION_START))
+
+FILE_SIZE=$(stat -f%z benchmark-data-100mb.csv 2>/dev/null || stat -c%s benchmark-data-100mb.csv 2>/dev/null)
+FILE_SIZE_MB=$((FILE_SIZE / 1024 / 1024))
+
+echo "" | tee -a "$RESULT_FILE"
+echo "Data generation complete:" | tee -a "$RESULT_FILE"
+echo "  Lines generated: $LINES_GENERATED" | tee -a "$RESULT_FILE"
+echo "  File size: ${FILE_SIZE_MB}MB ($FILE_SIZE bytes)" | tee -a "$RESULT_FILE"
+echo "  Generation time: ${GENERATION_TIME}s" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 2: Start port-forward to Prometheus
+echo "Step 2: Setting up port-forward to Prometheus..." | tee -a "$RESULT_FILE"
+kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090 > /tmp/benchmark-pf.log 2>&1 &
+PF_PID=$!
+echo "Port-forward started (PID: $PF_PID)" | tee -a "$RESULT_FILE"
+sleep 8
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 3: Get baseline Prometheus metrics
+echo "Step 3: Collecting baseline Prometheus metrics..." | tee -a "$RESULT_FILE"
+PROM_POD=$(kubectl get pod -n monitoring -l app.kubernetes.io/name=prometheus -o jsonpath='{.items[0].metadata.name}')
+echo "Prometheus pod: $PROM_POD" | tee -a "$RESULT_FILE"
+BASELINE_MEMORY=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $3}')
+BASELINE_CPU=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $2}')
+echo "  Baseline memory: $BASELINE_MEMORY" | tee -a "$RESULT_FILE"
+echo "  Baseline CPU: $BASELINE_CPU" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 4: Run ingestion benchmark
+echo "Step 4: Running ingestion benchmark..." | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+INGEST_START=$(date +%s.%N)
+
+echo "Parsing CSV and preparing for Remote Write ingestion..." | tee -a "$RESULT_FILE"
+echo "WARNING: Using auto mode - this may fail if data is too recent" | tee -a "$RESULT_FILE"
+echo "Continuing with Remote Write API for historic data..." | tee -a "$RESULT_FILE"
+
+/usr/bin/time -v ./epimetheus \
+  -mode=auto \
+  -file=benchmark-data-100mb.csv \
+  -format=csv \
+  -prometheus=http://localhost:9090/api/v1/write \
+  -pushgateway=http://localhost:9091 \
+  2>&1 | tee -a "$RESULT_FILE" || true
+
+INGEST_END=$(date +%s.%N)
+INGEST_TIME=$(echo "$INGEST_END - $INGEST_START" | bc)
+
+echo "" | tee -a "$RESULT_FILE"
+echo "Ingestion complete:" | tee -a "$RESULT_FILE"
+echo "  Total time: ${INGEST_TIME}s" | tee -a "$RESULT_FILE"
+SAMPLES_PER_SECOND=$(echo "scale=2; $LINES_GENERATED / $INGEST_TIME" | bc)
+MB_PER_SECOND=$(echo "scale=2; $FILE_SIZE_MB / $INGEST_TIME" | bc)
+echo "  Samples/second: $SAMPLES_PER_SECOND" | tee -a "$RESULT_FILE"
+echo "  MB/second: $MB_PER_SECOND" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 5: Post-ingestion metrics
+echo "Step 5: Collecting post-ingestion Prometheus metrics..." | tee -a "$RESULT_FILE"
+sleep 5
+POST_MEMORY=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $3}')
+POST_CPU=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $2}')
+echo "  Post-ingestion memory: $POST_MEMORY" | tee -a "$RESULT_FILE"
+echo "  Post-ingestion CPU: $POST_CPU" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 6: Verify
+echo "Step 6: Verifying data ingestion..." | tee -a "$RESULT_FILE"
+QUERY_RESULT=$(curl -s "http://localhost:9090/api/v1/query?query=count(epimetheus_benchmark_cpu_usage)" | jq -r '.data.result[0].value[1]')
+echo "  Samples found for epimetheus_benchmark_cpu_usage: $QUERY_RESULT" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 7: Cleanup
+echo "Step 7: Cleaning up..." | tee -a "$RESULT_FILE"
+kill $PF_PID 2>/dev/null || true
+echo "" | tee -a "$RESULT_FILE"
+
+echo "=== BENCHMARK SUMMARY ===" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Dataset:" | tee -a "$RESULT_FILE"
+echo "  Size: ${FILE_SIZE_MB}MB" | tee -a "$RESULT_FILE"
+echo "  Samples: $LINES_GENERATED" | tee -a "$RESULT_FILE"
+echo "  Time range: 7 days" | tee -a "$RESULT_FILE"
+echo "  Interval: 1 minute" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Performance:" | tee -a "$RESULT_FILE"
+echo "  Generation time: ${GENERATION_TIME}s" | tee -a "$RESULT_FILE"
+echo "  Ingestion time: ${INGEST_TIME}s" | tee -a "$RESULT_FILE"
+echo "  Throughput: $SAMPLES_PER_SECOND samples/s" | tee -a "$RESULT_FILE"
+echo "  Throughput: $MB_PER_SECOND MB/s" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Resources:" | tee -a "$RESULT_FILE"
+echo "  Memory: $BASELINE_MEMORY -> $POST_MEMORY" | tee -a "$RESULT_FILE"
+echo "  CPU: $BASELINE_CPU -> $POST_CPU" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Results saved to: $RESULT_FILE" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "To view results: cat $RESULT_FILE"
+echo "To analyze: less $RESULT_FILE"
diff --git a/scripts/benchmark-1gb.sh b/scripts/benchmark-1gb.sh
new file mode 100644
index 0000000..35176b0
--- /dev/null
+++ b/scripts/benchmark-1gb.sh
@@ -0,0 +1,196 @@
+#!/bin/bash
+# Benchmark script: Generate and ingest 1GB of historic metrics
+# This tests Epimetheus performance with large-scale data ingestion
+# Run from repo root: ./scripts/benchmark-1gb.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$REPO_ROOT"
+
+# Optimize Go GC for better performance (Phase 3 optimization)
+export GOGC=200  # Reduce GC frequency (default 100)
+export GOMEMLIMIT=3GiB  # Set memory limit for Go 1.19+
+
+BENCHMARK_DIR="benchmark-results"
+TIMESTAMP=$(date +%Y%m%d-%H%M%S)
+RESULT_FILE="$BENCHMARK_DIR/benchmark-1gb-$TIMESTAMP.log"
+
+mkdir -p "$BENCHMARK_DIR"
+
+echo "=== Epimetheus 1GB Benchmark ===" | tee "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Timestamp: $(date)" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 1: Generate 1GB of test data
+echo "Step 1: Generating 1GB of test data..." | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+TARGET_SIZE_MB=1000
+TARGET_BYTES=$((TARGET_SIZE_MB * 1024 * 1024))
+BYTES_PER_LINE=80
+TARGET_LINES=$((TARGET_BYTES / BYTES_PER_LINE))
+
+echo "Target size: ${TARGET_SIZE_MB}MB" | tee -a "$RESULT_FILE"
+echo "Estimated lines needed: $TARGET_LINES" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+GENERATION_START=$(date +%s)
+
+NOW=$(date +%s)000
+ONE_HOUR_AGO=$((NOW - 3600000))
+THIRTY_DAYS_AGO=$((ONE_HOUR_AGO - 2592000000))
+
+cat > benchmark-data-1gb.csv << 'EOF'
+# Prometheus metrics - 1GB benchmark dataset
+# Format: metric_name,labels,value,timestamp_ms
+EOF
+
+METRICS=(
+  "epimetheus_benchmark_cpu_usage"
+  "epimetheus_benchmark_memory_bytes"
+  "epimetheus_benchmark_disk_io_bytes"
+  "epimetheus_benchmark_network_rx_bytes"
+  "epimetheus_benchmark_network_tx_bytes"
+  "epimetheus_benchmark_requests_total"
+  "epimetheus_benchmark_errors_total"
+  "epimetheus_benchmark_response_time_ms"
+  "epimetheus_benchmark_active_connections"
+  "epimetheus_benchmark_queue_depth"
+)
+
+INSTANCES=(
+  "web-01" "web-02" "web-03" "web-04" "web-05"
+  "api-01" "api-02" "api-03" "api-04" "api-05"
+  "db-01" "db-02" "db-03" "worker-01" "worker-02"
+)
+
+INTERVAL_MS=30000
+TOTAL_INTERVALS=86400
+
+echo "Generating data..." | tee -a "$RESULT_FILE"
+LINES_GENERATED=0
+
+for ((i=0; i<TOTAL_INTERVALS; i++)); do
+  TIMESTAMP=$((THIRTY_DAYS_AGO + (i * INTERVAL_MS)))
+
+  for METRIC in "${METRICS[@]}"; do
+    for INSTANCE in "${INSTANCES[@]}"; do
+      VALUE=$((RANDOM % 1000))
+      echo "$METRIC,instance=$INSTANCE;env=benchmark,$VALUE,$TIMESTAMP" >> benchmark-data-1gb.csv
+      LINES_GENERATED=$((LINES_GENERATED + 1))
+    done
+  done
+
+  if [ $((i % 5000)) -eq 0 ]; then
+    PROGRESS=$((i * 100 / TOTAL_INTERVALS))
+    echo -ne "\rProgress: $PROGRESS% ($LINES_GENERATED lines)" | tee -a "$RESULT_FILE"
+  fi
+done
+
+echo "" | tee -a "$RESULT_FILE"
+
+GENERATION_END=$(date +%s)
+GENERATION_TIME=$((GENERATION_END - GENERATION_START))
+
+FILE_SIZE=$(stat -f%z benchmark-data-1gb.csv 2>/dev/null || stat -c%s benchmark-data-1gb.csv 2>/dev/null)
+FILE_SIZE_MB=$((FILE_SIZE / 1024 / 1024))
+
+echo "" | tee -a "$RESULT_FILE"
+echo "Data generation complete:" | tee -a "$RESULT_FILE"
+echo "  Lines generated: $LINES_GENERATED" | tee -a "$RESULT_FILE"
+echo "  File size: ${FILE_SIZE_MB}MB ($FILE_SIZE bytes)" | tee -a "$RESULT_FILE"
+echo "  Generation time: ${GENERATION_TIME}s" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 2: Port-forward
+echo "Step 2: Setting up port-forward to Prometheus..." | tee -a "$RESULT_FILE"
+kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090 > /tmp/benchmark-pf.log 2>&1 &
+PF_PID=$!
+echo "Port-forward started (PID: $PF_PID)" | tee -a "$RESULT_FILE"
+sleep 8
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 3: Baseline
+echo "Step 3: Collecting baseline Prometheus metrics..." | tee -a "$RESULT_FILE"
+PROM_POD=$(kubectl get pod -n monitoring -l app.kubernetes.io/name=prometheus -o jsonpath='{.items[0].metadata.name}')
+echo "Prometheus pod: $PROM_POD" | tee -a "$RESULT_FILE"
+BASELINE_MEMORY=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $3}')
+BASELINE_CPU=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $2}')
+echo "  Baseline memory: $BASELINE_MEMORY" | tee -a "$RESULT_FILE"
+echo "  Baseline CPU: $BASELINE_CPU" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 4: Ingest
+echo "Step 4: Running ingestion benchmark..." | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+INGEST_START=$(date +%s.%N)
+
+echo "Parsing CSV and preparing for Remote Write ingestion..." | tee -a "$RESULT_FILE"
+echo "WARNING: Using auto mode - this may fail if data is too recent" | tee -a "$RESULT_FILE"
+echo "Continuing with Remote Write API for historic data..." | tee -a "$RESULT_FILE"
+
+/usr/bin/time -v ./epimetheus \
+  -mode=auto \
+  -file=benchmark-data-1gb.csv \
+  -format=csv \
+  -prometheus=http://localhost:9090/api/v1/write \
+  -pushgateway=http://localhost:9091 \
+  2>&1 | tee -a "$RESULT_FILE" || true
+
+INGEST_END=$(date +%s.%N)
+INGEST_TIME=$(echo "$INGEST_END - $INGEST_START" | bc)
+
+echo "" | tee -a "$RESULT_FILE"
+echo "Ingestion complete:" | tee -a "$RESULT_FILE"
+echo "  Total time: ${INGEST_TIME}s" | tee -a "$RESULT_FILE"
+SAMPLES_PER_SECOND=$(echo "scale=2; $LINES_GENERATED / $INGEST_TIME" | bc)
+MB_PER_SECOND=$(echo "scale=2; $FILE_SIZE_MB / $INGEST_TIME" | bc)
+echo "  Samples/second: $SAMPLES_PER_SECOND" | tee -a "$RESULT_FILE"
+echo "  MB/second: $MB_PER_SECOND" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 5: Post-ingestion
+echo "Step 5: Collecting post-ingestion Prometheus metrics..." | tee -a "$RESULT_FILE"
+sleep 5
+POST_MEMORY=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $3}')
+POST_CPU=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $2}')
+echo "  Post-ingestion memory: $POST_MEMORY" | tee -a "$RESULT_FILE"
+echo "  Post-ingestion CPU: $POST_CPU" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 6: Verify
+echo "Step 6: Verifying data ingestion..." | tee -a "$RESULT_FILE"
+QUERY_RESULT=$(curl -s "http://localhost:9090/api/v1/query?query=count(epimetheus_benchmark_cpu_usage)" | jq -r '.data.result[0].value[1]')
+echo "  Samples found for epimetheus_benchmark_cpu_usage: $QUERY_RESULT" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 7: Cleanup
+echo "Step 7: Cleaning up..." | tee -a "$RESULT_FILE"
+kill $PF_PID 2>/dev/null || true
+echo "" | tee -a "$RESULT_FILE"
+
+echo "=== BENCHMARK SUMMARY ===" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Dataset:" | tee -a "$RESULT_FILE"
+echo "  Size: ${FILE_SIZE_MB}MB" | tee -a "$RESULT_FILE"
+echo "  Samples: $LINES_GENERATED" | tee -a "$RESULT_FILE"
+echo "  Time range: 30 days" | tee -a "$RESULT_FILE"
+echo "  Interval: 30 seconds" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Performance:" | tee -a "$RESULT_FILE"
+echo "  Generation time: ${GENERATION_TIME}s" | tee -a "$RESULT_FILE"
+echo "  Ingestion time: ${INGEST_TIME}s" | tee -a "$RESULT_FILE"
+echo "  Throughput: $SAMPLES_PER_SECOND samples/s" | tee -a "$RESULT_FILE"
+echo "  Throughput: $MB_PER_SECOND MB/s" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Resources:" | tee -a "$RESULT_FILE"
+echo "  Memory: $BASELINE_MEMORY -> $POST_MEMORY" | tee -a "$RESULT_FILE"
+echo "  CPU: $BASELINE_CPU -> $POST_CPU" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Results saved to: $RESULT_FILE" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "To view results: cat $RESULT_FILE"
+echo "To analyze: less $RESULT_FILE"
diff --git a/scripts/cleanup-benchmark-data.sh b/scripts/cleanup-benchmark-data.sh
new file mode 100644
index 0000000..48ba187
--- /dev/null
+++ b/scripts/cleanup-benchmark-data.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+# Cleanup script: Delete benchmark data from Prometheus
+# This uses the Prometheus Admin API to selectively remove benchmark metrics
+# Run from repo root: ./scripts/cleanup-benchmark-data.sh [prometheus_url]
+
+set -e
+
+PROMETHEUS_URL="${1:-http://localhost:9090}"
+
+echo "=== Prometheus Benchmark Data Cleanup ==="
+echo ""
+echo "Prometheus URL: $PROMETHEUS_URL"
+echo ""
+
+# Check if port-forward is needed
+if [[ "$PROMETHEUS_URL" == *"localhost"* ]]; then
+  echo "Note: Make sure you have port-forward running:"
+  echo "  kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090"
+  echo ""
+fi
+
+# Metrics to delete
+METRICS=(
+  "epimetheus_benchmark_cpu_usage"
+  "epimetheus_benchmark_memory_bytes"
+  "epimetheus_benchmark_disk_io_bytes"
+  "epimetheus_benchmark_network_rx_bytes"
+  "epimetheus_benchmark_network_tx_bytes"
+  "epimetheus_benchmark_requests_total"
+  "epimetheus_benchmark_errors_total"
+  "epimetheus_benchmark_response_time_ms"
+  "epimetheus_benchmark_active_connections"
+  "epimetheus_benchmark_queue_depth"
+)
+
+echo "Step 1: Deleting benchmark metrics..."
+echo ""
+
+SUCCESS_COUNT=0
+ERROR_COUNT=0
+
+for METRIC in "${METRICS[@]}"; do
+  echo "  Deleting: $METRIC"
+
+  # Delete series endpoint returns HTTP 204 No Content on success
+  HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST -g "${PROMETHEUS_URL}/api/v1/admin/tsdb/delete_series?match[]=${METRIC}")
+
+  if [ "$HTTP_CODE" == "204" ] || [ "$HTTP_CODE" == "200" ]; then
+    echo "    ✅ Success (HTTP $HTTP_CODE)"
+    SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
+  else
+    echo "    ❌ Error: HTTP $HTTP_CODE"
+    ERROR_COUNT=$((ERROR_COUNT + 1))
+  fi
+done
+
+echo ""
+echo "Deletion summary: $SUCCESS_COUNT succeeded, $ERROR_COUNT failed"
+echo ""
+
+if [ $ERROR_COUNT -eq 0 ]; then
+  echo "Step 2: Cleaning up tombstones..."
+  echo ""
+
+  # Clean tombstones endpoint returns HTTP 204 No Content on success
+  CLEANUP_HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST "${PROMETHEUS_URL}/api/v1/admin/tsdb/clean_tombstones")
+
+  if [ "$CLEANUP_HTTP_CODE" == "204" ] || [ "$CLEANUP_HTTP_CODE" == "200" ]; then
+    echo "  ✅ Tombstones cleaned successfully (HTTP $CLEANUP_HTTP_CODE)"
+    echo ""
+    echo "🎉 Cleanup complete!"
+    echo ""
+    echo "Note: Prometheus may take a few moments to compact the database"
+    echo "and free up disk space."
+  else
+    echo "  ❌ Error cleaning tombstones: HTTP $CLEANUP_HTTP_CODE"
+    exit 1
+  fi
+else
+  echo "⚠️  Some deletions failed. Skipping tombstone cleanup."
+  echo "Check Prometheus admin API is enabled with:"
+  echo "  kubectl get prometheus -n monitoring prometheus-kube-prometheus-prometheus -o yaml | grep -A5 additionalArgs"
+  exit 1
+fi
+
+echo ""
+echo "To verify deletion, run:"
+echo "  curl -s '${PROMETHEUS_URL}/api/v1/label/__name__/values' | jq '.data | map(select(startswith(\"epimetheus_benchmark\")))'"
+echo ""
diff --git a/scripts/cleanup-benchmark-metrics.sh b/scripts/cleanup-benchmark-metrics.sh
new file mode 100644
index 0000000..7b1ce4e
--- /dev/null
+++ b/scripts/cleanup-benchmark-metrics.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# Cleanup benchmark metrics from Prometheus
+# This allows running benchmarks from a clean state
+# Run from repo root: ./scripts/cleanup-benchmark-metrics.sh
+
+set -e
+
+echo "=== Prometheus Benchmark Metrics Cleanup ==="
+echo ""
+
+# Port-forward to Prometheus
+echo "Setting up port-forward to Prometheus..."
+kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090 > /tmp/cleanup-pf.log 2>&1 &
+PF_PID=$!
+echo "Port-forward started (PID: $PF_PID)"
+sleep 5
+
+# Check if Admin API is enabled
+echo ""
+echo "Checking if Prometheus Admin API is enabled..."
+ADMIN_CHECK=$(curl -s -o /dev/null -w "%{http_code}" -X POST "http://localhost:9090/api/v1/admin/tsdb/delete_series?match[]=epimetheus_benchmark_cpu_usage")
+
+if [ "$ADMIN_CHECK" = "204" ] || [ "$ADMIN_CHECK" = "200" ]; then
+    echo "✅ Admin API is enabled"
+    echo ""
+    echo "Deleting benchmark metrics..."
+
+    # Delete all benchmark metrics
+    METRICS=(
+        "epimetheus_benchmark_cpu_usage"
+        "epimetheus_benchmark_memory_bytes"
+        "epimetheus_benchmark_disk_io_bytes"
+        "epimetheus_benchmark_network_rx_bytes"
+        "epimetheus_benchmark_network_tx_bytes"
+        "epimetheus_benchmark_requests_total"
+        "epimetheus_benchmark_errors_total"
+        "epimetheus_benchmark_response_time_ms"
+        "epimetheus_benchmark_active_connections"
+        "epimetheus_benchmark_queue_depth"
+    )
+
+    for METRIC in "${METRICS[@]}"; do
+        echo "  Deleting: $METRIC"
+        curl -s -X POST "http://localhost:9090/api/v1/admin/tsdb/delete_series?match[]=$METRIC" > /dev/null
+    done
+
+    echo ""
+    echo "Triggering tombstone cleanup (this removes deleted data from disk)..."
+    curl -s -X POST "http://localhost:9090/api/v1/admin/tsdb/clean_tombstones" > /dev/null
+
+    echo ""
+    echo "✅ Cleanup complete!"
+elif [ "$ADMIN_CHECK" = "405" ]; then
+    echo "❌ Admin API is NOT enabled"
+    echo ""
+    echo "To enable the Admin API, update your Prometheus configuration:"
+    echo ""
+    echo "In f3s/prometheus/persistence-values.yaml, add:"
+    echo ""
+    echo "prometheus:"
+    echo "  prometheusSpec:"
+    echo "    additionalArgs:"
+    echo "      - name: web.enable-admin-api"
+    echo "        value: \"\""
+    echo ""
+    echo "Then upgrade Prometheus:"
+    echo "  helm upgrade ... (or: just upgrade in your conf repo)"
+    echo ""
+    echo "WARNING: Admin API should only be enabled in development/test environments!"
+    echo ""
+    echo "Alternative: Delete benchmark data files manually:"
+    echo "  kubectl exec -n monitoring prometheus-prometheus-kube-prometheus-prometheus-0 -- sh -c 'rm -rf /prometheus/data/wal/*'"
+    echo "  kubectl delete pod -n monitoring prometheus-prometheus-kube-prometheus-prometheus-0"
+else
+    echo "⚠️  Unexpected response: HTTP $ADMIN_CHECK"
+fi
+
+echo ""
+echo "Cleaning up port-forward..."
+kill $PF_PID 2>/dev/null || true
+
+echo ""
+echo "Done!"
diff --git a/scripts/generate-test-data.sh b/scripts/generate-test-data.sh
new file mode 100644
index 0000000..4db332e
--- /dev/null
+++ b/scripts/generate-test-data.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Generate test data with actual timestamps for different time ranges
+# Run from repo root: ./scripts/generate-test-data.sh
+
+NOW=$(date +%s)000  # Current time in milliseconds
+ONE_HOUR_AGO=$((NOW - 3600000))
+ONE_DAY_AGO=$((NOW - 86400000))
+ONE_WEEK_AGO=$((NOW - 604800000))
+ONE_MONTH_AGO=$((NOW - 2592000000))
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$REPO_ROOT"
+
+cat > test-all-ages.csv << EOF
+# Prometheus metrics in CSV format demonstrating all time ranges
+# Format: metric_name,labels,value,timestamp_ms
+
+# CURRENT data (< 5min old - will use Pushgateway/Realtime)
+app_requests_total,instance=current;env=prod,100,$NOW
+app_temperature_celsius,instance=current;zone=us-east,22.5,$NOW
+app_active_connections,instance=current;env=prod,50,$NOW
+
+# 1 HOUR OLD data (will use Remote Write/Historic)
+app_requests_total,instance=1h_ago;env=prod,95,$ONE_HOUR_AGO
+app_active_connections,instance=1h_ago;env=prod,45,$ONE_HOUR_AGO
+app_temperature_celsius,instance=1h_ago;zone=us-east,21.8,$ONE_HOUR_AGO
+
+# 1 DAY OLD data (will use Remote Write/Historic)
+app_requests_total,instance=1d_ago;env=prod,150,$ONE_DAY_AGO
+app_temperature_celsius,instance=1d_ago;zone=eu-west,18.3,$ONE_DAY_AGO
+app_active_connections,instance=1d_ago;env=prod,60,$ONE_DAY_AGO
+
+# 1 WEEK OLD data (will use Remote Write/Historic)
+app_requests_total,instance=1w_ago;env=prod,200,$ONE_WEEK_AGO
+app_jobs_processed_total,instance=1w_ago;env=prod;job_type=email;status=success,75,$ONE_WEEK_AGO
+app_temperature_celsius,instance=1w_ago;zone=asia,25.2,$ONE_WEEK_AGO
+
+# 1 MONTH OLD data (will use Remote Write/Historic)
+app_requests_total,instance=1m_ago;env=prod,180,$ONE_MONTH_AGO
+app_active_connections,instance=1m_ago;env=prod,30,$ONE_MONTH_AGO
+app_temperature_celsius,instance=1m_ago;zone=africa,28.7,$ONE_MONTH_AGO
+EOF
+
+echo "Generated test-all-ages.csv with the following timestamps:"
+echo "  Current:  $NOW ($(date -d @$((NOW/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null || date -r $((NOW/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null))"
+echo "  1h ago:   $ONE_HOUR_AGO ($(date -d @$((ONE_HOUR_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null || date -r $((ONE_HOUR_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null))"
+echo "  1d ago:   $ONE_DAY_AGO ($(date -d @$((ONE_DAY_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null || date -r $((ONE_DAY_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null))"
+echo "  1w ago:   $ONE_WEEK_AGO ($(date -d @$((ONE_WEEK_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null || date -r $((ONE_WEEK_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null))"
+echo "  1m ago:   $ONE_MONTH_AGO ($(date -d @$((ONE_MONTH_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null || date -r $((ONE_MONTH_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null))"
diff --git a/scripts/run.sh b/scripts/run.sh
new file mode 100644
index 0000000..d603639
--- /dev/null
+++ b/scripts/run.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+# Simple script to run Epimetheus
+# Automatically sets up port-forwarding and runs the binary
+# Run from repo root: ./scripts/run.sh
+
+set -e
+
+echo "Starting Epimetheus..."
+echo ""
+echo "Step 1: Setting up port-forward to Pushgateway..."
+kubectl port-forward -n monitoring svc/pushgateway 9091:9091 > /tmp/pushgateway-port-forward.log 2>&1 &
+PF_PID=$!
+
+# Wait for port-forward to be ready
+sleep 2
+
+echo "Step 2: Running epimetheus binary (realtime mode)..."
+echo "Press Ctrl+C to stop"
+echo ""
+
+# Run from repo root so ./epimetheus resolves
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$REPO_ROOT"
+
+# Run the binary in realtime mode and capture its exit status
+./epimetheus -mode=realtime -continuous
+EXIT_CODE=$?
+
+# Clean up port-forward
+echo ""
+echo "Cleaning up port-forward..."
+kill $PF_PID 2>/dev/null || true
+
+echo "Done!"
+exit $EXIT_CODE
diff --git a/scripts/verify-clickhouse.sh b/scripts/verify-clickhouse.sh
new file mode 100644
index 0000000..a9c3233
--- /dev/null
+++ b/scripts/verify-clickhouse.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# Verify that epimetheus metrics were successfully ingested into ClickHouse.
+# Usage: ./scripts/verify-clickhouse.sh [clickhouse_url] [table_name]
+# Default: http://localhost:8123, epimetheus_metrics
+
+set -e
+
+CLICKHOUSE_URL="${1:-http://localhost:8123}"
+TABLE="${2:-epimetheus_metrics}"
+
+echo "Verifying ClickHouse ingestion..."
+echo "  URL:   $CLICKHOUSE_URL"
+echo "  Table: $TABLE"
+echo ""
+
+# Check connectivity
+if ! curl -sS "${CLICKHOUSE_URL}/ping" > /dev/null 2>&1; then
+    echo "ERROR: Cannot connect to ClickHouse at $CLICKHOUSE_URL"
+    echo "  Make sure ClickHouse is running: sudo systemctl start clickhouse-server"
+    exit 1
+fi
+
+echo "✓ ClickHouse is reachable"
+echo ""
+
+# Query 1: Row count
+echo "--- Row count ---"
+COUNT=$(curl -sS "${CLICKHOUSE_URL}/?query=SELECT%20count()%20FROM%20${TABLE}" 2>/dev/null | tail -1)
+if [ -z "$COUNT" ] || [ "$COUNT" = "0" ]; then
+    echo "ERROR: Table $TABLE is empty or does not exist"
+    echo "  Run: ./epimetheus -mode=watch -file=test-data/watch-clickhouse-test.csv -metric-name=watch_test -clickhouse=$CLICKHOUSE_URL -prometheus="
+    exit 1
+fi
+echo "Total rows: $COUNT"
+echo ""
+
+# Query 2: Distinct metrics
+echo "--- Metrics in table ---"
+curl -sS "${CLICKHOUSE_URL}/?query=SELECT%20distinct%20metric%20FROM%20${TABLE}%20ORDER%20BY%20metric%20FORMAT%20PrettyCompact" 2>/dev/null
+echo ""
+
+# Query 3: Sample data
+echo "--- Sample rows (last 5) ---"
+curl -sS "${CLICKHOUSE_URL}/?query=SELECT%20metric%2C%20labels%2C%20value%2C%20timestamp%20FROM%20${TABLE}%20ORDER%20BY%20timestamp%20DESC%20LIMIT%205%20FORMAT%20PrettyCompact" 2>/dev/null
+echo ""
+
+# Query 4: Aggregation by metric
+echo "--- Rows per metric ---"
+curl -sS "${CLICKHOUSE_URL}/?query=SELECT%20metric%2C%20count()%20AS%20cnt%20FROM%20${TABLE}%20GROUP%20BY%20metric%20ORDER%20BY%20cnt%20DESC%20FORMAT%20PrettyCompact" 2>/dev/null
+echo ""
+
+echo "✅ ClickHouse verification complete - data is present and queryable"
author	Paul Buetow <paul@buetow.org>	2026-02-14 13:54:54 +0200
committer	Paul Buetow <paul@buetow.org>	2026-02-14 13:54:54 +0200
commit	3a6e01c1abd4a68810f1d85c9aa75293af47f579 (patch)
tree	2e3c066392cf2a292e89c90f259d039ce0afcb9b /scripts
parent	f3ea9a7a1f466b6109271c76eb58189d2a799998 (diff)