diff options
| author | Paul Buetow <paul@buetow.org> | 2026-02-14 13:54:54 +0200 |
|---|---|---|
| committer | Paul Buetow <paul@buetow.org> | 2026-02-14 13:54:54 +0200 |
| commit | 3a6e01c1abd4a68810f1d85c9aa75293af47f579 (patch) | |
| tree | 2e3c066392cf2a292e89c90f259d039ce0afcb9b /scripts | |
| parent | f3ea9a7a1f466b6109271c76eb58189d2a799998 (diff) | |
docs: restructure documentation and move scripts to scripts/
- Add docs/ hierarchy: guides, backends, operations, reference, design
- Slim root README; add documentation index and links to docs/
- Add missing docs: csv-format-flexibility, dns-resolution, dtail-metrics-example, magefile
- Document Prometheus/VictoriaMetrics and ClickHouse backends
- Move all helper shell scripts to scripts/; update Magefile and doc references
- Add ASCII diagrams for watch mode (CSV watcher), auto mode, and ingestion paths
- Add .gitignore
Co-authored-by: Cursor <cursoragent@cursor.com>
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/backfill-historic-data.sh | 63 | ||||
| -rw-r--r-- | scripts/benchmark-100mb.sh | 200 | ||||
| -rw-r--r-- | scripts/benchmark-1gb.sh | 196 | ||||
| -rw-r--r-- | scripts/cleanup-benchmark-data.sh | 89 | ||||
| -rw-r--r-- | scripts/cleanup-benchmark-metrics.sh | 83 | ||||
| -rw-r--r-- | scripts/generate-test-data.sh | 51 | ||||
| -rw-r--r-- | scripts/run.sh | 37 | ||||
| -rw-r--r-- | scripts/verify-clickhouse.sh | 52 |
8 files changed, 771 insertions, 0 deletions
diff --git a/scripts/backfill-historic-data.sh b/scripts/backfill-historic-data.sh new file mode 100644 index 0000000..c755da7 --- /dev/null +++ b/scripts/backfill-historic-data.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# Backfill historic data to Prometheus for Epimetheus dashboard +# Run from repo root: ./scripts/backfill-historic-data.sh + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$REPO_ROOT" + +echo "=== Epimetheus Historic Data Backfill ===" +echo "" +echo "This script will populate Prometheus with historic test data" +echo "going back 7 days, with data points every 12 hours." +echo "" + +# Port-forward to Prometheus +echo "Step 1: Setting up port-forward to Prometheus..." +kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090 > /tmp/epimetheus-prom-pf.log 2>&1 & +PF_PID=$! +echo "Port-forward started (PID: $PF_PID)" + +# Wait for port-forward to be ready +sleep 5 + +# Run backfill +echo "" +echo "Step 2: Backfilling data from 7 days ago to now (12-hour intervals)..." +echo "" +./epimetheus -mode=backfill \ + -prometheus=http://localhost:9090/api/v1/write \ + -start-hours=168 \ + -end-hours=0 \ + -interval=12 + +EXIT_CODE=$? + +# Clean up +echo "" +echo "Step 3: Cleaning up port-forward..." +kill $PF_PID 2>/dev/null || true + +if [ $EXIT_CODE -eq 0 ]; then + echo "" + echo "✅ Historic data backfill complete!" + echo "" + echo "The Grafana dashboard timeline should now show data from:" + echo " - 7 days ago" + echo " - 6 days ago" + echo " - 5 days ago" + echo " - 4 days ago" + echo " - 3 days ago" + echo " - 2 days ago" + echo " - 1 day ago" + echo " - 12 hours ago" + echo " - Now (from previous realtime push)" +else + echo "" + echo "❌ Backfill failed with exit code $EXIT_CODE" + echo "Check /tmp/epimetheus-prom-pf.log for port-forward logs" +fi + +exit $EXIT_CODE diff --git a/scripts/benchmark-100mb.sh b/scripts/benchmark-100mb.sh new file mode 100644 index 0000000..bda6476 --- /dev/null +++ b/scripts/benchmark-100mb.sh @@ -0,0 +1,200 @@ +#!/bin/bash +# Benchmark script: Generate and ingest 100MB of historic metrics +# This tests Epimetheus performance with large-scale data ingestion +# Run from repo root: ./scripts/benchmark-100mb.sh + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$REPO_ROOT" + +# Optimize Go GC for better performance (Phase 3 optimization) +export GOGC=200 # Reduce GC frequency (default 100) +export GOMEMLIMIT=3GiB # Set memory limit for Go 1.19+ + +BENCHMARK_DIR="benchmark-results" +TIMESTAMP=$(date +%Y%m%d-%H%M%S) +RESULT_FILE="$BENCHMARK_DIR/benchmark-$TIMESTAMP.log" + +mkdir -p "$BENCHMARK_DIR" + +echo "=== Epimetheus 100MB Benchmark ===" | tee "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +echo "Timestamp: $(date)" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Step 1: Generate 100MB of test data +echo "Step 1: Generating 100MB of test data..." | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Calculate: ~70 bytes per line, 100MB = ~1.5M lines +TARGET_SIZE_MB=100 +TARGET_BYTES=$((TARGET_SIZE_MB * 1024 * 1024)) +BYTES_PER_LINE=70 +TARGET_LINES=$((TARGET_BYTES / BYTES_PER_LINE)) + +echo "Target size: ${TARGET_SIZE_MB}MB" | tee -a "$RESULT_FILE" +echo "Estimated lines needed: $TARGET_LINES" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Generate data going back 7 days with 1-minute intervals +GENERATION_START=$(date +%s) + +NOW=$(date +%s)000 # Current time in milliseconds +ONE_HOUR_AGO=$((NOW - 3600000)) # Start from 1 hour ago to ensure all data is historic +SEVEN_DAYS_AGO=$((ONE_HOUR_AGO - 604800000)) # 7 days before that + +# CSV header +cat > benchmark-data-100mb.csv << 'EOF' +# Prometheus metrics - 100MB benchmark dataset +# Format: metric_name,labels,value,timestamp_ms +EOF + +# Generate metrics +METRICS=( + "epimetheus_benchmark_cpu_usage" + "epimetheus_benchmark_memory_bytes" + "epimetheus_benchmark_disk_io_bytes" + "epimetheus_benchmark_network_rx_bytes" + "epimetheus_benchmark_network_tx_bytes" + "epimetheus_benchmark_requests_total" + "epimetheus_benchmark_errors_total" + "epimetheus_benchmark_response_time_ms" + "epimetheus_benchmark_active_connections" + "epimetheus_benchmark_queue_depth" +) + +INSTANCES=( + "web-01" "web-02" "web-03" "web-04" "web-05" + "api-01" "api-02" "api-03" "api-04" "api-05" + "db-01" "db-02" "db-03" "worker-01" "worker-02" +) + +INTERVAL_MS=60000 # 1 minute interval +TOTAL_INTERVALS=10080 # 7 days of 1-minute intervals + +echo "Generating data..." | tee -a "$RESULT_FILE" +LINES_GENERATED=0 + +for ((i=0; i<TOTAL_INTERVALS; i++)); do + TIMESTAMP=$((SEVEN_DAYS_AGO + (i * INTERVAL_MS))) + + for METRIC in "${METRICS[@]}"; do + for INSTANCE in "${INSTANCES[@]}"; do + VALUE=$((RANDOM % 1000)) + echo "$METRIC,instance=$INSTANCE;env=benchmark,$VALUE,$TIMESTAMP" >> benchmark-data-100mb.csv + LINES_GENERATED=$((LINES_GENERATED + 1)) + done + done + + if [ $((i % 1000)) -eq 0 ]; then + PROGRESS=$((i * 100 / TOTAL_INTERVALS)) + echo -ne "\rProgress: $PROGRESS% ($LINES_GENERATED lines)" | tee -a "$RESULT_FILE" + fi +done + +echo "" | tee -a "$RESULT_FILE" + +GENERATION_END=$(date +%s) +GENERATION_TIME=$((GENERATION_END - GENERATION_START)) + +FILE_SIZE=$(stat -f%z benchmark-data-100mb.csv 2>/dev/null || stat -c%s benchmark-data-100mb.csv 2>/dev/null) +FILE_SIZE_MB=$((FILE_SIZE / 1024 / 1024)) + +echo "" | tee -a "$RESULT_FILE" +echo "Data generation complete:" | tee -a "$RESULT_FILE" +echo " Lines generated: $LINES_GENERATED" | tee -a "$RESULT_FILE" +echo " File size: ${FILE_SIZE_MB}MB ($FILE_SIZE bytes)" | tee -a "$RESULT_FILE" +echo " Generation time: ${GENERATION_TIME}s" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Step 2: Start port-forward to Prometheus +echo "Step 2: Setting up port-forward to Prometheus..." | tee -a "$RESULT_FILE" +kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090 > /tmp/benchmark-pf.log 2>&1 & +PF_PID=$! +echo "Port-forward started (PID: $PF_PID)" | tee -a "$RESULT_FILE" +sleep 8 +echo "" | tee -a "$RESULT_FILE" + +# Step 3: Get baseline Prometheus metrics +echo "Step 3: Collecting baseline Prometheus metrics..." | tee -a "$RESULT_FILE" +PROM_POD=$(kubectl get pod -n monitoring -l app.kubernetes.io/name=prometheus -o jsonpath='{.items[0].metadata.name}') +echo "Prometheus pod: $PROM_POD" | tee -a "$RESULT_FILE" +BASELINE_MEMORY=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $3}') +BASELINE_CPU=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $2}') +echo " Baseline memory: $BASELINE_MEMORY" | tee -a "$RESULT_FILE" +echo " Baseline CPU: $BASELINE_CPU" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Step 4: Run ingestion benchmark +echo "Step 4: Running ingestion benchmark..." | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +INGEST_START=$(date +%s.%N) + +echo "Parsing CSV and preparing for Remote Write ingestion..." | tee -a "$RESULT_FILE" +echo "WARNING: Using auto mode - this may fail if data is too recent" | tee -a "$RESULT_FILE" +echo "Continuing with Remote Write API for historic data..." | tee -a "$RESULT_FILE" + +/usr/bin/time -v ./epimetheus \ + -mode=auto \ + -file=benchmark-data-100mb.csv \ + -format=csv \ + -prometheus=http://localhost:9090/api/v1/write \ + -pushgateway=http://localhost:9091 \ + 2>&1 | tee -a "$RESULT_FILE" || true + +INGEST_END=$(date +%s.%N) +INGEST_TIME=$(echo "$INGEST_END - $INGEST_START" | bc) + +echo "" | tee -a "$RESULT_FILE" +echo "Ingestion complete:" | tee -a "$RESULT_FILE" +echo " Total time: ${INGEST_TIME}s" | tee -a "$RESULT_FILE" +SAMPLES_PER_SECOND=$(echo "scale=2; $LINES_GENERATED / $INGEST_TIME" | bc) +MB_PER_SECOND=$(echo "scale=2; $FILE_SIZE_MB / $INGEST_TIME" | bc) +echo " Samples/second: $SAMPLES_PER_SECOND" | tee -a "$RESULT_FILE" +echo " MB/second: $MB_PER_SECOND" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Step 5: Post-ingestion metrics +echo "Step 5: Collecting post-ingestion Prometheus metrics..." | tee -a "$RESULT_FILE" +sleep 5 +POST_MEMORY=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $3}') +POST_CPU=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $2}') +echo " Post-ingestion memory: $POST_MEMORY" | tee -a "$RESULT_FILE" +echo " Post-ingestion CPU: $POST_CPU" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Step 6: Verify +echo "Step 6: Verifying data ingestion..." | tee -a "$RESULT_FILE" +QUERY_RESULT=$(curl -s "http://localhost:9090/api/v1/query?query=count(epimetheus_benchmark_cpu_usage)" | jq -r '.data.result[0].value[1]') +echo " Samples found for epimetheus_benchmark_cpu_usage: $QUERY_RESULT" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Step 7: Cleanup +echo "Step 7: Cleaning up..." | tee -a "$RESULT_FILE" +kill $PF_PID 2>/dev/null || true +echo "" | tee -a "$RESULT_FILE" + +echo "=== BENCHMARK SUMMARY ===" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +echo "Dataset:" | tee -a "$RESULT_FILE" +echo " Size: ${FILE_SIZE_MB}MB" | tee -a "$RESULT_FILE" +echo " Samples: $LINES_GENERATED" | tee -a "$RESULT_FILE" +echo " Time range: 7 days" | tee -a "$RESULT_FILE" +echo " Interval: 1 minute" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +echo "Performance:" | tee -a "$RESULT_FILE" +echo " Generation time: ${GENERATION_TIME}s" | tee -a "$RESULT_FILE" +echo " Ingestion time: ${INGEST_TIME}s" | tee -a "$RESULT_FILE" +echo " Throughput: $SAMPLES_PER_SECOND samples/s" | tee -a "$RESULT_FILE" +echo " Throughput: $MB_PER_SECOND MB/s" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +echo "Resources:" | tee -a "$RESULT_FILE" +echo " Memory: $BASELINE_MEMORY -> $POST_MEMORY" | tee -a "$RESULT_FILE" +echo " CPU: $BASELINE_CPU -> $POST_CPU" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +echo "Results saved to: $RESULT_FILE" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +echo "To view results: cat $RESULT_FILE" +echo "To analyze: less $RESULT_FILE" diff --git a/scripts/benchmark-1gb.sh b/scripts/benchmark-1gb.sh new file mode 100644 index 0000000..35176b0 --- /dev/null +++ b/scripts/benchmark-1gb.sh @@ -0,0 +1,196 @@ +#!/bin/bash +# Benchmark script: Generate and ingest 1GB of historic metrics +# This tests Epimetheus performance with large-scale data ingestion +# Run from repo root: ./scripts/benchmark-1gb.sh + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$REPO_ROOT" + +# Optimize Go GC for better performance (Phase 3 optimization) +export GOGC=200 # Reduce GC frequency (default 100) +export GOMEMLIMIT=3GiB # Set memory limit for Go 1.19+ + +BENCHMARK_DIR="benchmark-results" +TIMESTAMP=$(date +%Y%m%d-%H%M%S) +RESULT_FILE="$BENCHMARK_DIR/benchmark-1gb-$TIMESTAMP.log" + +mkdir -p "$BENCHMARK_DIR" + +echo "=== Epimetheus 1GB Benchmark ===" | tee "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +echo "Timestamp: $(date)" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Step 1: Generate 1GB of test data +echo "Step 1: Generating 1GB of test data..." | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +TARGET_SIZE_MB=1000 +TARGET_BYTES=$((TARGET_SIZE_MB * 1024 * 1024)) +BYTES_PER_LINE=80 +TARGET_LINES=$((TARGET_BYTES / BYTES_PER_LINE)) + +echo "Target size: ${TARGET_SIZE_MB}MB" | tee -a "$RESULT_FILE" +echo "Estimated lines needed: $TARGET_LINES" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +GENERATION_START=$(date +%s) + +NOW=$(date +%s)000 +ONE_HOUR_AGO=$((NOW - 3600000)) +THIRTY_DAYS_AGO=$((ONE_HOUR_AGO - 2592000000)) + +cat > benchmark-data-1gb.csv << 'EOF' +# Prometheus metrics - 1GB benchmark dataset +# Format: metric_name,labels,value,timestamp_ms +EOF + +METRICS=( + "epimetheus_benchmark_cpu_usage" + "epimetheus_benchmark_memory_bytes" + "epimetheus_benchmark_disk_io_bytes" + "epimetheus_benchmark_network_rx_bytes" + "epimetheus_benchmark_network_tx_bytes" + "epimetheus_benchmark_requests_total" + "epimetheus_benchmark_errors_total" + "epimetheus_benchmark_response_time_ms" + "epimetheus_benchmark_active_connections" + "epimetheus_benchmark_queue_depth" +) + +INSTANCES=( + "web-01" "web-02" "web-03" "web-04" "web-05" + "api-01" "api-02" "api-03" "api-04" "api-05" + "db-01" "db-02" "db-03" "worker-01" "worker-02" +) + +INTERVAL_MS=30000 +TOTAL_INTERVALS=86400 + +echo "Generating data..." | tee -a "$RESULT_FILE" +LINES_GENERATED=0 + +for ((i=0; i<TOTAL_INTERVALS; i++)); do + TIMESTAMP=$((THIRTY_DAYS_AGO + (i * INTERVAL_MS))) + + for METRIC in "${METRICS[@]}"; do + for INSTANCE in "${INSTANCES[@]}"; do + VALUE=$((RANDOM % 1000)) + echo "$METRIC,instance=$INSTANCE;env=benchmark,$VALUE,$TIMESTAMP" >> benchmark-data-1gb.csv + LINES_GENERATED=$((LINES_GENERATED + 1)) + done + done + + if [ $((i % 5000)) -eq 0 ]; then + PROGRESS=$((i * 100 / TOTAL_INTERVALS)) + echo -ne "\rProgress: $PROGRESS% ($LINES_GENERATED lines)" | tee -a "$RESULT_FILE" + fi +done + +echo "" | tee -a "$RESULT_FILE" + +GENERATION_END=$(date +%s) +GENERATION_TIME=$((GENERATION_END - GENERATION_START)) + +FILE_SIZE=$(stat -f%z benchmark-data-1gb.csv 2>/dev/null || stat -c%s benchmark-data-1gb.csv 2>/dev/null) +FILE_SIZE_MB=$((FILE_SIZE / 1024 / 1024)) + +echo "" | tee -a "$RESULT_FILE" +echo "Data generation complete:" | tee -a "$RESULT_FILE" +echo " Lines generated: $LINES_GENERATED" | tee -a "$RESULT_FILE" +echo " File size: ${FILE_SIZE_MB}MB ($FILE_SIZE bytes)" | tee -a "$RESULT_FILE" +echo " Generation time: ${GENERATION_TIME}s" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Step 2: Port-forward +echo "Step 2: Setting up port-forward to Prometheus..." | tee -a "$RESULT_FILE" +kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090 > /tmp/benchmark-pf.log 2>&1 & +PF_PID=$! +echo "Port-forward started (PID: $PF_PID)" | tee -a "$RESULT_FILE" +sleep 8 +echo "" | tee -a "$RESULT_FILE" + +# Step 3: Baseline +echo "Step 3: Collecting baseline Prometheus metrics..." | tee -a "$RESULT_FILE" +PROM_POD=$(kubectl get pod -n monitoring -l app.kubernetes.io/name=prometheus -o jsonpath='{.items[0].metadata.name}') +echo "Prometheus pod: $PROM_POD" | tee -a "$RESULT_FILE" +BASELINE_MEMORY=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $3}') +BASELINE_CPU=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $2}') +echo " Baseline memory: $BASELINE_MEMORY" | tee -a "$RESULT_FILE" +echo " Baseline CPU: $BASELINE_CPU" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Step 4: Ingest +echo "Step 4: Running ingestion benchmark..." | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +INGEST_START=$(date +%s.%N) + +echo "Parsing CSV and preparing for Remote Write ingestion..." | tee -a "$RESULT_FILE" +echo "WARNING: Using auto mode - this may fail if data is too recent" | tee -a "$RESULT_FILE" +echo "Continuing with Remote Write API for historic data..." | tee -a "$RESULT_FILE" + +/usr/bin/time -v ./epimetheus \ + -mode=auto \ + -file=benchmark-data-1gb.csv \ + -format=csv \ + -prometheus=http://localhost:9090/api/v1/write \ + -pushgateway=http://localhost:9091 \ + 2>&1 | tee -a "$RESULT_FILE" || true + +INGEST_END=$(date +%s.%N) +INGEST_TIME=$(echo "$INGEST_END - $INGEST_START" | bc) + +echo "" | tee -a "$RESULT_FILE" +echo "Ingestion complete:" | tee -a "$RESULT_FILE" +echo " Total time: ${INGEST_TIME}s" | tee -a "$RESULT_FILE" +SAMPLES_PER_SECOND=$(echo "scale=2; $LINES_GENERATED / $INGEST_TIME" | bc) +MB_PER_SECOND=$(echo "scale=2; $FILE_SIZE_MB / $INGEST_TIME" | bc) +echo " Samples/second: $SAMPLES_PER_SECOND" | tee -a "$RESULT_FILE" +echo " MB/second: $MB_PER_SECOND" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Step 5: Post-ingestion +echo "Step 5: Collecting post-ingestion Prometheus metrics..." | tee -a "$RESULT_FILE" +sleep 5 +POST_MEMORY=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $3}') +POST_CPU=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $2}') +echo " Post-ingestion memory: $POST_MEMORY" | tee -a "$RESULT_FILE" +echo " Post-ingestion CPU: $POST_CPU" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Step 6: Verify +echo "Step 6: Verifying data ingestion..." | tee -a "$RESULT_FILE" +QUERY_RESULT=$(curl -s "http://localhost:9090/api/v1/query?query=count(epimetheus_benchmark_cpu_usage)" | jq -r '.data.result[0].value[1]') +echo " Samples found for epimetheus_benchmark_cpu_usage: $QUERY_RESULT" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" + +# Step 7: Cleanup +echo "Step 7: Cleaning up..." | tee -a "$RESULT_FILE" +kill $PF_PID 2>/dev/null || true +echo "" | tee -a "$RESULT_FILE" + +echo "=== BENCHMARK SUMMARY ===" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +echo "Dataset:" | tee -a "$RESULT_FILE" +echo " Size: ${FILE_SIZE_MB}MB" | tee -a "$RESULT_FILE" +echo " Samples: $LINES_GENERATED" | tee -a "$RESULT_FILE" +echo " Time range: 30 days" | tee -a "$RESULT_FILE" +echo " Interval: 30 seconds" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +echo "Performance:" | tee -a "$RESULT_FILE" +echo " Generation time: ${GENERATION_TIME}s" | tee -a "$RESULT_FILE" +echo " Ingestion time: ${INGEST_TIME}s" | tee -a "$RESULT_FILE" +echo " Throughput: $SAMPLES_PER_SECOND samples/s" | tee -a "$RESULT_FILE" +echo " Throughput: $MB_PER_SECOND MB/s" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +echo "Resources:" | tee -a "$RESULT_FILE" +echo " Memory: $BASELINE_MEMORY -> $POST_MEMORY" | tee -a "$RESULT_FILE" +echo " CPU: $BASELINE_CPU -> $POST_CPU" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +echo "Results saved to: $RESULT_FILE" | tee -a "$RESULT_FILE" +echo "" | tee -a "$RESULT_FILE" +echo "To view results: cat $RESULT_FILE" +echo "To analyze: less $RESULT_FILE" diff --git a/scripts/cleanup-benchmark-data.sh b/scripts/cleanup-benchmark-data.sh new file mode 100644 index 0000000..48ba187 --- /dev/null +++ b/scripts/cleanup-benchmark-data.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Cleanup script: Delete benchmark data from Prometheus +# This uses the Prometheus Admin API to selectively remove benchmark metrics +# Run from repo root: ./scripts/cleanup-benchmark-data.sh [prometheus_url] + +set -e + +PROMETHEUS_URL="${1:-http://localhost:9090}" + +echo "=== Prometheus Benchmark Data Cleanup ===" +echo "" +echo "Prometheus URL: $PROMETHEUS_URL" +echo "" + +# Check if port-forward is needed +if [[ "$PROMETHEUS_URL" == *"localhost"* ]]; then + echo "Note: Make sure you have port-forward running:" + echo " kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090" + echo "" +fi + +# Metrics to delete +METRICS=( + "epimetheus_benchmark_cpu_usage" + "epimetheus_benchmark_memory_bytes" + "epimetheus_benchmark_disk_io_bytes" + "epimetheus_benchmark_network_rx_bytes" + "epimetheus_benchmark_network_tx_bytes" + "epimetheus_benchmark_requests_total" + "epimetheus_benchmark_errors_total" + "epimetheus_benchmark_response_time_ms" + "epimetheus_benchmark_active_connections" + "epimetheus_benchmark_queue_depth" +) + +echo "Step 1: Deleting benchmark metrics..." +echo "" + +SUCCESS_COUNT=0 +ERROR_COUNT=0 + +for METRIC in "${METRICS[@]}"; do + echo " Deleting: $METRIC" + + # Delete series endpoint returns HTTP 204 No Content on success + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST -g "${PROMETHEUS_URL}/api/v1/admin/tsdb/delete_series?match[]=${METRIC}") + + if [ "$HTTP_CODE" == "204" ] || [ "$HTTP_CODE" == "200" ]; then + echo " ✅ Success (HTTP $HTTP_CODE)" + SUCCESS_COUNT=$((SUCCESS_COUNT + 1)) + else + echo " ❌ Error: HTTP $HTTP_CODE" + ERROR_COUNT=$((ERROR_COUNT + 1)) + fi +done + +echo "" +echo "Deletion summary: $SUCCESS_COUNT succeeded, $ERROR_COUNT failed" +echo "" + +if [ $ERROR_COUNT -eq 0 ]; then + echo "Step 2: Cleaning up tombstones..." + echo "" + + # Clean tombstones endpoint returns HTTP 204 No Content on success + CLEANUP_HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST "${PROMETHEUS_URL}/api/v1/admin/tsdb/clean_tombstones") + + if [ "$CLEANUP_HTTP_CODE" == "204" ] || [ "$CLEANUP_HTTP_CODE" == "200" ]; then + echo " ✅ Tombstones cleaned successfully (HTTP $CLEANUP_HTTP_CODE)" + echo "" + echo "🎉 Cleanup complete!" + echo "" + echo "Note: Prometheus may take a few moments to compact the database" + echo "and free up disk space." + else + echo " ❌ Error cleaning tombstones: HTTP $CLEANUP_HTTP_CODE" + exit 1 + fi +else + echo "⚠️ Some deletions failed. Skipping tombstone cleanup." + echo "Check Prometheus admin API is enabled with:" + echo " kubectl get prometheus -n monitoring prometheus-kube-prometheus-prometheus -o yaml | grep -A5 additionalArgs" + exit 1 +fi + +echo "" +echo "To verify deletion, run:" +echo " curl -s '${PROMETHEUS_URL}/api/v1/label/__name__/values' | jq '.data | map(select(startswith(\"epimetheus_benchmark\")))'" +echo "" diff --git a/scripts/cleanup-benchmark-metrics.sh b/scripts/cleanup-benchmark-metrics.sh new file mode 100644 index 0000000..7b1ce4e --- /dev/null +++ b/scripts/cleanup-benchmark-metrics.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# Cleanup benchmark metrics from Prometheus +# This allows running benchmarks from a clean state +# Run from repo root: ./scripts/cleanup-benchmark-metrics.sh + +set -e + +echo "=== Prometheus Benchmark Metrics Cleanup ===" +echo "" + +# Port-forward to Prometheus +echo "Setting up port-forward to Prometheus..." +kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090 > /tmp/cleanup-pf.log 2>&1 & +PF_PID=$! +echo "Port-forward started (PID: $PF_PID)" +sleep 5 + +# Check if Admin API is enabled +echo "" +echo "Checking if Prometheus Admin API is enabled..." +ADMIN_CHECK=$(curl -s -o /dev/null -w "%{http_code}" -X POST "http://localhost:9090/api/v1/admin/tsdb/delete_series?match[]=epimetheus_benchmark_cpu_usage") + +if [ "$ADMIN_CHECK" = "204" ] || [ "$ADMIN_CHECK" = "200" ]; then + echo "✅ Admin API is enabled" + echo "" + echo "Deleting benchmark metrics..." + + # Delete all benchmark metrics + METRICS=( + "epimetheus_benchmark_cpu_usage" + "epimetheus_benchmark_memory_bytes" + "epimetheus_benchmark_disk_io_bytes" + "epimetheus_benchmark_network_rx_bytes" + "epimetheus_benchmark_network_tx_bytes" + "epimetheus_benchmark_requests_total" + "epimetheus_benchmark_errors_total" + "epimetheus_benchmark_response_time_ms" + "epimetheus_benchmark_active_connections" + "epimetheus_benchmark_queue_depth" + ) + + for METRIC in "${METRICS[@]}"; do + echo " Deleting: $METRIC" + curl -s -X POST "http://localhost:9090/api/v1/admin/tsdb/delete_series?match[]=$METRIC" > /dev/null + done + + echo "" + echo "Triggering tombstone cleanup (this removes deleted data from disk)..." + curl -s -X POST "http://localhost:9090/api/v1/admin/tsdb/clean_tombstones" > /dev/null + + echo "" + echo "✅ Cleanup complete!" +elif [ "$ADMIN_CHECK" = "405" ]; then + echo "❌ Admin API is NOT enabled" + echo "" + echo "To enable the Admin API, update your Prometheus configuration:" + echo "" + echo "In f3s/prometheus/persistence-values.yaml, add:" + echo "" + echo "prometheus:" + echo " prometheusSpec:" + echo " additionalArgs:" + echo " - name: web.enable-admin-api" + echo " value: \"\"" + echo "" + echo "Then upgrade Prometheus:" + echo " helm upgrade ... (or: just upgrade in your conf repo)" + echo "" + echo "WARNING: Admin API should only be enabled in development/test environments!" + echo "" + echo "Alternative: Delete benchmark data files manually:" + echo " kubectl exec -n monitoring prometheus-prometheus-kube-prometheus-prometheus-0 -- sh -c 'rm -rf /prometheus/data/wal/*'" + echo " kubectl delete pod -n monitoring prometheus-prometheus-kube-prometheus-prometheus-0" +else + echo "⚠️ Unexpected response: HTTP $ADMIN_CHECK" +fi + +echo "" +echo "Cleaning up port-forward..." +kill $PF_PID 2>/dev/null || true + +echo "" +echo "Done!" diff --git a/scripts/generate-test-data.sh b/scripts/generate-test-data.sh new file mode 100644 index 0000000..4db332e --- /dev/null +++ b/scripts/generate-test-data.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# Generate test data with actual timestamps for different time ranges +# Run from repo root: ./scripts/generate-test-data.sh + +NOW=$(date +%s)000 # Current time in milliseconds +ONE_HOUR_AGO=$((NOW - 3600000)) +ONE_DAY_AGO=$((NOW - 86400000)) +ONE_WEEK_AGO=$((NOW - 604800000)) +ONE_MONTH_AGO=$((NOW - 2592000000)) + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$REPO_ROOT" + +cat > test-all-ages.csv << EOF +# Prometheus metrics in CSV format demonstrating all time ranges +# Format: metric_name,labels,value,timestamp_ms + +# CURRENT data (< 5min old - will use Pushgateway/Realtime) +app_requests_total,instance=current;env=prod,100,$NOW +app_temperature_celsius,instance=current;zone=us-east,22.5,$NOW +app_active_connections,instance=current;env=prod,50,$NOW + +# 1 HOUR OLD data (will use Remote Write/Historic) +app_requests_total,instance=1h_ago;env=prod,95,$ONE_HOUR_AGO +app_active_connections,instance=1h_ago;env=prod,45,$ONE_HOUR_AGO +app_temperature_celsius,instance=1h_ago;zone=us-east,21.8,$ONE_HOUR_AGO + +# 1 DAY OLD data (will use Remote Write/Historic) +app_requests_total,instance=1d_ago;env=prod,150,$ONE_DAY_AGO +app_temperature_celsius,instance=1d_ago;zone=eu-west,18.3,$ONE_DAY_AGO +app_active_connections,instance=1d_ago;env=prod,60,$ONE_DAY_AGO + +# 1 WEEK OLD data (will use Remote Write/Historic) +app_requests_total,instance=1w_ago;env=prod,200,$ONE_WEEK_AGO +app_jobs_processed_total,instance=1w_ago;env=prod;job_type=email;status=success,75,$ONE_WEEK_AGO +app_temperature_celsius,instance=1w_ago;zone=asia,25.2,$ONE_WEEK_AGO + +# 1 MONTH OLD data (will use Remote Write/Historic) +app_requests_total,instance=1m_ago;env=prod,180,$ONE_MONTH_AGO +app_active_connections,instance=1m_ago;env=prod,30,$ONE_MONTH_AGO +app_temperature_celsius,instance=1m_ago;zone=africa,28.7,$ONE_MONTH_AGO +EOF + +echo "Generated test-all-ages.csv with the following timestamps:" +echo " Current: $NOW ($(date -d @$((NOW/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null || date -r $((NOW/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null))" +echo " 1h ago: $ONE_HOUR_AGO ($(date -d @$((ONE_HOUR_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null || date -r $((ONE_HOUR_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null))" +echo " 1d ago: $ONE_DAY_AGO ($(date -d @$((ONE_DAY_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null || date -r $((ONE_DAY_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null))" +echo " 1w ago: $ONE_WEEK_AGO ($(date -d @$((ONE_WEEK_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null || date -r $((ONE_WEEK_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null))" +echo " 1m ago: $ONE_MONTH_AGO ($(date -d @$((ONE_MONTH_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null || date -r $((ONE_MONTH_AGO/1000)) '+%Y-%m-%d %H:%M:%S' 2>/dev/null))" diff --git a/scripts/run.sh b/scripts/run.sh new file mode 100644 index 0000000..d603639 --- /dev/null +++ b/scripts/run.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# Simple script to run Epimetheus +# Automatically sets up port-forwarding and runs the binary +# Run from repo root: ./scripts/run.sh + +set -e + +echo "Starting Epimetheus..." +echo "" +echo "Step 1: Setting up port-forward to Pushgateway..." +kubectl port-forward -n monitoring svc/pushgateway 9091:9091 > /tmp/pushgateway-port-forward.log 2>&1 & +PF_PID=$! + +# Wait for port-forward to be ready +sleep 2 + +echo "Step 2: Running epimetheus binary (realtime mode)..." +echo "Press Ctrl+C to stop" +echo "" + +# Run from repo root so ./epimetheus resolves +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$REPO_ROOT" + +# Run the binary in realtime mode and capture its exit status +./epimetheus -mode=realtime -continuous +EXIT_CODE=$? + +# Clean up port-forward +echo "" +echo "Cleaning up port-forward..." +kill $PF_PID 2>/dev/null || true + +echo "Done!" +exit $EXIT_CODE diff --git a/scripts/verify-clickhouse.sh b/scripts/verify-clickhouse.sh new file mode 100644 index 0000000..a9c3233 --- /dev/null +++ b/scripts/verify-clickhouse.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# Verify that epimetheus metrics were successfully ingested into ClickHouse. +# Usage: ./scripts/verify-clickhouse.sh [clickhouse_url] [table_name] +# Default: http://localhost:8123, epimetheus_metrics + +set -e + +CLICKHOUSE_URL="${1:-http://localhost:8123}" +TABLE="${2:-epimetheus_metrics}" + +echo "Verifying ClickHouse ingestion..." +echo " URL: $CLICKHOUSE_URL" +echo " Table: $TABLE" +echo "" + +# Check connectivity +if ! curl -sS "${CLICKHOUSE_URL}/ping" > /dev/null 2>&1; then + echo "ERROR: Cannot connect to ClickHouse at $CLICKHOUSE_URL" + echo " Make sure ClickHouse is running: sudo systemctl start clickhouse-server" + exit 1 +fi + +echo "✓ ClickHouse is reachable" +echo "" + +# Query 1: Row count +echo "--- Row count ---" +COUNT=$(curl -sS "${CLICKHOUSE_URL}/?query=SELECT%20count()%20FROM%20${TABLE}" 2>/dev/null | tail -1) +if [ -z "$COUNT" ] || [ "$COUNT" = "0" ]; then + echo "ERROR: Table $TABLE is empty or does not exist" + echo " Run: ./epimetheus -mode=watch -file=test-data/watch-clickhouse-test.csv -metric-name=watch_test -clickhouse=$CLICKHOUSE_URL -prometheus=" + exit 1 +fi +echo "Total rows: $COUNT" +echo "" + +# Query 2: Distinct metrics +echo "--- Metrics in table ---" +curl -sS "${CLICKHOUSE_URL}/?query=SELECT%20distinct%20metric%20FROM%20${TABLE}%20ORDER%20BY%20metric%20FORMAT%20PrettyCompact" 2>/dev/null +echo "" + +# Query 3: Sample data +echo "--- Sample rows (last 5) ---" +curl -sS "${CLICKHOUSE_URL}/?query=SELECT%20metric%2C%20labels%2C%20value%2C%20timestamp%20FROM%20${TABLE}%20ORDER%20BY%20timestamp%20DESC%20LIMIT%205%20FORMAT%20PrettyCompact" 2>/dev/null +echo "" + +# Query 4: Aggregation by metric +echo "--- Rows per metric ---" +curl -sS "${CLICKHOUSE_URL}/?query=SELECT%20metric%2C%20count()%20AS%20cnt%20FROM%20${TABLE}%20GROUP%20BY%20metric%20ORDER%20BY%20cnt%20DESC%20FORMAT%20PrettyCompact" 2>/dev/null +echo "" + +echo "✅ ClickHouse verification complete - data is present and queryable" |
