summaryrefslogtreecommitdiff
path: root/scripts/benchmark-100mb.sh
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/benchmark-100mb.sh')
-rw-r--r--scripts/benchmark-100mb.sh200
1 files changed, 200 insertions, 0 deletions
diff --git a/scripts/benchmark-100mb.sh b/scripts/benchmark-100mb.sh
new file mode 100644
index 0000000..bda6476
--- /dev/null
+++ b/scripts/benchmark-100mb.sh
@@ -0,0 +1,200 @@
+#!/bin/bash
+# Benchmark script: Generate and ingest 100MB of historic metrics
+# This tests Epimetheus performance with large-scale data ingestion
+# Run from repo root: ./scripts/benchmark-100mb.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$REPO_ROOT"
+
+# Optimize Go GC for better performance (Phase 3 optimization)
+export GOGC=200 # Reduce GC frequency (default 100)
+export GOMEMLIMIT=3GiB # Set memory limit for Go 1.19+
+
+BENCHMARK_DIR="benchmark-results"
+TIMESTAMP=$(date +%Y%m%d-%H%M%S)
+RESULT_FILE="$BENCHMARK_DIR/benchmark-$TIMESTAMP.log"
+
+mkdir -p "$BENCHMARK_DIR"
+
+echo "=== Epimetheus 100MB Benchmark ===" | tee "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Timestamp: $(date)" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 1: Generate 100MB of test data
+echo "Step 1: Generating 100MB of test data..." | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Calculate: ~70 bytes per line, 100MB = ~1.5M lines
+TARGET_SIZE_MB=100
+TARGET_BYTES=$((TARGET_SIZE_MB * 1024 * 1024))
+BYTES_PER_LINE=70
+TARGET_LINES=$((TARGET_BYTES / BYTES_PER_LINE))
+
+echo "Target size: ${TARGET_SIZE_MB}MB" | tee -a "$RESULT_FILE"
+echo "Estimated lines needed: $TARGET_LINES" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Generate data going back 7 days with 1-minute intervals
+GENERATION_START=$(date +%s)
+
+NOW=$(date +%s)000 # Current time in milliseconds
+ONE_HOUR_AGO=$((NOW - 3600000)) # Start from 1 hour ago to ensure all data is historic
+SEVEN_DAYS_AGO=$((ONE_HOUR_AGO - 604800000)) # 7 days before that
+
+# CSV header
+cat > benchmark-data-100mb.csv << 'EOF'
+# Prometheus metrics - 100MB benchmark dataset
+# Format: metric_name,labels,value,timestamp_ms
+EOF
+
+# Generate metrics
+METRICS=(
+ "epimetheus_benchmark_cpu_usage"
+ "epimetheus_benchmark_memory_bytes"
+ "epimetheus_benchmark_disk_io_bytes"
+ "epimetheus_benchmark_network_rx_bytes"
+ "epimetheus_benchmark_network_tx_bytes"
+ "epimetheus_benchmark_requests_total"
+ "epimetheus_benchmark_errors_total"
+ "epimetheus_benchmark_response_time_ms"
+ "epimetheus_benchmark_active_connections"
+ "epimetheus_benchmark_queue_depth"
+)
+
+INSTANCES=(
+ "web-01" "web-02" "web-03" "web-04" "web-05"
+ "api-01" "api-02" "api-03" "api-04" "api-05"
+ "db-01" "db-02" "db-03" "worker-01" "worker-02"
+)
+
+INTERVAL_MS=60000 # 1 minute interval
+TOTAL_INTERVALS=10080 # 7 days of 1-minute intervals
+
+echo "Generating data..." | tee -a "$RESULT_FILE"
+LINES_GENERATED=0
+
+for ((i=0; i<TOTAL_INTERVALS; i++)); do
+ TIMESTAMP=$((SEVEN_DAYS_AGO + (i * INTERVAL_MS)))
+
+ for METRIC in "${METRICS[@]}"; do
+ for INSTANCE in "${INSTANCES[@]}"; do
+ VALUE=$((RANDOM % 1000))
+ echo "$METRIC,instance=$INSTANCE;env=benchmark,$VALUE,$TIMESTAMP" >> benchmark-data-100mb.csv
+ LINES_GENERATED=$((LINES_GENERATED + 1))
+ done
+ done
+
+ if [ $((i % 1000)) -eq 0 ]; then
+ PROGRESS=$((i * 100 / TOTAL_INTERVALS))
+ echo -ne "\rProgress: $PROGRESS% ($LINES_GENERATED lines)" | tee -a "$RESULT_FILE"
+ fi
+done
+
+echo "" | tee -a "$RESULT_FILE"
+
+GENERATION_END=$(date +%s)
+GENERATION_TIME=$((GENERATION_END - GENERATION_START))
+
+FILE_SIZE=$(stat -f%z benchmark-data-100mb.csv 2>/dev/null || stat -c%s benchmark-data-100mb.csv 2>/dev/null)
+FILE_SIZE_MB=$((FILE_SIZE / 1024 / 1024))
+
+echo "" | tee -a "$RESULT_FILE"
+echo "Data generation complete:" | tee -a "$RESULT_FILE"
+echo " Lines generated: $LINES_GENERATED" | tee -a "$RESULT_FILE"
+echo " File size: ${FILE_SIZE_MB}MB ($FILE_SIZE bytes)" | tee -a "$RESULT_FILE"
+echo " Generation time: ${GENERATION_TIME}s" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 2: Start port-forward to Prometheus
+echo "Step 2: Setting up port-forward to Prometheus..." | tee -a "$RESULT_FILE"
+kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090 > /tmp/benchmark-pf.log 2>&1 &
+PF_PID=$!
+echo "Port-forward started (PID: $PF_PID)" | tee -a "$RESULT_FILE"
+sleep 8
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 3: Get baseline Prometheus metrics
+echo "Step 3: Collecting baseline Prometheus metrics..." | tee -a "$RESULT_FILE"
+PROM_POD=$(kubectl get pod -n monitoring -l app.kubernetes.io/name=prometheus -o jsonpath='{.items[0].metadata.name}')
+echo "Prometheus pod: $PROM_POD" | tee -a "$RESULT_FILE"
+BASELINE_MEMORY=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $3}')
+BASELINE_CPU=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $2}')
+echo " Baseline memory: $BASELINE_MEMORY" | tee -a "$RESULT_FILE"
+echo " Baseline CPU: $BASELINE_CPU" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 4: Run ingestion benchmark
+echo "Step 4: Running ingestion benchmark..." | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+INGEST_START=$(date +%s.%N)
+
+echo "Parsing CSV and preparing for Remote Write ingestion..." | tee -a "$RESULT_FILE"
+echo "WARNING: Using auto mode - this may fail if data is too recent" | tee -a "$RESULT_FILE"
+echo "Continuing with Remote Write API for historic data..." | tee -a "$RESULT_FILE"
+
+/usr/bin/time -v ./epimetheus \
+ -mode=auto \
+ -file=benchmark-data-100mb.csv \
+ -format=csv \
+ -prometheus=http://localhost:9090/api/v1/write \
+ -pushgateway=http://localhost:9091 \
+ 2>&1 | tee -a "$RESULT_FILE" || true
+
+INGEST_END=$(date +%s.%N)
+INGEST_TIME=$(echo "$INGEST_END - $INGEST_START" | bc)
+
+echo "" | tee -a "$RESULT_FILE"
+echo "Ingestion complete:" | tee -a "$RESULT_FILE"
+echo " Total time: ${INGEST_TIME}s" | tee -a "$RESULT_FILE"
+SAMPLES_PER_SECOND=$(echo "scale=2; $LINES_GENERATED / $INGEST_TIME" | bc)
+MB_PER_SECOND=$(echo "scale=2; $FILE_SIZE_MB / $INGEST_TIME" | bc)
+echo " Samples/second: $SAMPLES_PER_SECOND" | tee -a "$RESULT_FILE"
+echo " MB/second: $MB_PER_SECOND" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 5: Post-ingestion metrics
+echo "Step 5: Collecting post-ingestion Prometheus metrics..." | tee -a "$RESULT_FILE"
+sleep 5
+POST_MEMORY=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $3}')
+POST_CPU=$(kubectl top pod -n monitoring "$PROM_POD" --no-headers | awk '{print $2}')
+echo " Post-ingestion memory: $POST_MEMORY" | tee -a "$RESULT_FILE"
+echo " Post-ingestion CPU: $POST_CPU" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 6: Verify
+echo "Step 6: Verifying data ingestion..." | tee -a "$RESULT_FILE"
+QUERY_RESULT=$(curl -s "http://localhost:9090/api/v1/query?query=count(epimetheus_benchmark_cpu_usage)" | jq -r '.data.result[0].value[1]')
+echo " Samples found for epimetheus_benchmark_cpu_usage: $QUERY_RESULT" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+
+# Step 7: Cleanup
+echo "Step 7: Cleaning up..." | tee -a "$RESULT_FILE"
+kill $PF_PID 2>/dev/null || true
+echo "" | tee -a "$RESULT_FILE"
+
+echo "=== BENCHMARK SUMMARY ===" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Dataset:" | tee -a "$RESULT_FILE"
+echo " Size: ${FILE_SIZE_MB}MB" | tee -a "$RESULT_FILE"
+echo " Samples: $LINES_GENERATED" | tee -a "$RESULT_FILE"
+echo " Time range: 7 days" | tee -a "$RESULT_FILE"
+echo " Interval: 1 minute" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Performance:" | tee -a "$RESULT_FILE"
+echo " Generation time: ${GENERATION_TIME}s" | tee -a "$RESULT_FILE"
+echo " Ingestion time: ${INGEST_TIME}s" | tee -a "$RESULT_FILE"
+echo " Throughput: $SAMPLES_PER_SECOND samples/s" | tee -a "$RESULT_FILE"
+echo " Throughput: $MB_PER_SECOND MB/s" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Resources:" | tee -a "$RESULT_FILE"
+echo " Memory: $BASELINE_MEMORY -> $POST_MEMORY" | tee -a "$RESULT_FILE"
+echo " CPU: $BASELINE_CPU -> $POST_CPU" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "Results saved to: $RESULT_FILE" | tee -a "$RESULT_FILE"
+echo "" | tee -a "$RESULT_FILE"
+echo "To view results: cat $RESULT_FILE"
+echo "To analyze: less $RESULT_FILE"