summaryrefslogtreecommitdiff
path: root/cleanup-benchmark-data.sh
blob: a5409f1f5055aea12f0c8c7b04b8be8f6ebfaae9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/bash
# Cleanup script: Delete benchmark data from Prometheus
# This uses the Prometheus Admin API to selectively remove benchmark metrics

set -e

PROMETHEUS_URL="${1:-http://localhost:9090}"

echo "=== Prometheus Benchmark Data Cleanup ==="
echo ""
echo "Prometheus URL: $PROMETHEUS_URL"
echo ""

# Check if port-forward is needed
if [[ "$PROMETHEUS_URL" == *"localhost"* ]]; then
  echo "Note: Make sure you have port-forward running:"
  echo "  kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090"
  echo ""
fi

# Metrics to delete
METRICS=(
  "epimetheus_benchmark_cpu_usage"
  "epimetheus_benchmark_memory_bytes"
  "epimetheus_benchmark_disk_io_bytes"
  "epimetheus_benchmark_network_rx_bytes"
  "epimetheus_benchmark_network_tx_bytes"
  "epimetheus_benchmark_requests_total"
  "epimetheus_benchmark_errors_total"
  "epimetheus_benchmark_response_time_ms"
  "epimetheus_benchmark_active_connections"
  "epimetheus_benchmark_queue_depth"
)

echo "Step 1: Deleting benchmark metrics..."
echo ""

SUCCESS_COUNT=0
ERROR_COUNT=0

for METRIC in "${METRICS[@]}"; do
  echo "  Deleting: $METRIC"

  # Delete series endpoint returns HTTP 204 No Content on success
  HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST -g "${PROMETHEUS_URL}/api/v1/admin/tsdb/delete_series?match[]=${METRIC}")

  if [ "$HTTP_CODE" == "204" ] || [ "$HTTP_CODE" == "200" ]; then
    echo "    ✅ Success (HTTP $HTTP_CODE)"
    SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
  else
    echo "    ❌ Error: HTTP $HTTP_CODE"
    ERROR_COUNT=$((ERROR_COUNT + 1))
  fi
done

echo ""
echo "Deletion summary: $SUCCESS_COUNT succeeded, $ERROR_COUNT failed"
echo ""

if [ $ERROR_COUNT -eq 0 ]; then
  echo "Step 2: Cleaning up tombstones..."
  echo ""

  # Clean tombstones endpoint returns HTTP 204 No Content on success
  CLEANUP_HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST "${PROMETHEUS_URL}/api/v1/admin/tsdb/clean_tombstones")

  if [ "$CLEANUP_HTTP_CODE" == "204" ] || [ "$CLEANUP_HTTP_CODE" == "200" ]; then
    echo "  ✅ Tombstones cleaned successfully (HTTP $CLEANUP_HTTP_CODE)"
    echo ""
    echo "🎉 Cleanup complete!"
    echo ""
    echo "Note: Prometheus may take a few moments to compact the database"
    echo "and free up disk space."
  else
    echo "  ❌ Error cleaning tombstones: HTTP $CLEANUP_HTTP_CODE"
    exit 1
  fi
else
  echo "⚠️  Some deletions failed. Skipping tombstone cleanup."
  echo "Check Prometheus admin API is enabled with:"
  echo "  kubectl get prometheus -n monitoring prometheus-kube-prometheus-prometheus -o yaml | grep -A5 additionalArgs"
  exit 1
fi

echo ""
echo "To verify deletion, run:"
echo "  curl -s '${PROMETHEUS_URL}/api/v1/label/__name__/values' | jq '.data | map(select(startswith(\"epimetheus_benchmark\")))'"
echo ""