summaryrefslogtreecommitdiff
path: root/f3s/argocd-apps/monitoring/alloy.yaml
diff options
context:
space:
mode:
authorPaul Buetow <paul@buetow.org>2026-05-16 16:06:14 +0300
committerPaul Buetow <paul@buetow.org>2026-05-16 16:06:14 +0300
commit4b4cde4fe3848c30e9f1cf1efc8cbc46fd50da83 (patch)
treed93511d43226331605c095fa0993cc15b7169e8a /f3s/argocd-apps/monitoring/alloy.yaml
parent107ccb68af18cf3f4bd04bc93bdde1d7c1169f93 (diff)
f3s/monitoring: disable grafana, loki, tempo; reduce alloy to no-opHEADmaster
Grafana's SQLite-on-NFS persistence is unreliable across restarts (the new pod can't reacquire a clean exclusive lock after any NFS bounce), and with Loki + Tempo also gone there's nothing left for it to visualize. Keeping Prometheus alone for metrics + alerting. Changes: - prometheus.yaml: add grafana.enabled=false in the kube-prometheus-stack values so the subchart no longer renders the grafana deployment/pvc. - loki.yaml, tempo.yaml, grafana-ingress.yaml: renamed to .disabled (same pattern as commit 03a18c6) so 'kubectl apply -f argocd-apps/' stops re-creating them; the cluster Applications were also deleted, which cascade-removes the helm resources via the resources-finalizer. - alloy.yaml: drop the loki.write and otelcol.* blocks (no destinations to ship to). DaemonSet stays deployed with a minimal 'logging' block so the chart can be re-enabled by restoring the blocks here. Prometheus TSDB was also wiped (corrupted zero-byte WAL segments from the same NFS blip that took grafana down) — done separately, not part of this commit.
Diffstat (limited to 'f3s/argocd-apps/monitoring/alloy.yaml')
-rw-r--r--f3s/argocd-apps/monitoring/alloy.yaml99
1 files changed, 6 insertions, 93 deletions
diff --git a/f3s/argocd-apps/monitoring/alloy.yaml b/f3s/argocd-apps/monitoring/alloy.yaml
index c5574b1..e2105e3 100644
--- a/f3s/argocd-apps/monitoring/alloy.yaml
+++ b/f3s/argocd-apps/monitoring/alloy.yaml
@@ -15,101 +15,14 @@ spec:
releaseName: alloy
valuesObject:
alloy:
- service:
- ports:
- otlp-grpc:
- enabled: true
- port: 4317
- targetPort: 4317
- protocol: TCP
- otlp-http:
- enabled: true
- port: 4318
- targetPort: 4318
- protocol: TCP
-
+ # Log shipping (to Loki) and trace forwarding (to Tempo) are
+ # disabled — Loki and Tempo apps are off. The DaemonSet stays
+ # deployed with a no-op config so the chart can be re-enabled
+ # by restoring the discovery/loki/otelcol blocks here.
configMap:
content: |
- discovery.kubernetes "pods" {
- role = "pod"
- }
-
- discovery.relabel "pods" {
- targets = discovery.kubernetes.pods.targets
-
- rule {
- source_labels = ["__meta_kubernetes_namespace"]
- target_label = "namespace"
- }
-
- rule {
- source_labels = ["__meta_kubernetes_pod_name"]
- target_label = "pod"
- }
-
- rule {
- source_labels = ["__meta_kubernetes_pod_container_name"]
- target_label = "container"
- }
-
- rule {
- source_labels = ["__meta_kubernetes_pod_label_app"]
- target_label = "app"
- }
- }
-
- loki.source.kubernetes "pods" {
- targets = discovery.relabel.pods.output
- forward_to = [loki.write.default.receiver]
- }
-
- loki.write "default" {
- endpoint {
- url = "http://loki.monitoring.svc.cluster.local:3100/loki/api/v1/push"
- }
- }
-
- // ========================================
- // TRACES COLLECTION
- // ========================================
-
- // OTLP receiver for traces via gRPC and HTTP
- otelcol.receiver.otlp "default" {
- grpc {
- endpoint = "0.0.0.0:4317"
- }
-
- http {
- endpoint = "0.0.0.0:4318"
- }
-
- output {
- traces = [otelcol.processor.batch.default.input]
- }
- }
-
- // Batch processor for efficient trace forwarding
- otelcol.processor.batch "default" {
- timeout = "5s"
- send_batch_size = 100
- send_batch_max_size = 200
-
- output {
- traces = [otelcol.exporter.otlp.tempo.input]
- }
- }
-
- // OTLP exporter to send traces to Tempo
- otelcol.exporter.otlp "tempo" {
- client {
- endpoint = "tempo.monitoring.svc.cluster.local:4317"
-
- tls {
- insecure = true
- }
-
- compression = "gzip"
- }
+ logging {
+ level = "info"
}
destination: