Created
May 8, 2025 18:45
-
-
Save gangtao/27cf5ae0d5a94fbfc6c4b94f9525d09e to your computer and use it in GitHub Desktop.
gpu-mointor-vector-config
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Vector configuration for collecting NVIDIA GPU metrics | |
[api] | |
enabled = true | |
address = "0.0.0.0:8686" | |
# Source configuration for scraping Prometheus metrics from DCGM exporter | |
[sources.dcgm_metrics] | |
type = "prometheus_scrape" | |
endpoints = ["http://dcgm-exporter:9400/metrics"] | |
scrape_interval_secs = 15 | |
# Transform configuration to process the metrics | |
[transforms.gpu_metrics_processor] | |
type = "remap" | |
inputs = ["dcgm_metrics"] | |
source = ''' | |
# Handle type checking properly | |
if exists(.metric_name) { | |
# Convert to string with fallback | |
.metric_name = to_string(.metric_name) ?? "" | |
# Now we can safely check if it starts with DCGM | |
if !starts_with(.metric_name, "DCGM") { | |
# If not a DCGM metric, remove it by returning null | |
null | |
} else { | |
# Add a timestamp if missing | |
if !exists(.timestamp) { | |
.timestamp = now() | |
} | |
# Keep the event by returning it | |
. | |
} | |
} else { | |
# If no metric_name, add a type for debugging | |
.metric_type = "unknown" | |
. | |
} | |
''' | |
# Sink configuration to send metrics to Redpanda/Kafka | |
[sinks.kafka_output] | |
type = "kafka" | |
inputs = ["gpu_metrics_processor"] | |
bootstrap_servers = "redpanda:9092" | |
topic = "gpu-metrics" | |
encoding.codec = "json" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment