--- version: '3' services: nvidia_exporter: #to export data from DCGM host, need DCGM installed of an equal or newer version to the container on the host system https://github.com/NVIDIA/DCGM image: nvcr.io/nvidia/k8s/dcgm-exporter:2.3.2-2.6.2-ubuntu20.04 container_name: nvidia_exporter runtime: nvidia cap_add: - SYS_ADMIN environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=all ports: - "9400:9400" restart: unless-stopped #NVIDIA Data Center GPU Manager: To export data from DCGM host to prometheus, you need DCGM installed on host as well as nvidia container toolkit #https://github.com/NVIDIA/DCGM #https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#install-guide #https://github.com/NVIDIA/dcgm-exporter and https://docs.nvidia.com/datacenter/cloud-native/gpu-telemetry/dcgm-exporter.html