Skip to content

Commit

Permalink
DCGM-Exporter release 3.3.9-3.6.1 (#420)
Browse files Browse the repository at this point in the history
  • Loading branch information
glowkey authored Nov 18, 2024
1 parent 965b2de commit b97b763
Show file tree
Hide file tree
Showing 9 changed files with 22 additions and 21 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Official documentation for DCGM-Exporter can be found on [docs.nvidia.com](https
To gather metrics on a GPU node, simply start the `dcgm-exporter` container:

```shell
docker run -d --gpus all --rm -p 9400:9400 nvcr.io/nvidia/k8s/dcgm-exporter:3.3.8-3.6.0-ubuntu22.04
docker run -d --gpus all --rm -p 9400:9400 nvcr.io/nvidia/k8s/dcgm-exporter:3.3.9-3.6.1-ubuntu22.04
curl localhost:9400/metrics
# HELP DCGM_FI_DEV_SM_CLOCK SM clock frequency (in MHz).
# TYPE DCGM_FI_DEV_SM_CLOCK gauge
Expand Down
12 changes: 6 additions & 6 deletions dcgm-exporter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,23 @@ metadata:
name: "dcgm-exporter"
labels:
app.kubernetes.io/name: "dcgm-exporter"
app.kubernetes.io/version: "3.6.0"
app.kubernetes.io/version: "3.6.1"
spec:
updateStrategy:
type: RollingUpdate
selector:
matchLabels:
app.kubernetes.io/name: "dcgm-exporter"
app.kubernetes.io/version: "3.6.0"
app.kubernetes.io/version: "3.6.1"
template:
metadata:
labels:
app.kubernetes.io/name: "dcgm-exporter"
app.kubernetes.io/version: "3.6.0"
app.kubernetes.io/version: "3.6.1"
name: "dcgm-exporter"
spec:
containers:
- image: "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.8-3.6.0-ubuntu22.04"
- image: "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.9-3.6.1-ubuntu22.04"
env:
- name: "DCGM_EXPORTER_LISTEN"
value: ":9400"
Expand Down Expand Up @@ -66,11 +66,11 @@ metadata:
name: "dcgm-exporter"
labels:
app.kubernetes.io/name: "dcgm-exporter"
app.kubernetes.io/version: "3.6.0"
app.kubernetes.io/version: "3.6.1"
spec:
selector:
app.kubernetes.io/name: "dcgm-exporter"
app.kubernetes.io/version: "3.6.0"
app.kubernetes.io/version: "3.6.1"
ports:
- name: "metrics"
port: 9400
2 changes: 1 addition & 1 deletion deployment/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: dcgm-exporter
description: A Helm chart for DCGM exporter
version: "3.7.0"
kubeVersion: ">= 1.19.0-0"
appVersion: "3.6.0"
appVersion: "3.6.1"
sources:
- https://github.com/nvidia/dcgm-exporter
home: https://github.com/nvidia/dcgm-exporter/
Expand Down
2 changes: 1 addition & 1 deletion deployment/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ image:
pullPolicy: IfNotPresent
# Image tag defaults to AppVersion, but you can use the tag key
# for the image tag, e.g:
tag: 3.3.8-3.6.0-ubuntu22.04
tag: 3.3.9-3.6.1-ubuntu22.04

# Change the following reference to "/etc/dcgm-exporter/default-counters.csv"
# to stop profiling metrics from DCGM
Expand Down
7 changes: 4 additions & 3 deletions docker/Dockerfile.ubi9
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM nvcr.io/nvidia/cuda:12.6.1-base-ubi9 AS builder
FROM nvcr.io/nvidia/cuda:12.6.2-base-ubi9 AS builder
ARG GOLANG_VERSION=1.22.4
WORKDIR /go/src/github.com/NVIDIA/dcgm-exporter
RUN set -eux; \
Expand Down Expand Up @@ -40,7 +40,7 @@ COPY . .

RUN make binary check-format

FROM nvcr.io/nvidia/cuda:12.6.1-base-ubi9
FROM nvcr.io/nvidia/cuda:12.6.2-base-ubi9
ARG DCGM_VERSION
ARG VERSION
ARG DIST_DIR
Expand All @@ -63,7 +63,8 @@ RUN dnf update --disablerepo=* --enablerepo=ubi-9-appstream-rpms --enablerepo=ub
&& rm -rf /usr/local/dcgm/scripts \
&& rm -f /usr/include/*.h /usr/bin/DcgmProfTesterKernels.ptx /usr/bin/dcgmproftester* \
&& rm -rf /var/lib/rpm/rpmdb.sqlite /var/cache/* /var/lib/dnf/history.* /var/log/* /tmp/* /var/tmp/* \
&& rm -rf /usr/share/doc && rm -rf /usr/share/man
&& rm -rf /usr/share/doc && rm -rf /usr/share/man \
&& dnf remove openssl

COPY ./LICENSE ./licenses/LICENSE
COPY --from=builder /go/src/github.com/NVIDIA/dcgm-exporter/cmd/dcgm-exporter/dcgm-exporter /usr/bin/
Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile.ubuntu22.04
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM nvcr.io/nvidia/cuda:12.6.1-base-ubuntu22.04 AS builder
FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 AS builder
ARG GOLANG_VERSION=1.22.4
WORKDIR /go/src/github.com/NVIDIA/dcgm-exporter
RUN set -eux; \
Expand Down Expand Up @@ -45,7 +45,7 @@ COPY . .

RUN make binary check-format

FROM nvcr.io/nvidia/cuda:12.6.1-base-ubuntu22.04
FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04

ARG VERSION
ARG DCGM_VERSION
Expand Down
8 changes: 4 additions & 4 deletions hack/VERSION
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
OLD_DCGM_VERSION=3.3.7
OLD_EXPORTER_VERSION=3.5.0
NEW_DCGM_VERSION=3.3.8
NEW_EXPORTER_VERSION=3.6.0
OLD_DCGM_VERSION=3.3.8
OLD_EXPORTER_VERSION=3.6.0
NEW_DCGM_VERSION=3.3.9
NEW_EXPORTER_VERSION=3.6.1
4 changes: 2 additions & 2 deletions service-monitor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ metadata:
name: "dcgm-exporter"
labels:
app.kubernetes.io/name: "dcgm-exporter"
app.kubernetes.io/version: "3.6.0"
app.kubernetes.io/version: "3.6.1"
spec:
selector:
matchLabels:
app.kubernetes.io/name: "dcgm-exporter"
app.kubernetes.io/version: "3.6.0"
app.kubernetes.io/version: "3.6.1"
endpoints:
- port: "metrics"
path: "/metrics"
2 changes: 1 addition & 1 deletion tests/e2e/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ GO_CMD ?= go
NAMESPACE ?= "dcgm-exporter"
CHART ?= "./../../deployment/"
IMAGE_REPOSITORY ?= "nvcr.io/nvidia/k8s/dcgm-exporter"
IMAGE_TAG ?= "3.3.8-3.6.0-ubuntu22.04"
IMAGE_TAG ?= "3.3.9-3.6.1-ubuntu22.04"
KUBECONFIG ?= "~/.kube/config"

define TEST_CMD
Expand Down

0 comments on commit b97b763

Please sign in to comment.