From 2ec2f9a813becd16601c38bd953345d340c48e05 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Wed, 24 Jul 2024 10:56:14 -0600 Subject: [PATCH 01/15] wip: swap promtail to grafana alloy --- .github/filters.yaml | 4 +- .vscode/settings.json | 5 +- README.md | 2 +- compliance/oscal-component.yaml | 2 +- docs/application-baseline.md | 2 +- packages/standard/zarf.yaml | 6 +- pkg/sumdb/sum.golang.org/latest | 5 + renovate.json | 6 +- src/alloy/README.md | 3 + src/{promtail => alloy}/chart/.helmignore | 0 src/{promtail => alloy}/chart/Chart.yaml | 4 +- .../chart/templates/_helpers.tpl | 20 +-- src/alloy/chart/templates/alloy-config.yaml | 130 ++++++++++++++++++ src/alloy/chart/templates/uds-exemption.yaml | 23 ++++ src/alloy/chart/templates/uds-package.yaml | 28 ++++ src/{promtail => alloy}/chart/values.yaml | 0 src/alloy/common/zarf.yaml | 32 +++++ src/{promtail => alloy}/oscal-component.yaml | 34 ++--- src/alloy/tasks.yaml | 10 ++ src/alloy/values/registry1-values.yaml | 10 ++ src/alloy/values/unicorn-values.yaml | 15 ++ src/alloy/values/upstream-values.yaml | 10 ++ src/alloy/values/values.yaml | 12 ++ src/{promtail => alloy}/zarf.yaml | 30 ++-- src/istio/oscal-component.yaml | 2 +- src/loki/chart/templates/uds-package.yaml | 6 +- .../controllers/exemptions/exemptions.spec.ts | 38 ++--- src/pepr/policies/exemptions/index.spec.ts | 2 +- src/promtail/README.md | 1 - src/promtail/chart/templates/service.yaml | 18 --- .../chart/templates/uds-exemption.yaml | 24 ---- src/promtail/chart/templates/uds-package.yaml | 44 ------ src/promtail/common/zarf.yaml | 32 ----- src/promtail/tasks.yaml | 10 -- src/promtail/values/registry1-values.yaml | 10 -- src/promtail/values/unicorn-values.yaml | 10 -- src/promtail/values/upstream-values.yaml | 10 -- src/promtail/values/values.yaml | 116 ---------------- 38 files changed, 355 insertions(+), 361 deletions(-) create mode 100644 pkg/sumdb/sum.golang.org/latest create mode 100644 src/alloy/README.md rename src/{promtail => alloy}/chart/.helmignore (100%) rename src/{promtail => alloy}/chart/Chart.yaml (91%) rename src/{promtail => alloy}/chart/templates/_helpers.tpl (71%) create mode 100644 src/alloy/chart/templates/alloy-config.yaml create mode 100644 src/alloy/chart/templates/uds-exemption.yaml create mode 100644 src/alloy/chart/templates/uds-package.yaml rename src/{promtail => alloy}/chart/values.yaml (100%) create mode 100644 src/alloy/common/zarf.yaml rename src/{promtail => alloy}/oscal-component.yaml (87%) create mode 100644 src/alloy/tasks.yaml create mode 100644 src/alloy/values/registry1-values.yaml create mode 100644 src/alloy/values/unicorn-values.yaml create mode 100644 src/alloy/values/upstream-values.yaml create mode 100644 src/alloy/values/values.yaml rename src/{promtail => alloy}/zarf.yaml (59%) delete mode 100644 src/promtail/README.md delete mode 100644 src/promtail/chart/templates/service.yaml delete mode 100644 src/promtail/chart/templates/uds-exemption.yaml delete mode 100644 src/promtail/chart/templates/uds-package.yaml delete mode 100644 src/promtail/common/zarf.yaml delete mode 100644 src/promtail/tasks.yaml delete mode 100644 src/promtail/values/registry1-values.yaml delete mode 100644 src/promtail/values/unicorn-values.yaml delete mode 100644 src/promtail/values/upstream-values.yaml delete mode 100644 src/promtail/values/values.yaml diff --git a/.github/filters.yaml b/.github/filters.yaml index 0e97ddef8..88e3a9534 100644 --- a/.github/filters.yaml +++ b/.github/filters.yaml @@ -19,8 +19,8 @@ neuvector: - "src/neuvector/**" prometheus-stack: - "src/prometheus-stack/**" -promtail: - - "src/promtail/**" +alloy: + - "src/alloy/**" tempo: - "src/tempo/**" velero: diff --git a/.vscode/settings.json b/.vscode/settings.json index 81f75a82c..dcad4ac2a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,7 +12,6 @@ "https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.12.0/uds.schema.json": [ "uds-bundle.yaml" ], - // renovate: datasource=github-tags depName=defenseunicorns/uds-cli versioning=semver "https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.12.0/tasks.schema.json": [ "tasks.yaml", @@ -37,7 +36,7 @@ "MITM", "neuvector", "opensource", - "promtail", + "alloy", "Quarkus", "Quickstart", "seccomp", @@ -47,7 +46,7 @@ "cSpell.enabled": true, "[typescript]": { "editor.codeActionsOnSave": { - "source.organizeImports": "always" + "source.organizeImports": "always" } }, } diff --git a/README.md b/README.md index 2679ac06a..350e84587 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ UDS Core establishes a secure baseline for cloud-native systems and ships with c - [Neuvector](https://open-docs.neuvector.com/) - Container Security - [Pepr](https://pepr.dev) - UDS policy engine & operator - [Prometheus Stack](https://github.com/prometheus-operator/kube-prometheus) - Monitoring -- [Promtail](https://grafana.com/docs/loki/latest/send-data/promtail/) - Log Aggregation +- [Grafana Alloy](https://grafana.com/docs/alloy/latest/) - Log Aggregation - [Velero](https://velero.io/) - Backup & Restore #### Future Applications diff --git a/compliance/oscal-component.yaml b/compliance/oscal-component.yaml index ecb88933e..25782e144 100644 --- a/compliance/oscal-component.yaml +++ b/compliance/oscal-component.yaml @@ -19,7 +19,7 @@ component-definition: - href: 'file://./../src/loki/oscal-component.yaml' - href: 'file://./../src/neuvector/oscal-component.yaml' - href: 'file://./../src/prometheus-stack/oscal-component.yaml' - - href: 'file://./../src/promtail/oscal-component.yaml' + - href: 'file://./../src/alloy/oscal-component.yaml' - href: 'file://./../src/velero/oscal-component.yaml' capabilities: diff --git a/docs/application-baseline.md b/docs/application-baseline.md index 27d507175..a36b90bdc 100644 --- a/docs/application-baseline.md +++ b/docs/application-baseline.md @@ -18,7 +18,7 @@ For optimal deployment and operational efficiency, it is important to deliver a | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **Service Mesh** | **[Istio](https://istio.io/):** A powerful service mesh that provides traffic management, load balancing, security, and observability features. | | **Monitoring** | **[Metrics Server](https://kubernetes-sigs.github.io/metrics-server/):** Provides container resource utilization metrics API for Kubernetes clusters.

**[Prometheus](https://prometheus.io/):** Scrapes Metrics Server API and application metrics and stores the data in a time-series database for insights into application health and performance.

**[Grafana](https://grafana.com/grafana/):** Provides visualization and alerting capabilities based on Prometheus's time-series database of metrics. | -| **Logging** | **[Promtail](https://grafana.com/docs/loki/latest/send-data/promtail/):** A companion agent that efficiently gathers and sends container logs to Loki, simplifying log monitoring, troubleshooting, and compliance auditing, enhancing the overall observability of the mission environment.

**[Loki](https://grafana.com/docs/loki/latest/):** A log aggregation system that allows users to store, search, and analyze logs across their applications. | +| **Logging** | **[Grafana Alloy](https://grafana.com/docs/alloy/latest/):** A companion agent that efficiently gathers and sends container logs to Loki, simplifying log monitoring, troubleshooting, and compliance auditing, enhancing the overall observability of the mission environment.

**[Loki](https://grafana.com/docs/loki/latest/):** A log aggregation system that allows users to store, search, and analyze logs across their applications. | | **Security and Compliance** | **[NeuVector](https://open-docs.neuvector.com/):** Offers container-native security, protecting applications against threats and vulnerabilities.

**[Pepr](https://pepr.dev/):** UDS policy engine and operator for enhanced security and compliance.| | **Identity and Access Management** | **[Keycloak](https://www.keycloak.org/):** A robust open-source Identity and Access Management solution, providing centralized authentication, authorization, and user management for enhanced security and control over access to mission-critical resources.| | **Backup and Restore** | **[Velero](https://velero.io/):** Provides backup and restore capabilities for Kubernetes clusters, ensuring data protection and disaster recovery.| diff --git a/packages/standard/zarf.yaml b/packages/standard/zarf.yaml index 59e243f64..4daff972f 100644 --- a/packages/standard/zarf.yaml +++ b/packages/standard/zarf.yaml @@ -77,11 +77,11 @@ components: import: path: ../../src/prometheus-stack - # Promtail - - name: promtail + # Alloy + - name: alloy required: true import: - path: ../../src/promtail + path: ../../src/alloy # Grafana - name: grafana diff --git a/pkg/sumdb/sum.golang.org/latest b/pkg/sumdb/sum.golang.org/latest new file mode 100644 index 000000000..e6dc1e023 --- /dev/null +++ b/pkg/sumdb/sum.golang.org/latest @@ -0,0 +1,5 @@ +go.sum database tree +28378881 +sFs4sytcTINd4fSkuqjsUBbH50QT58Sd76cWsuYhf3E= + +— sum.golang.org Az3grlCypsn6e3I3N5QpM5WZAiJkKj823SuTJrkg9OA9CgWCGAsnFSkUMlZTbQtHq7oj5bZWG5KwauOhDcqF3j6tMAk= diff --git a/renovate.json b/renovate.json index 80ac376bc..c6b15f4b6 100644 --- a/renovate.json +++ b/renovate.json @@ -57,9 +57,9 @@ "commitMessageTopic": "istio" }, { - "matchFileNames": ["src/promtail/**"], - "groupName": "promtail", - "commitMessageTopic": "promtail" + "matchFileNames": ["src/alloy/**"], + "groupName": "alloy", + "commitMessageTopic": "alloy" }, { "matchFileNames": ["src/velero/**"], diff --git a/src/alloy/README.md b/src/alloy/README.md new file mode 100644 index 000000000..5950881b0 --- /dev/null +++ b/src/alloy/README.md @@ -0,0 +1,3 @@ +## Grafana Alloy + +Grafana Alloy is a distribution of the OpenTelemetry (OTel) Collector. Within UDS Core it is primarily used for log collection and shipping to destinations (like Loki and S3). diff --git a/src/promtail/chart/.helmignore b/src/alloy/chart/.helmignore similarity index 100% rename from src/promtail/chart/.helmignore rename to src/alloy/chart/.helmignore diff --git a/src/promtail/chart/Chart.yaml b/src/alloy/chart/Chart.yaml similarity index 91% rename from src/promtail/chart/Chart.yaml rename to src/alloy/chart/Chart.yaml index 84403fdd5..2b1ce75fa 100644 --- a/src/promtail/chart/Chart.yaml +++ b/src/alloy/chart/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 -name: uds-promtail-config -description: Promtail configuration for UDS +name: uds-alloy-config +description: Grafana Alloy configuration for UDS # A chart can be either an 'application' or a 'library' chart. # diff --git a/src/promtail/chart/templates/_helpers.tpl b/src/alloy/chart/templates/_helpers.tpl similarity index 71% rename from src/promtail/chart/templates/_helpers.tpl rename to src/alloy/chart/templates/_helpers.tpl index e2736937a..c257f3ba2 100644 --- a/src/promtail/chart/templates/_helpers.tpl +++ b/src/alloy/chart/templates/_helpers.tpl @@ -1,7 +1,7 @@ {{/* Expand the name of the chart. */}} -{{- define "uds-promtail-config.name" -}} +{{- define "uds-alloy-config.name" -}} {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} {{- end }} @@ -10,7 +10,7 @@ Create a default fully qualified app name. We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). If release name contains chart name it will be used as a full name. */}} -{{- define "uds-promtail-config.fullname" -}} +{{- define "uds-alloy-config.fullname" -}} {{- if .Values.fullnameOverride }} {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} {{- else }} @@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name. {{/* Create chart name and version as used by the chart label. */}} -{{- define "uds-promtail-config.chart" -}} +{{- define "uds-alloy-config.chart" -}} {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} {{- end }} {{/* Common labels */}} -{{- define "uds-promtail-config.labels" -}} -helm.sh/chart: {{ include "uds-promtail-config.chart" . }} -{{ include "uds-promtail-config.selectorLabels" . }} +{{- define "uds-alloy-config.labels" -}} +helm.sh/chart: {{ include "uds-alloy-config.chart" . }} +{{ include "uds-alloy-config.selectorLabels" . }} {{- if .Chart.AppVersion }} app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} @@ -45,17 +45,17 @@ app.kubernetes.io/managed-by: {{ .Release.Service }} {{/* Selector labels */}} -{{- define "uds-promtail-config.selectorLabels" -}} -app.kubernetes.io/name: {{ include "uds-promtail-config.name" . }} +{{- define "uds-alloy-config.selectorLabels" -}} +app.kubernetes.io/name: {{ include "uds-alloy-config.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} {{/* Create the name of the service account to use */}} -{{- define "uds-promtail-config.serviceAccountName" -}} +{{- define "uds-alloy-config.serviceAccountName" -}} {{- if .Values.serviceAccount.create }} -{{- default (include "uds-promtail-config.fullname" .) .Values.serviceAccount.name }} +{{- default (include "uds-alloy-config.fullname" .) .Values.serviceAccount.name }} {{- else }} {{- default "default" .Values.serviceAccount.name }} {{- end }} diff --git a/src/alloy/chart/templates/alloy-config.yaml b/src/alloy/chart/templates/alloy-config.yaml new file mode 100644 index 000000000..8f8382cc4 --- /dev/null +++ b/src/alloy/chart/templates/alloy-config.yaml @@ -0,0 +1,130 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: alloy-config + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: alloy + app.kubernetes.io/component: config +data: + config: |- + loki.write "default" { + endpoint { + url = "http://loki-gateway.loki.svc.cluster.local:80/loki/api/v1/push" + } + } + + // Host/Node Logs + discovery.relabel "node_logs" { + targets = [{ + __path__ = "/var/log/*", + host = env("HOSTNAME"), + job = "varlogs", + }] + + rule { + source_labels = ["__journal_systemd_unit"] + target_label = "systemd_unit" + } + + rule { + source_labels = ["__journal_hostname"] + target_label = "nodename" + } + + rule { + source_labels = ["__journal_syslog_identifier"] + target_label = "syslog_identifier" + } + } + local.file_match "node_logs" { + path_targets = discovery.relabel.node_logs.output + } + + // Pod Logs + discovery.kubernetes "pod_logs" { + role = "pod" + } + discovery.relabel "pod_logs" { + targets = discovery.kubernetes.pod_logs.targets + rule { + source_labels = ["__meta_kubernetes_pod_controller_name"] + regex = "([0-9a-z-.]+?)(-[0-9a-f]{8,10})?" + target_label = "__tmp_controller_name" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app", "__tmp_controller_name", "__meta_kubernetes_pod_name"] + regex = "^;*([^;]+)(;.*)?$" + target_label = "app" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_instance", "__meta_kubernetes_pod_label_instance"] + regex = "^;*([^;]+)(;.*)?$" + target_label = "instance" + } + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_component", "__meta_kubernetes_pod_label_component"] + regex = "^;*([^;]+)(;.*)?$" + target_label = "component" + } + rule { + source_labels = ["__meta_kubernetes_pod_node_name"] + target_label = "node_name" + } + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + rule { + source_labels = ["namespace", "app"] + separator = "/" + target_label = "job" + } + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + rule { + source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] + separator = "/" + target_label = "__path__" + replacement = "/var/log/pods/*$1/*.log" + } + rule { + source_labels = ["__meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash", "__meta_kubernetes_pod_annotation_kubernetes_io_config_hash", "__meta_kubernetes_pod_container_name"] + separator = "/" + regex = "true/(.*)" + target_label = "__path__" + replacement = "/var/log/pods/*$1/*.log" + } + } + local.file_match "pod_logs" { + path_targets = discovery.relabel.pod_logs.output + } + + // K8s Logs (ex: audit log) + local.file_match "k8s_logs" { + path_targets = [{ + __path__ = "/var/log/kubernetes/**/*.log", + job = "kubernetes-logs", + host = env("HOSTNAME"), + }] + } + + // Forward all logs to Loki + loki.source.file "node_logs" { + targets = local.file_match.node_logs.targets + forward_to = [loki.write.default.receiver] + } + loki.source.file "pod_logs" { + targets = local.file_match.pod_logs.targets + forward_to = [loki.write.default.receiver] + } + loki.source.file "k8s_logs" { + targets = local.file_match.k8s_logs.targets + forward_to = [loki.write.default.receiver] + } diff --git a/src/alloy/chart/templates/uds-exemption.yaml b/src/alloy/chart/templates/uds-exemption.yaml new file mode 100644 index 000000000..a433838f8 --- /dev/null +++ b/src/alloy/chart/templates/uds-exemption.yaml @@ -0,0 +1,23 @@ +apiVersion: uds.dev/v1alpha1 +kind: Exemption +metadata: + name: alloy + namespace: uds-policy-exemptions +spec: + exemptions: + - policies: + - DisallowPrivileged + - RequireNonRootUser + - RestrictSELinuxType + - RestrictHostPathWrite + - RestrictVolumeTypes + matcher: + namespace: alloy + name: "^alloy-.*" + title: "alloy exemptions" + description: | + alloy mounts the following hostPaths: + - `/var/log`: to tail system/pod logs + - `/var/lib/docker/containers`: to tail container logs + Since logs can have sensitive information, it is better to exclude + alloy from the policy than add the paths as allowable mounts diff --git a/src/alloy/chart/templates/uds-package.yaml b/src/alloy/chart/templates/uds-package.yaml new file mode 100644 index 000000000..6943169e9 --- /dev/null +++ b/src/alloy/chart/templates/uds-package.yaml @@ -0,0 +1,28 @@ +apiVersion: uds.dev/v1alpha1 +kind: Package +metadata: + name: alloy + namespace: {{ .Release.Namespace }} +spec: + network: + allow: + # Permit intra-namespace communication + - direction: Ingress + remoteGenerated: IntraNamespace + + - direction: Egress + remoteGenerated: IntraNamespace + + - direction: Egress + remoteGenerated: KubeAPI + selector: + app.kubernetes.io/name: alloy + + - direction: Egress + selector: + app.kubernetes.io/name: alloy + remoteNamespace: loki + remoteSelector: + app.kubernetes.io/name: loki + port: 8080 + description: "Write Logs to Loki" diff --git a/src/promtail/chart/values.yaml b/src/alloy/chart/values.yaml similarity index 100% rename from src/promtail/chart/values.yaml rename to src/alloy/chart/values.yaml diff --git a/src/alloy/common/zarf.yaml b/src/alloy/common/zarf.yaml new file mode 100644 index 000000000..f1f04aecb --- /dev/null +++ b/src/alloy/common/zarf.yaml @@ -0,0 +1,32 @@ +kind: ZarfPackageConfig +metadata: + name: uds-core-alloy-common + description: "UDS Core Alloy Common" + url: "https://grafana.com/docs/alloy/latest/" + source: "https://github.com/grafana/alloy/tree/main/operations/helm/charts/alloy" + +components: + - name: alloy + required: true + charts: + - name: uds-alloy-config + namespace: alloy + version: 0.1.0 + localPath: ../chart + - name: alloy + url: https://grafana.github.io/helm-charts + version: 0.5.1 + namespace: alloy + valuesFiles: + - ../values/values.yaml + actions: + onDeploy: + after: + - description: Validate Alloy Package + maxTotalSeconds: 300 + wait: + cluster: + kind: Packages + name: alloy + namespace: alloy + condition: "'{.status.phase}'=Ready" diff --git a/src/promtail/oscal-component.yaml b/src/alloy/oscal-component.yaml similarity index 87% rename from src/promtail/oscal-component.yaml rename to src/alloy/oscal-component.yaml index 012159d33..54aa4c4c5 100644 --- a/src/promtail/oscal-component.yaml +++ b/src/alloy/oscal-component.yaml @@ -1,7 +1,7 @@ component-definition: uuid: ff959bdb-7be9-49b3-9dc2-c41b34e7017d metadata: - title: Promtail + title: Alloy last-modified: "2024-01-31T16:44:35Z" version: "20240132" oscal-version: 1.1.1 @@ -15,7 +15,7 @@ component-definition: components: - uuid: 3ca1e9a3-a566-48d1-93af-200abd1245e3 type: software - title: Promtail + title: Alloy description: | Log collector purpose: Collects logs from the cluster @@ -26,7 +26,7 @@ component-definition: control-implementations: - uuid: d2afb4c4-2cd8-5305-a6cc-d1bc7b388d0c source: https://raw.githubusercontent.com/GSA/fedramp-automation/93ca0e20ff5e54fc04140613476fba80f08e3c7d/dist/content/rev5/baselines/json/FedRAMP_rev5_HIGH-baseline-resolved-profile_catalog.json - description: Controls implemented by Promtail for inheritance by applications + description: Controls implemented by Alloy for inheritance by applications implemented-requirements: - uuid: 954ba9c8-452c-4503-a43f-c880a01b828d control-id: ac-6.9 @@ -36,7 +36,7 @@ component-definition: Auditing the use of privileged functions is one way to detect such misuse, and in doing so, help mitigate the risk from insider threats and the advanced persistent threat (APT). # Control Implementation - Promtail can be configured to collect all logs from Kubernetes and underlying operating systems, allowing the aggregation of privileged function calls. + Alloy can be configured to collect all logs from Kubernetes and underlying operating systems, allowing the aggregation of privileged function calls. remarks: This control is fully implemented by this tool. links: - href: "#98b97ec9-a9ce-4444-83d8-71066270a424" @@ -78,9 +78,7 @@ component-definition: Event outcomes can include indicators of event success or failure and event-specific results (e.g., the security state of the information system after the event occurred). # Control Implementation - Logs are captured by promtail from the node. The node logs will contain the necessary log data from all pods/applications inside the selected nodes. - Validating `logfmt` as the config.logFormat would be the goal. This is currently a secret mounted to /etc/promtail/promtail.yaml in the promtail container. We will ensure the promtail.yaml file is at a minimum the target config. - https://grafana.com/docs/loki/latest/send-data/promtail/stages/logfmt/ + Logs are captured by alloy from the node. The node logs will contain the necessary log data from all pods/applications inside the selected nodes. [Alloy Kubernetes Discovery](https://grafana.com/docs/alloy/latest/reference/components/discovery/discovery.kubernetes/) is used to ensure logs have the necessary labels of file name, application, etc. remarks: This control is fully implemented by this tool. links: - href: "#98b97ec9-a9ce-4444-83d8-71066270a424" @@ -105,8 +103,6 @@ component-definition: * time of the event (UTC). * source of event (pod, namespace, container id). Applications are responsible for providing all other information. - Validating `logfmt` as the config.logFormat would be the goal. This is currently a secret mounted to /etc/promtail/promtail.yaml in the promtail container. We will ensure the promtail.yaml file is at a minimum the target config. - https://grafana.com/docs/loki/latest/send-data/promtail/stages/logfmt/ remarks: This control is fully implemented by this tool. links: - href: "#98b97ec9-a9ce-4444-83d8-71066270a424" @@ -119,13 +115,9 @@ component-definition: back-matter: resources: - uuid: D552C935-E40C-4A03-B5CC-4605EBD95B6D - title: Promtail + title: Alloy rlinks: - - href: https://grafana.com/docs/loki/latest/clients/promtail/ - - uuid: 211C474B-E11A-4DD2-8075-50CDAC507CDC - title: Big Bang Promtail package - rlinks: - - href: https://repo1.dso.mil/platform-one/big-bang/apps/sandbox/promtail + - href: https://grafana.com/docs/alloy/latest/ - uuid: 98b97ec9-a9ce-4444-83d8-71066270a424 title: Lula Validation rlinks: @@ -142,7 +134,7 @@ component-definition: Group: apps Version: v1 Resource: daemonsets - Namespaces: [promtail] + Namespaces: [alloy] rego: | package validate @@ -174,7 +166,7 @@ component-definition: Group: Version: v1 Resource: pods - Namespaces: [promtail] + Namespaces: [alloy] rego: | package validate @@ -210,7 +202,7 @@ component-definition: Group: Version: v1 Resource: pods - Namespaces: [promtail] + Namespaces: [alloy] rego: | package validate @@ -247,7 +239,7 @@ component-definition: Group: Version: v1 Resource: pods - Namespaces: [promtail] + Namespaces: [alloy] rego: | package validate @@ -258,8 +250,8 @@ component-definition: containers := pod.spec.containers some container in containers - container.name == "promtail" + container.name == "alloy" some i - container.args[i] == "-config.file=/etc/promtail/promtail.yaml" + container.args[i] == "--volume-dir=/etc/alloy" } } diff --git a/src/alloy/tasks.yaml b/src/alloy/tasks.yaml new file mode 100644 index 000000000..aa9d75f78 --- /dev/null +++ b/src/alloy/tasks.yaml @@ -0,0 +1,10 @@ +tasks: + - name: validate + actions: + - description: Validate alloy + wait: + cluster: + kind: Pod + name: app.kubernetes.io/instance=alloy + namespace: alloy + condition: Ready diff --git a/src/alloy/values/registry1-values.yaml b/src/alloy/values/registry1-values.yaml new file mode 100644 index 000000000..352f51fc6 --- /dev/null +++ b/src/alloy/values/registry1-values.yaml @@ -0,0 +1,10 @@ +image: + registry: registry1.dso.mil + repository: ironbank/opensource/grafana/alloy + tag: v1.2.1 + +configReloader: + image: + registry: registry1.dso.mil + repository: ironbank/opensource/jimmidyson/configmap-reload + tag: v0.13.1 diff --git a/src/alloy/values/unicorn-values.yaml b/src/alloy/values/unicorn-values.yaml new file mode 100644 index 000000000..41afce497 --- /dev/null +++ b/src/alloy/values/unicorn-values.yaml @@ -0,0 +1,15 @@ +image: + registry: docker.io + repository: grafana/alloy + tag: v1.2.1 + +# image: +# registry: cgr.dev +# repository: du-uds-defenseunicorns/grafana-alloy-fips +# tag: v1.2.1 + +configReloader: + image: + registry: cgr.dev + repository: du-uds-defenseunicorns/configmap-reload-fips + tag: "0.12.0" diff --git a/src/alloy/values/upstream-values.yaml b/src/alloy/values/upstream-values.yaml new file mode 100644 index 000000000..355aa275b --- /dev/null +++ b/src/alloy/values/upstream-values.yaml @@ -0,0 +1,10 @@ +image: + registry: docker.io + repository: grafana/alloy + tag: v1.2.1 + +configReloader: + image: + registry: ghcr.io + repository: jimmidyson/configmap-reload + tag: v0.13.1 diff --git a/src/alloy/values/values.yaml b/src/alloy/values/values.yaml new file mode 100644 index 000000000..b7b604608 --- /dev/null +++ b/src/alloy/values/values.yaml @@ -0,0 +1,12 @@ +alloy: + mounts: + varlog: true + configMap: + create: false + name: alloy-config + key: config + # Disable telemetry that doesn't function in the airgap + enableReporting: false + +serviceMonitor: + enabled: true diff --git a/src/promtail/zarf.yaml b/src/alloy/zarf.yaml similarity index 59% rename from src/promtail/zarf.yaml rename to src/alloy/zarf.yaml index e1310ed58..5a0bfa0af 100644 --- a/src/promtail/zarf.yaml +++ b/src/alloy/zarf.yaml @@ -1,51 +1,51 @@ kind: ZarfPackageConfig metadata: - name: uds-core-promtail - description: "UDS Core Promtail" - url: "https://grafana.com/docs/loki/latest/" + name: uds-core-alloy + description: "UDS Core Alloy" + url: "https://grafana.com/docs/alloy/latest/" components: - - name: promtail + - name: alloy required: true - description: "Deploy Promtail" + description: "Deploy Alloy" only: flavor: upstream import: path: common charts: - - name: promtail + - name: alloy valuesFiles: - values/upstream-values.yaml images: + - docker.io/grafana/alloy:v1.2.1 - ghcr.io/jimmidyson/configmap-reload:v0.13.1 - - docker.io/grafana/promtail:3.1.0 - - name: promtail + - name: alloy required: true - description: "Deploy Promtail" + description: "Deploy Alloy" only: flavor: registry1 import: path: common charts: - - name: promtail + - name: alloy valuesFiles: - values/registry1-values.yaml images: + - registry1.dso.mil/ironbank/opensource/grafana/alloy:v1.2.1 - registry1.dso.mil/ironbank/opensource/jimmidyson/configmap-reload:v0.13.1 - - registry1.dso.mil/ironbank/opensource/grafana/promtail:v3.1.0 - - name: promtail + - name: alloy required: true - description: "Deploy Promtail" + description: "Deploy Alloy" only: flavor: unicorn import: path: common charts: - - name: promtail + - name: alloy valuesFiles: - values/unicorn-values.yaml images: + - docker.io/grafana/alloy:v1.2.1 # no chainguard image yet - cgr.dev/du-uds-defenseunicorns/configmap-reload-fips:0.12.0 - - cgr.dev/du-uds-defenseunicorns/promtail:3.1.0 diff --git a/src/istio/oscal-component.yaml b/src/istio/oscal-component.yaml index c6bb07f23..b2876725d 100644 --- a/src/istio/oscal-component.yaml +++ b/src/istio/oscal-component.yaml @@ -538,7 +538,7 @@ component-definition: # Expected values expected_istiod_port := 15012 expected_istiod_protocol := "TCP" - required_namespaces := {"authservice", "grafana", "keycloak", "loki", "metrics-server", "monitoring", "neuvector", "promtail", "velero"} + required_namespaces := {"authservice", "grafana", "keycloak", "loki", "metrics-server", "monitoring", "neuvector", "alloy", "velero"} # Validate NetworkPolicy for Istiod in required namespaces validate { diff --git a/src/loki/chart/templates/uds-package.yaml b/src/loki/chart/templates/uds-package.yaml index 8f30a3d0c..0559bde01 100644 --- a/src/loki/chart/templates/uds-package.yaml +++ b/src/loki/chart/templates/uds-package.yaml @@ -37,12 +37,12 @@ spec: - direction: Ingress selector: app.kubernetes.io/name: loki - remoteNamespace: promtail + remoteNamespace: alloy remoteSelector: - app.kubernetes.io/name: promtail + app.kubernetes.io/name: alloy ports: - 8080 - description: "Promtail Log Storage" + description: "Alloy Log Storage" # Todo: wide open for now for pushing to s3 - direction: Egress diff --git a/src/pepr/operator/controllers/exemptions/exemptions.spec.ts b/src/pepr/operator/controllers/exemptions/exemptions.spec.ts index 8c276d879..d33f898bd 100644 --- a/src/pepr/operator/controllers/exemptions/exemptions.spec.ts +++ b/src/pepr/operator/controllers/exemptions/exemptions.spec.ts @@ -19,13 +19,13 @@ const prometheusMatcher = { name: "^neuvector-prometheus-exporter-pod.*", kind: MatcherKind.Pod, }; -const promtailMatcher = { namespace: "promtail", name: "^promtail-.*", kind: MatcherKind.Pod }; +const alloyMatcher = { namespace: "alloy", name: "^alloy-.*", kind: MatcherKind.Pod }; const exemption1UID = "exemption-1-uid"; const exemption2UID = "exemption-2-uid"; const storedEnforcerMatcher = { ...enforcerMatcher, owner: exemption1UID }; const storedControllerMatcher = { ...controllerMatcher, owner: exemption1UID }; const storedPrometheusMatcher = { ...prometheusMatcher, owner: exemption1UID }; -const storedPromtailMatcher = { ...promtailMatcher, owner: exemption2UID }; +const storedAlloyMatcher = { ...alloyMatcher, owner: exemption2UID }; const neuvectorMockExemption = { metadata: { uid: exemption1UID, @@ -89,7 +89,7 @@ describe("Test processExemptions() no duplicate matchers in same CR", () => { // remove RequireNonRootUser from enforcerMatcher // remove prometheusMatcher // add DisallowHostNamespaces to controllerMatcher - // add promtailMatcher with RequireNonRootUser + // add alloyMatcher with RequireNonRootUser const updatedNeuvectorExemption = { metadata: { uid: exemption1UID, @@ -109,7 +109,7 @@ describe("Test processExemptions() no duplicate matchers in same CR", () => { ], }, { - matcher: promtailMatcher, + matcher: alloyMatcher, policies: [Policy.RequireNonRootUser], }, ], @@ -119,7 +119,7 @@ describe("Test processExemptions() no duplicate matchers in same CR", () => { processExemptions(neuvectorMockExemption, WatchPhase.Added); processExemptions(updatedNeuvectorExemption, WatchPhase.Modified); expect(ExemptionStore.getByPolicy(Policy.RequireNonRootUser)).toEqual([ - { ...storedPromtailMatcher, owner: exemption1UID }, + { ...storedAlloyMatcher, owner: exemption1UID }, ]); expect(ExemptionStore.getByPolicy(Policy.DisallowPrivileged)).toEqual([ storedEnforcerMatcher, @@ -359,14 +359,14 @@ describe("Test processExemptions(); phase DELETED", () => { }); it("Does not remove exemptions set by separate CR from the one being deleted", async () => { - const promtailMockExemption = { + const alloyMockExemption = { metadata: { uid: exemption2UID, }, spec: { exemptions: [ { - matcher: promtailMatcher, + matcher: alloyMatcher, policies: [ Policy.DisallowPrivileged, Policy.DropAllCapabilities, @@ -378,12 +378,12 @@ describe("Test processExemptions(); phase DELETED", () => { } as Exemption; processExemptions(neuvectorMockExemption, WatchPhase.Added); - processExemptions(promtailMockExemption, WatchPhase.Added); + processExemptions(alloyMockExemption, WatchPhase.Added); processExemptions(neuvectorMockExemption, WatchPhase.Deleted); - expect(ExemptionStore.getByPolicy(Policy.DisallowPrivileged)).toEqual([storedPromtailMatcher]); - expect(ExemptionStore.getByPolicy(Policy.DropAllCapabilities)).toEqual([storedPromtailMatcher]); - expect(ExemptionStore.getByPolicy(Policy.RequireNonRootUser)).toEqual([storedPromtailMatcher]); + expect(ExemptionStore.getByPolicy(Policy.DisallowPrivileged)).toEqual([storedAlloyMatcher]); + expect(ExemptionStore.getByPolicy(Policy.DropAllCapabilities)).toEqual([storedAlloyMatcher]); + expect(ExemptionStore.getByPolicy(Policy.RequireNonRootUser)).toEqual([storedAlloyMatcher]); }); it("Does not delete duplicate exemptions if set by separate CRs", async () => { @@ -447,28 +447,28 @@ describe("Test processExemptions(); phase DELETED", () => { }, } as Exemption; - const promtailMockExemption = { + const alloyMockExemption = { metadata: { uid: exemption2UID, }, spec: { exemptions: [ { - matcher: promtailMatcher, + matcher: alloyMatcher, policies: [Policy.DisallowPrivileged], }, ], }, } as Exemption; - const promtailUpdatedMockExemption = { + const alloyUpdatedMockExemption = { metadata: { uid: exemption2UID, }, spec: { exemptions: [ { - matcher: promtailMatcher, + matcher: alloyMatcher, policies: [Policy.DisallowPrivileged, Policy.RequireNonRootUser], }, ], @@ -476,14 +476,14 @@ describe("Test processExemptions(); phase DELETED", () => { } as Exemption; processExemptions(neuvectorMockExemption, WatchPhase.Added); - processExemptions(promtailMockExemption, WatchPhase.Added); - processExemptions(promtailUpdatedMockExemption, WatchPhase.Modified); + processExemptions(alloyMockExemption, WatchPhase.Added); + processExemptions(alloyUpdatedMockExemption, WatchPhase.Modified); expect(ExemptionStore.getByPolicy(Policy.RequireNonRootUser)).toEqual([ storedEnforcerMatcher, - storedPromtailMatcher, + storedAlloyMatcher, ]); expect(ExemptionStore.getByPolicy(Policy.DropAllCapabilities)).toEqual([storedEnforcerMatcher]); - expect(ExemptionStore.getByPolicy(Policy.DisallowPrivileged)).toEqual([storedPromtailMatcher]); + expect(ExemptionStore.getByPolicy(Policy.DisallowPrivileged)).toEqual([storedAlloyMatcher]); }); }); diff --git a/src/pepr/policies/exemptions/index.spec.ts b/src/pepr/policies/exemptions/index.spec.ts index 2ab36dd25..632f084cb 100644 --- a/src/pepr/policies/exemptions/index.spec.ts +++ b/src/pepr/policies/exemptions/index.spec.ts @@ -34,7 +34,7 @@ describe("test registering exemptions", () => { const req = { Raw: { metadata: { - name: "promtail", + name: "alloy", namespace: "monitoring", }, }, diff --git a/src/promtail/README.md b/src/promtail/README.md deleted file mode 100644 index 447959057..000000000 --- a/src/promtail/README.md +++ /dev/null @@ -1 +0,0 @@ -## Promtail diff --git a/src/promtail/chart/templates/service.yaml b/src/promtail/chart/templates/service.yaml deleted file mode 100644 index 23c6a4429..000000000 --- a/src/promtail/chart/templates/service.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# Upstream chart can create this service but it is conditionally tied to the serviceMonitor which would cause errors in single package testing -# This would be resolved by https://github.com/grafana/helm-charts/pull/3083 when merged and released -apiVersion: v1 -kind: Service -metadata: - name: promtail-metrics - namespace: {{ .Release.Namespace }} - labels: - app.kubernetes.io/name: promtail -spec: - clusterIP: None - ports: - - name: http-metrics - port: 3101 - targetPort: http-metrics - protocol: TCP - selector: - app.kubernetes.io/name: promtail diff --git a/src/promtail/chart/templates/uds-exemption.yaml b/src/promtail/chart/templates/uds-exemption.yaml deleted file mode 100644 index 9b8bca9cf..000000000 --- a/src/promtail/chart/templates/uds-exemption.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: uds.dev/v1alpha1 -kind: Exemption -metadata: - name: promtail - namespace: uds-policy-exemptions -spec: - exemptions: - - policies: - - DisallowPrivileged - - RequireNonRootUser - - RestrictSELinuxType - - RestrictHostPathWrite - - RestrictVolumeTypes - matcher: - namespace: promtail - name: "^promtail-.*" - title: "promtail exemptions" - description: "Promtail mounts the following hostPaths: - - `/var/log/pods`: to tail pod logs - - `/var/lib/docker/containers`: to tail container logs - - `/run/promtail`: for Promtail's buffering and persistent state - Since logs can have sensitive information, it is better to exclude - Promtail from the policy than add the paths as allowable mounts - https://github.com/grafana/helm-charts/blob/main/charts/promtail/templates/daemonset.yaml#L120" diff --git a/src/promtail/chart/templates/uds-package.yaml b/src/promtail/chart/templates/uds-package.yaml deleted file mode 100644 index 4875d82ec..000000000 --- a/src/promtail/chart/templates/uds-package.yaml +++ /dev/null @@ -1,44 +0,0 @@ -apiVersion: uds.dev/v1alpha1 -kind: Package -metadata: - name: promtail - namespace: {{ .Release.Namespace }} -spec: - monitor: - - selector: - app.kubernetes.io/name: promtail - targetPort: 3101 - portName: http-metrics - description: Metrics - - network: - allow: - - direction: Ingress - podSelector: - app.kubernetes.io/name: promtail - remoteNamespace: monitoring - remoteSelector: - app.kubernetes.io/name: prometheus - port: 3101 - description: "Prometheus Metrics" - - - direction: Egress - podSelector: - app.kubernetes.io/name: promtail - remoteGenerated: KubeAPI - - - direction: Egress - remoteNamespace: tempo - remoteSelector: - app.kubernetes.io/name: tempo - port: 9411 - description: "Tempo" - - - direction: Egress - selector: - app.kubernetes.io/name: promtail - remoteNamespace: loki - remoteSelector: - app.kubernetes.io/name: loki - port: 8080 - description: "Write Logs to Loki" diff --git a/src/promtail/common/zarf.yaml b/src/promtail/common/zarf.yaml deleted file mode 100644 index b3df11848..000000000 --- a/src/promtail/common/zarf.yaml +++ /dev/null @@ -1,32 +0,0 @@ -kind: ZarfPackageConfig -metadata: - name: uds-core-promtail-common - description: "UDS Core Promtail Common" - url: "https://grafana.com/docs/loki/latest/" - -components: - - name: promtail - required: true - charts: - - name: uds-promtail-config - namespace: promtail - version: 0.1.0 - localPath: ../chart - - name: promtail - url: https://grafana.github.io/helm-charts/ - version: 6.16.3 - namespace: promtail - gitPath: charts/promtail - valuesFiles: - - ../values/values.yaml - actions: - onDeploy: - after: - - description: Validate Promtail Package - maxTotalSeconds: 300 - wait: - cluster: - kind: Packages - name: promtail - namespace: promtail - condition: "'{.status.phase}'=Ready" diff --git a/src/promtail/tasks.yaml b/src/promtail/tasks.yaml deleted file mode 100644 index 8117f590a..000000000 --- a/src/promtail/tasks.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: - - name: validate - actions: - - description: Validate promtail - wait: - cluster: - kind: Pod - name: app.kubernetes.io/instance=promtail - namespace: promtail - condition: Ready diff --git a/src/promtail/values/registry1-values.yaml b/src/promtail/values/registry1-values.yaml deleted file mode 100644 index 63511bc69..000000000 --- a/src/promtail/values/registry1-values.yaml +++ /dev/null @@ -1,10 +0,0 @@ -image: - registry: registry1.dso.mil - repository: ironbank/opensource/grafana/promtail - tag: v3.1.0 -sidecar: - configReloader: - image: - registry: registry1.dso.mil - repository: ironbank/opensource/jimmidyson/configmap-reload - tag: v0.13.1 diff --git a/src/promtail/values/unicorn-values.yaml b/src/promtail/values/unicorn-values.yaml deleted file mode 100644 index 4f4ac593e..000000000 --- a/src/promtail/values/unicorn-values.yaml +++ /dev/null @@ -1,10 +0,0 @@ -image: - registry: cgr.dev - repository: du-uds-defenseunicorns/promtail - tag: 3.1.0 -sidecar: - configReloader: - image: - registry: cgr.dev - repository: du-uds-defenseunicorns/configmap-reload-fips - tag: 0.12.0 diff --git a/src/promtail/values/upstream-values.yaml b/src/promtail/values/upstream-values.yaml deleted file mode 100644 index 1813158fb..000000000 --- a/src/promtail/values/upstream-values.yaml +++ /dev/null @@ -1,10 +0,0 @@ -image: - registry: docker.io - repository: grafana/promtail - tag: 3.1.0 -sidecar: - configReloader: - image: - registry: ghcr.io - repository: jimmidyson/configmap-reload - tag: v0.13.1 diff --git a/src/promtail/values/values.yaml b/src/promtail/values/values.yaml deleted file mode 100644 index d7bb9af71..000000000 --- a/src/promtail/values/values.yaml +++ /dev/null @@ -1,116 +0,0 @@ -config: - clients: - - url: 'http://loki-gateway.loki.svc.cluster.local:80/loki/api/v1/push' - - snippets: - scrapeConfigs: | - # Upstream Defaults https://github.com/grafana/helm-charts/blob/main/charts/promtail/values.yaml - # See also https://github.com/grafana/loki/blob/master/production/ksonnet/promtail/scrape_config.libsonnet for reference - - job_name: kubernetes-pods - pipeline_stages: - {{- toYaml .Values.config.snippets.pipelineStages | nindent 4 }} - kubernetes_sd_configs: - - role: pod - relabel_configs: - - source_labels: - - __meta_kubernetes_pod_controller_name - regex: ([0-9a-z-.]+?)(-[0-9a-f]{8,10})? - action: replace - target_label: __tmp_controller_name - - source_labels: - - __meta_kubernetes_pod_label_app_kubernetes_io_name - - __meta_kubernetes_pod_label_app - - __tmp_controller_name - - __meta_kubernetes_pod_name - regex: ^;*([^;]+)(;.*)?$ - action: replace - target_label: app - - source_labels: - - __meta_kubernetes_pod_label_app_kubernetes_io_instance - - __meta_kubernetes_pod_label_instance - regex: ^;*([^;]+)(;.*)?$ - action: replace - target_label: instance - - source_labels: - - __meta_kubernetes_pod_label_app_kubernetes_io_component - - __meta_kubernetes_pod_label_component - regex: ^;*([^;]+)(;.*)?$ - action: replace - target_label: component - {{- if .Values.config.snippets.addScrapeJobLabel }} - - replacement: kubernetes-pods - target_label: scrape_job - {{- end }} - {{- toYaml .Values.config.snippets.common | nindent 4 }} - {{- with .Values.config.snippets.extraRelabelConfigs }} - {{- toYaml . | nindent 4 }} - {{- end }} - # UDS CORE Defaults - - job_name: systemd-messages - static_configs: - - targets: [localhost] - labels: - job: varlogs - host: "${NODE_HOSTNAME}" - __path__: /var/log/* - relabel_configs: - - source_labels: - - __journal_systemd_unit - target_label: systemd_unit - - source_labels: - - __journal_hostname - target_label: nodename - - source_labels: - - __journal_syslog_identifier - target_label: syslog_identifier - - job_name: kubernetes-logs - static_configs: - - targets: [localhost] - labels: - job: kubernetes-logs - host: "${NODE_HOSTNAME}" - __path__: /var/log/kubernetes/**/*.log - -containerSecurityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - privileged: false - readOnlyRootFilesystem: true - runAsUser: 0 - seLinuxOptions: - type: spc_t -extraArgs: - - '-config.expand-env=true' - -extraEnv: - - name: NODE_HOSTNAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - -extraVolumes: - - hostPath: - path: /var/log - name: varlog - - hostPath: - path: /etc - name: machine-id - -extraVolumeMounts: - - mountPath: /var/log - name: varlog - readOnly: true - - mountPath: /etc/machine-id - name: machine-id - readOnly: true - subPath: machine-id - -resources: - limits: - cpu: 500m - memory: 750Mi - requests: - cpu: 100m - memory: 256Mi From 323ed65571e7fa4397ca1d711d89d4529e587f2d Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Wed, 24 Jul 2024 16:56:45 -0600 Subject: [PATCH 02/15] wip: add s3 exporter with batching --- src/alloy/chart/templates/alloy-config.yaml | 88 +++++++++++++++++++- src/alloy/chart/templates/env-secret.yaml | 12 +++ src/alloy/chart/templates/uds-exemption.yaml | 1 - src/alloy/chart/templates/uds-package.yaml | 8 ++ src/alloy/chart/values.schema.json | 80 ++++++++++++++++++ src/alloy/chart/values.yaml | 55 ++++++++++++ src/alloy/values/values.yaml | 5 ++ 7 files changed, 244 insertions(+), 5 deletions(-) create mode 100644 src/alloy/chart/templates/env-secret.yaml create mode 100644 src/alloy/chart/values.schema.json diff --git a/src/alloy/chart/templates/alloy-config.yaml b/src/alloy/chart/templates/alloy-config.yaml index 8f8382cc4..29071fa9e 100644 --- a/src/alloy/chart/templates/alloy-config.yaml +++ b/src/alloy/chart/templates/alloy-config.yaml @@ -11,6 +11,8 @@ data: loki.write "default" { endpoint { url = "http://loki-gateway.loki.svc.cluster.local:80/loki/api/v1/push" + batch_wait = "{{ .Values.loki.batch.timeout }}" + batch_size = "{{ .Values.loki.batch.size }}" } } @@ -101,6 +103,11 @@ data: target_label = "__path__" replacement = "/var/log/pods/*$1/*.log" } + rule { + source_labels = ["job"] + regex = "alloy/alloy" // Drop any logs from Alloy itself to prevent log loops + action = "drop" + } } local.file_match "pod_logs" { path_targets = discovery.relabel.pod_logs.output @@ -115,16 +122,89 @@ data: }] } - // Forward all logs to Loki + // Forward all logs to Destinations loki.source.file "node_logs" { targets = local.file_match.node_logs.targets - forward_to = [loki.write.default.receiver] + forward_to = [ + loki.write.default.receiver, + {{- if .Values.s3Exporter.enabled }} + otelcol.receiver.loki.s3.receiver, + {{- end }} + {{- range .Values.additionalForwards }} + {{ . }}, + {{- end }} + ] } loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets - forward_to = [loki.write.default.receiver] + forward_to = [ + loki.write.default.receiver, + {{- if .Values.s3Exporter.enabled }} + otelcol.receiver.loki.s3.receiver, + {{- end }} + {{- range .Values.additionalForwards }} + {{ . }}, + {{- end }} + ] } loki.source.file "k8s_logs" { targets = local.file_match.k8s_logs.targets - forward_to = [loki.write.default.receiver] + forward_to = [ + loki.write.default.receiver, + {{- if .Values.s3Exporter.enabled }} + otelcol.receiver.loki.s3.receiver, + {{- end }} + {{- range .Values.additionalForwards }} + {{ . }}, + {{- end }} + ] + } + + {{- with .Values.s3Exporter }} + {{ if .enabled }} + {{- if not (and .region .bucket .path) }} + {{ fail "You must provide region, bucket, and path if enabling s3 exporting from Alloy."}} + {{- end }} + // Export to s3 + otelcol.receiver.loki "s3" { + output { + logs = [otelcol.processor.batch.s3.input] + } + } + + // Batch updates because we aren't crazy people + otelcol.processor.batch "s3" { + send_batch_size = {{ .batch.size }} + timeout = "{{ .batch.timeout }}" + output { + logs = [otelcol.exporter.awss3.logs.input] + } + } + + otelcol.exporter.awss3 "logs" { + s3_uploader { + region = "{{ .region }}" + s3_bucket = "{{ .bucket }}" + s3_prefix = "{{ .path }}" + {{- if .role_arn }} + role_arn = "{{ .role_arn }}" + {{- end }} + {{- if .endpoint }} + endpoint = "{{ .endpoint }}" + {{- end }} + s3_force_path_style = {{ .s3_force_path_style }} + {{- if .compression }} + compression = "gzip" + {{- end }} + } + marshaler { + type = "{{ .format }}" + } + debug_metrics {} } + {{- end }} + {{- end }} + {{ with .Values.extraConfig }} + // User provided additional config + {{- tpl . $.Values | nindent 4 }} + {{- end }} diff --git a/src/alloy/chart/templates/env-secret.yaml b/src/alloy/chart/templates/env-secret.yaml new file mode 100644 index 000000000..31100dfc0 --- /dev/null +++ b/src/alloy/chart/templates/env-secret.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Secret +metadata: + name: uds-config-env + namespace: {{ .Release.Namespace }} +type: Opaque +stringData: + UDS_CONFIG: "placeholder" # Used as a placeholder to ensure valid secret with keys + {{- if and .Values.s3Exporter.enabled .Values.s3Exporter.accessKey .Values.s3Exporter.secretKey }} + AWS_ACCESS_KEY_ID: "{{ .Values.s3Exporter.accessKey }}" + AWS_SECRET_ACCESS_KEY: "{{ .Values.s3Exporter.secretKey }}" + {{- end }} diff --git a/src/alloy/chart/templates/uds-exemption.yaml b/src/alloy/chart/templates/uds-exemption.yaml index a433838f8..187a3a4d3 100644 --- a/src/alloy/chart/templates/uds-exemption.yaml +++ b/src/alloy/chart/templates/uds-exemption.yaml @@ -18,6 +18,5 @@ spec: description: | alloy mounts the following hostPaths: - `/var/log`: to tail system/pod logs - - `/var/lib/docker/containers`: to tail container logs Since logs can have sensitive information, it is better to exclude alloy from the policy than add the paths as allowable mounts diff --git a/src/alloy/chart/templates/uds-package.yaml b/src/alloy/chart/templates/uds-package.yaml index 6943169e9..1b8fda232 100644 --- a/src/alloy/chart/templates/uds-package.yaml +++ b/src/alloy/chart/templates/uds-package.yaml @@ -26,3 +26,11 @@ spec: app.kubernetes.io/name: loki port: 8080 description: "Write Logs to Loki" + + {{- if .Values.s3Exporter.enabled }} + # Todo: wide open for now for pushing to s3 + - direction: Egress + selector: + app.kubernetes.io/name: alloy + remoteGenerated: Anywhere + {{- end }} diff --git a/src/alloy/chart/values.schema.json b/src/alloy/chart/values.schema.json new file mode 100644 index 000000000..2d8d12d9c --- /dev/null +++ b/src/alloy/chart/values.schema.json @@ -0,0 +1,80 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": { + "additionalForwards": { + "items": { + "type": "string" + }, + "type": "array" + }, + "extraConfig": { + "type": "string" + }, + "loki": { + "properties": { + "batch": { + "properties": { + "size": { + "type": "string" + }, + "timeout": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "s3Exporter": { + "properties": { + "accessKey": { + "type": "string" + }, + "batch": { + "properties": { + "size": { + "type": "integer" + }, + "timeout": { + "type": "string" + } + }, + "type": "object" + }, + "bucket": { + "type": "string" + }, + "compression": { + "type": "boolean" + }, + "enabled": { + "type": "boolean" + }, + "endpoint": { + "type": "string" + }, + "format": { + "type": "string" + }, + "path": { + "type": "string" + }, + "region": { + "type": "string" + }, + "role_arn": { + "type": "string" + }, + "s3_force_path_style": { + "type": "boolean" + }, + "secretKey": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" +} diff --git a/src/alloy/chart/values.yaml b/src/alloy/chart/values.yaml index e69de29bb..5b1143f25 100644 --- a/src/alloy/chart/values.yaml +++ b/src/alloy/chart/values.yaml @@ -0,0 +1,55 @@ +loki: + # -- Batch size to send to Loki + batch: + # -- Maximum batch size of logs to accumulate before sending + size: 1MiB # Upstream default + # -- Maximum amount of time to wait before sending a batch + timeout: 1s # Upstream default + +# -- Export logs to an s3 (or s3-compatible) bucket - https://grafana.com/docs/alloy/latest/reference/components/otelcol/otelcol.exporter.awss3/ +s3Exporter: + enabled: true + region: "us-gov-west-1" + bucket: "uds" + # -- Path in the bucket to place logs under + path: "otlp-logs" + # -- The Role ARN to be assumed + role_arn: "" + # -- Modify endpoint if using non-s3 storage (ex: minio) + endpoint: "http://minio.uds-dev-stack.svc.cluster.local:9000" + s3_force_path_style: false + # -- Log format, acceptable values are otlp_json, otlp_proto, sumo_ic, or body + # See https://grafana.com/docs/alloy/latest/reference/components/otelcol/otelcol.exporter.awss3/#marshaler-block + format: "otlp_json" + # -- Configure with access/secret key - leave blank if using IRSA + accessKey: "uds" + secretKey: "uds-secret" + # -- Batch size to send to s3 + batch: + size: 8192 # Upstream default + timeout: 5m # Time before sending a batch regardless of size + # -- Enable gzip compression, not supported with `sumo_ic` format + compression: false + +# -- List of additional `forward_to` receivers for all default scraped logs (pod, node, k8s) +additionalForwards: [] +# Example: +# additionalForwards: +# - foobar.receiver +# - barfoor.receiver + +# -- Support for extra additional config, anything supported as alloy configuration - https://grafana.com/docs/alloy/latest/reference/ +# Should be provided as a multi-line string, supports templating other helm values +extraConfig: "" +# Example: +# extraConfig: | +# otelcol.exporter.awss3 "logs" { +# s3_uploader { +# region = "{{ .Values.s3Exporter.region }}" +# s3_bucket = "my-bucket" +# s3_prefix = "prefix" +# } +# marshaler { +# type = "body" +# } +# } diff --git a/src/alloy/values/values.yaml b/src/alloy/values/values.yaml index b7b604608..58d679652 100644 --- a/src/alloy/values/values.yaml +++ b/src/alloy/values/values.yaml @@ -7,6 +7,11 @@ alloy: key: config # Disable telemetry that doesn't function in the airgap enableReporting: false + envFrom: + - secretRef: + name: uds-config-env + # Enables support for experimental components + stabilityLevel: "experimental" serviceMonitor: enabled: true From 4432b492a747264394d5758b0ffcd755a4a60ed8 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Thu, 25 Jul 2024 11:31:35 -0600 Subject: [PATCH 03/15] fix/docs: quick doc on decision, minio fix --- src/alloy/README.md | 36 +++++++++++++++++++++- src/alloy/chart/templates/uds-package.yaml | 1 + src/alloy/chart/values.yaml | 2 +- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/alloy/README.md b/src/alloy/README.md index 5950881b0..b5e13adbe 100644 --- a/src/alloy/README.md +++ b/src/alloy/README.md @@ -1,3 +1,37 @@ -## Grafana Alloy +# Grafana Alloy Grafana Alloy is a distribution of the OpenTelemetry (OTel) Collector. Within UDS Core it is primarily used for log collection and shipping to destinations (like Loki and S3). + +## Switching from Promtail to Alloy + +Within UDS Core we have made the decision to switch from Promtail (historically the log collector/shipper of choice) to Grafana Alloy. The below contains primary motivating factors and impacts of this choice. + +### Motivations + +Promtail has historically been the tool of choice for log collection/shipping when using Loki. It provides a very lightweight layer to scrape logs from pods and hosts, label them with additional metadata, and ship them to Loki. + +One of the main issues that has arisen with Promtail is its limited output/export options. Promtail only supports sending logs to one or more Loki instances. A common requirement in production environments is to ship logs to a secondary destination for collection/analysis by security teams and SIEM tools. Promtail is currently listed as [feature complete](https://grafana.com/docs/loki/latest/send-data/promtail/) so there is no expectation that additional export functionality would be added. + +### Goals and Options + +In choosing an alternative to Promtail we have a few primary objectives: +1. Chosen tool must be capable of gathering host and pod logs: This has been our primary usage of Promtail in the past - gathering pods logs and host logs (to include k8s audit logs, controlplane logs, etc). +1. Provide a tool that has a generic export option, as well as direct Loki integration: Generally we are looking for S3/S3-compatible object storage integrations as this is a common ask from end users. While specific SIEM tool integrations can be nice, it's more valuable to have something generic that most other tools can pick logs up from. In addition, a direct integration with Loki that makes it easy to label logs for indexing/querying in Loki is highly desirable. +1. Choose a tool that does not require major changes in our logging stack, but is flexible for future adjustments to the stack: As we do have active users of our product we want to be careful in switching tools, so ideally we would like a tool that is a "drop-in" replacement. However, we don't want to rule out future changes to other pieces of the stack (i.e. Loki) so choosing a tool that doesn't lock us into Loki is important. +1. Focus on the log collection/shipping problem: While there are a number of tools that offer far more than just logging pipelines (metrics, traces, etc), we don't currently see a need to focus on these tools. These features are seen as a nice to have, but not being evaluated as the focus here. + +Three tools in the space of log collection were considered: +1. [Vector](https://vector.dev/): Opensource and maintained by Datadog, Vector provides input integrations with Kubernetes logs, arbitrary files, and [other sources](https://vector.dev/docs/reference/configuration/sources/). It has the necessary export integrations with Loki, S3, and a [number of other targets](https://vector.dev/docs/reference/configuration/sinks/). Vector is a newer tool that has not yet reached a 1.0 release, but has risen in popularity due to its performance improvements over other tools. +1. [FluentBit](https://fluentbit.io/): Fluentbit was historically used in Big Bang and supports file based inputs as well as [other targets](https://docs.fluentbit.io/manual/pipeline/inputs). It also supports the necessary export integrations (Loki, S3, OpenTelemetry and [others](https://docs.fluentbit.io/manual/pipeline/outputs)). FluentBit is a CNCF graduated project and is relatively mature. +1. [Grafana Alloy](https://grafana.com/docs/alloy/latest/): Alloy is a distribution of the OpenTelemetry Collector, opensource and maintained by Grafana Labs. It supports the necessary [inputs and outputs](https://grafana.com/docs/alloy/latest/reference/components/) (local file/k8s logs, Loki and S3). As a distribution of OTel it supports vendor-agnostic logging formats and can be integrated with numerous other tools through the OTel ecosystem. While Alloy itself is relatively new, it is built on the previous codebase of Grafana Agent and the existing OTel framework. + +### Decision and Impact + +Grafana Alloy has been chosen as our replacement for Promtail. Primary motivations include: +1. It is positioned as the "successor"/more feature-rich alternative to Promtail, to include [migration documentation](https://grafana.com/docs/alloy/latest/set-up/migrate/from-promtail/) focused on the switch. This makes it very easy to switch from Promtail to Alloy with little end user impact. +1. As Alloy is part of the Grafana ecosystem it has good integrations with Loki to provide the enriched log data we have come to expect from Promtail (simple approach to labelling logs, etc). +1. Through Alloy's S3 integration we can export logs to an additional storage location in generic formats (raw logs, OTel JSON) without any modification - no need to provide Alloy with edit/delete permissions. +1. By choosing a distribution of the OTel Collector we have flexibility in the future to switch to a different distribution of the OTel collector and/or easily swap out our logging backend (Loki) for something else that fits in the OTel Framework without needing to ensure specific tool compatibility. OTel is part of the CNCF (Incubating) and seems to be on a good trajectory. +1. Since Alloy is within the Grafana ecosystem there are good integrations and options for enterprise support from Grafana Labs across our entire logging stack. While this may not be common, it is worth calling out as a benefit. + +As with any decisions of tooling in core this can always be reevaluated in the future as different tools or factors affect how we look at our logging stack. diff --git a/src/alloy/chart/templates/uds-package.yaml b/src/alloy/chart/templates/uds-package.yaml index 1b8fda232..93dcd84e6 100644 --- a/src/alloy/chart/templates/uds-package.yaml +++ b/src/alloy/chart/templates/uds-package.yaml @@ -33,4 +33,5 @@ spec: selector: app.kubernetes.io/name: alloy remoteGenerated: Anywhere + description: "Object Storage" {{- end }} diff --git a/src/alloy/chart/values.yaml b/src/alloy/chart/values.yaml index 5b1143f25..8f45ff9ca 100644 --- a/src/alloy/chart/values.yaml +++ b/src/alloy/chart/values.yaml @@ -17,7 +17,7 @@ s3Exporter: role_arn: "" # -- Modify endpoint if using non-s3 storage (ex: minio) endpoint: "http://minio.uds-dev-stack.svc.cluster.local:9000" - s3_force_path_style: false + s3_force_path_style: true # -- Log format, acceptable values are otlp_json, otlp_proto, sumo_ic, or body # See https://grafana.com/docs/alloy/latest/reference/components/otelcol/otelcol.exporter.awss3/#marshaler-block format: "otlp_json" From 7a8257f147814902795ef639c90d51430df359b5 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Thu, 25 Jul 2024 11:33:55 -0600 Subject: [PATCH 04/15] chore: cleanup dev values, move to separate file --- src/alloy/chart/dev-values.yaml | 10 ++++++++++ src/alloy/chart/values.yaml | 14 +++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) create mode 100644 src/alloy/chart/dev-values.yaml diff --git a/src/alloy/chart/dev-values.yaml b/src/alloy/chart/dev-values.yaml new file mode 100644 index 000000000..31d1107c1 --- /dev/null +++ b/src/alloy/chart/dev-values.yaml @@ -0,0 +1,10 @@ +# Values for the uds-dev-stack minio +s3Exporter: + enabled: true + region: "minio" + bucket: "uds" + path: "otlp-logs" + endpoint: "http://minio.uds-dev-stack.svc.cluster.local:9000" + s3_force_path_style: true + accessKey: "uds" + secretKey: "uds-secret" diff --git a/src/alloy/chart/values.yaml b/src/alloy/chart/values.yaml index 8f45ff9ca..e22d0d843 100644 --- a/src/alloy/chart/values.yaml +++ b/src/alloy/chart/values.yaml @@ -8,22 +8,22 @@ loki: # -- Export logs to an s3 (or s3-compatible) bucket - https://grafana.com/docs/alloy/latest/reference/components/otelcol/otelcol.exporter.awss3/ s3Exporter: - enabled: true + enabled: false region: "us-gov-west-1" - bucket: "uds" + bucket: "" # -- Path in the bucket to place logs under - path: "otlp-logs" + path: "" # -- The Role ARN to be assumed role_arn: "" # -- Modify endpoint if using non-s3 storage (ex: minio) - endpoint: "http://minio.uds-dev-stack.svc.cluster.local:9000" - s3_force_path_style: true + endpoint: "" + s3_force_path_style: false # -- Log format, acceptable values are otlp_json, otlp_proto, sumo_ic, or body # See https://grafana.com/docs/alloy/latest/reference/components/otelcol/otelcol.exporter.awss3/#marshaler-block format: "otlp_json" # -- Configure with access/secret key - leave blank if using IRSA - accessKey: "uds" - secretKey: "uds-secret" + accessKey: "" + secretKey: "" # -- Batch size to send to s3 batch: size: 8192 # Upstream default From bc18e2253bc15c8cdf9788ea5603652ca0391d2a Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Thu, 25 Jul 2024 11:34:11 -0600 Subject: [PATCH 05/15] ci: [skip ci] From ef5227ccb705976db58d49f6c64c7874c0444270 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Thu, 25 Jul 2024 11:41:12 -0600 Subject: [PATCH 06/15] chore: lightweight upgrade swap [ci skip] --- src/alloy/common/zarf.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/alloy/common/zarf.yaml b/src/alloy/common/zarf.yaml index f1f04aecb..ca1f3c8cf 100644 --- a/src/alloy/common/zarf.yaml +++ b/src/alloy/common/zarf.yaml @@ -21,6 +21,11 @@ components: - ../values/values.yaml actions: onDeploy: + before: + - description: Remove Promtail Components if necessary + cmd: | + ./zarf package remove core --components promtail --confirm || true # Ensure this doesn't error on installs + ./zarf tools kubectl delete ns promtail || true # Ensure this doesn't error on installs after: - description: Validate Alloy Package maxTotalSeconds: 300 From 07557f6c6620bf6da202841cf798c85ea8a8a2b1 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Thu, 25 Jul 2024 11:42:32 -0600 Subject: [PATCH 07/15] chore: remove gosum [ci skip] --- pkg/sumdb/sum.golang.org/latest | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 pkg/sumdb/sum.golang.org/latest diff --git a/pkg/sumdb/sum.golang.org/latest b/pkg/sumdb/sum.golang.org/latest deleted file mode 100644 index e6dc1e023..000000000 --- a/pkg/sumdb/sum.golang.org/latest +++ /dev/null @@ -1,5 +0,0 @@ -go.sum database tree -28378881 -sFs4sytcTINd4fSkuqjsUBbH50QT58Sd76cWsuYhf3E= - -— sum.golang.org Az3grlCypsn6e3I3N5QpM5WZAiJkKj823SuTJrkg9OA9CgWCGAsnFSkUMlZTbQtHq7oj5bZWG5KwauOhDcqF3j6tMAk= From 0bb338f49ce9c8686574b2e12ab157ec6d0cd4c1 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Thu, 25 Jul 2024 11:57:21 -0600 Subject: [PATCH 08/15] chore: dev values [ci skip] --- src/alloy/common/zarf.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/alloy/common/zarf.yaml b/src/alloy/common/zarf.yaml index ca1f3c8cf..666ba59e1 100644 --- a/src/alloy/common/zarf.yaml +++ b/src/alloy/common/zarf.yaml @@ -13,6 +13,8 @@ components: namespace: alloy version: 0.1.0 localPath: ../chart + # valuesFiles: + # - ../chart/dev-values.yaml - name: alloy url: https://grafana.github.io/helm-charts version: 0.5.1 From 25ce313b837a29bffafd0adb62a4a22c79104a0c Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Fri, 26 Jul 2024 09:54:42 -0600 Subject: [PATCH 09/15] fix: alloy metrics scraping --- src/alloy/chart/templates/uds-package.yaml | 7 +++++++ src/alloy/values/values.yaml | 3 --- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/alloy/chart/templates/uds-package.yaml b/src/alloy/chart/templates/uds-package.yaml index 93dcd84e6..3478e911f 100644 --- a/src/alloy/chart/templates/uds-package.yaml +++ b/src/alloy/chart/templates/uds-package.yaml @@ -4,6 +4,13 @@ metadata: name: alloy namespace: {{ .Release.Namespace }} spec: + monitor: + - selector: + app.kubernetes.io/name: alloy + targetPort: 12345 + portName: http-metrics + description: Metrics + network: allow: # Permit intra-namespace communication diff --git a/src/alloy/values/values.yaml b/src/alloy/values/values.yaml index 58d679652..6216a2a6e 100644 --- a/src/alloy/values/values.yaml +++ b/src/alloy/values/values.yaml @@ -12,6 +12,3 @@ alloy: name: uds-config-env # Enables support for experimental components stabilityLevel: "experimental" - -serviceMonitor: - enabled: true From 45a2a58cd200dfb3dea1c3a71310901932fcd846 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Fri, 26 Jul 2024 11:46:10 -0600 Subject: [PATCH 10/15] fix: image, positions access --- src/alloy/chart/templates/alloy-config.yaml | 3 +++ src/alloy/chart/templates/uds-exemption.yaml | 2 ++ src/alloy/values/unicorn-values.yaml | 2 +- src/alloy/values/values.yaml | 24 ++++++++++++++++++++ 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/alloy/chart/templates/alloy-config.yaml b/src/alloy/chart/templates/alloy-config.yaml index 29071fa9e..4fcf553da 100644 --- a/src/alloy/chart/templates/alloy-config.yaml +++ b/src/alloy/chart/templates/alloy-config.yaml @@ -125,6 +125,7 @@ data: // Forward all logs to Destinations loki.source.file "node_logs" { targets = local.file_match.node_logs.targets + legacy_positions_file = "/run/promtail/positions.yaml" forward_to = [ loki.write.default.receiver, {{- if .Values.s3Exporter.enabled }} @@ -137,6 +138,7 @@ data: } loki.source.file "pod_logs" { targets = local.file_match.pod_logs.targets + legacy_positions_file = "/run/promtail/positions.yaml" forward_to = [ loki.write.default.receiver, {{- if .Values.s3Exporter.enabled }} @@ -149,6 +151,7 @@ data: } loki.source.file "k8s_logs" { targets = local.file_match.k8s_logs.targets + legacy_positions_file = "/run/promtail/positions.yaml" forward_to = [ loki.write.default.receiver, {{- if .Values.s3Exporter.enabled }} diff --git a/src/alloy/chart/templates/uds-exemption.yaml b/src/alloy/chart/templates/uds-exemption.yaml index 187a3a4d3..267003fcf 100644 --- a/src/alloy/chart/templates/uds-exemption.yaml +++ b/src/alloy/chart/templates/uds-exemption.yaml @@ -18,5 +18,7 @@ spec: description: | alloy mounts the following hostPaths: - `/var/log`: to tail system/pod logs + - `/run/alloy`: for Alloy persistent config/write-ahead-log + - `/run/promtail`: for accessing Promtail legacy persistent config/write-ahead-log Since logs can have sensitive information, it is better to exclude alloy from the policy than add the paths as allowable mounts diff --git a/src/alloy/values/unicorn-values.yaml b/src/alloy/values/unicorn-values.yaml index 41afce497..79924b52e 100644 --- a/src/alloy/values/unicorn-values.yaml +++ b/src/alloy/values/unicorn-values.yaml @@ -12,4 +12,4 @@ configReloader: image: registry: cgr.dev repository: du-uds-defenseunicorns/configmap-reload-fips - tag: "0.12.0" + tag: "0.13.1" diff --git a/src/alloy/values/values.yaml b/src/alloy/values/values.yaml index 6216a2a6e..90c96846a 100644 --- a/src/alloy/values/values.yaml +++ b/src/alloy/values/values.yaml @@ -1,6 +1,24 @@ +controller: + volumes: + extra: + # Mount alloy-storage from a persistent host path + - hostPath: + path: /run/alloy + name: alloy-storage + # Mount the legacy promtail positions file so that we can track log scraping pre-alloy + - hostPath: + path: /run/promtail + name: promtail-legacy + alloy: mounts: varlog: true + extra: + - mountPath: /run/alloy + name: alloy-storage + # Mount the legacy promtail positions file so that we can track log scraping pre-alloy + - mountPath: /run/promtail + name: promtail-legacy configMap: create: false name: alloy-config @@ -12,3 +30,9 @@ alloy: name: uds-config-env # Enables support for experimental components stabilityLevel: "experimental" + # Persist write-ahead-log and other files to hostpath + storagePath: /run/alloy + +# Disable PodLogs CRD since it will hit the KubeAPI for logs, causing strain on the control plane +crds: + create: false From a9fca830b9cd2bb6dd7922878ec9235694456b79 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Fri, 26 Jul 2024 13:35:19 -0600 Subject: [PATCH 11/15] fix: securitycontext for registry1 --- src/alloy/values/values.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/alloy/values/values.yaml b/src/alloy/values/values.yaml index 90c96846a..f6344cb1b 100644 --- a/src/alloy/values/values.yaml +++ b/src/alloy/values/values.yaml @@ -11,6 +11,10 @@ controller: name: promtail-legacy alloy: + # Alloy runs as root to allow reading log files and writing WAL to the host + securityContext: + runAsUser: 0 + runAsGroup: 0 mounts: varlog: true extra: From 97d4a0b0cc0f415a7c3fab218e01e4a2c781a2c7 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Fri, 9 Aug 2024 15:37:37 -0600 Subject: [PATCH 12/15] fix: values tag [ci skip] --- src/alloy/chart/templates/env-secret.yaml | 2 +- src/alloy/values/unicorn-values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alloy/chart/templates/env-secret.yaml b/src/alloy/chart/templates/env-secret.yaml index 31100dfc0..bebef09dc 100644 --- a/src/alloy/chart/templates/env-secret.yaml +++ b/src/alloy/chart/templates/env-secret.yaml @@ -5,7 +5,7 @@ metadata: namespace: {{ .Release.Namespace }} type: Opaque stringData: - UDS_CONFIG: "placeholder" # Used as a placeholder to ensure valid secret with keys + UDS_CONFIG: "values" # Used as a placeholder to ensure valid secret with keys {{- if and .Values.s3Exporter.enabled .Values.s3Exporter.accessKey .Values.s3Exporter.secretKey }} AWS_ACCESS_KEY_ID: "{{ .Values.s3Exporter.accessKey }}" AWS_SECRET_ACCESS_KEY: "{{ .Values.s3Exporter.secretKey }}" diff --git a/src/alloy/values/unicorn-values.yaml b/src/alloy/values/unicorn-values.yaml index ef87fa634..816c67375 100644 --- a/src/alloy/values/unicorn-values.yaml +++ b/src/alloy/values/unicorn-values.yaml @@ -1,7 +1,7 @@ image: registry: cgr.dev repository: du-uds-defenseunicorns/grafana-alloy-fips - tag: v1.3.0 + tag: "1.3.0" configReloader: image: From 7265364681464a438808714d5ef7e3ae374f3a8c Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Tue, 27 Aug 2024 12:06:14 -0600 Subject: [PATCH 13/15] chore: comments, envs [skip ci] --- src/alloy/chart/templates/env-secret.yaml | 3 +++ src/alloy/chart/values.yaml | 6 ++++++ src/alloy/values/values.yaml | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/alloy/chart/templates/env-secret.yaml b/src/alloy/chart/templates/env-secret.yaml index bebef09dc..bb4edc4d5 100644 --- a/src/alloy/chart/templates/env-secret.yaml +++ b/src/alloy/chart/templates/env-secret.yaml @@ -10,3 +10,6 @@ stringData: AWS_ACCESS_KEY_ID: "{{ .Values.s3Exporter.accessKey }}" AWS_SECRET_ACCESS_KEY: "{{ .Values.s3Exporter.secretKey }}" {{- end }} + {{- range $key, $value := .Values.secretEnv }} + {{ $key }}: "{{ $value }}" + {{- end }} diff --git a/src/alloy/chart/values.yaml b/src/alloy/chart/values.yaml index e22d0d843..8185c3faf 100644 --- a/src/alloy/chart/values.yaml +++ b/src/alloy/chart/values.yaml @@ -53,3 +53,9 @@ extraConfig: "" # type = "body" # } # } + +# -- Add secret env vars to add to Alloy +secretEnv: {} +# Example: +# secretEnv: +# FOO: "BAR" diff --git a/src/alloy/values/values.yaml b/src/alloy/values/values.yaml index f6344cb1b..bfdbabd7a 100644 --- a/src/alloy/values/values.yaml +++ b/src/alloy/values/values.yaml @@ -27,8 +27,9 @@ alloy: create: false name: alloy-config key: config - # Disable telemetry that doesn't function in the airgap + # Disable telemetry enableReporting: false + # Add env from our config chart envFrom: - secretRef: name: uds-config-env From 4150c6e0915ee7f1b64e35ee03f9a493f7ecf283 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Tue, 27 Aug 2024 16:58:13 -0600 Subject: [PATCH 14/15] chore: config/mute [ci skip] --- src/alloy/common/zarf.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/alloy/common/zarf.yaml b/src/alloy/common/zarf.yaml index 47d117619..9f96b1f49 100644 --- a/src/alloy/common/zarf.yaml +++ b/src/alloy/common/zarf.yaml @@ -26,10 +26,15 @@ components: onDeploy: before: - description: Remove Promtail Components if necessary + mute: true cmd: | ./zarf package remove core --components promtail --confirm || true # Ensure this doesn't error on installs ./zarf tools kubectl delete ns promtail || true # Ensure this doesn't error on installs after: + - description: Annotate Alloy daemonset with config secret + cmd: | + CONFIG_CHECKSUM=$(./zarf tools kubectl get configmap alloy-config -o json -n alloy | sha256sum | cut -d' ' -f1) + ./zarf tools kubectl patch daemonset alloy -n alloy -p "{\"spec\":{\"template\":{\"metadata\":{\"annotations\":{\"checksum/uds-config\":\"$CONFIG_CHECKSUM\"}}}}}" - description: Validate Alloy Package maxTotalSeconds: 300 wait: From 0e302005ed9941b6a8e2c3d99d8f3e8ecfea8ca5 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Wed, 28 Aug 2024 10:59:08 -0600 Subject: [PATCH 15/15] fix: validate on upgrade, comments --- src/alloy/common/zarf.yaml | 9 +++----- src/alloy/tasks.yaml | 7 +------ src/pepr/zarf.yaml | 42 ++++++++++++++------------------------ 3 files changed, 19 insertions(+), 39 deletions(-) diff --git a/src/alloy/common/zarf.yaml b/src/alloy/common/zarf.yaml index 9f96b1f49..6bb9401c3 100644 --- a/src/alloy/common/zarf.yaml +++ b/src/alloy/common/zarf.yaml @@ -13,9 +13,6 @@ components: namespace: alloy version: 0.1.0 localPath: ../chart - # Dev-values for using s3 storage - # valuesFiles: - # - ../chart/dev-values.yaml - name: alloy url: https://grafana.github.io/helm-charts version: 0.6.1 @@ -28,10 +25,10 @@ components: - description: Remove Promtail Components if necessary mute: true cmd: | - ./zarf package remove core --components promtail --confirm || true # Ensure this doesn't error on installs - ./zarf tools kubectl delete ns promtail || true # Ensure this doesn't error on installs + ./zarf package remove core --components promtail --confirm || true # Ensure this doesn't error on installs and upgrades when Promtail no longer exists + ./zarf tools kubectl delete ns promtail || true # Ensure this doesn't error on installs and upgrades when Promtail no longer exists after: - - description: Annotate Alloy daemonset with config secret + - description: Annotate Alloy Daemonset with config checksum cmd: | CONFIG_CHECKSUM=$(./zarf tools kubectl get configmap alloy-config -o json -n alloy | sha256sum | cut -d' ' -f1) ./zarf tools kubectl patch daemonset alloy -n alloy -p "{\"spec\":{\"template\":{\"metadata\":{\"annotations\":{\"checksum/uds-config\":\"$CONFIG_CHECKSUM\"}}}}}" diff --git a/src/alloy/tasks.yaml b/src/alloy/tasks.yaml index aa9d75f78..acad54789 100644 --- a/src/alloy/tasks.yaml +++ b/src/alloy/tasks.yaml @@ -2,9 +2,4 @@ tasks: - name: validate actions: - description: Validate alloy - wait: - cluster: - kind: Pod - name: app.kubernetes.io/instance=alloy - namespace: alloy - condition: Ready + cmd: ./zarf tools kubectl rollout status daemonset alloy -n alloy --timeout 300s diff --git a/src/pepr/zarf.yaml b/src/pepr/zarf.yaml index 5dafad221..8bbbaa6db 100644 --- a/src/pepr/zarf.yaml +++ b/src/pepr/zarf.yaml @@ -52,31 +52,19 @@ components: actions: onDeploy: before: - - cmd: ./zarf tools kubectl annotate secret -n pepr-system pepr-uds-core-api-token meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate secret -n pepr-system pepr-uds-core-module meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate secret -n pepr-system pepr-uds-core-tls meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate serviceaccount -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate clusterrolebinding pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate clusterrole pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate role -n pepr-system pepr-uds-core-store meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate rolebinding -n pepr-system pepr-uds-core-store meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate service -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate service -n pepr-system pepr-uds-core-watcher meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate deployment -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate deployment -n pepr-system pepr-uds-core-watcher meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate mutatingwebhookconfiguration -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate validatingwebhookconfiguration -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + - cmd: | + ./zarf tools kubectl annotate secret -n pepr-system pepr-uds-core-api-token meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate secret -n pepr-system pepr-uds-core-module meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate secret -n pepr-system pepr-uds-core-tls meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate serviceaccount -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate clusterrolebinding pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate clusterrole pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate role -n pepr-system pepr-uds-core-store meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate rolebinding -n pepr-system pepr-uds-core-store meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate service -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate service -n pepr-system pepr-uds-core-watcher meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate deployment -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate deployment -n pepr-system pepr-uds-core-watcher meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate mutatingwebhookconfiguration -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate validatingwebhookconfiguration -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true mute: true