Merge pull request #18 from yadneshk/secure_image

Secure image
yadneshk · Dec 4, 2024 · 75c74a5 · 75c74a5
2 parents f848e79 + cd9777b
commit 75c74a5
Show file tree

Hide file tree

Showing 14 changed files with 134 additions and 13 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -9,16 +9,19 @@ RUN CGO_ENABLED=0 GOOS=linux go build -o metricly cmd/collector/main.go
 
 # # Use a minimal base image for the final image
 FROM quay.io/jitesoft/alpine:3.20.3
-WORKDIR /root
-COPY --from=builder /app/metricly .
+COPY --from=builder /app/metricly /bin/metricly
+WORKDIR /metricly
 COPY ./config/healthcheck .
-RUN chmod +x /root/healthcheck
+
 RUN mkdir /etc/metricly
+RUN chown -R nobody:nobody /etc/metricly /metricly
+
 # Expose the port
 EXPOSE 8080
+USER nobody
 
 # Run the metrics collector
-ENTRYPOINT ["./metricly"]
+ENTRYPOINT ["/bin/metricly"]
 
 # Default agrs
 CMD ["--config", "/etc/metricly/config.yaml"]
diff --git a/Makefile b/Makefile
@@ -33,7 +33,7 @@ run_container:
 	-p 8080:8080 \
 	-v ./config/config.yaml:/etc/metricly/config.yaml:ro,z \
 	-v /:/host/root:ro,slave \
-	--health-cmd "/root/healthcheck metricly" \
+	--health-cmd "/metricly/healthcheck metricly" \
 	-e HOSTNAME=${HOSTNAME} \
 	-e PROC_CPU_STAT=/host/root/proc/stat \
 	-e PROC_MEMORY_INFO=/host/root/proc/meminfo \

diff --git a/README.md b/README.md
@@ -23,6 +23,8 @@
   - Logs incoming and outgoing API requests with support for multiple log levels (INFO, DEBUG, ERROR).
 - **Metrics Visualization**
   - Provides an inbuilt `Grafana` dashboard to visualize all metrics.
+- **Alerting Mechanism**
+  - Provides an inbuilt `Alertmanager` rules that send Gmail alerts.
 ---
 
 ## **Getting Started**
@@ -371,6 +373,17 @@ The Metricly exporter provides the following API endpoints:
       }
       ```      
 
+### **Alertmanager Configuration** ###
+Metricly provides a few inbuilt alerts to monitor high utilization of CPU, Memory and Disk usage.
+
+![Sample Alerts](doc/alerts.png)
+
+Upon meeting condition for any alert, an email notification is sent to the receiver configured in `config/alertmanager/alertmanager.yml`
+
+![High CPU Alert](doc/high_cpu_alert.png)
+
+To include more alerts, take a look at `config/prometheus/alerts/`. Similar alerts can be built and added to the same directory.
+
 ### **Development**
 
 #### **Testing**

diff --git a/alertmanager.yml b/alertmanager.yml
@@ -0,0 +1,15 @@
+global:
+  resolve_timeout: 5m
+
+route:
+  receiver: 'default'
+
+receivers:
+  - name: 'default'
+    email_configs:
+      - to: '[email protected]'
+        from: '[email protected]'
+        smarthost: 'smtp.gmail.com:587'
+        auth_username: '[email protected]'
+        auth_identity: '[email protected]'
+        auth_password: 'jqhotwxphmieuaqw'
diff --git a/config/alertmanager/alertmanager.yml b/config/alertmanager/alertmanager.yml
@@ -0,0 +1,15 @@
+global:
+  resolve_timeout: 5m
+
+route:
+  receiver: 'default'
+
+receivers:
+  - name: 'default'
+    email_configs:
+      - to: '[email protected]'           # receiver's address
+        from: '[email protected]'   
+        smarthost: 'smtp.gmail.com:587'
+        auth_username: '[email protected]' # sender's address
+        auth_identity: '[email protected]' # sender's address
+        auth_password: 'xxxxxxxxxxxxx'    # gmail app password generated by sender
diff --git a/config/go-runtime.json → ...ig/grafana/visualizations/go-runtime.json b/config/go-runtime.json → ...ig/grafana/visualizations/go-runtime.json
diff --git a/config/infrastructure.json → ...rafana/visualizations/infrastructure.json b/config/infrastructure.json → ...rafana/visualizations/infrastructure.json
diff --git a/config/prometheus/alerts/cpu_alerts.yml b/config/prometheus/alerts/cpu_alerts.yml
@@ -0,0 +1,20 @@
+groups:
+  - name: cpu_alerts
+    rules:
+      - alert: CPUUsage > 60%
+        expr: avg_over_time(metricly_cpu_total[5m]) > 60
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High CPU usage detected"
+          description: "CPU usage is above 60% for the last 5 minutes on host {{ $labels.host }}"
+
+      - alert: CPUUsage > 80%
+        expr: avg_over_time(metricly_cpu_total[15m]) > 80
+        for: 1m
+        labels:
+          severity: critical
+        annotations:
+          summary: "High CPU usage detected"
+          description: "CPU usage is above 80% for the last 15 minutes on host {{ $labels.host }}"
diff --git a/config/prometheus/alerts/disk_alerts.yml b/config/prometheus/alerts/disk_alerts.yml
@@ -0,0 +1,20 @@
+groups:
+  - name: disk_alerts
+    rules:
+      - alert: Disk Usage > 60%
+        expr: 100*metricly_disk_used_bytes/metricly_disk_total_bytes > 60
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High Disk usage detected"
+          description: "Disk usage is above 60%"
+
+      - alert: Disk Usage > 80%
+        expr: 100*metricly_disk_used_bytes/metricly_disk_total_bytes > 80
+        for: 1m
+        labels:
+          severity: critical
+        annotations:
+          summary: "High Disk usage detected"
+          description: "Disk usage is above 80%"
diff --git a/config/prometheus/alerts/memory_alerts.yml b/config/prometheus/alerts/memory_alerts.yml
@@ -0,0 +1,20 @@
+groups:
+  - name: memory_alerts
+    rules:
+      - alert: Memory Usage > 60%
+        expr: 100*(metricly_memory_total_bytes-metricly_memory_available_bytes)/metricly_memory_total_bytes > 60
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High Memory usage detected"
+          description: "Memory usage is above 60%"
+
+      - alert: Memory Usage > 80%
+        expr: 100*(metricly_memory_total_bytes-metricly_memory_available_bytes)/metricly_memory_total_bytes > 80
+        for: 1m
+        labels:
+          severity: critical
+        annotations:
+          summary: "High Memory usage detected"
+          description: "Memory usage is above 80%"
diff --git a/config/prometheus/prometheus.yml b/config/prometheus/prometheus.yml
@@ -8,3 +8,12 @@ scrape_configs:
     static_configs:
       - targets:
           - '127.0.0.1:8080' # Target where your app exposes metrics
+
+rule_files:
+  - "/etc/prometheus/alerts/*.yml"
+
+alerting:
+  alertmanagers:
+    - static_configs:
+        - targets:
+            - '127.0.0.1:9093'
diff --git a/doc/alerts.png b/doc/alerts.png
diff --git a/doc/high_cpu_alert.png b/doc/high_cpu_alert.png
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -8,11 +8,6 @@ services:
     network_mode: host
     volumes:
       - ./config/config.yaml:/etc/metricly/config.yaml:ro,z
-      # - /proc/stat:/proc/stat:ro
-      # - /proc/meminfo:/proc/meminfo:ro
-      # - /proc/diskstats:/proc/diskstats:ro
-      # - /proc/net/dev:/mnt/metricly/dev:ro
-      # - /proc/self/mounts:/mnt/metricly/mounts:ro
       - /:/host/root:ro,rslave # Changes in the source (host) are reflected in the container, not vice-versa
     environment:
       - HOSTNAME=${HOSTNAME}
@@ -22,7 +17,7 @@ services:
       - PROC_DISK_MOUNTS=/host/root/proc/mounts
       - PROC_DISK_STATS=/host/root/proc/diskstats
     healthcheck:
-      test: ["CMD", "/root/healthcheck metricly"]
+      test: ["CMD", "/bin/sh /metricly/healthcheck metricly"]
       interval: 30s   
       timeout: 5s     
       retries: 3      
@@ -34,6 +29,7 @@ services:
     network_mode: host
     volumes:
       - ./config/prometheus/:/etc/prometheus/:ro,z
+      - ./config/prometheus/alerts:/etc/prometheus/alerts:ro,z
     command:
       - '--config.file=/etc/prometheus/prometheus.yml'
       - '--storage.tsdb.path=/prometheus'
@@ -52,8 +48,18 @@ services:
       - GF_SECURITY_ADMIN_PASSWORD=admin
     volumes:
       - ./config/grafana/provisioning/:/etc/grafana/provisioning/:ro,z
-      - ./config/infrastructure.json:/var/lib/grafana/dashboards/infrastructure.json:ro,z
-      - ./config/go-runtime.json:/var/lib/grafana/dashboards/go-runtime.json:ro,z
+      - ./config/grafana/visualizations/infrastructure.json:/var/lib/grafana/dashboards/infrastructure.json:ro,z
+      - ./config/grafana/visualizations/go-runtime.json:/var/lib/grafana/dashboards/go-runtime.json:ro,z
     restart: always
     depends_on:
       - prometheus
+
+  alertmanager:
+    container_name: metricly_alertmanager
+    image: docker.io/prom/alertmanager:v0.27.0
+    network_mode: host
+    volumes:
+      - ./config/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro,z
+    restart: always
+    depends_on:
+      - prometheus