Skip to content

Commit

Permalink
Merge pull request #18 from yadneshk/secure_image
Browse files Browse the repository at this point in the history
Secure image
  • Loading branch information
yadneshk authored Dec 4, 2024
2 parents f848e79 + cd9777b commit 75c74a5
Show file tree
Hide file tree
Showing 14 changed files with 134 additions and 13 deletions.
11 changes: 7 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,19 @@ RUN CGO_ENABLED=0 GOOS=linux go build -o metricly cmd/collector/main.go

# # Use a minimal base image for the final image
FROM quay.io/jitesoft/alpine:3.20.3
WORKDIR /root
COPY --from=builder /app/metricly .
COPY --from=builder /app/metricly /bin/metricly
WORKDIR /metricly
COPY ./config/healthcheck .
RUN chmod +x /root/healthcheck

RUN mkdir /etc/metricly
RUN chown -R nobody:nobody /etc/metricly /metricly

# Expose the port
EXPOSE 8080
USER nobody

# Run the metrics collector
ENTRYPOINT ["./metricly"]
ENTRYPOINT ["/bin/metricly"]

# Default agrs
CMD ["--config", "/etc/metricly/config.yaml"]
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ run_container:
-p 8080:8080 \
-v ./config/config.yaml:/etc/metricly/config.yaml:ro,z \
-v /:/host/root:ro,slave \
--health-cmd "/root/healthcheck metricly" \
--health-cmd "/metricly/healthcheck metricly" \
-e HOSTNAME=${HOSTNAME} \
-e PROC_CPU_STAT=/host/root/proc/stat \
-e PROC_MEMORY_INFO=/host/root/proc/meminfo \
Expand Down
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
- Logs incoming and outgoing API requests with support for multiple log levels (INFO, DEBUG, ERROR).
- **Metrics Visualization**
- Provides an inbuilt `Grafana` dashboard to visualize all metrics.
- **Alerting Mechanism**
- Provides an inbuilt `Alertmanager` rules that send Gmail alerts.
---

## **Getting Started**
Expand Down Expand Up @@ -371,6 +373,17 @@ The Metricly exporter provides the following API endpoints:
}
```

### **Alertmanager Configuration** ###
Metricly provides a few inbuilt alerts to monitor high utilization of CPU, Memory and Disk usage.

![Sample Alerts](doc/alerts.png)

Upon meeting condition for any alert, an email notification is sent to the receiver configured in `config/alertmanager/alertmanager.yml`

![High CPU Alert](doc/high_cpu_alert.png)

To include more alerts, take a look at `config/prometheus/alerts/`. Similar alerts can be built and added to the same directory.

### **Development**

#### **Testing**
Expand Down
15 changes: 15 additions & 0 deletions alertmanager.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
global:
resolve_timeout: 5m

route:
receiver: 'default'

receivers:
- name: 'default'
email_configs:
- to: '[email protected]'
from: '[email protected]'
smarthost: 'smtp.gmail.com:587'
auth_username: '[email protected]'
auth_identity: '[email protected]'
auth_password: 'jqhotwxphmieuaqw'
15 changes: 15 additions & 0 deletions config/alertmanager/alertmanager.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
global:
resolve_timeout: 5m

route:
receiver: 'default'

receivers:
- name: 'default'
email_configs:
- to: '[email protected]' # receiver's address
from: '[email protected]'
smarthost: 'smtp.gmail.com:587'
auth_username: '[email protected]' # sender's address
auth_identity: '[email protected]' # sender's address
auth_password: 'xxxxxxxxxxxxx' # gmail app password generated by sender
File renamed without changes.
File renamed without changes.
20 changes: 20 additions & 0 deletions config/prometheus/alerts/cpu_alerts.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
groups:
- name: cpu_alerts
rules:
- alert: CPUUsage > 60%
expr: avg_over_time(metricly_cpu_total[5m]) > 60
for: 1m
labels:
severity: warning
annotations:
summary: "High CPU usage detected"
description: "CPU usage is above 60% for the last 5 minutes on host {{ $labels.host }}"

- alert: CPUUsage > 80%
expr: avg_over_time(metricly_cpu_total[15m]) > 80
for: 1m
labels:
severity: critical
annotations:
summary: "High CPU usage detected"
description: "CPU usage is above 80% for the last 15 minutes on host {{ $labels.host }}"
20 changes: 20 additions & 0 deletions config/prometheus/alerts/disk_alerts.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
groups:
- name: disk_alerts
rules:
- alert: Disk Usage > 60%
expr: 100*metricly_disk_used_bytes/metricly_disk_total_bytes > 60
for: 1m
labels:
severity: warning
annotations:
summary: "High Disk usage detected"
description: "Disk usage is above 60%"

- alert: Disk Usage > 80%
expr: 100*metricly_disk_used_bytes/metricly_disk_total_bytes > 80
for: 1m
labels:
severity: critical
annotations:
summary: "High Disk usage detected"
description: "Disk usage is above 80%"
20 changes: 20 additions & 0 deletions config/prometheus/alerts/memory_alerts.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
groups:
- name: memory_alerts
rules:
- alert: Memory Usage > 60%
expr: 100*(metricly_memory_total_bytes-metricly_memory_available_bytes)/metricly_memory_total_bytes > 60
for: 1m
labels:
severity: warning
annotations:
summary: "High Memory usage detected"
description: "Memory usage is above 60%"

- alert: Memory Usage > 80%
expr: 100*(metricly_memory_total_bytes-metricly_memory_available_bytes)/metricly_memory_total_bytes > 80
for: 1m
labels:
severity: critical
annotations:
summary: "High Memory usage detected"
description: "Memory usage is above 80%"
9 changes: 9 additions & 0 deletions config/prometheus/prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,12 @@ scrape_configs:
static_configs:
- targets:
- '127.0.0.1:8080' # Target where your app exposes metrics

rule_files:
- "/etc/prometheus/alerts/*.yml"

alerting:
alertmanagers:
- static_configs:
- targets:
- '127.0.0.1:9093'
Binary file added doc/alerts.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/high_cpu_alert.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 14 additions & 8 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,6 @@ services:
network_mode: host
volumes:
- ./config/config.yaml:/etc/metricly/config.yaml:ro,z
# - /proc/stat:/proc/stat:ro
# - /proc/meminfo:/proc/meminfo:ro
# - /proc/diskstats:/proc/diskstats:ro
# - /proc/net/dev:/mnt/metricly/dev:ro
# - /proc/self/mounts:/mnt/metricly/mounts:ro
- /:/host/root:ro,rslave # Changes in the source (host) are reflected in the container, not vice-versa
environment:
- HOSTNAME=${HOSTNAME}
Expand All @@ -22,7 +17,7 @@ services:
- PROC_DISK_MOUNTS=/host/root/proc/mounts
- PROC_DISK_STATS=/host/root/proc/diskstats
healthcheck:
test: ["CMD", "/root/healthcheck metricly"]
test: ["CMD", "/bin/sh /metricly/healthcheck metricly"]
interval: 30s
timeout: 5s
retries: 3
Expand All @@ -34,6 +29,7 @@ services:
network_mode: host
volumes:
- ./config/prometheus/:/etc/prometheus/:ro,z
- ./config/prometheus/alerts:/etc/prometheus/alerts:ro,z
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
Expand All @@ -52,8 +48,18 @@ services:
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- ./config/grafana/provisioning/:/etc/grafana/provisioning/:ro,z
- ./config/infrastructure.json:/var/lib/grafana/dashboards/infrastructure.json:ro,z
- ./config/go-runtime.json:/var/lib/grafana/dashboards/go-runtime.json:ro,z
- ./config/grafana/visualizations/infrastructure.json:/var/lib/grafana/dashboards/infrastructure.json:ro,z
- ./config/grafana/visualizations/go-runtime.json:/var/lib/grafana/dashboards/go-runtime.json:ro,z
restart: always
depends_on:
- prometheus

alertmanager:
container_name: metricly_alertmanager
image: docker.io/prom/alertmanager:v0.27.0
network_mode: host
volumes:
- ./config/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro,z
restart: always
depends_on:
- prometheus

0 comments on commit 75c74a5

Please sign in to comment.