-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #16 from Make-Zenerator/develop
[Feat] V2 pull request
- Loading branch information
Showing
237 changed files
with
42,733 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
name: Model Serving | ||
|
||
on: | ||
pull_request: | ||
branches: ['develop'] # develop 브랜치에 pull request될 때 동작 | ||
push: | ||
branches: ['develop'] # develop 브랜치에 push될 때 동작 | ||
|
||
jobs: | ||
ci: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Login to Docker Hub | ||
uses: docker/login-action@v3 | ||
with: | ||
username: ${{secrets.DOCKERHUB_USERNAME}} | ||
password: ${{secrets.DOCKERHUB_PASSWORD}} | ||
- name: Build docker file and setting deploy files | ||
run: | | ||
docker build -t makezenerator/serving:latest serving/ | ||
docker push makezenerator/serving:latest | ||
cd: | ||
runs-on: ubuntu-latest | ||
needs: [ci] | ||
steps: | ||
- name: Connect to server | ||
uses: appleboy/ssh-action@master | ||
with: | ||
host: ${{secrets.SERVER3_HOST}} | ||
username: ${{secrets.SERVER_USERNAME}} | ||
key: ${{secrets.SERVER3_KEY}} | ||
port: ${{secrets.SERVER_PORT}} | ||
script: | | ||
docker login -u "${{secrets.DOCKERHUB_USERNAME}}" -p "${{secrets.DOCKERHUB_PASSWORD}}" | ||
docker stop $(docker ps -a -q) | ||
docker rm -f $(docker ps -a -q) | ||
docker pull makezenerator/serving:latest | ||
docker run -d -p 5050:5050 makezenerator/serving:latest | ||
docker image prune -f |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
FROM amd64/python:3.9-slim | ||
|
||
RUN apt-get update && apt-get install -y \ | ||
git \ | ||
wget \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
RUN pip install -U pip &&\ | ||
pip install boto3==1.26.8 mlflow==1.30.0 psycopg2-binary | ||
|
||
RUN cd /tmp && \ | ||
wget https://dl.min.io/client/mc/release/linux-amd64/mc && \ | ||
chmod +x mc && \ | ||
mv mc /usr/bin/mc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
version: "3" | ||
|
||
services: | ||
mlflow-backend-store: | ||
image: postgres:14.0 | ||
container_name: mlflow-backend-store | ||
environment: | ||
POSTGRES_USER: mlflowuser | ||
POSTGRES_PASSWORD: mlflowpassword | ||
POSTGRES_DB: mlflowdatabase | ||
healthcheck: | ||
test: ["CMD", "pg_isready", "-q", "-U", "mlflowuser", "-d", "mlflowdatabase"] | ||
interval: 10s | ||
timeout: 5s | ||
retries: 5 | ||
|
||
mlflow-artifact-store: | ||
image: minio/minio:RELEASE.2024-01-18T22-51-28Z | ||
container_name: mlflow-artifact-store | ||
ports: | ||
- 9000:9000 | ||
- 9001:9001 | ||
environment: | ||
MINIO_ROOT_USER: minio | ||
MINIO_ROOT_PASSWORD: miniostorage | ||
command: server /data/minio --console-address :9001 | ||
healthcheck: | ||
test: ["CMD", "mc", "ready", "local"] | ||
interval: 5s | ||
timeout: 5s | ||
retries: 5 | ||
|
||
mlflow-server: | ||
build: | ||
context: . | ||
dockerfile: DockerFile_mlflow | ||
container_name: mlflow-server | ||
depends_on: | ||
mlflow-backend-store: | ||
condition: service_healthy | ||
mlflow-artifact-store: | ||
condition: service_healthy | ||
ports: | ||
- 5001:5000 | ||
environment: | ||
AWS_ACCESS_KEY: AKIA3FLD32HPRN22NJQ7 | ||
AWS_SECRET_ACCESS_KEY: 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 | ||
MLFLOW_S3_ENDPOINT_URL: http://mlflow-artifact-store:9000 | ||
command: | ||
- /bin/sh | ||
- -c | ||
- | | ||
mc config host add mlflowminio http://mlflow-artifact-store:9000 minio miniostorage && | ||
mc mb --ignore-existing mlflowminio/mlflow | ||
mlflow server \ | ||
--backend-store-uri postgresql://mlflowuser:mlflowpassword@mlflow-backend-store/mlflowdatabase \ | ||
--default-artifact-root s3://mlflow/ \ | ||
--host 0.0.0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
global: | ||
resolve_timeout: 1m | ||
|
||
route: | ||
group_by: ['alertname', 'instance'] | ||
group_wait: 10s | ||
group_interval: 5m | ||
repeat_interval: 1h | ||
receiver: 'slack-notifications' | ||
routes: | ||
- match: | ||
severity: 'critical' | ||
receiver: 'slack-notifications' | ||
|
||
receivers: | ||
- name: 'slack-notifications' | ||
slack_configs: | ||
- api_url: 'https://discord.com/api/webhooks/1222157548657049611/TAhDV5DnL1sAVNBYJivf3CYe7877PKoBSsp0QZ9DgEMaVNaslR6wlBZuaSmk6NiQZ7zZ' # Discord webhook URL을 여기에 입력하세요. | ||
channel: '#alerts' | ||
send_resolved: true | ||
title: '[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] Monitoring Alert - {{ .CommonLabels.alertname }} for {{ .CommonLabels.instance }}' | ||
text: >- | ||
{{ range .Alerts }} | ||
*Alert:* {{ .Annotations.summary }}\n | ||
*Description:* {{ .Annotations.description }}\n | ||
*Details:* | ||
{{ range .Labels.SortedPairs }} • *{{ .Name }}:* {{ .Value }}\n{{ end }} | ||
{{ end }} | ||
inhibit_rules: | ||
- source_match: | ||
severity: 'critical' | ||
target_match: | ||
severity: 'warning' | ||
equal: ['alertname', 'instance'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
version: '3.7' | ||
|
||
services: | ||
prometheus: | ||
image: prom/prometheus | ||
container_name: prometheus | ||
volumes: | ||
- ./prometheus/config/:/etc/prometheus/ | ||
- ./prometheus/prometheus-volume:/prometheus | ||
ports: | ||
- 9090:9090 | ||
command: | ||
- "--web.enable-lifecycle" | ||
- '--config.file=/etc/prometheus/prometheus.yml' | ||
restart: always | ||
networks: | ||
- promnet | ||
user: root | ||
|
||
grafana: | ||
image: grafana/grafana | ||
container_name: grafana | ||
volumes: | ||
- ./grafana-volume:/var/lib/grafana | ||
restart: always | ||
networks: | ||
- promnet | ||
ports: | ||
- 3300:3000 | ||
user: root | ||
alertmanager: | ||
image: prom/alertmanager | ||
container_name: alertmanager | ||
user: root | ||
ports: | ||
- 9093:9093 | ||
volumes: | ||
- ./alertmanager/config/:/etc/alertmanager/ | ||
networks: | ||
- promnet | ||
restart: always | ||
command: | ||
- '--config.file=/etc/alertmanager/alertmanager.yml' | ||
|
||
networks: | ||
promnet: | ||
driver: bridge |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
version: '3.7' | ||
services: | ||
node: | ||
image: prom/node-exporter | ||
container_name: node-exporter | ||
ports: | ||
- 9100:9100 | ||
networks: | ||
- promnet | ||
dcgm: | ||
image : nvcr.io/nvidia/k8s/dcgm-exporter:3.2.6-3.1.9-ubuntu20.04 | ||
container_name : dcgm-exporter | ||
ports: | ||
- 9400:9400 | ||
deploy: | ||
resources: | ||
reservations: | ||
devices: | ||
- driver: nvidia | ||
count: 1 | ||
capabilities: [gpu] | ||
networks: | ||
- promnet | ||
networks: | ||
promnet: | ||
driver: bridge |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# default 값 설정하기 - 여기 부분은 전부 설정 안해줘도 상관없음 | ||
global: | ||
scrape_interval: 15s # scrap target의 기본 interval을 15초로 변경 / default = 1m | ||
scrape_timeout: 15s # scrap request 가 timeout 나는 길이 / default = 10s | ||
evaluation_interval: 2m # rule 을 얼마나 빈번하게 검증하는지 / default = 1m | ||
|
||
# Attach these labels to any time series or alerts when communicating with | ||
# external systems (federation, remote storage, Alertmanager). | ||
external_labels: | ||
monitor: 'codelab-monitor' # 기본적으로 붙여줄 라벨 | ||
# query_log_file: 로그가저장될파일주소.log # prometheus의 쿼리 로그들을 기록, 없으면 기록안함 | ||
|
||
# 규칙을 로딩하고 'evaluation_interval' 설정에 따라 정기적으로 평가한다. | ||
rule_files: | ||
- "rule.yml" # 파일위치는 prometheus.yml 이 있는 곳과 동일 위치 | ||
|
||
# 매트릭을 수집할 엔드포인드로 여기선 Prometheus 서버 자신을 가리킨다. | ||
|
||
scrape_configs: | ||
- job_name: 'inference_node_exporter' | ||
metrics_path: /metrics | ||
static_configs: | ||
- targets: ['175.45.193.25:9100'] | ||
- job_name: 'web_node_exporter' | ||
metrics_path: /metrics | ||
static_configs: | ||
- targets: ['175.45.194.59:9100'] | ||
- job_name: 'minio_node_exporter' | ||
metrics_path: /metrics | ||
static_configs: | ||
- targets: ['223.130.133.236:9100'] | ||
- job_name: 'gpu_exporter' | ||
metrics_path: /metrics | ||
static_configs: | ||
- targets: ['175.45.193.25:9400'] | ||
|
||
# 그 외에도 authorization 설정 | ||
# service discovery 설정(sd) | ||
|
||
# 실제 scrap 하는 타겟에 관한 설정 | ||
# static_configs: | ||
# - targets: ['192.168.0.80:3300', 'localhost:9100', 'localhost:80'] // prometheus, node-exporter, cadvisor | ||
# labels: # 옵션 - scrap 해서 가져올 metrics 들 전부에게 붙여줄 라벨 | ||
# service : 'monitor-1' | ||
|
||
# relabel_config - 스크랩되기 전의 label들을 수정 | ||
# metric_relabel_configs - 가져오는 대상들의 레이블들을 동적으로 다시작성하는 설정(drop, replace, labeldrop) | ||
|
||
|
||
# # Alerting specifies settings related to the Alertmanager. | ||
# alerting: | ||
# alert_relabel_configs: | ||
# [ - <relabel_config> ... ] | ||
# alertmanagers: | ||
# [ - <alertmanager_config> ... ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
groups: | ||
- name: example # 파일 내에서 unique 해야함 | ||
rules: | ||
|
||
# Alert for any instance that is unreachable for >5 minutes. | ||
- alert: InstanceDown | ||
expr: up == 0 | ||
for: 5m | ||
labels: | ||
severity: page | ||
annotations: | ||
summary: "Instance {{ $labels.instance }} down" | ||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes." | ||
|
||
# Alert for any instance that has a median request latency >1s. | ||
- alert: APIHighRequestLatency | ||
expr: api_http_request_latencies_second{quantile="0.5"} > 1 | ||
for: 10m | ||
annotations: | ||
summary: "High request latency on {{ $labels.instance }}" | ||
description: "{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
FROM pytorch/pytorch:2.2.1-cuda11.8-cudnn8-runtime | ||
|
||
ENV base_path /workspace/ | ||
COPY . ${base_path} | ||
|
||
RUN apt update -y | ||
RUN apt install -y gcc | ||
|
||
RUN pip install glog \ | ||
python_speech_features \ | ||
webrtcvad \ | ||
pydub \ | ||
mlflow \ | ||
minio \ | ||
boto3 \ | ||
flask \ | ||
flask_cors \ | ||
gunicorn \ | ||
python-dotenv | ||
|
||
EXPOSE 3002 | ||
|
||
# CMD [ "python", "inference.py" ] |
Oops, something went wrong.