meokgu-skku · sinkyoungdeok · May 8, 2024 · May 8, 2024 · May 8, 2024 · May 8, 2024
diff --git a/.github/workflows/es-batch-job.yml b/.github/workflows/es-batch-job.yml
@@ -0,0 +1,33 @@
+name: ElasticSearch Batch Job
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+  schedule:
+    - cron: '0 1 * * *'
+
+jobs:
+  batch-job:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Docker Build And Push
+        run: |
+          docker login -u ${{ secrets.USERNAME }} -p ${{ secrets.PASSWORD }}
+          docker build -f es.Dockerfile -t skku-es-batch .
+          docker tag skku-es-batch sinkyoungdeok/skku-es-batch
+          docker push sinkyoungdeok/skku-es-batch
+      - name: Deploy Prod
+        uses: appleboy/[email protected]
+        with:
+          key: ${{ secrets.SSH_KEY }}
+          host: ${{ secrets.HOST_NAME }}
+          username: ubuntu
+          port: 22
+          script: |
+            docker login -u ${{ secrets.USERNAME }} -p ${{ secrets.PASSWORD }}
+            docker pull sinkyoungdeok/skku-es-batch
+
+            docker run --net ubuntu_default sinkyoungdeok/skku-es-batch
diff --git a/csv-to-es.py b/csv-to-es.py
@@ -0,0 +1,49 @@
+import datetime
+
+import pandas as pd
+from elasticsearch import Elasticsearch
+
+file_path = 'restaurants.csv'
+df = pd.read_csv(file_path)
+
+now = datetime.datetime.now()
+index_name = f"restaurant_{now.strftime('%Y_%m_%d_%H-%M')}"
+
+# Elasticsearch 클라이언트 설정
+es = Elasticsearch("http://es-singlenode:9200")
+
+# 새 인덱스 생성 및 매핑 설정
+if not es.indices.exists(index=index_name):
+  es.indices.create(index=index_name, mappings={
+    "properties": {
+      "name": {"type": "text"},
+      "category": {"type": "text"},
+    }
+  })
+
+# 데이터 인덱싱
+for _, row in df.iterrows():
+  response = es.index(index=index_name, document={
+    "name": row['name'],
+    "category": row['category'],
+  })
+  print(f"Indexed document ID: {response['_id']}, Result: {response['result']}")
+
+# 앨리어스 확인 및 설정
+if not es.indices.exists_alias(name="restaurant"):
+  # 앨리어스가 없으면 새 인덱스에 앨리어스 생성
+  es.indices.put_alias(index=index_name, name="restaurant")
+else:
+  # 기존에 앨리어스가 있다면, 앨리어스를 새 인덱스로 업데이트하고, 기존 인덱스 삭제
+  old_indices = list(es.indices.get_alias(name="restaurant").keys())
+  es.indices.update_aliases(body={
+    "actions": [
+      {"remove": {"index": "*", "alias": "restaurant"}},
+      {"add": {"index": index_name, "alias": "restaurant"}}
+    ]
+  })
+  for idx in old_indices:
+    if idx != index_name:
+      es.indices.delete(index=idx)
+
+print("Indexing complete, and alias updated.")
diff --git a/es-requirements.txt b/es-requirements.txt
@@ -0,0 +1,2 @@
+pandas
+elasticsearch==8.2.0
diff --git a/es.Dockerfile b/es.Dockerfile
@@ -0,0 +1,9 @@
+FROM python:3.8-slim
+
+COPY es-requirements.txt es-requirements.txt
+COPY restaurants.csv restaurants.csv
+COPY csv-to-es.py csv-to-es.py
+
+RUN pip install -r es-requirements.txt
+
+ENTRYPOINT ["python3", "csv-to-es.py"]