diff --git a/.github/workflows/es-batch-job.yml b/.github/workflows/es-batch-job.yml new file mode 100644 index 0000000..0bcce83 --- /dev/null +++ b/.github/workflows/es-batch-job.yml @@ -0,0 +1,33 @@ +name: ElasticSearch Batch Job + +on: + workflow_dispatch: + push: + branches: + - main + schedule: + - cron: '0 1 * * *' + +jobs: + batch-job: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Docker Build And Push + run: | + docker login -u ${{ secrets.USERNAME }} -p ${{ secrets.PASSWORD }} + docker build -f es.Dockerfile -t skku-es-batch . + docker tag skku-es-batch sinkyoungdeok/skku-es-batch + docker push sinkyoungdeok/skku-es-batch + - name: Deploy Prod + uses: appleboy/ssh-action@v0.1.4 + with: + key: ${{ secrets.SSH_KEY }} + host: ${{ secrets.HOST_NAME }} + username: ubuntu + port: 22 + script: | + docker login -u ${{ secrets.USERNAME }} -p ${{ secrets.PASSWORD }} + docker pull sinkyoungdeok/skku-es-batch + + docker run --net ubuntu_default sinkyoungdeok/skku-es-batch \ No newline at end of file diff --git a/csv-to-es.py b/csv-to-es.py new file mode 100644 index 0000000..251d361 --- /dev/null +++ b/csv-to-es.py @@ -0,0 +1,49 @@ +import datetime + +import pandas as pd +from elasticsearch import Elasticsearch + +file_path = 'restaurants.csv' +df = pd.read_csv(file_path) + +now = datetime.datetime.now() +index_name = f"restaurant_{now.strftime('%Y_%m_%d_%H-%M')}" + +# Elasticsearch 클라이언트 설정 +es = Elasticsearch("http://es-singlenode:9200") + +# 새 인덱스 생성 및 매핑 설정 +if not es.indices.exists(index=index_name): + es.indices.create(index=index_name, mappings={ + "properties": { + "name": {"type": "text"}, + "category": {"type": "text"}, + } + }) + +# 데이터 인덱싱 +for _, row in df.iterrows(): + response = es.index(index=index_name, document={ + "name": row['name'], + "category": row['category'], + }) + print(f"Indexed document ID: {response['_id']}, Result: {response['result']}") + +# 앨리어스 확인 및 설정 +if not es.indices.exists_alias(name="restaurant"): + # 앨리어스가 없으면 새 인덱스에 앨리어스 생성 + es.indices.put_alias(index=index_name, name="restaurant") +else: + # 기존에 앨리어스가 있다면, 앨리어스를 새 인덱스로 업데이트하고, 기존 인덱스 삭제 + old_indices = list(es.indices.get_alias(name="restaurant").keys()) + es.indices.update_aliases(body={ + "actions": [ + {"remove": {"index": "*", "alias": "restaurant"}}, + {"add": {"index": index_name, "alias": "restaurant"}} + ] + }) + for idx in old_indices: + if idx != index_name: + es.indices.delete(index=idx) + +print("Indexing complete, and alias updated.") diff --git a/es-requirements.txt b/es-requirements.txt new file mode 100644 index 0000000..64425f5 --- /dev/null +++ b/es-requirements.txt @@ -0,0 +1,2 @@ +pandas +elasticsearch==8.2.0 \ No newline at end of file diff --git a/es.Dockerfile b/es.Dockerfile new file mode 100644 index 0000000..8631070 --- /dev/null +++ b/es.Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.8-slim + +COPY es-requirements.txt es-requirements.txt +COPY restaurants.csv restaurants.csv +COPY csv-to-es.py csv-to-es.py + +RUN pip install -r es-requirements.txt + +ENTRYPOINT ["python3", "csv-to-es.py"] \ No newline at end of file