From 9853709947556579b9beca26ebf61dfdcdedba49 Mon Sep 17 00:00:00 2001 From: sinkyoungdeok Date: Thu, 9 May 2024 01:36:19 +0900 Subject: [PATCH 1/7] =?UTF-8?q?[KAN-42]=20es=20batch=20=EC=9E=91=EC=97=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/es-batch-job.yml | 33 ++++++++++++++++++++ csv-to-es.py | 49 ++++++++++++++++++++++++++++++ es-requirements.txt | 2 ++ es.Dockerfile | 9 ++++++ 4 files changed, 93 insertions(+) create mode 100644 .github/workflows/es-batch-job.yml create mode 100644 csv-to-es.py create mode 100644 es-requirements.txt create mode 100644 es.Dockerfile diff --git a/.github/workflows/es-batch-job.yml b/.github/workflows/es-batch-job.yml new file mode 100644 index 0000000..0432508 --- /dev/null +++ b/.github/workflows/es-batch-job.yml @@ -0,0 +1,33 @@ +name: ElasticSearch Batch Job + +on: + workflow_dispatch: + push: + branches: + - main + schedule: + - cron: '0 1 * * *' + +jobs: + batch-job: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Docker Build And Push + run: | + docker login -u ${{ secrets.USERNAME }} -p ${{ secrets.PASSWORD }} + docker build -t skku-es-batch . + docker tag skku-es-batch sinkyoungdeok/skku-es-batch + docker push sinkyoungdeok/skku-es-batch + - name: Deploy Prod + uses: appleboy/ssh-action@v0.1.4 + with: + key: ${{ secrets.SSH_KEY }} + host: ${{ secrets.HOST_NAME }} + username: ubuntu + port: 22 + script: | + docker login -u ${{ secrets.USERNAME }} -p ${{ secrets.PASSWORD }} + docker pull sinkyoungdeok/skku-es-batch + + docker run --net ubuntu_default sinkyoungdeok/skku-es-batch \ No newline at end of file diff --git a/csv-to-es.py b/csv-to-es.py new file mode 100644 index 0000000..37a49c2 --- /dev/null +++ b/csv-to-es.py @@ -0,0 +1,49 @@ +import datetime + +import pandas as pd +from elasticsearch import Elasticsearch + +file_path = 'restaurants.csv' +df = pd.read_csv(file_path) + +now = datetime.datetime.now() +index_name = f"restaurant_{now.strftime('%Y_%m_%d_%H-%M')}" + +# Elasticsearch 클라이언트 설정 +es = Elasticsearch("http://localhost:9200") + +# 새 인덱스 생성 및 매핑 설정 +if not es.indices.exists(index=index_name): + es.indices.create(index=index_name, mappings={ + "properties": { + "name": {"type": "text"}, + "category": {"type": "text"}, + } + }) + +# 데이터 인덱싱 +for _, row in df.iterrows(): + response = es.index(index=index_name, document={ + "name": row['name'], + "category": row['category'], + }) + print(f"Indexed document ID: {response['_id']}, Result: {response['result']}") + +# 앨리어스 확인 및 설정 +if not es.indices.exists_alias(name="restaurant"): + # 앨리어스가 없으면 새 인덱스에 앨리어스 생성 + es.indices.put_alias(index=index_name, name="restaurant") +else: + # 기존에 앨리어스가 있다면, 앨리어스를 새 인덱스로 업데이트하고, 기존 인덱스 삭제 + old_indices = list(es.indices.get_alias(name="restaurant").keys()) + es.indices.update_aliases(body={ + "actions": [ + {"remove": {"index": "*", "alias": "restaurant"}}, + {"add": {"index": index_name, "alias": "restaurant"}} + ] + }) + for idx in old_indices: + if idx != index_name: + es.indices.delete(index=idx) + +print("Indexing complete, and alias updated.") diff --git a/es-requirements.txt b/es-requirements.txt new file mode 100644 index 0000000..64425f5 --- /dev/null +++ b/es-requirements.txt @@ -0,0 +1,2 @@ +pandas +elasticsearch==8.2.0 \ No newline at end of file diff --git a/es.Dockerfile b/es.Dockerfile new file mode 100644 index 0000000..9ea3641 --- /dev/null +++ b/es.Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.8-slim + +COPY requirements.txt requirements.txt +COPY restaurants.csv restaurants.csv +COPY csv-to-es.py csv-to-es.py + +RUN pip install -r es-requirements.txt + +ENTRYPOINT ["python3", "csv-to-es.py"] \ No newline at end of file From ae201be8e4006abfc87ea611bfdf499ec4f186fb Mon Sep 17 00:00:00 2001 From: sinkyoungdeok Date: Thu, 9 May 2024 01:36:40 +0900 Subject: [PATCH 2/7] =?UTF-8?q?[KAN-42]=20es=20batch=20=EC=9E=91=EC=97=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/es-batch-job.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/es-batch-job.yml b/.github/workflows/es-batch-job.yml index 0432508..9c57683 100644 --- a/.github/workflows/es-batch-job.yml +++ b/.github/workflows/es-batch-job.yml @@ -1,6 +1,7 @@ name: ElasticSearch Batch Job on: + pull_request: workflow_dispatch: push: branches: From d556d3692825d894290445bc0f7a47c02a9d9758 Mon Sep 17 00:00:00 2001 From: sinkyoungdeok Date: Thu, 9 May 2024 01:40:15 +0900 Subject: [PATCH 3/7] =?UTF-8?q?[KAN-42]=20es=20batch=20=EC=9E=91=EC=97=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/es-batch-job.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/es-batch-job.yml b/.github/workflows/es-batch-job.yml index 9c57683..381c8b4 100644 --- a/.github/workflows/es-batch-job.yml +++ b/.github/workflows/es-batch-job.yml @@ -17,7 +17,7 @@ jobs: - name: Docker Build And Push run: | docker login -u ${{ secrets.USERNAME }} -p ${{ secrets.PASSWORD }} - docker build -t skku-es-batch . + docker build -f es.Dockerfile -t skku-es-batch . docker tag skku-es-batch sinkyoungdeok/skku-es-batch docker push sinkyoungdeok/skku-es-batch - name: Deploy Prod From 5fcfae70b99b886061b5e44148dadd300280576b Mon Sep 17 00:00:00 2001 From: sinkyoungdeok Date: Thu, 9 May 2024 01:41:09 +0900 Subject: [PATCH 4/7] =?UTF-8?q?[KAN-42]=20es=20batch=20=EC=9E=91=EC=97=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- es.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/es.Dockerfile b/es.Dockerfile index 9ea3641..8631070 100644 --- a/es.Dockerfile +++ b/es.Dockerfile @@ -1,6 +1,6 @@ FROM python:3.8-slim -COPY requirements.txt requirements.txt +COPY es-requirements.txt es-requirements.txt COPY restaurants.csv restaurants.csv COPY csv-to-es.py csv-to-es.py From d72f293467119af1c864cbe9f8ba8284beebf389 Mon Sep 17 00:00:00 2001 From: sinkyoungdeok Date: Thu, 9 May 2024 01:43:33 +0900 Subject: [PATCH 5/7] =?UTF-8?q?[KAN-42]=20es=20batch=20=EC=9E=91=EC=97=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- csv-to-es.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csv-to-es.py b/csv-to-es.py index 37a49c2..b5521ee 100644 --- a/csv-to-es.py +++ b/csv-to-es.py @@ -10,7 +10,7 @@ index_name = f"restaurant_{now.strftime('%Y_%m_%d_%H-%M')}" # Elasticsearch 클라이언트 설정 -es = Elasticsearch("http://localhost:9200") +es = Elasticsearch("es-singlenode:9200") # 새 인덱스 생성 및 매핑 설정 if not es.indices.exists(index=index_name): From f3e69969f266e664d23203ab1ead895e2f54b3fb Mon Sep 17 00:00:00 2001 From: sinkyoungdeok Date: Thu, 9 May 2024 01:44:57 +0900 Subject: [PATCH 6/7] =?UTF-8?q?[KAN-42]=20es=20batch=20=EC=9E=91=EC=97=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- csv-to-es.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csv-to-es.py b/csv-to-es.py index b5521ee..251d361 100644 --- a/csv-to-es.py +++ b/csv-to-es.py @@ -10,7 +10,7 @@ index_name = f"restaurant_{now.strftime('%Y_%m_%d_%H-%M')}" # Elasticsearch 클라이언트 설정 -es = Elasticsearch("es-singlenode:9200") +es = Elasticsearch("http://es-singlenode:9200") # 새 인덱스 생성 및 매핑 설정 if not es.indices.exists(index=index_name): From 7b45f76a601aace76ccf0f2ecb8da3b482369a0a Mon Sep 17 00:00:00 2001 From: sinkyoungdeok Date: Thu, 9 May 2024 01:46:19 +0900 Subject: [PATCH 7/7] =?UTF-8?q?[KAN-42]=20es=20batch=20=EC=9E=91=EC=97=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/es-batch-job.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/es-batch-job.yml b/.github/workflows/es-batch-job.yml index 381c8b4..0bcce83 100644 --- a/.github/workflows/es-batch-job.yml +++ b/.github/workflows/es-batch-job.yml @@ -1,7 +1,6 @@ name: ElasticSearch Batch Job on: - pull_request: workflow_dispatch: push: branches: