diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 5c17b16..d178dd7 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,5 +1,6 @@ name: CI/CD Pipeline +# Trigger the workflow on push events to the main branch on: push: branches: @@ -10,37 +11,50 @@ jobs: runs-on: ubuntu-latest steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 - with: - region: ap-northeast-2 - access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - - - name: Build and push Docker image - id: build-image - run: | - IMAGE_TAG=$(date +%Y%m%d%H%M) - docker build -t ${{ secrets.ECR_REPOSITORY }}:$IMAGE_TAG . - docker tag ${{ secrets.ECR_REPOSITORY }}:$IMAGE_TAG ${{ secrets.ECR_REPOSITORY }}:latest - docker push ${{ secrets.ECR_REPOSITORY }}:$IMAGE_TAG - docker push ${{ secrets.ECR_REPOSITORY }}:latest - env: - DATABASE_URL: ${{ secrets.DATABASE_URL }} - - - name: Deploy to EC2 - run: | - ssh -o StrictHostKeyChecking=no -i ${{ secrets.EC2_SSH_KEY }} ec2-user@${{ secrets.EC2_INSTANCE_IP }} << 'EOF' - docker pull ${{ secrets.ECR_REPOSITORY }}:latest - docker-compose down - docker-compose up -d - EOF - env: - DATABASE_URL: ${{ secrets.DATABASE_URL }} + # Step 1: Check out the code from the repository + - name: Checkout code + uses: actions/checkout@v2 + + # Step 2: Set up Docker Buildx + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + # Step 3: Configure AWS credentials for the workflow + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ap-northeast-2 + + # Step 4: Log in to Amazon ECR (Elastic Container Registry) + - name: Login to Amazon ECR + run: | + aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws + + # Step 5: Build and push Docker image to Amazon ECR + - name: Build and push Docker image + id: build-image + run: | + IMAGE_TAG=$(date +%Y%m%d%H%M) + docker build -t public.ecr.aws/h7p2f6d8/ticats_ai:$IMAGE_TAG . + docker tag public.ecr.aws/h7p2f6d8/ticats_ai:$IMAGE_TAG public.ecr.aws/h7p2f6d8/ticats_ai:latest + docker push public.ecr.aws/h7p2f6d8/ticats_ai:$IMAGE_TAG + docker push public.ecr.aws/h7p2f6d8/ticats_ai:latest + env: + DATABASE_URL: ${{ secrets.DATABASE_URL }} + + # Step 6: Deploy to EC2 instance + - name: Run SSH Command + uses: appleboy/ssh-action@master + with: + host: ${{ secrets.EC2_INSTANCE_IP }} + username: ec2-user + key: ${{ secrets.EC2_SSH_KEY }} + port: 22 + script: | + docker stop ticats_ai || true + docker rm ticats_ai || true + docker image rm public.ecr.aws/h7p2f6d8/ticats_ai:latest || true + docker pull public.ecr.aws/h7p2f6d8/ticats_ai:latest + docker run -d -p 8000:8000 --name ticats_ai -e DATABASE_URL="${{ secrets.DATABASE_URL }}" public.ecr.aws/h7p2f6d8/ticats_ai:latest \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 4a9af80..1be3206 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,15 @@ FROM python:3.9 +# 작업 디렉토리 설정 WORKDIR /code +# 종속성 파일 복사 및 설치 +COPY ./requirements.txt /code/ +RUN pip install --no-cache-dir --upgrade -r requirements.txt + + +# 애플리케이션 코드 복사 COPY ./ /code/ -RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt \ No newline at end of file +ENTRYPOINT ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] + diff --git a/README.md b/README.md index 9c4cf00..522dc2c 100644 --- a/README.md +++ b/README.md @@ -60,8 +60,16 @@ │ └─routers ``` -## Git Convention +## AI 협업 방법 +1. 가상환경 활성화(3.9버전) +2. pip install -r requirements.txt +3. uvicorn main:app --reload +4. 127.0.0.1:8000/docs 열기 +5. ocr 분석 실행 +6. run_ocr 잘 되는지 확인 +7. git push 또는 pr 올리기 +## Git Convention - github-flow를 따름 - branch Convention ```angular2html diff --git a/app/database.py b/app/database.py index 2e19cf4..7b974f2 100644 --- a/app/database.py +++ b/app/database.py @@ -13,4 +13,10 @@ Base = declarative_base() # 읽기 전용 세션 설정 (autocommit 제거) -SessionReadOnly = sessionmaker(bind=engine, autoflush=False) \ No newline at end of file +SessionReadOnly = sessionmaker(bind=engine, autoflush=False) +def get_db(): + db = SessionReadOnly() + try: + yield db + finally: + db.close() \ No newline at end of file diff --git a/app/dependencies.py b/app/dependencies.py deleted file mode 100644 index 76cf341..0000000 --- a/app/dependencies.py +++ /dev/null @@ -1,8 +0,0 @@ -from database import SessionReadOnly - -def get_db(): - db = SessionReadOnly() - try: - yield db - finally: - db.close() \ No newline at end of file diff --git a/app/routers/__init__.py b/app/routers/__init__.py index e69de29..8627205 100644 --- a/app/routers/__init__.py +++ b/app/routers/__init__.py @@ -0,0 +1 @@ +from app.services.ocr_service import OcrService diff --git a/app/routers/ocr_router.py b/app/routers/ocr_router.py index ea074ec..d137afa 100644 --- a/app/routers/ocr_router.py +++ b/app/routers/ocr_router.py @@ -1,11 +1,19 @@ -# Fast Api -from fastapi import APIRouter +from fastapi import APIRouter, Depends +from pydantic import BaseModel -# Schemas -from app.schemas.ocr_schema import OcrRequestSchema, OcrResponseSchema +from app.services.ocr_service import OcrService + + +# Dependency injection function +def get_ocr_service(): + return OcrService() + + +# Response schema definition +class OcrResponseSchema(BaseModel): + result: str + title: str -# Services -from app.services import ocr_service # Router router = APIRouter( @@ -14,7 +22,7 @@ ) -# Main Section @router.post("", response_model=OcrResponseSchema, summary="OCR 분석", description="") -async def upload_ocr_photo(ocr_photo: OcrRequestSchema): - return ocr_service.get_ocr_values(ocr_photo=ocr_photo) +async def upload_ocr_photo(ocr_service: OcrService = Depends(get_ocr_service)): + ocr_result = ocr_service.run_ocr() + return {"result": "hello", "title": "OCR Analysis Result"} diff --git a/app/routers/recommend_router.py b/app/routers/recommend_router.py index 22293a1..e4bd7ad 100644 --- a/app/routers/recommend_router.py +++ b/app/routers/recommend_router.py @@ -5,10 +5,10 @@ from fastapi import APIRouter from fastapi.params import Query from typing import List -from app.dependencies import get_db from fastapi import Depends from sqlalchemy.orm import Session +from app.database import get_db from app.services.recommend_service import RecommendService # Router diff --git a/app/services/keyword_service.py b/app/services/keyword_service.py index 82ce086..bcc4775 100644 --- a/app/services/keyword_service.py +++ b/app/services/keyword_service.py @@ -6,13 +6,10 @@ from torch.autograd.profiler import record_function np.bool = np.bool_ -from collections import Counter import warnings; warnings.filterwarnings('ignore') -from sklearn.metrics.pairwise import cosine_similarity -from sklearn.feature_extraction.text import CountVectorizer from torch.utils.data import Dataset from constants import STOP_WORDS import numpy as np @@ -137,43 +134,6 @@ def united_Processor(self, target: str) -> (str, str): return topics, sentiments - def content_recommender(self, base_df, content_id_lst, - return_type='return_id'): # 다중 인풋 처리용, id를 리스트 형태로 넣으면 됨(=content_id_lst). - count_vect = CountVectorizer(min_df=0, ngram_range=(1, 2), lowercase=False) # 유사도 측정을 위한 피처 백터화 - genre_mat = count_vect.fit_transform(base_df['sentiment']) # standard 인자 추가 가능 - - genre_sim = cosine_similarity(genre_mat, genre_mat) # 코사인 유사도 측정 - genre_sim_sorted_idx = genre_sim.argsort()[:, ::-1] # 유사도가 높은 순으로 인덱스 나열(각 인덱스(작품)별로) - - sentiment_candidates = [] - for content_id in content_id_lst: - content_idx = base_df[base_df['culturalEventId'] == content_id].index.values - similar_indexes = genre_sim_sorted_idx[content_idx, :4] # top3 만큼 가져옴(기준 인덱스 포함하여 4) - similar_indexes = similar_indexes[similar_indexes != content_idx].reshape(-1) - for idx in similar_indexes: - sentiment_candidates.append(base_df.loc[idx, 'sentiment']) # 감정 추출 - - sentiment_candidates = ', '.join(sentiment_candidates).split(',') - counter = Counter(sentiment_candidates) - recommend_sentiment = ', '.join([item[0].strip() for item in counter.most_common(2)]) # 빈도 수 높은 감정 상위 2개 추출 - - base_content = base_df[base_df['sentiment'] == recommend_sentiment].sample( - n=1).index.values # 같은 감정을 가진 작품 랜덤으로 선택 - similar_indexes = genre_sim_sorted_idx[base_content, :] # base와 비슷한 작품 추천 - similar_indexes = similar_indexes[similar_indexes != content_idx].reshape(-1) - - if return_type == 'return_id': # id - idx_to_id_lst = [] - for idx in similar_indexes: - idx_to_id_lst.append(base_df['culturalEventId'].iloc[idx]) - return idx_to_id_lst - - elif return_type == 'return_idx': # index - return similar_indexes - - else: - print('return_type error') - def sentiment_prediction(self, target_sentence, tok, vocab, model, device): start_time = time.time() # 시작 시간 기록 diff --git a/app/services/ocr_service.py b/app/services/ocr_service.py index 8ba2441..12bb12e 100644 --- a/app/services/ocr_service.py +++ b/app/services/ocr_service.py @@ -1,10 +1,8 @@ -# Fast-app -from app.schemas.ocr_schema import OcrRequestSchema - - # Main Section -def get_ocr_values(ocr_photo: OcrRequestSchema): - file = ocr_photo.file +class OcrService: + def run_ocr(self): + self.other_function() + return "hello" - values = {"title": "title example"} - return values + def other_function(self): + pass diff --git a/requirements.txt b/requirements.txt index acf2dc2..bb296ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,47 @@ +# 웹 프레임워크 및 API 관련 +fastapi==0.111.0 +fastapi-cli==0.0.4 +starlette==0.37.2 +uvicorn==0.30.1 +uvloop==0.19.0 +watchfiles==0.22.0 +websockets==12.0 + +# 데이터베이스 관련 alembic==1.13.2 -annotated-types==0.7.0 -anyio==4.4.0 +mysql-connector-python==9.0.0 +SQLAlchemy==2.0.31 +SQLAlchemy-Utils==0.41.2 + +# 데이터 과학 및 머신러닝 +numpy==1.25.2 +pandas==2.2.2 +scikit-learn==1.5.1 +scipy==1.13.1 +joblib==1.4.2 +threadpoolctl==3.5.0 +sympy==1.13.0 +mxnet==1.7.0.post2 +torch==1.10.1 +transformers==4.8.1 +huggingface-hub==0.0.12 +sentencepiece==0.1.96 +tokenizers==0.10.3 +onnxruntime==1.8.0 +tqdm==4.66.4 +gluonnlp==0.10.0 + +# 한국어 처리 +konlpy==0.6.0 +kobert @ git+https://git@github.com/SKTBrain/KoBERT.git@47a69af87928fc24e20f571fe10c3cc9dd9af9a3 +kobert-tokenizer @ git+https://github.com/SKTBrain/KoBERT.git@47a69af87928fc24e20f571fe10c3cc9dd9af9a3#subdirectory=kobert_hf + +# AWS 및 클라우드 관련 boto3==1.15.18 botocore==1.18.18 +s3transfer==0.3.7 + +# 유틸리티 및 도구 certifi==2024.7.4 charset-normalizer==3.3.2 click==8.1.7 @@ -11,42 +50,27 @@ Cython==3.0.10 dnspython==2.6.1 email_validator==2.2.0 exceptiongroup==1.2.1 -fastapi==0.111.0 -fastapi-cli==0.0.4 filelock==3.15.4 -flatbuffers==24.3.25 fsspec==2024.6.1 -gluonnlp==0.10.0 graphviz==0.8.4 greenlet==3.0.3 h11==0.14.0 httpcore==1.0.5 httptools==0.6.1 httpx==0.27.0 -huggingface-hub==0.0.12 humanfriendly==10.0 idna==3.7 Jinja2==3.1.4 jmespath==0.10.0 -joblib==1.4.2 JPype1==1.5.0 -kobert @ git+https://git@github.com/SKTBrain/KoBERT.git@47a69af87928fc24e20f571fe10c3cc9dd9af9a3 -kobert-tokenizer @ git+https://github.com/SKTBrain/KoBERT.git@47a69af87928fc24e20f571fe10c3cc9dd9af9a3#subdirectory=kobert_hf -konlpy==0.6.0 lxml==5.2.2 Mako==1.3.5 markdown-it-py==3.0.0 MarkupSafe==2.1.5 mdurl==0.1.2 mpmath==1.3.0 -mxnet==1.7.0.post2 -mysql-connector-python==9.0.0 -networkx==3.2.1 -numpy==1.25.2 -onnxruntime==1.8.0 orjson==3.10.6 packaging==24.1 -pandas==2.2.2 protobuf==5.27.2 pydantic==2.8.2 pydantic_core==2.20.1 @@ -59,28 +83,12 @@ PyYAML==6.0.1 regex==2024.5.15 requests==2.32.3 rich==13.7.1 -s3transfer==0.3.7 sacremoses==0.1.1 -scikit-learn==1.5.1 -scipy==1.13.1 -sentencepiece==0.1.96 shellingham==1.5.4 six==1.16.0 sniffio==1.3.1 -SQLAlchemy==2.0.31 -starlette==0.37.2 -sympy==1.13.0 -threadpoolctl==3.5.0 -tokenizers==0.10.3 -torch==1.10.1 -tqdm==4.66.4 -transformers==4.8.1 -typer==0.12.3 typing_extensions==4.12.2 tzdata==2024.1 ujson==5.10.0 urllib3==1.25.11 -uvicorn==0.30.1 -uvloop==0.19.0 -watchfiles==0.22.0 -websockets==12.0 +typer==0.12.3