Merge pull request #25 from CAUSOLDOUTMEN/feat/24-multipart

Feat: S3에서 Multipart로 변경 및 fuzzywuzzy 수정
CAUSOLDOUTMEN · Nov 17, 2023 · d0b1544 · d0b1544
2 parents 3fef976 + e60dbaa
commit d0b1544
Show file tree

Hide file tree

Showing 5 changed files with 16 additions and 33 deletions.
diff --git a/cache/g.txt b/cache/g.txt
diff --git a/main.py b/main.py
@@ -5,9 +5,10 @@
 import boto3
 import configparser
 
+import numpy as np
 from starlette.responses import JSONResponse
 from utils.image_preprocess import PreProcessor
-from fastapi import FastAPI, HTTPException, Form
+from fastapi import FastAPI, HTTPException, File, UploadFile
 from botocore.exceptions import ClientError
 from pydantic import BaseModel
 import logging
@@ -28,19 +29,6 @@ class ImageRequest(BaseModel):
 ocr = PororoOcr()
 preprocessor = PreProcessor()
 
-aws_access_key = os.environ.get('AWS_ACCESS_KEY')
-aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
-bucket_name = os.environ.get('BUCKET_NAME')
-region_name = os.environ.get('REGION_NAME')
-
-
-s3_client = boto3.client(
-    's3',
-    aws_access_key_id=aws_access_key,
-    aws_secret_access_key=aws_secret_access_key,
-    region_name=region_name,
-)
-
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -57,16 +45,11 @@ def handle_unexpected_error(request, exc: Exception):
     )
 
 @app.post("/parse_nutrients", status_code=201)
-async def read_item(request: ImageRequest):
-    image_name = request.image_key
-    file_name = f"./cache/temp_{image_name}"
-    if not os.path.exists(file_name):
-        logger.info("Download image from s3")
-        try:
-            s3_client.download_file(bucket_name, image_name, file_name)
-        except ClientError:
-            raise HTTPException(status_code=404, detail='Image not found in S3')
-    image = cv2.imread(file_name, cv2.IMREAD_COLOR)
+async def read_item(file: UploadFile = File(...)):
+    contents = await file.read()
+    nparr = np.frombuffer(contents, np.uint8)
+
+    image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
 
     result = nutrition_run(image)
     if not result:

diff --git a/test_image/output/cropped_table_enhanced.jpg b/test_image/output/cropped_table_enhanced.jpg
diff --git a/utils/nutrition_parser.py b/utils/nutrition_parser.py
@@ -2,10 +2,10 @@
 from fuzzywuzzy import process
 
 def correct_ocr_text(text):
-    target_words = ["칼로리", "탄수화물", "단백질", "지방"]
+    target_words = ["kcal", "탄수화물", "단백질", "지방"]
     corrected_text = text
     for word in target_words:
-        extracted_words = process.extractBests(word, text.split(), score_cutoff=75, limit=10)
+        extracted_words = process.extractBests(word, text.split(), score_cutoff=60, limit=10)
         for extracted_word, score in extracted_words:
             if abs(len(extracted_word) - len(word)) <= 1:
                 corrected_text = corrected_text.replace(extracted_word, word)
@@ -34,26 +34,26 @@ def parse_nutrients_from_text(text):
 
     for match in matches:
         if match[0] == '물': # 물로 끝나면 탄수화물이라고 판단
-            nutrient_dict['탄수화물'] = float(match[1])
+            nutrient_dict['carbohydrate'] = float(match[1])
         elif match[0] == '류': # 류로 끝나면 당류라고 판단
             nutrient_dict['당류'] = float(match[1])
         elif match[0] == '질': # 질로 끝나면 단백질이라고 판단
             if match[1].startswith('0') and len(match[1]) > 1: # 0으로 시작하는데 소수점을 잃은 경우(02g 등)에 대한 예외처리
-                nutrient_dict['단백질'] = float(match[1]) / 10
+                nutrient_dict['protein'] = float(match[1]) / 10
                 continue
-            nutrient_dict['단백질'] = float(match[1])
+            nutrient_dict['protein'] = float(match[1])
         elif match[0] == '방': # 방으로 끝나면 지방, 포화지방, 트랜스지방 이라고 판단 (합계)
             if match[1].startswith('0') and len(match[1]) > 1: # 0으로 시작하는데 소수점을 잃은 경우(02g 등)에 대한 예외처리
                 fats.append(float(match[1]) / 10)
                 continue
             fats.append(float(match[1]))
 
     if len(fats) != 0:
-        nutrient_dict['지방'] = max(fats) # "방"으로 끝나는 것들의 숫자를 파싱한 값 중 가장 큰 값을 지방으로 판단 (포함의 관계이므로)
+        nutrient_dict['fat'] = max(fats) # "방"으로 끝나는 것들의 숫자를 파싱한 값 중 가장 큰 값을 지방으로 판단 (포함의 관계이므로)
     kcal_matches = re.findall(kcal_pattern, text) # 칼로리는 kcal로 끝나는 숫자
     if not kcal_matches:
-        nutrient_dict['칼로리'] = -1
+        nutrient_dict['kcal'] = -1
     else:
-        nutrient_dict['칼로리'] = float(kcal_matches[0])
+        nutrient_dict['kcal'] = float(kcal_matches[0])
 
     return nutrient_dict
diff --git a/utils/nutrition_runner.py b/utils/nutrition_runner.py
@@ -23,7 +23,7 @@ def nutrition_run(image):
         realdata += d
     print('before correcting: ', realdata)
 
-    final_key = {'칼로리', '탄수화물', '단백질', '지방'}
+    final_key = {'kcal', 'carbohydrate', 'protein', 'fat'}
     final_dict = {key: -1 for key in final_key}
 
     if not realdata: