Skip to content

Commit

Permalink
Merge pull request #25 from CAUSOLDOUTMEN/feat/24-multipart
Browse files Browse the repository at this point in the history
Feat: S3에서 Multipart로 변경 및 fuzzywuzzy 수정
  • Loading branch information
win-luck authored Nov 17, 2023
2 parents 3fef976 + e60dbaa commit d0b1544
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 33 deletions.
Empty file added cache/g.txt
Empty file.
31 changes: 7 additions & 24 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import boto3
import configparser

import numpy as np
from starlette.responses import JSONResponse
from utils.image_preprocess import PreProcessor
from fastapi import FastAPI, HTTPException, Form
from fastapi import FastAPI, HTTPException, File, UploadFile
from botocore.exceptions import ClientError
from pydantic import BaseModel
import logging
Expand All @@ -28,19 +29,6 @@ class ImageRequest(BaseModel):
ocr = PororoOcr()
preprocessor = PreProcessor()

aws_access_key = os.environ.get('AWS_ACCESS_KEY')
aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
bucket_name = os.environ.get('BUCKET_NAME')
region_name = os.environ.get('REGION_NAME')


s3_client = boto3.client(
's3',
aws_access_key_id=aws_access_key,
aws_secret_access_key=aws_secret_access_key,
region_name=region_name,
)


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
Expand All @@ -57,16 +45,11 @@ def handle_unexpected_error(request, exc: Exception):
)

@app.post("/parse_nutrients", status_code=201)
async def read_item(request: ImageRequest):
image_name = request.image_key
file_name = f"./cache/temp_{image_name}"
if not os.path.exists(file_name):
logger.info("Download image from s3")
try:
s3_client.download_file(bucket_name, image_name, file_name)
except ClientError:
raise HTTPException(status_code=404, detail='Image not found in S3')
image = cv2.imread(file_name, cv2.IMREAD_COLOR)
async def read_item(file: UploadFile = File(...)):
contents = await file.read()
nparr = np.frombuffer(contents, np.uint8)

image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

result = nutrition_run(image)
if not result:
Expand Down
Binary file modified test_image/output/cropped_table_enhanced.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 8 additions & 8 deletions utils/nutrition_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
from fuzzywuzzy import process

def correct_ocr_text(text):
target_words = ["칼로리", "탄수화물", "단백질", "지방"]
target_words = ["kcal", "탄수화물", "단백질", "지방"]
corrected_text = text
for word in target_words:
extracted_words = process.extractBests(word, text.split(), score_cutoff=75, limit=10)
extracted_words = process.extractBests(word, text.split(), score_cutoff=60, limit=10)
for extracted_word, score in extracted_words:
if abs(len(extracted_word) - len(word)) <= 1:
corrected_text = corrected_text.replace(extracted_word, word)
Expand Down Expand Up @@ -34,26 +34,26 @@ def parse_nutrients_from_text(text):

for match in matches:
if match[0] == '물': # 물로 끝나면 탄수화물이라고 판단
nutrient_dict['탄수화물'] = float(match[1])
nutrient_dict['carbohydrate'] = float(match[1])
elif match[0] == '류': # 류로 끝나면 당류라고 판단
nutrient_dict['당류'] = float(match[1])
elif match[0] == '질': # 질로 끝나면 단백질이라고 판단
if match[1].startswith('0') and len(match[1]) > 1: # 0으로 시작하는데 소수점을 잃은 경우(02g 등)에 대한 예외처리
nutrient_dict['단백질'] = float(match[1]) / 10
nutrient_dict['protein'] = float(match[1]) / 10
continue
nutrient_dict['단백질'] = float(match[1])
nutrient_dict['protein'] = float(match[1])
elif match[0] == '방': # 방으로 끝나면 지방, 포화지방, 트랜스지방 이라고 판단 (합계)
if match[1].startswith('0') and len(match[1]) > 1: # 0으로 시작하는데 소수점을 잃은 경우(02g 등)에 대한 예외처리
fats.append(float(match[1]) / 10)
continue
fats.append(float(match[1]))

if len(fats) != 0:
nutrient_dict['지방'] = max(fats) # "방"으로 끝나는 것들의 숫자를 파싱한 값 중 가장 큰 값을 지방으로 판단 (포함의 관계이므로)
nutrient_dict['fat'] = max(fats) # "방"으로 끝나는 것들의 숫자를 파싱한 값 중 가장 큰 값을 지방으로 판단 (포함의 관계이므로)
kcal_matches = re.findall(kcal_pattern, text) # 칼로리는 kcal로 끝나는 숫자
if not kcal_matches:
nutrient_dict['칼로리'] = -1
nutrient_dict['kcal'] = -1
else:
nutrient_dict['칼로리'] = float(kcal_matches[0])
nutrient_dict['kcal'] = float(kcal_matches[0])

return nutrient_dict
2 changes: 1 addition & 1 deletion utils/nutrition_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def nutrition_run(image):
realdata += d
print('before correcting: ', realdata)

final_key = {'칼로리', '탄수화물', '단백질', '지방'}
final_key = {'kcal', 'carbohydrate', 'protein', 'fat'}
final_dict = {key: -1 for key in final_key}

if not realdata:
Expand Down

0 comments on commit d0b1544

Please sign in to comment.