Skip to content

Commit

Permalink
Merge pull request #33 from CAUSOLDOUTMEN/feat/32-clova-ocr
Browse files Browse the repository at this point in the history
feat: clova ocr로 변경 (#32)
  • Loading branch information
win-luck authored Jan 25, 2024
2 parents 62e0d2e + 26feeed commit 5c596b7
Show file tree
Hide file tree
Showing 10 changed files with 151 additions and 25 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
*.env
*.jpeg
*.png
clova.conf.dev
3 changes: 3 additions & 0 deletions clova.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[clova_credentials]
API_URL = ${API_URL}
SECERT_KEY = ${SECERT_KEY}
7 changes: 7 additions & 0 deletions k8s/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- manifest.yaml
images:
- name: synoti21/diareat-ocr
newName: synoti21/diareat-ocr
67 changes: 67 additions & 0 deletions k8s/manifest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: diareat-ocr
namespace: diareat
spec:
selector:
matchLabels:
app: diareat-ocr
template:
metadata:
labels:
app: diareat-ocr
spec:
revisionHistoryLimit: 3
containers:
- name: diareat-ocr
image: synoti21/diareat-ocr:latest
imagePullPolicy: Always
resources:
requests:
memory: "512Mi"
cpu: "0.2"
limits:
memory: "1Gi"
cpu: "0.8"
envFrom:
secretRef:
name: diareat-ocr-secret
ports:
- containerPort: 8000
readinessProbe:
httpGet:
path: /docs
port: 8000
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 6
---
apiVersion: v1
kind: Service
metadata:
name: diareat-ocr
namespace: diareat
spec:
selector:
app: diareat-ocr
ports:
- port: 8800
targetPort: 8000
---
apiVersion: traefik.containo.us/v1alpha1
kind: IngressRoute
metadata:
name: diareatocr-route
namespace: diareat
spec:
entryPoints:
- websecure
routes:
- match: Host(`diareat-ocr.thisiswandol.com`)
kind: Rule
services:
- name: diareat-svc
port: 8800
tls:
certResolver: myresolver
7 changes: 3 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import numpy as np
import uvicorn
from starlette.responses import JSONResponse

from utils.clova import clova_ocr
from utils.image_preprocess import PreProcessor
from fastapi import FastAPI, HTTPException, File, UploadFile
from pydantic import BaseModel
Expand Down Expand Up @@ -39,11 +41,8 @@ def handle_unexpected_error(request, exc: Exception):
@app.post("/parse_nutrients", status_code=201)
async def read_item(file: UploadFile = File(...)):
contents = await file.read()
nparr = np.frombuffer(contents, np.uint8)

image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

result = nutrition_run(image)
result = clova_ocr(file, contents)

if not result:
raise HTTPException(status_code=422, detail='Text Recognition Fail')
Expand Down
Binary file modified pororo/__pycache__/pororo.cpython-311.pyc
Binary file not shown.
Binary file modified pororo/models/brainOCR/__pycache__/detection.cpython-311.pyc
Binary file not shown.
Binary file modified test_image/output/cropped_table_enhanced.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
53 changes: 53 additions & 0 deletions utils/clova.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import requests
import uuid
import time
import json
import configparser


from utils.nutrition_parser import parse_nutrients_from_text



def clova_ocr(file, file_content):
parser = configparser.ConfigParser()
parser.read("./clova.conf")

api_url = parser.get("clova_credentials", "API_URL")
secret_key = parser.get("clova_credentials", "SECRET_KEY")

request_json = {
'images': [
{
'format': 'png',
'name': 'demo'
}
],
'requestId': str(uuid.uuid4()),
'version': 'V2',
'timestamp': int(round(time.time() * 1000))
}

files = {
'message': (None, json.dumps(request_json), 'application/json'),
'file': (file.filename, file_content, file.content_type)
}

headers = {
'X-OCR-SECRET': secret_key
}
payload = {'message': json.dumps(request_json).encode('UTF-8')}

response = requests.request("POST", api_url, headers=headers, files=files)

res = json.loads(response.text.encode('utf8'))
print(res)

# 결과에서 Text만 추출하여 출력하기 위한 코드
text = res['images'][0]['fields']
answer = ''

# fileds 배열의 길이만큼 반복하면서 inferText 값을 담고 공백을 붙여줌
for i in range(len(text)):
answer += text[i]['inferText'] + ' '
return parse_nutrients_from_text(answer)
38 changes: 17 additions & 21 deletions utils/nutrition_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,42 +21,38 @@ def fix_nine_to_g(text):
text = text[:i] + 'g' + text[i + 1:] # 9를 g로 바꿔줌
return text

def parse_nutrients_from_text(text):
text = text.replace(',', '') # 파싱을 방해하는 , 문자 제거
text = text.replace("'", '') # ' 제거
text = fix_nine_to_g(text)

nutrient_pattern = r'(율|물|집|질|방)\s*(\d+(?:\.\d+)?)\s*g?'
def parse_nutrients_from_text(text): # 기존 코드에 존재하던 부분
nutrient_pattern = r'(물|질|방)\s*(\d+(?:\.\d+)?)\s?g'
kcal_pattern = r'(\d+)\s*k'

matches = re.findall(nutrient_pattern, text)
print(matches)
nutrient_dict = {}
fats = [] # 지방, 포화지방, 트랜스지방을 담아 가장 큰 값을 지방으로 판단
nutrient_dict = {'탄수화물': 0, '단백질': 0, '지방': 0, '칼로리': 0}
fats = [] # 지방, 포화지방, 트랜스지방을 담아 가장 큰 값을 지방으로 판단

for match in matches:
if match[0] == '물' or match[0] == '율': # 물로 끝나면 탄수화물이라고 판단
# print(match[0])
if match[0] == '물': # 물로 끝나면 탄수화물이라고 판단
if match[1].startswith('0') and len(match[1]) > 1 and match[1][1] != '.':
nutrient_dict['carbohydrate'] = float(match[1]) / 10
nutrient_dict['탄수화물'] = float(match[1]) / 10
continue
nutrient_dict['carbohydrate'] = float(match[1])
elif match[0] == '질' or match[0] == '집': # 질로 끝나면 단백질이라고 판단
if match[1].startswith('0') and len(match[1]) > 1 and match[1][1] != '.': # 0으로 시작하는데 소수점을 잃은 경우(02g 등)에 대한 예외처리
nutrient_dict['protein'] = float(match[1]) / 10
nutrient_dict['탄수화물'] = float(match[1])
elif match[0] == '질': # 질로 끝나면 단백질이라고 판단
if match[1].startswith('0') and len(match[1]) > 1 and match[1][1] != '.':
nutrient_dict['단백질'] = float(match[1]) / 10
continue
nutrient_dict['protein'] = float(match[1])
elif match[0] == '방': # 방으로 끝나면 지방, 포화지방, 트랜스지방 이라고 판단 (합계)
nutrient_dict['단백질'] = float(match[1])
elif match[0] == '방': # 방으로 끝나면 지방, 포화지방, 트랜스지방 이라고 판단 (합계)
if match[1].startswith('0') and len(match[1]) > 1 and match[1][1] != '.':
fats.append(float(match[1]) / 10)
continue
fats.append(float(match[1]))

if len(fats) != 0:
nutrient_dict['fat'] = max(fats) # "방"으로 끝나는 것들의 숫자를 파싱한 값 중 가장 큰 값을 지방으로 판단 (포함의 관계이므로)
kcal_matches = re.findall(kcal_pattern, text) # 칼로리는 kcal로 끝나는 숫자
nutrient_dict['지방'] = max(fats) # "방"으로 끝나는 것들의 숫자를 파싱한 값 중 가장 큰 값을 지방으로 판단 (포함의 관계이므로)
kcal_matches = re.findall(kcal_pattern, text) # 칼로리는 kcal로 끝나는 숫자
if not kcal_matches:
nutrient_dict['kcal'] = -1
nutrient_dict['칼로리'] = -1
else:
nutrient_dict['kcal'] = float(kcal_matches[0])
nutrient_dict['칼로리'] = float(kcal_matches[0])

return nutrient_dict

0 comments on commit 5c596b7

Please sign in to comment.