Merge pull request #33 from CAUSOLDOUTMEN/feat/32-clova-ocr

feat: clova ocr로 변경 (#32)
CAUSOLDOUTMEN · Jan 25, 2024 · 5c596b7 · 5c596b7
2 parents 62e0d2e + 26feeed
commit 5c596b7
Show file tree

Hide file tree

Showing 10 changed files with 151 additions and 25 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@
 *.env
 *.jpeg
 *.png
+clova.conf.dev
diff --git a/clova.conf b/clova.conf
@@ -0,0 +1,3 @@
+[clova_credentials]
+API_URL = ${API_URL}
+SECERT_KEY = ${SECERT_KEY}
diff --git a/k8s/kustomization.yaml b/k8s/kustomization.yaml
@@ -0,0 +1,7 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+- manifest.yaml
+images:
+- name: synoti21/diareat-ocr
+  newName: synoti21/diareat-ocr
diff --git a/k8s/manifest.yaml b/k8s/manifest.yaml
@@ -0,0 +1,67 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: diareat-ocr
+  namespace: diareat
+spec:
+  selector:
+    matchLabels:
+      app: diareat-ocr
+  template:
+    metadata:
+      labels:
+        app: diareat-ocr
+    spec:
+      revisionHistoryLimit: 3
+      containers:
+      - name: diareat-ocr
+        image: synoti21/diareat-ocr:latest
+        imagePullPolicy: Always
+        resources:
+          requests:
+            memory: "512Mi"
+            cpu: "0.2"
+          limits:
+            memory: "1Gi"
+            cpu: "0.8"
+          envFrom:
+            secretRef:
+              name: diareat-ocr-secret
+        ports:
+        - containerPort: 8000
+        readinessProbe:
+          httpGet:
+            path: /docs
+            port: 8000
+          initialDelaySeconds: 5
+          periodSeconds: 5
+          failureThreshold: 6
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: diareat-ocr
+  namespace: diareat
+spec:
+  selector:
+    app: diareat-ocr
+  ports:
+  - port: 8800
+    targetPort: 8000
+---
+apiVersion: traefik.containo.us/v1alpha1
+kind: IngressRoute
+metadata:
+  name: diareatocr-route
+  namespace: diareat
+spec:
+  entryPoints:
+    - websecure
+  routes:
+    - match: Host(`diareat-ocr.thisiswandol.com`)
+      kind: Rule
+      services:
+        - name: diareat-svc
+          port: 8800
+  tls:
+    certResolver: myresolver
diff --git a/main.py b/main.py
@@ -2,6 +2,8 @@
 import numpy as np
 import uvicorn
 from starlette.responses import JSONResponse
+
+from utils.clova import clova_ocr
 from utils.image_preprocess import PreProcessor
 from fastapi import FastAPI, HTTPException, File, UploadFile
 from pydantic import BaseModel
@@ -39,11 +41,8 @@ def handle_unexpected_error(request, exc: Exception):
 @app.post("/parse_nutrients", status_code=201)
 async def read_item(file: UploadFile = File(...)):
     contents = await file.read()
-    nparr = np.frombuffer(contents, np.uint8)
-
-    image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
 
-    result = nutrition_run(image)
+    result = clova_ocr(file, contents)
 
     if not result:
         raise HTTPException(status_code=422, detail='Text Recognition Fail')

diff --git a/pororo/__pycache__/pororo.cpython-311.pyc b/pororo/__pycache__/pororo.cpython-311.pyc
diff --git a/pororo/models/brainOCR/__pycache__/detection.cpython-311.pyc b/pororo/models/brainOCR/__pycache__/detection.cpython-311.pyc
diff --git a/test_image/output/cropped_table_enhanced.jpg b/test_image/output/cropped_table_enhanced.jpg
diff --git a/utils/clova.py b/utils/clova.py
@@ -0,0 +1,53 @@
+import requests
+import uuid
+import time
+import json
+import configparser
+
+
+from utils.nutrition_parser import parse_nutrients_from_text
+
+
+
+def clova_ocr(file, file_content):
+    parser = configparser.ConfigParser()
+    parser.read("./clova.conf")
+
+    api_url = parser.get("clova_credentials", "API_URL")
+    secret_key = parser.get("clova_credentials", "SECRET_KEY")
+
+    request_json = {
+        'images': [
+            {
+                'format': 'png',
+                'name': 'demo'
+            }
+        ],
+        'requestId': str(uuid.uuid4()),
+        'version': 'V2',
+        'timestamp': int(round(time.time() * 1000))
+    }
+
+    files = {
+        'message': (None, json.dumps(request_json), 'application/json'),
+        'file': (file.filename, file_content, file.content_type)
+    }
+
+    headers = {
+        'X-OCR-SECRET': secret_key
+    }
+    payload = {'message': json.dumps(request_json).encode('UTF-8')}
+
+    response = requests.request("POST", api_url, headers=headers, files=files)
+
+    res = json.loads(response.text.encode('utf8'))
+    print(res)
+
+    # 결과에서 Text만 추출하여 출력하기 위한 코드
+    text = res['images'][0]['fields']
+    answer = ''
+
+    # fileds 배열의 길이만큼 반복하면서 inferText 값을 담고 공백을 붙여줌
+    for i in range(len(text)):
+        answer += text[i]['inferText'] + ' '
+    return parse_nutrients_from_text(answer)
diff --git a/utils/nutrition_parser.py b/utils/nutrition_parser.py
@@ -21,42 +21,38 @@ def fix_nine_to_g(text):
                 text = text[:i] + 'g' + text[i + 1:] # 9를 g로 바꿔줌
     return text
 
-def parse_nutrients_from_text(text):
-    text = text.replace(',', '') # 파싱을 방해하는 , 문자 제거
-    text = text.replace("'", '') # ' 제거
-    text = fix_nine_to_g(text)
-
-    nutrient_pattern = r'(율|물|집|질|방)\s*(\d+(?:\.\d+)?)\s*g?'
+def parse_nutrients_from_text(text):  # 기존 코드에 존재하던 부분
+    nutrient_pattern = r'(물|질|방)\s*(\d+(?:\.\d+)?)\s?g'
     kcal_pattern = r'(\d+)\s*k'
 
     matches = re.findall(nutrient_pattern, text)
-    print(matches)
-    nutrient_dict = {}
-    fats = [] # 지방, 포화지방, 트랜스지방을 담아 가장 큰 값을 지방으로 판단
+    nutrient_dict = {'탄수화물': 0, '단백질': 0, '지방': 0, '칼로리': 0}
+    fats = []  # 지방, 포화지방, 트랜스지방을 담아 가장 큰 값을 지방으로 판단
 
     for match in matches:
-        if match[0] == '물' or match[0] == '율': # 물로 끝나면 탄수화물이라고 판단
+        # print(match[0])
+        if match[0] == '물':  # 물로 끝나면 탄수화물이라고 판단
             if match[1].startswith('0') and len(match[1]) > 1 and match[1][1] != '.':
-                nutrient_dict['carbohydrate'] = float(match[1]) / 10
+                nutrient_dict['탄수화물'] = float(match[1]) / 10
                 continue
-            nutrient_dict['carbohydrate'] = float(match[1])
-        elif match[0] == '질' or match[0] == '집': # 질로 끝나면 단백질이라고 판단
-            if match[1].startswith('0') and len(match[1]) > 1 and match[1][1] != '.': # 0으로 시작하는데 소수점을 잃은 경우(02g 등)에 대한 예외처리
-                nutrient_dict['protein'] = float(match[1]) / 10
+            nutrient_dict['탄수화물'] = float(match[1])
+        elif match[0] == '질':  # 질로 끝나면 단백질이라고 판단
+            if match[1].startswith('0') and len(match[1]) > 1 and match[1][1] != '.':
+                nutrient_dict['단백질'] = float(match[1]) / 10
                 continue
-            nutrient_dict['protein'] = float(match[1])
-        elif match[0] == '방': # 방으로 끝나면 지방, 포화지방, 트랜스지방 이라고 판단 (합계)
+            nutrient_dict['단백질'] = float(match[1])
+        elif match[0] == '방':  # 방으로 끝나면 지방, 포화지방, 트랜스지방 이라고 판단 (합계)
             if match[1].startswith('0') and len(match[1]) > 1 and match[1][1] != '.':
                 fats.append(float(match[1]) / 10)
                 continue
             fats.append(float(match[1]))
 
     if len(fats) != 0:
-        nutrient_dict['fat'] = max(fats) # "방"으로 끝나는 것들의 숫자를 파싱한 값 중 가장 큰 값을 지방으로 판단 (포함의 관계이므로)
-    kcal_matches = re.findall(kcal_pattern, text) # 칼로리는 kcal로 끝나는 숫자
+        nutrient_dict['지방'] = max(fats)  # "방"으로 끝나는 것들의 숫자를 파싱한 값 중 가장 큰 값을 지방으로 판단 (포함의 관계이므로)
+    kcal_matches = re.findall(kcal_pattern, text)  # 칼로리는 kcal로 끝나는 숫자
     if not kcal_matches:
-        nutrient_dict['kcal'] = -1
+        nutrient_dict['칼로리'] = -1
     else:
-        nutrient_dict['kcal'] = float(kcal_matches[0])
+        nutrient_dict['칼로리'] = float(kcal_matches[0])
 
     return nutrient_dict
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,3 +5,4 @@ @@
     *.env
     *.jpeg
     *.png
+    clova.conf.dev