-
Notifications
You must be signed in to change notification settings - Fork 1
/
imageOCR.py
68 lines (51 loc) · 1.53 KB
/
imageOCR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import cv2
import requests
import io
import json
from matplotlib import pyplot as plt
# CHANGE REGION HERE
region = "westus"
URL = "https://"+region+".api.cognitive.microsoft.com/vision/v2.0/ocr"
HEADERS = {
'Content-Type': 'application/octet-stream',
# FILL SUBSCRIPTION KEY HERE
'Ocp-Apim-Subscription-Key': "YOUR_SUBSCRIPTION_KEY"
}
def getImageData(image):
buf = io.BytesIO()
plt.imsave(buf, image, format='png')
img_data = buf.getvalue()
return img_data
def saveToJSON(result,filename):
with open(filename, 'w') as outfile:
json.dump(result, outfile)
print("Done.")
def getOCRFromImage(path):
img = cv2.imread(path, cv2.IMREAD_COLOR)
img_data = getImageData(img)
r = requests.post(URL,data=img_data,headers=HEADERS)
result = r.json()
return result
def getStructuredText(data):
structured_text = {}
structured_text['regions'] = []
regions = data["regions"]
for region in regions:
regionToAdd = {}
regionToAdd['lines'] = []
lines = region["lines"]
for l in lines:
line = ""
words = l["words"]
for w in words:
line += w["text"]
line += " "
regionToAdd['lines'].append(line)
structured_text['regions'].append(regionToAdd)
print(structured_text)
return structured_text
path = 'imgs-2.jpg'
data = getOCRFromImage(path)
text = getStructuredText(data)
saveToJSON(text,"imgs-2.json")