This repository has been archived by the owner on Nov 15, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhtn_ml.py
149 lines (130 loc) · 5.25 KB
/
htn_ml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# -*- coding: utf-8 -*-
"""HTN-ML"""
# !pip install --upgrade azure-cognitiveservices-vision-computervision
# !pip install dateparser
# pip install azure-storage-blob
# !pip install pillow
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials
import requests
from array import array
import os
from PIL import Image
import sys
import time
import re
from dateparser.search import search_dates
import datetime
import io
from PIL import Image
from azure.storage.blob import BlobClient
subscription_key = ""
endpoint = ""
def extract_info(path):
blob = BlobClient.from_connection_string(
conn_str="", container_name="dropzone", blob_name=path.split("/")[-1])
with open(path, "rb") as data:
blob.upload_blob(data)
remote_image_url = "https://htnuploader.blob.core.windows.net/dropzone/" + \
path.split("/")[-1]
computervision_client = ComputerVisionClient(
endpoint, CognitiveServicesCredentials(subscription_key))
# remote_image_url = "http://drive.google.com/uc?export=view&id=13dhN8lcrE9iWyLnIIhqFlfygxIzjbaNI"
recognize_handw_results = computervision_client.read(
remote_image_url, raw=True)
operation_location_remote = recognize_handw_results.headers["Operation-Location"]
operation_id = operation_location_remote.split("/")[-1]
while True:
get_handw_text_results = computervision_client.get_read_result(
operation_id)
if get_handw_text_results.status not in ['notStarted', 'running']:
break
time.sleep(1)
# Print the detected text, line by line
master_string = ""
if get_handw_text_results.status == OperationStatusCodes.succeeded:
for text_result in get_handw_text_results.analyze_result.read_results:
for line in text_result.lines:
master_string += (line.text)+"\n"
# print(line.bounding_box)
# print(master_string)
maxHeight = 0
if get_handw_text_results.status == OperationStatusCodes.succeeded:
for text_result in get_handw_text_results.analyze_result.read_results:
for line in text_result.lines:
y1 = (line.bounding_box)[1]
y2 = (line.bounding_box)[3]
y3 = (line.bounding_box)[5]
y4 = (line.bounding_box)[7]
c = y4-y1
d = y3-y2
maxHeight = max(maxHeight, (c+d)/2)
# print(line.text)
# print(c,d,"\n")
title = ""
alt_string = ""
freq = None
check = ["every monday", "every tuesday", "every wednesday",
"every thursday", "every friday", "every saturday", "every sunday"]
if get_handw_text_results.status == OperationStatusCodes.succeeded:
for text_result in get_handw_text_results.analyze_result.read_results:
for line in text_result.lines:
y1 = (line.bounding_box)[1]
y2 = (line.bounding_box)[3]
y3 = (line.bounding_box)[5]
y4 = (line.bounding_box)[7]
c = y4-y1
d = y3-y2
if((line.text.lower()) in check):
freq = "WEEKLY"
continue
if(maxHeight <= ((c+d)/2)+5):
title += (line.text+" ")
else:
alt_string += (line.text)+"\n"
dates = search_dates(alt_string)
print(dates)
start_time = "00:00:00"
end_time = "00:00:00"
start_date = None
end_date = None
f = 0
s = 0
for d in dates:
if(len(d[0]) <= 8):
# only time
if(len(re.findall("[0-9^:]", d[0][0:4])) > 0):
if(f > 0):
end_time = d[1].strftime("%X")
else:
start_time = d[1].strftime("%X")
f = 1
else:
if(s == 0):
s = 1
start_date = d[1]
else:
end_date = d[1]
else:
if(s == 0):
s = 1
start_date = d[1]
else:
end_date = d[1]
# print(d[1].strftime("%x"))
if(len(dates) <= 1):
return(title, start_date, start_date, None, freq)
# print(start_time, end_time, start_date, end_date)
if(start_date is not None and start_time is not None):
start_date = datetime.datetime(start_date.year, start_date.month, start_date.day, int(
start_time[0:2]), int(start_time[3:5])) # 1
if(end_date is not None and end_time is not None):
end_date = datetime.datetime(end_date.year, end_date.month, end_date.day, int(
end_time[0:2]), int(end_time[3:5])) # 3
if(start_date is not None and end_time is not None):
end_time = datetime.datetime(start_date.year, start_date.month, start_date.day, int(
end_time[0:2]), int(end_time[3:5])) # 2
return(title, start_date, end_time, end_date, freq)
# print(extract_info("http://drive.google.com/uc?export=view&id=13dhN8lcrE9iWyLnIIhqFlfygxIzjbaNI"))