-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathworkers.py
157 lines (123 loc) · 7 KB
/
workers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
from distutils.log import error
from xmlrpc.client import DateTime
from helpers.dbConnect import DatabaseConnect
from helpers.dbOperations import scrapeSingle, createRangeLogTable, addToApproved
from helpers.getCases import casesNotUpdatedToday, NearApprovalAndFreshOrUnscanned, getCaseObj
from helpers.conversions import getStatusCode, handleUnknownCaseType
from helpers.checks import checkType, rangeLogTableExist, caseNotApproved
import numpy
from random import randint as rand, sample as sample
from time import sleep
import datetime
from constants import SAMPLE_SIZE
from helpers.conversions import scrapeAll
#goal: delete invalid cases from the table that stores the range's queryable cases, populate initial status code
def batchScrape(rangeId, frequency:str = None):
# print("rangeId from init:" + rangeId)
with DatabaseConnect("QueryableCases") as (cnx, cursor):
if cnx!=None:
if scrapeAll(0.5):
list = casesNotUpdatedToday(cursor, rangeId)
else:
list= NearApprovalAndFreshOrUnscanned(cursor, rangeId)
while len(list) !=0:
print(len(list))
caseNumber = list.pop()
try:
caseResult = scrapeSingle(caseNumber)
now = datetime.datetime.now()
dt_string = now.strftime("%Y-%m-%d %H:%M:%S")
#not an invalid case #get scrape result
if caseResult!=None:
newTitle=caseResult['title']
newContent=caseResult['content']
newStatusCode = getStatusCode(newTitle)
newCaseType = checkType("", newContent)
newCaseType = handleUnknownCaseType(newStatusCode, newCaseType)
#add case to approved list if it got approved today
if caseNotApproved(cursor, rangeId, caseNumber) and newStatusCode in [9,10,11,15]:
print("caseNotApproved and new status is approved")
caseTup = getCaseObj(cursor, rangeId, caseNumber)
print(caseTup[0])
currType = caseTup[0]
inputType = newCaseType if (newCaseType!=None and newCaseType!="") else currType
addToApproved(caseNumber, inputType)
query ="UPDATE " +rangeId+ " SET statusCode = %s, lastFetched = %s, caseType = %s WHERE CaseNumber = %s"
cursor.execute(query, (newStatusCode, dt_string, newCaseType, caseNumber))
#an invalid case, only update lastFetched
else:
query ="UPDATE " +rangeId+ " SET caseType = 'invalid', lastFetched = %s WHERE CaseNumber = %s"
cursor.execute(query, (dt_string, caseNumber))
cnx.commit()
except:
print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!EXCEPTIONNN")
sleep(10)
batchScrape(rangeId)
else:
print('initial Batch scan failed due to database connection issues')
batchScrape(rangeId)
def checkAndFillRange(rangeId):
if not rangeLogTableExist(rangeId):
createRangeLogTable(rangeId)
with DatabaseConnect("RangeLog") as (cnx, cursor):
tableName = "R"+rangeId
now = datetime.datetime.now()
caseTypes = {"I-140":0,"I-765":0,"I-821":0,"I-131":0,"I-129":0,
"I-539":0,"I-130":0,"I-90":0,"I-485":0,"N-400":0,"I-751":0,
"I-824":0, "Approv":0, "OtherS":0}
for caseType in caseTypes.keys():
with DatabaseConnect("QueryableCases") as (cnx2,cursor2):
query="Select StatusCode from "+rangeId+" where CaseType=%s"
cursor2.execute(query, (caseType,))
statusCodesTups = cursor2.fetchall()
statusCodesDict ={"Received":0, "ActiveReview":0, "RFEreq":0,
"RFErec":0, "IntReady":0, "IntSched":0, "Denied":0,
"Approved":0, "Other":0, "FingTaken":0, "Transferred":0}
for tup in statusCodesTups:
if tup[0]==1:
statusCodesDict["Received"]+=1
if tup[0]==2:
statusCodesDict["ActiveReview"]+=1
if tup[0]==3:
statusCodesDict["RFEreq"]+=1
if tup[0]==4:
statusCodesDict["RFErec"]+=1
if tup[0]==5:
statusCodesDict["IntReady"]+=1
if tup[0]==6:
statusCodesDict["IntSched"]+=1
if tup[0]==7:
statusCodesDict["Denied"]+=1
if tup[0]==9 or tup[0]==10 or tup[0]==11 or tup[0]==12 or tup[0]==13 or tup[0]==15:
statusCodesDict["Approved"]+=1
if tup[0]==14:
statusCodesDict["Other"]+=1
if tup[0]==8:
statusCodesDict["FingTaken"]+=1
if tup[0]==16:
statusCodesDict["Transferred"]+=1
# initial insert unless today's already filled
insertQueryWhenNoDuplicate= "\
INSERT INTO "+tableName+" (CollectionDate, CaseType, Received, \
ActiveReview, RFEreq, RFErec, IntReady, IntSched, Denied, Approved, Other, FingTaken, Transferred) \
select %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s \
where NOT EXISTS(select * from " +tableName+" where CollectionDate=%s and CaseType=%s)"
cursor.execute(insertQueryWhenNoDuplicate, (now.strftime("%Y-%m-%d"), caseType,
statusCodesDict["Received"],statusCodesDict["ActiveReview"],
statusCodesDict["RFEreq"],statusCodesDict["RFErec"],
statusCodesDict["IntReady"], statusCodesDict["IntSched"],
statusCodesDict["Denied"], statusCodesDict["Approved"], statusCodesDict["Other"],
statusCodesDict["FingTaken"], statusCodesDict["Transferred"],
now.strftime("%Y-%m-%d"), caseType))
#if filled, update
insertQueryWhenDuplicate ="UPDATE "+tableName+" set Received=%s, \
ActiveReview=%s, RFEreq=%s, RFErec=%s, IntReady=%s, IntSched=%s, \
Denied=%s, Approved=%s, Other=%s, FingTaken=%s, Transferred=%s \
where CollectionDate=%s and CaseType=%s"
cursor.execute(insertQueryWhenDuplicate, (
statusCodesDict["Received"],statusCodesDict["ActiveReview"],
statusCodesDict["RFEreq"],statusCodesDict["RFErec"],
statusCodesDict["IntReady"], statusCodesDict["IntSched"],
statusCodesDict["Denied"], statusCodesDict["Approved"], statusCodesDict["Other"],
statusCodesDict["FingTaken"], statusCodesDict["Transferred"],
now.strftime("%Y-%m-%d"), caseType))