-
Notifications
You must be signed in to change notification settings - Fork 7
/
request.py
276 lines (229 loc) · 9.62 KB
/
request.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
# -*- coding: utf-8 -*-
"""
This module defines classes which define and perform requests to individual VAMDC database nodes. An instance of type result.Result is returned
if a request has been performed.
"""
try:
from lxml import objectify
is_available_xml_objectify = True
except ImportError:
is_available_xml_objectify = False
from xml.etree import ElementTree
import urllib2
from settings import *
import query as q
import results as r
import nodes
from urlparse import urlparse
from httplib import HTTPConnection, HTTPSConnection, urlsplit, HTTPException, socket
from dateutil.parser import parse
XSD = "http://vamdc.org/xml/xsams/1.0"
class TimeOutError(HTTPException):
def __init__(self):
HTTPException.__init__(self, 408, "Timeout")
self.strerror = "Timeout"
class NoContentError(Exception):
def __init__(self, expr):
self.expr = expr
self.msg = "No content to perform operation on"
class Request(object):
"""
A Request instance represents one request to a specified VAMDC database node.
"""
def __init__(self, node = None, query = None):
"""
Initialize a request instance.
node: Database-Node to which the request will be sent
query: Query which will be performed on the database.
"""
self.status = 0
self.reason = "INIT"
if node != None:
self.setnode(node)
if query != None:
self.setquery(query)
def setnode(self, node):
"""
Sets the node to which the request will be sent. If the node has not been specified already during the
initialization of the instance, it has to be specified before the request will be performed in order to
obtain the Base-Url of the database node. Alternatively, the Base-Url can be set directly with the method 'setbaseurl'
"""
self.status = 0
self.reason = "INIT"
if type(node) == nodes.Node:
self.node = node
if not hasattr(self.node,'url') or len(self.node.url)==0:
# print "Warning: Url of this node is empty!"
pass
else:
self.baseurl = self.node.url
if self.baseurl[-1]=='/':
self.baseurl+='sync?'
else:
self.baseurl+='/sync?'
def setbaseurl(self, baseurl):
"""
Sets the Base-Url to which the query will be sent. Usually this method is called internally via the method 'setnode' and
is only called if requests shall be sent to nodes which are not registered in the VAMDC registry.
"""
self.baseurl = baseurl
if self.baseurl[-1]=='/':
self.baseurl+='sync?'
else:
self.baseurl+='/sync?'
def setquery(self, query):
"""
Sets the query which shall be defined on the database node. Query could ether be a query.Query instance
or a string. The query has to be specified before the request can be performed.
"""
self.status = 0
self.reason = "INIT"
if type(query)==q.Query:
self.query = query
self.__setquerypath()
elif type(query) == str or type(query) == unicode:
self.query = q.Query(Query = query)
self.__setquerypath()
else:
# print type(query)
# print "Warning: this is not a query object"
pass
def __setquerypath(self):
"""
Sets the querypath which is appended to the nodes 'base'-url.
"""
self.querypath = "REQUEST=%s&LANG=%s&FORMAT=%s&QUERY=%s" % ( self.query.Request,
self.query.Lang,
self.query.Format,
urllib2.quote(self.query.Query))
def dorequest(self, timeout = TIMEOUT, HttpMethod = "POST", parsexsams = True):
"""
Sends the request to the database node and returns a result.Result instance. The
request uses 'POST' requests by default. If the request fails or if stated in the parameter 'HttpMethod',
'GET' requests will be performed.
The returned result will be parsed by default and the model defined in 'specmodel' will be populated by default
(parseexams = True).
"""
self.xml = None
#self.get_xml(self.Source.Requesturl)
url = self.baseurl + self.querypath
urlobj = urlsplit(url)
if urlobj.scheme == 'https':
conn = HTTPSConnection(urlobj.netloc, timeout = timeout)
else:
conn = HTTPConnection(urlobj.netloc, timeout = timeout)
conn.putrequest(HttpMethod, urlobj.path+"?"+urlobj.query)
conn.endheaders()
try:
res = conn.getresponse()
except socket.timeout:
# error handling has to be included
self.status = 408
self.reason = "Socket timeout"
raise TimeOutError
self.status = res.status
self.reason = res.reason
if not parsexsams:
if res.status == 200:
result = r.Result()
result.Content = res.read()
elif res.status == 400 and HttpMethod == 'POST':
# Try to use http-method: GET
result = self.dorequest( HttpMethod = 'GET', parsexsams = parsexsams)
else:
result = None
else:
if res.status == 200:
self.xml = res.read()
result = r.Result()
result.Xml = self.xml
result.populate_model()
elif res.status == 400 and HttpMethod == 'POST':
# Try to use http-method: GET
result = self.dorequest( HttpMethod = 'GET', parsexsams = parsexsams)
else:
result = None
return result
def doheadrequest(self, timeout = TIMEOUT):
"""
Sends a HEAD request to the database node. The header returned by the database node contains some
information on statistics. This information is stored in the headers object of the request instance.
"""
self.headers = {}
url = self.baseurl + self.querypath
urlobj = urlsplit(url)
if urlobj.scheme == 'https':
conn = HTTPSConnection(urlobj.netloc, timeout = timeout)
else:
conn = HTTPConnection(urlobj.netloc, timeout = timeout)
conn.putrequest("HEAD", urlobj.path+"?"+urlobj.query)
conn.endheaders()
try:
res = conn.getresponse()
except socket.timeout, e:
self.status = 408
self.reason = "Socket timeout"
raise TimeOutError
self.status = res.status
self.reason = res.reason
if res.status == 200:
headers = res.getheaders()
elif res.status == 204:
headers = [ ("vamdc-count-species",0),
("vamdc-count-states",0),
("vamdc-truncated",0),
("vamdc-count-molecules",0),
("vamdc-count-sources",0),
("vamdc-approx-size",0),
("vamdc-count-radiative",0),
("vamdc-count-atoms",0)]
elif res.status == 408:
print "TIMEOUT"
headers = [("vamdc-count-species",0),
("vamdc-count-states",0),
("vamdc-truncated",0),
("vamdc-count-molecules",0),
("vamdc-count-sources",0),
("vamdc-approx-size",0),
("vamdc-count-radiative",0),
("vamdc-count-atoms",0)]
else:
print "STATUS: %d" % res.status
headers = [("vamdc-count-species",0),
("vamdc-count-states",0),
("vamdc-truncated",0),
("vamdc-count-molecules",0),
("vamdc-count-sources",0),
("vamdc-approx-size",0),
("vamdc-count-radiative",0),
("vamdc-count-atoms",0)]
for key,value in headers:
self.headers[key] = value
def getlastmodified(self):
"""
Returns the 'last-modified' date which has been specified in the
Header of the requested document.
"""
if not self.status == 200:
self.doheadrequest()
if self.headers.has_key('last-modified'):
try:
self.lastmodified = parse(self.headers['last-modified'])
except Exception, e:
print "Could not parse date %s" % self.headers['last-modified']
print e
else:
if self.status == 204:
raise NoContentError('requets.getlastmodified')
self.lastmodified = None
return self.lastmodified
def getspecies(self):
"""
Requests all species of the database node and returns a result.Result instance which contains the inforation
in the format specified by the model (specmodel.py).
This is equal to sending a 'SELECT SPECIES' - query to the node.
"""
querystring = "SELECT SPECIES WHERE ((InchiKey!='UGFAIRIUMAVXCW'))"
self.setquery(querystring)
result = self.dorequest()
return result