-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathcvrfparse.py
executable file
·291 lines (245 loc) · 11.9 KB
/
cvrfparse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
#!/usr/bin/env python
"""Parse/Validate a CVRF file and emit user-specified fields. Requires lxml.
"""
__author__ = "Mike Schiffman"
__email__ = "[email protected]"
__credits__ = "William McVey"
__date__ = "November 2013"
__revision__ = "1.0"
__maintainer__ = "Mike Schiffman"
import os
import sys
import copy
import codecs
import urllib2
import argparse
from lxml import etree
class CVRF_Syntax(object):
"""
All of the CVRF Elements and Namespaces are kept here. As CVRF evolves, make appropriate changes here.
"""
NAMESPACES = {x.upper(): "{http://www.icasi.org/CVRF/schema/%s/1.1}" % x for x in ("cvrf", "vuln", "prod")}
CVRF_ARGS = ["all", "DocumentTitle", "DocumentType", "DocumentPublisher", "DocumentTracking", "DocumentNotes",
"DocumentDistribution", "AggregateSeverity", "DocumentReferences", "Acknowledgments"]
VULN_ARGS = ["all", "Title", "ID", "Notes", "DiscoveryDate", "ReleaseDate", "Involvements", "CVE", "CWE",
"ProductStatuses", "Threats", "CVSSScoreSets", "Remediations", "References", "Acknowledgments"]
PROD_ARGS = ["all", "Branch", "FullProductName", "Relationship", "ProductGroups"]
CVRF_SCHEMA = "http://www.icasi.org/CVRF/schema/cvrf/1.1/cvrf.xsd"
CVRF_CATALOG = "./cvrfparse/schemata/catalog.xml"
class PrependerAction(argparse.Action):
"""
Customization for argparse. Prepends some static text to an accumalated list.
"""
prepend_text = ""
def __call__(self, parser, namespace, values, option_string=None):
orig = getattr(namespace, self.dest, None)
items = [] if orig is None else copy.copy(orig)
for value in values:
items.append(self.prepend_text + value)
setattr(namespace, self.dest, items)
class NonDupBracketFormatter(argparse.HelpFormatter):
"""
Customization for argparse. A formatter that is a more terse in repeated arguments.
"""
def _format_args(self, action, default_metavar):
get_metavar = self._metavar_formatter(action, default_metavar)
if action.nargs == argparse.ZERO_OR_MORE:
result = "[%s ...]" % get_metavar(1)
elif action.nargs == argparse.ONE_OR_MORE:
result = "%s [...]" % get_metavar(1)
else:
result = super(NonDupBracketFormatter, self)._format_args(
action, default_metavar)
return result
def namespace_prepend(namespace):
"""
Returns a dynamic class (not instance) with appropriate prepend_text.
"""
return type("Prepend_%s" % namespace, (PrependerAction,),
{"prepend_text": CVRF_Syntax.NAMESPACES[namespace]})
def chop_ns_prefix(element):
"""
Return the element of a fully qualified namespace URI
element: a fully qualified ET element tag
"""
return element[element.rindex("}") + 1:]
def print_node(node, strip_ns, f=sys.stdout):
"""
Print each XML node
node: the ElementTree node to be printed
strip_ns: boolean that when true indicates the namespace prefix will be chomped
f: the file to print to (default is stdout)
"""
if node.tag:
print >> f, "[%s]" %(chop_ns_prefix(node.tag) if strip_ns else node.tag),
if node.text:
print >> f, node.text.strip()
if node.attrib:
for key in node.attrib:
print >> f, "(%s: %s)" %(key, node.attrib[key])
print >> f
def cvrf_validate(f, cvrf_doc):
"""
Validates a CVRF document
f: file object containing the schema
cvrf_doc: the serialized CVRF ElementTree object
returns: a code (True for valid / False for invalid) and a reason for the code
"""
try:
xmlschema_doc = etree.parse(f)
except etree.XMLSyntaxError as e:
log = e.error_log.filter_from_level(etree.ErrorLevels.FATAL)
return False, "Parsing error, schema document \"{0}\" is not well-formed: {1}".format(f.name, log)
xmlschema = etree.XMLSchema(xmlschema_doc)
try:
xmlschema.assertValid(cvrf_doc)
return True, "Valid"
except etree.DocumentInvalid:
return False, xmlschema.error_log
def cvrf_dump(results, strip_ns):
"""
Iterates over results and dumps to the dictionary key (which is a file handle)
results: a dictionary of the format: {filename, [ElementTree node, ...], ...}
strip_ns: boolean that when true indicates the namespace prefix will be chomped
"""
for key in results:
if key == "stdout":
f = codecs.EncodedFile(sys.stdout, data_encoding="UTF-8")
else:
try:
f = codecs.open(key, "w", encoding="UTF-8")
except IOError as e:
sys.exit("{0}: I/O error({1}) \"{2}\": {3}".format(progname, e.errno, key, e.strerror))
for item in results[key]:
print_node(item, strip_ns, f)
f.close()
def cvrf_dispatch(cvrf_doc, parsables, collate_vuln, strip_ns):
"""
Filter through a CVRF document and perform user-specified actions and report the results
cvrf_doc: the serialized CVRF ElementTree object
collate_vuln: boolean indicating whether or not to collate the vulnerabilities
strip_ns: boolean that when true indicates the namespace prefix will be chomped
returns: N/A
"""
if parsables:
results = cvrf_parse(cvrf_doc, parsables)
cvrf_dump(results, strip_ns)
if collate_vuln:
results = cvrf_collate_vuln(cvrf_doc)
cvrf_dump(results, strip_ns)
def cvrf_parse(cvrf_doc, parsables):
"""
Parse a cvrf_doc and return a list of elements as determined by parsables
cvrf_doc: the serialized CVRF ElementTree object
parsables: list of elements to parse from a CVRF doc
returns: a dictionary of the format {filename:[item, ...]}
"""
items = []
for element in parsables:
for node in cvrf_doc.iter(element):
for child in node.iter():
items.append(child)
# Hardcoded output for now, eventually make this user-tunable
return {"stdout": items}
def cvrf_collate_vuln(cvrf_doc):
"""
Zip through a cvrf_doc and return all vulnerability elements collated by ordinal
cvrf_doc: the serialized CVRF ElementTree object
returns: a dictionary of the format {filename:[item, ...], filename:[item, ...]}
"""
results = {}
# Obtain document title to use in the filename(s) tiptoeing around around the curly braces in our NS definition
document_title = cvrf_doc.findtext("cvrf:DocumentTitle",
namespaces={"cvrf": CVRF_Syntax.NAMESPACES["CVRF"].replace("{", "").replace("}", "")}).strip().replace(" ", "_")
# Constrain Xpath search to the Vulnerability container
for node in cvrf_doc.findall(".//" + CVRF_Syntax.NAMESPACES["VULN"] + "Vulnerability"):
# Create filename based on ordinal number to use as a key for results dictionary
filename = "cvrfparse-" + document_title + "-ordinal-" + node.attrib["Ordinal"] + ".txt"
# Create an iterator to iterate over each child element and populate results dictionary values
results[filename] = node.iter()
return results
def post_process_arglist(arg, namespace, valid_args):
parsables = []
if CVRF_Syntax.NAMESPACES[namespace] + "all" in arg:
for element in valid_args:
parsables.append(CVRF_Syntax.NAMESPACES[namespace] + element)
parsables.remove(CVRF_Syntax.NAMESPACES[namespace] + "all")
else:
for element in arg:
parsables.append(element)
return parsables
def main(progname = None):
progname = progname if progname else os.path.basename(sys.argv[0])
parser = argparse.ArgumentParser(formatter_class=NonDupBracketFormatter,
description="Validate/parse a CVRF 1.1 document and emit user-specified bits.")
parser.add_argument("-f", "--file", required="True", action="store",
help="candidate CVRF 1.1 XML file")
parser.add_argument('--cvrf', nargs="*", choices=CVRF_Syntax.CVRF_ARGS,
action=namespace_prepend("CVRF"),
help="emit CVRF elements, use \"all\" to glob all CVRF elements.")
parser.add_argument("--vuln", nargs="*", choices=CVRF_Syntax.VULN_ARGS,
action=namespace_prepend("VULN"),
help="emit Vulnerability elements, use \"all\" to glob all Vulnerability elements.")
parser.add_argument("--prod", nargs="*", choices=CVRF_Syntax.PROD_ARGS,
action=namespace_prepend("PROD"),
help="emit ProductTree elements, use \"all\" to glob all ProductTree elements.")
parser.add_argument("-c", "--collate", dest="collate_vuln", default=False,
action="store_true",
help="collate all of the Vulnerability elements by ordinal into separate files")
parser.add_argument("-s", "--strip-ns", dest="strip_ns", default=False, action="store_true",
help="strip namespace header from element tags before printing")
parser.add_argument("-V", "--validate", default=False, action="store_true",
help="validate the CVRF document")
parser.add_argument("-S", "--schema", action="store",
help="specify local alternative for cvrf.xsd")
parser.add_argument("-C", "--catalog", action="store",
help="specify location for catalog.xml (default is {0})".format(CVRF_Syntax.CVRF_CATALOG))
parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __revision__)
args = parser.parse_args()
# Post process argument lists into a single list, handling 'all' globs if present
# this block should probably eventually be folded into argparse
parsables = []
if args.cvrf:
parsables.extend(post_process_arglist(args.cvrf, "CVRF", CVRF_Syntax.CVRF_ARGS))
if args.vuln:
parsables.extend(post_process_arglist(args.vuln, "VULN", CVRF_Syntax.VULN_ARGS))
if args.prod:
parsables.extend(post_process_arglist(args.prod, "PROD", CVRF_Syntax.PROD_ARGS))
# First things first: parse the document (to ensure it is well-formed XML) to obtain an ElementTree object
# to pass to the CVRF validator/parser
try:
cvrf_doc = etree.parse(args.file, etree.XMLParser(encoding="utf-8"))
except IOError:
sys.exit("{0}: I/O error: \"{1}\" does not exist".format(progname, args.file))
except etree.XMLSyntaxError as e:
sys.exit("{0}: Parsing error, document \"{1}\" is not well-formed: {2}".format(progname, args.file, e.error_log.filter_from_level(etree.ErrorLevels.FATAL)))
if args.validate is True:
try:
if args.schema:
# Try to use local schema files
f = open(args.schema, 'r')
# If the supplied file is not a valid catalog.xml or doesn't exist lxml will fall back to
# using remote validation
catalog = args.catalog if args.catalog else CVRF_Syntax.CVRF_CATALOG
os.environ.update(XML_CATALOG_FILES=catalog)
else:
print >> sys.stderr, "Fetching schemata..."
f = urllib2.urlopen(CVRF_Syntax.CVRF_SCHEMA)
except IOError as e:
sys.exit("{0}: I/O error({1}) \"{2}\": {3}".format(progname, e.errno, args.schema, e.strerror))
(code, result) = cvrf_validate(f, cvrf_doc)
f.close()
if code is False:
sys.exit("{0}: {1}".format(progname, result))
else:
print >> sys.stderr, result
cvrf_dispatch(cvrf_doc, parsables, collate_vuln=args.collate_vuln, strip_ns=args.strip_ns)
if __name__ == "__main__":
progname=os.path.basename(sys.argv[0])
try:
main(progname)
except Exception, value:
(exc_type, exc_value, exc_tb) = sys.exc_info()
sys.excepthook(exc_type, exc_value, exc_tb) # if debugging
sys.exit("%s: %s: %s" % (progname, exc_type.__name__, exc_value))
sys.exit(0)