From 3a96f29fc0642d0fe73babcc39dff6a01268a8c4 Mon Sep 17 00:00:00 2001 From: Neeraj Edwards Date: Wed, 21 Nov 2018 12:20:42 +0530 Subject: [PATCH] Decoding error resolved --- SubDomainizer.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/SubDomainizer.py b/SubDomainizer.py index 82895d8..8cb7de9 100644 --- a/SubDomainizer.py +++ b/SubDomainizer.py @@ -71,8 +71,16 @@ def IntJsExtract(self, url, heads): req = requests.get(url, headers=heads) else: req = requests.get('http://' + url, headers=heads) + decoding = req.encoding + + if decoding: + decoding = decoding + else: + decoding = 'utf-8' + print(termcolor.colored("Searching for Inline Javascripts.....", color='yellow', attrs=['bold'])) + try: html = req.content.decode(decoding) minhtml = htmlmin.minify(html, remove_empty_space=True) @@ -92,7 +100,13 @@ def ExtJsExtract(self, url, heads): req = requests.get(url, headers=heads) else: req = requests.get('http://' + url, headers=heads) + decoding = req.encoding + if decoding: + decoding = decoding + else: + decoding = 'utf-8' + try: html = req.content.decode(decoding) soup = BeautifulSoup(html, features='html.parser')