Skip to content

Commit

Permalink
Update noisy.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ail1020 authored and Arduous committed Aug 31, 2018
1 parent dc80bbd commit 49ef8c1
Showing 1 changed file with 16 additions and 16 deletions.
32 changes: 16 additions & 16 deletions noisy.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,25 +229,25 @@ def crawl(self):
self._start_time = datetime.datetime.now()

while True:
for url in self._config["root_urls"]:
try:
body = self._request(url).content
self._links = self._extract_urls(body, url)
logging.debug("found {} links".format(len(self._links)))
self._browse_from_links()

except requests.exceptions.RequestException:
logging.warn("Error connecting to root url: {}".format(url))
url = random.choice(self._config["root_urls"])
try:
body = self._request(url).content
self._links = self._extract_urls(body, url)
logging.debug("found {} links".format(len(self._links)))
self._browse_from_links()

except requests.exceptions.RequestException:
logging.warn("Error connecting to root url: {}".format(url))

except MemoryError:
logging.warn("Error: content at url: {} is exhausting the memory".format(url))
except MemoryError:
logging.warn("Error: content at url: {} is exhausting the memory".format(url))

except LocationParseError:
logging.warn("Error encountered during parsing of: {}".format(url))
except LocationParseError:
logging.warn("Error encountered during parsing of: {}".format(url))

except self.CrawlerTimedOut:
logging.info("Timeout has exceeded, exiting")
return
except self.CrawlerTimedOut:
logging.info("Timeout has exceeded, exiting")
return


def main():
Expand Down

0 comments on commit 49ef8c1

Please sign in to comment.