Update noisy.py

1tayH · Aug 31, 2018 · 49ef8c1 · 49ef8c1
1 parent dc80bbd
commit 49ef8c1
Showing 1 changed file with 16 additions and 16 deletions.
diff --git a/noisy.py b/noisy.py
@@ -229,25 +229,25 @@ def crawl(self):
         self._start_time = datetime.datetime.now()
 
         while True:
-            for url in self._config["root_urls"]:
-                try:
-                    body = self._request(url).content
-                    self._links = self._extract_urls(body, url)
-                    logging.debug("found {} links".format(len(self._links)))
-                    self._browse_from_links()
-
-                except requests.exceptions.RequestException:
-                    logging.warn("Error connecting to root url: {}".format(url))
+            url = random.choice(self._config["root_urls"])
+            try:
+                body = self._request(url).content
+                self._links = self._extract_urls(body, url)
+                logging.debug("found {} links".format(len(self._links)))
+                self._browse_from_links()
+
+            except requests.exceptions.RequestException:
+                logging.warn("Error connecting to root url: {}".format(url))
 
-                except MemoryError:
-                    logging.warn("Error: content at url: {} is exhausting the memory".format(url))
+            except MemoryError:
+                logging.warn("Error: content at url: {} is exhausting the memory".format(url))
 
-                except LocationParseError:
-                    logging.warn("Error encountered during parsing of: {}".format(url))
+            except LocationParseError:
+                logging.warn("Error encountered during parsing of: {}".format(url))
 
-                except self.CrawlerTimedOut:
-                    logging.info("Timeout has exceeded, exiting")
-                    return
+            except self.CrawlerTimedOut:
+                logging.info("Timeout has exceeded, exiting")
+                return
 
 
 def main():