diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py index 4156678..0c40437 100644 --- a/bing_image_downloader/bing.py +++ b/bing_image_downloader/bing.py @@ -12,15 +12,19 @@ class Bing: - def __init__(self, query, limit, output_dir, adult, timeout, filter='', verbose=True): + def __init__(self, query, limit, output_dir, adult, timeout, filter='', verbose=True,badsites=[]): self.download_count = 0 self.query = query self.output_dir = output_dir self.adult = adult self.filter = filter self.verbose = verbose + self.badsites = badsites self.seen = set() + if self.badsites: + print("Download links will not include: {}".format(*self.badsites),sep=', ') + assert type(limit) == int, "limit must be integer" self.limit = limit assert type(timeout) == int, "timeout must be integer" @@ -106,7 +110,21 @@ def run(self): print("[%] Indexed {} Images on Page {}.".format(len(links), self.page_counter + 1)) print("\n===============================================\n") - for link in links: + + for link in links: + + isbadsite = False + + for badsite in self.badsites: + isbadsite = badsite in link + if isbadsite: + if self.verbose: + print("[!] Link included in badsites {}".format(badsite,link)) + break + + if isbadsite: + continue + if self.download_count < self.limit and link not in self.seen: self.seen.add(link) self.download_image(link) diff --git a/bing_image_downloader/downloader.py b/bing_image_downloader/downloader.py index 41789dd..0098969 100644 --- a/bing_image_downloader/downloader.py +++ b/bing_image_downloader/downloader.py @@ -9,7 +9,7 @@ def download(query, limit=100, output_dir='dataset', adult_filter_off=True, -force_replace=False, timeout=60, filter="", verbose=True): +force_replace=False, timeout=60, filter="", verbose=True,bad_sites=[]): # engine = 'bing' if adult_filter_off: @@ -34,7 +34,7 @@ def download(query, limit=100, output_dir='dataset', adult_filter_off=True, sys.exit(1) print("[%] Downloading Images to {}".format(str(image_dir.absolute()))) - bing = Bing(query, limit, image_dir, adult, timeout, filter, verbose) + bing = Bing(query, limit, image_dir, adult, timeout, filter, verbose,bad_sites) bing.run()