From e17e90130fcba04e6d4f456995ea7918a952857b Mon Sep 17 00:00:00 2001 From: Nick Todts Date: Sat, 5 Sep 2020 12:52:14 +0200 Subject: [PATCH] Add no_directory option when saving image to filesystem --- bing_image_downloader/bing.py | 21 +++++++++++++-------- bing_image_downloader/downloader.py | 15 ++++++++++----- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py index 0ff4e2b..c308657 100644 --- a/bing_image_downloader/bing.py +++ b/bing_image_downloader/bing.py @@ -14,11 +14,12 @@ class Bing: - def __init__(self, query, limit, output_dir, adult, timeout, filters=''): + def __init__(self, query, limit, output_dir, adult, timeout, no_directory=False, filters=''): self.download_count = 0 self.query = query self.output_dir = output_dir self.adult = adult + self.no_directory = no_directory self.filters = filters assert type(limit) == int, "limit must be integer" @@ -52,16 +53,20 @@ def download_image(self, link): # Download the image print("[%] Downloading Image #{} from {}".format(self.download_count, link)) - self.save_image(link, "{}/{}/{}/".format(os.getcwd(), self.output_dir, self.query) + "Image_{}.{}".format( - str(self.download_count), file_type)) - print("[%] File Downloaded !\n") + if self.no_directory: + self.save_image(link, "{}/{}/".format(os.getcwd(), self.output_dir) + "Image_{}.{}".format( + str(self.download_count), file_type)) + else: + self.save_image(link, "{}/{}/{}/".format(os.getcwd(), self.output_dir, self.query) + "Image_{}.{}".format( + str(self.download_count), file_type)) + # print("[%] File Downloaded !\n") except Exception as e: self.download_count -= 1 print("[!] Issue getting: {}\n[!] Error:: {}".format(link, e)) def run(self): while self.download_count < self.limit: - print('\n\n[!!]Indexing page: {}\n'.format(self.page_counter + 1)) + print('\n[!!]Indexing page: {}'.format(self.page_counter + 1)) # Parse the page source and download pics request_url = 'https://www.bing.com/images/async?q=' + urllib.parse.quote_plus(self.query) \ + '&first=' + str(self.page_counter) + '&count=' + str(self.limit) \ @@ -72,14 +77,14 @@ def run(self): links = re.findall('murl":"(.*?)"', html) print("[%] Indexed {} Images on Page {}.".format(len(links), self.page_counter + 1)) - print("\n===============================================\n") + print("===============================================") for link in links: if self.download_count < self.limit: self.download_image(link) else: - print("\n\n[%] Done. Downloaded {} images.".format(self.download_count)) - print("\n===============================================\n") + print("[%] Done. Downloaded {} images.".format(self.download_count)) + print("===============================================") break self.page_counter += 1 diff --git a/bing_image_downloader/downloader.py b/bing_image_downloader/downloader.py index d0dedb3..78b8801 100644 --- a/bing_image_downloader/downloader.py +++ b/bing_image_downloader/downloader.py @@ -7,7 +7,7 @@ from .bing import Bing -def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60): +def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, no_directory=False): # engine = 'bing' if adult_filter_off: @@ -16,22 +16,27 @@ def download(query, limit=100, output_dir='dataset', adult_filter_off=True, forc adult = 'on' cwd = os.getcwd() + image_dir = os.path.join(cwd, output_dir, query) if force_replace: if os.path.isdir(image_dir): shutil.rmtree(image_dir) - # check directory and create if necessary + # check output directory and create if necessary try: if not os.path.isdir("{}/{}/".format(cwd, output_dir)): os.makedirs("{}/{}/".format(cwd, output_dir)) except: pass - if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)): - os.makedirs("{}/{}/{}".format(cwd, output_dir, query)) - bing = Bing(query, limit, output_dir, adult, timeout) + # create extra directories if they don't exist and if no_directory parameter is false + if not no_directory: + if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)): + # print("making dirs") + os.makedirs("{}/{}/{}".format(cwd, output_dir, query)) + + bing = Bing(query, limit, output_dir, adult, timeout, no_directory) bing.run()