Skip to content

Commit

Permalink
Add no_directory option when saving image to filesystem
Browse files Browse the repository at this point in the history
  • Loading branch information
NickT5 committed Sep 5, 2020
1 parent 0cad2bd commit e17e901
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 13 deletions.
21 changes: 13 additions & 8 deletions bing_image_downloader/bing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@


class Bing:
def __init__(self, query, limit, output_dir, adult, timeout, filters=''):
def __init__(self, query, limit, output_dir, adult, timeout, no_directory=False, filters=''):
self.download_count = 0
self.query = query
self.output_dir = output_dir
self.adult = adult
self.no_directory = no_directory
self.filters = filters

assert type(limit) == int, "limit must be integer"
Expand Down Expand Up @@ -52,16 +53,20 @@ def download_image(self, link):
# Download the image
print("[%] Downloading Image #{} from {}".format(self.download_count, link))

self.save_image(link, "{}/{}/{}/".format(os.getcwd(), self.output_dir, self.query) + "Image_{}.{}".format(
str(self.download_count), file_type))
print("[%] File Downloaded !\n")
if self.no_directory:
self.save_image(link, "{}/{}/".format(os.getcwd(), self.output_dir) + "Image_{}.{}".format(
str(self.download_count), file_type))
else:
self.save_image(link, "{}/{}/{}/".format(os.getcwd(), self.output_dir, self.query) + "Image_{}.{}".format(
str(self.download_count), file_type))
# print("[%] File Downloaded !\n")
except Exception as e:
self.download_count -= 1
print("[!] Issue getting: {}\n[!] Error:: {}".format(link, e))

def run(self):
while self.download_count < self.limit:
print('\n\n[!!]Indexing page: {}\n'.format(self.page_counter + 1))
print('\n[!!]Indexing page: {}'.format(self.page_counter + 1))
# Parse the page source and download pics
request_url = 'https://www.bing.com/images/async?q=' + urllib.parse.quote_plus(self.query) \
+ '&first=' + str(self.page_counter) + '&count=' + str(self.limit) \
Expand All @@ -72,14 +77,14 @@ def run(self):
links = re.findall('murl&quot;:&quot;(.*?)&quot;', html)

print("[%] Indexed {} Images on Page {}.".format(len(links), self.page_counter + 1))
print("\n===============================================\n")
print("===============================================")

for link in links:
if self.download_count < self.limit:
self.download_image(link)
else:
print("\n\n[%] Done. Downloaded {} images.".format(self.download_count))
print("\n===============================================\n")
print("[%] Done. Downloaded {} images.".format(self.download_count))
print("===============================================")
break

self.page_counter += 1
15 changes: 10 additions & 5 deletions bing_image_downloader/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .bing import Bing


def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60):
def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, no_directory=False):

# engine = 'bing'
if adult_filter_off:
Expand All @@ -16,22 +16,27 @@ def download(query, limit=100, output_dir='dataset', adult_filter_off=True, forc
adult = 'on'

cwd = os.getcwd()

image_dir = os.path.join(cwd, output_dir, query)

if force_replace:
if os.path.isdir(image_dir):
shutil.rmtree(image_dir)

# check directory and create if necessary
# check output directory and create if necessary
try:
if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
os.makedirs("{}/{}/".format(cwd, output_dir))
except:
pass
if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

bing = Bing(query, limit, output_dir, adult, timeout)
# create extra directories if they don't exist and if no_directory parameter is false
if not no_directory:
if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
# print("making dirs")
os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

bing = Bing(query, limit, output_dir, adult, timeout, no_directory)
bing.run()


Expand Down

0 comments on commit e17e901

Please sign in to comment.