Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add no_directory option when saving image to filesystem #7

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions bing_image_downloader/bing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@


class Bing:
def __init__(self, query, limit, output_dir, adult, timeout, filters=''):
def __init__(self, query, limit, output_dir, adult, timeout, no_directory=False, filters=''):
self.download_count = 0
self.query = query
self.output_dir = output_dir
self.adult = adult
self.no_directory = no_directory
self.filters = filters

assert type(limit) == int, "limit must be integer"
Expand Down Expand Up @@ -52,16 +53,20 @@ def download_image(self, link):
# Download the image
print("[%] Downloading Image #{} from {}".format(self.download_count, link))

self.save_image(link, "{}/{}/{}/".format(os.getcwd(), self.output_dir, self.query) + "Image_{}.{}".format(
str(self.download_count), file_type))
print("[%] File Downloaded !\n")
if self.no_directory:
self.save_image(link, "{}/{}/".format(os.getcwd(), self.output_dir) + "Image_{}.{}".format(
str(self.download_count), file_type))
else:
self.save_image(link, "{}/{}/{}/".format(os.getcwd(), self.output_dir, self.query) + "Image_{}.{}".format(
str(self.download_count), file_type))
# print("[%] File Downloaded !\n")
except Exception as e:
self.download_count -= 1
print("[!] Issue getting: {}\n[!] Error:: {}".format(link, e))

def run(self):
while self.download_count < self.limit:
print('\n\n[!!]Indexing page: {}\n'.format(self.page_counter + 1))
print('\n[!!]Indexing page: {}'.format(self.page_counter + 1))
# Parse the page source and download pics
request_url = 'https://www.bing.com/images/async?q=' + urllib.parse.quote_plus(self.query) \
+ '&first=' + str(self.page_counter) + '&count=' + str(self.limit) \
Expand All @@ -72,14 +77,14 @@ def run(self):
links = re.findall('murl&quot;:&quot;(.*?)&quot;', html)

print("[%] Indexed {} Images on Page {}.".format(len(links), self.page_counter + 1))
print("\n===============================================\n")
print("===============================================")

for link in links:
if self.download_count < self.limit:
self.download_image(link)
else:
print("\n\n[%] Done. Downloaded {} images.".format(self.download_count))
print("\n===============================================\n")
print("[%] Done. Downloaded {} images.".format(self.download_count))
print("===============================================")
break

self.page_counter += 1
15 changes: 10 additions & 5 deletions bing_image_downloader/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .bing import Bing


def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60):
def download(query, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, no_directory=False):

# engine = 'bing'
if adult_filter_off:
Expand All @@ -16,22 +16,27 @@ def download(query, limit=100, output_dir='dataset', adult_filter_off=True, forc
adult = 'on'

cwd = os.getcwd()

image_dir = os.path.join(cwd, output_dir, query)

if force_replace:
if os.path.isdir(image_dir):
shutil.rmtree(image_dir)

# check directory and create if necessary
# check output directory and create if necessary
try:
if not os.path.isdir("{}/{}/".format(cwd, output_dir)):
os.makedirs("{}/{}/".format(cwd, output_dir))
except:
pass
if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

bing = Bing(query, limit, output_dir, adult, timeout)
# create extra directories if they don't exist and if no_directory parameter is false
if not no_directory:
if not os.path.isdir("{}/{}/{}".format(cwd, output_dir, query)):
# print("making dirs")
os.makedirs("{}/{}/{}".format(cwd, output_dir, query))

bing = Bing(query, limit, output_dir, adult, timeout, no_directory)
bing.run()


Expand Down