diff --git a/README.md b/README.md index ae7c5c2..7d251db 100644 --- a/README.md +++ b/README.md @@ -39,10 +39,11 @@ downloader.download(query_string, limit=100, output_dir='dataset', adult_filter `adult_filter_off` : (optional, default is True) Enable of disable adult filteration.
`force_replace` : (optional, default is False) Delete folder if present and start a fresh download.
`timeout` : (optional, default is 60) timeout for connection in seconds.
+`filter` : (optional, default is "") filter, choose from [line, photo, clipart, gif, transparent]
`verbose` : (optional, default is True) Enable downloaded message.
- +You can also test the programm by runnning `test.py keyword` ### PyPi
diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py index 1a1a28b..4156678 100644 --- a/bing_image_downloader/bing.py +++ b/bing_image_downloader/bing.py @@ -12,12 +12,12 @@ class Bing: - def __init__(self, query, limit, output_dir, adult, timeout, filters='', verbose=True): + def __init__(self, query, limit, output_dir, adult, timeout, filter='', verbose=True): self.download_count = 0 self.query = query self.output_dir = output_dir self.adult = adult - self.filters = filters + self.filter = filter self.verbose = verbose self.seen = set() @@ -37,6 +37,22 @@ def __init__(self, query, limit, output_dir, adult, timeout, filters='', verbos 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive'} + + def get_filter(self, shorthand): + if shorthand == "line" or shorthand == "linedrawing": + return "+filterui:photo-linedrawing" + elif shorthand == "photo": + return "+filterui:photo-photo" + elif shorthand == "clipart": + return "+filterui:photo-clipart" + elif shorthand == "gif" or shorthand == "animatedgif": + return "+filterui:photo-animatedgif" + elif shorthand == "transparent": + return "+filterui:photo-transparent" + else: + return "" + + def save_image(self, link, file_path): request = urllib.request.Request(link, None, self.headers) image = urllib.request.urlopen(request, timeout=self.timeout).read() @@ -78,7 +94,7 @@ def run(self): # Parse the page source and download pics request_url = 'https://www.bing.com/images/async?q=' + urllib.parse.quote_plus(self.query) \ + '&first=' + str(self.page_counter) + '&count=' + str(self.limit) \ - + '&adlt=' + self.adult + '&qft=' + ('' if self.filters is None else str(self.filters)) + + '&adlt=' + self.adult + '&qft=' + ('' if self.filter is None else self.get_filter(self.filter)) request = urllib.request.Request(request_url, None, headers=self.headers) response = urllib.request.urlopen(request) html = response.read().decode('utf8') @@ -97,7 +113,3 @@ def run(self): self.page_counter += 1 print("\n\n[%] Done. Downloaded {} images.".format(self.download_count)) - print("===============================================\n") - print("Please show your support here") - print("https://www.buymeacoffee.com/gurugaurav") - print("\n===============================================\n") diff --git a/bing_image_downloader/downloader.py b/bing_image_downloader/downloader.py index abca45c..41789dd 100644 --- a/bing_image_downloader/downloader.py +++ b/bing_image_downloader/downloader.py @@ -9,7 +9,7 @@ def download(query, limit=100, output_dir='dataset', adult_filter_off=True, -force_replace=False, timeout=60, verbose=True): +force_replace=False, timeout=60, filter="", verbose=True): # engine = 'bing' if adult_filter_off: @@ -34,7 +34,7 @@ def download(query, limit=100, output_dir='dataset', adult_filter_off=True, sys.exit(1) print("[%] Downloading Images to {}".format(str(image_dir.absolute()))) - bing = Bing(query, limit, image_dir, adult, timeout, verbose) + bing = Bing(query, limit, image_dir, adult, timeout, filter, verbose) bing.run() diff --git a/test.py b/test.py new file mode 100644 index 0000000..e7badfc --- /dev/null +++ b/test.py @@ -0,0 +1,22 @@ +import sys +from bing_image_downloader import downloader + +query=sys.argv[1] + +if len(sys.argv) == 3: + filter=sys.argv[2] +else: + filter="" + + +downloader.download( + query, + limit=10, + output_dir="dataset", + adult_filter_off=True, + force_replace=False, + timeout=60, + filter=filter, + verbose=True, +) +