From 905833d2d95cda5a809d2abc41371bbfc8cee769 Mon Sep 17 00:00:00 2001 From: bljoriss Date: Tue, 26 Oct 2021 22:26:38 +0200 Subject: [PATCH 1/5] Add support for using filters --- README.md | 1 + bing_image_downloader/bing.py | 23 ++++++++++++++++++++--- bing_image_downloader/downloader.py | 4 ++-- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ae7c5c2..feb20ee 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ downloader.download(query_string, limit=100, output_dir='dataset', adult_filter `adult_filter_off` : (optional, default is True) Enable of disable adult filteration.
`force_replace` : (optional, default is False) Delete folder if present and start a fresh download.
`timeout` : (optional, default is 60) timeout for connection in seconds.
+`filter` : (optional, default is "") filter, choose from [line, photo, clipart, gif, transparent]
`verbose` : (optional, default is True) Enable downloaded message.
diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py index 6756d19..6576f63 100644 --- a/bing_image_downloader/bing.py +++ b/bing_image_downloader/bing.py @@ -12,12 +12,12 @@ class Bing: - def __init__(self, query, limit, output_dir, adult, timeout, filters='', verbose=True): + def __init__(self, query, limit, output_dir, adult, timeout, filter='', verbose=True): self.download_count = 0 self.query = query self.output_dir = output_dir self.adult = adult - self.filters = filters + self.filter = filter self.verbose = verbose assert type(limit) == int, "limit must be integer" @@ -36,6 +36,23 @@ def __init__(self, query, limit, output_dir, adult, timeout, filters='', verbos 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive'} + + def get_filter(self, shorthand): + match shorthand: + case ("line" | "linedrawing"): + return "+filterui:photo-linedrawing" + case "photo": + return "+filterui:photo-photo" + case "clipart": + return "+filterui:photo-clipart" + case ("gif" | "animatedgif"): + return "+filterui:photo-animatedgif" + case "transparent": + return "+filterui:photo-transparent" + case _: + return "" + + def save_image(self, link, file_path): request = urllib.request.Request(link, None, self.headers) image = urllib.request.urlopen(request, timeout=self.timeout).read() @@ -77,7 +94,7 @@ def run(self): # Parse the page source and download pics request_url = 'https://www.bing.com/images/async?q=' + urllib.parse.quote_plus(self.query) \ + '&first=' + str(self.page_counter) + '&count=' + str(self.limit) \ - + '&adlt=' + self.adult + '&qft=' + ('' if self.filters is None else str(self.filters)) + + '&adlt=' + self.adult + '&qft=' + ('' if self.filter is None else self.get_filter(self.filter)) request = urllib.request.Request(request_url, None, headers=self.headers) response = urllib.request.urlopen(request) html = response.read().decode('utf8') diff --git a/bing_image_downloader/downloader.py b/bing_image_downloader/downloader.py index abca45c..41789dd 100644 --- a/bing_image_downloader/downloader.py +++ b/bing_image_downloader/downloader.py @@ -9,7 +9,7 @@ def download(query, limit=100, output_dir='dataset', adult_filter_off=True, -force_replace=False, timeout=60, verbose=True): +force_replace=False, timeout=60, filter="", verbose=True): # engine = 'bing' if adult_filter_off: @@ -34,7 +34,7 @@ def download(query, limit=100, output_dir='dataset', adult_filter_off=True, sys.exit(1) print("[%] Downloading Images to {}".format(str(image_dir.absolute()))) - bing = Bing(query, limit, image_dir, adult, timeout, verbose) + bing = Bing(query, limit, image_dir, adult, timeout, filter, verbose) bing.run() From 17730d5939f22675ae78930f0574da36b47eacf4 Mon Sep 17 00:00:00 2001 From: bljoriss Date: Tue, 26 Oct 2021 22:38:30 +0200 Subject: [PATCH 2/5] Add test script --- README.md | 2 +- test.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 test.py diff --git a/README.md b/README.md index feb20ee..7d251db 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ downloader.download(query_string, limit=100, output_dir='dataset', adult_filter `verbose` : (optional, default is True) Enable downloaded message.
- +You can also test the programm by runnning `test.py keyword` ### PyPi
diff --git a/test.py b/test.py new file mode 100644 index 0000000..1e04de5 --- /dev/null +++ b/test.py @@ -0,0 +1,22 @@ +import sys +from bing_image_downloader import downloader + +query=sys.argv[1] + +if len(sys.argv) == 3: + filter=sys.argv[2] +else: + filter="" + + +downloader.download( + query, + limit=1, + output_dir="dataset", + adult_filter_off=True, + force_replace=False, + timeout=60, + filter=filter, + verbose=True, +) + From e5c4df3fb059218126c5b263988cef1736b92101 Mon Sep 17 00:00:00 2001 From: bljoriss Date: Tue, 26 Oct 2021 22:41:52 +0200 Subject: [PATCH 3/5] Remove ads --- bing_image_downloader/bing.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py index 6576f63..e606fc4 100644 --- a/bing_image_downloader/bing.py +++ b/bing_image_downloader/bing.py @@ -112,7 +112,3 @@ def run(self): self.page_counter += 1 print("\n\n[%] Done. Downloaded {} images.".format(self.download_count)) - print("===============================================\n") - print("Please show your support here") - print("https://www.buymeacoffee.com/gurugaurav") - print("\n===============================================\n") From 746795f3ad247045eddd9d963d722b5688e204b2 Mon Sep 17 00:00:00 2001 From: bljoriss Date: Mon, 1 Nov 2021 22:57:43 +0100 Subject: [PATCH 4/5] change python requirement --- bing_image_downloader/bing.py | 13 ++++++------- test.py | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py index e606fc4..51fb55a 100644 --- a/bing_image_downloader/bing.py +++ b/bing_image_downloader/bing.py @@ -38,18 +38,17 @@ def __init__(self, query, limit, output_dir, adult, timeout, filter='', verbose def get_filter(self, shorthand): - match shorthand: - case ("line" | "linedrawing"): + if shorthand == "line" | shorthand == "linedrawing": return "+filterui:photo-linedrawing" - case "photo": + elif shorthand == "photo": return "+filterui:photo-photo" - case "clipart": + elif shorthand == "clipart": return "+filterui:photo-clipart" - case ("gif" | "animatedgif"): + elif shorthand == "gif" | shorthand == "animatedgif": return "+filterui:photo-animatedgif" - case "transparent": + elif shorthand == "transparent": return "+filterui:photo-transparent" - case _: + else: return "" diff --git a/test.py b/test.py index 1e04de5..e7badfc 100644 --- a/test.py +++ b/test.py @@ -11,7 +11,7 @@ downloader.download( query, - limit=1, + limit=10, output_dir="dataset", adult_filter_off=True, force_replace=False, From 8b32f465b8fdf4e46d5eeea9361a81301052bc4e Mon Sep 17 00:00:00 2001 From: bljoriss Date: Mon, 1 Nov 2021 23:42:07 +0100 Subject: [PATCH 5/5] no line for or --- bing_image_downloader/bing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py index 51fb55a..cd59544 100644 --- a/bing_image_downloader/bing.py +++ b/bing_image_downloader/bing.py @@ -38,13 +38,13 @@ def __init__(self, query, limit, output_dir, adult, timeout, filter='', verbose def get_filter(self, shorthand): - if shorthand == "line" | shorthand == "linedrawing": + if shorthand == "line" or shorthand == "linedrawing": return "+filterui:photo-linedrawing" elif shorthand == "photo": return "+filterui:photo-photo" elif shorthand == "clipart": return "+filterui:photo-clipart" - elif shorthand == "gif" | shorthand == "animatedgif": + elif shorthand == "gif" or shorthand == "animatedgif": return "+filterui:photo-animatedgif" elif shorthand == "transparent": return "+filterui:photo-transparent"