From bc100d99bc0f25c3e4088e3ea9646fcb9fea6382 Mon Sep 17 00:00:00 2001 From: yi-chia-chen Date: Fri, 9 Aug 2024 16:40:56 -0700 Subject: [PATCH 1/3] add all filter options --- README.md | 18 +++-- bing_image_downloader/bing.py | 124 +++++++++++++++++++++++++++------- 2 files changed, 112 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index cafd119..05b141d 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,14 @@ This package uses async url, which makes it very fast while downloading.
### Disclaimer
This program lets you download tons of images from Bing. -Please do not download or use any image that violates its copyright terms. +Please do not download or use any image that violates its copyright terms. ### Installation
```sh pip install bing-image-downloader ``` -or +or ```bash git clone https://github.com/gurugaurav/bing_image_downloader cd bing_image_downloader @@ -39,9 +39,17 @@ downloader.download(query_string, limit=100, output_dir='dataset', adult_filter `adult_filter_off` : (optional, default is True) Enable of disable adult filteration.
`force_replace` : (optional, default is False) Delete folder if present and start a fresh download.
`timeout` : (optional, default is 60) timeout for connection in seconds.
-`filter` : (optional, default is "") filter, choose from [line, photo, clipart, gif, transparent]
+`filter` : (optional, default is "") filter, take a dictionary (e.g., {'type':'photo'}), see options below
`verbose` : (optional, default is True) Enable downloaded message.
+Filter options: +"size": choose from ["small", "medium", "large", "extra large", or a specific size "480x480"] +"color": choose from ["color", "grayscale", "red", "orange", "yellow", "green", "teal", "blue", "purple", "pink", "brown", "black", "gray", "white"] +"type": choose from ["line", "photo", "clipart", "gif", "transparent"] +"layout": choose from ["square", "wide", "tall"] +"people": choose from ["faces", "head&shoulders"] +"date": choose from ["day", "week", "month", "year"] +"license": choose from ["cc", "public", "share", "modify,share", "commercial share", "commercial modify,share"] You can also test the programm by runnning `test.py keyword` @@ -58,7 +66,3 @@ https://pypi.org/project/bing-image-downloader/ You can buy me a coffee if this project was helpful to you.
[Show your support](https://www.buymeacoffee.com/gurugaurav) - - - - diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py index 39f5633..38d7d6f 100644 --- a/bing_image_downloader/bing.py +++ b/bing_image_downloader/bing.py @@ -19,7 +19,7 @@ def image_to_byte_array(image: Image) -> bytes: imgByteArr = imgByteArr.getvalue() return imgByteArr - + def resize(url,size: tuple): response = urllib.request.urlopen(url) @@ -31,7 +31,7 @@ def resize(url,size: tuple): return img class Bing: - def __init__(self, query, limit, output_dir, adult, timeout, filter='',resize=None, verbose=True): + def __init__(self, query, limit, output_dir, adult, timeout, filter={}, resize=None, verbose=True): self.download_count = 0 self.query = query self.output_dir = output_dir @@ -39,7 +39,7 @@ def __init__(self, query, limit, output_dir, adult, timeout, filter='',resize=No self.filter = filter self.verbose = verbose self.seen = set() - + assert type(limit) == int, "limit must be integer" self.limit = limit @@ -50,7 +50,7 @@ def __init__(self, query, limit, output_dir, adult, timeout, filter='',resize=No # self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'} self.page_counter = 0 - self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) ' + self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) ' 'AppleWebKit/537.11 (KHTML, like Gecko) ' 'Chrome/23.0.1271.64 Safari/537.11', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', @@ -60,20 +60,98 @@ def __init__(self, query, limit, output_dir, adult, timeout, filter='',resize=No 'Connection': 'keep-alive'} - def get_filter(self, shorthand): - if shorthand == "line" or shorthand == "linedrawing": - return "+filterui:photo-linedrawing" - elif shorthand == "photo": - return "+filterui:photo-photo" - elif shorthand == "clipart": - return "+filterui:photo-clipart" - elif shorthand == "gif" or shorthand == "animatedgif": - return "+filterui:photo-animatedgif" - elif shorthand == "transparent": - return "+filterui:photo-transparent" - else: - return "" - + def get_filter(self): + filter_string = "" + for k, v in self.filter.items(): + k = k.lower() + filter_string+= eval(f"self.get_{k}(v)") + return filter_string + + def get_size(self, shorthand): + if shorthand == "small": + return "+filterui:imagesize-small" + elif shorthand == "medium": + return "+filterui:imagesize-medium" + elif shorthand == "large": + return "+filterui:imagesize-large" + elif shorthand == "extra large": + return "+filterui:imagesize-wallpaper" + elif "x" in shorthand: + w, h = shorthand.split('x') + return f"+filterui:imagesize-custom_{w}_{h}" + else: + return "" + + def get_color(self, shorthand): + shorthand = shorthand.lower() + if shorthand in ["color", "color only"]: + return "+filterui:color2-color" + elif shorthand in ["grayscale", "black & white"]: + return "+filterui:color2-bw" + elif shorthand in ["red", "orange", "yellow", "green", "teal", "blue", "purple", "pink", "brown", "black", "gray", "white"]: + return f"+filterui:color2-FGcls_{shorthand.upper()}" + else: + return "" + + def get_type(self, shorthand): + if shorthand == "line" or shorthand == "linedrawing": + return "+filterui:photo-linedrawing" + elif shorthand == "photo": + return "+filterui:photo-photo" + elif shorthand == "clipart": + return "+filterui:photo-clipart" + elif shorthand == "gif" or shorthand == "animatedgif": + return "+filterui:photo-animatedgif" + elif shorthand == "transparent": + return "+filterui:photo-transparent" + else: + return "" + + def get_layout(self, shorthand): + shorthand = shorthand.lower() + if shorthand in ["square", "wide", "tall"]: + return f"+filterui:aspect-{shorthand}" + else: + return "" + + def get_people(self, shorthand): + shorthand = shorthand.lower() + if shorthand in ["faces", "just faces"]: + return "+filterui:face-face" + elif shorthand in ["head&shoulders", "head & shoulders", "portrait"]: + return "+filterui:face-portrait" + else: + return "" + + def get_date(self, shorthand): + shorthand = shorthand.lower() + if shorthand in ["day", "past 24 hours"]: + return "+filterui:age-lt1440" + elif shorthand in ["week", "past week"]: + return "+filterui:age-lt10080" + elif shorthand in ["month", "past month"]: + return "+filterui:age-lt43200" + elif shorthand in ["year", "past year"]: + return "+filterui:age-lt525600" + else: + return "" + + def get_license(self, shorthand): + shorthand = shorthand.lower() + if shorthand in ["cc", "creative commons", "all creative commons"]: + return "+filterui:licenseType-Any" + elif shorthand in ["public", "public domain"]: + return "+filterui:license-L1" + elif shorthand in ["share", "free to share and use"]: + return "+filterui:license-L2_L3_L4_L5_L6_L7" + elif shorthand in ["modify,share", "free to modify, share, and use"]: + return "+filterui:license-L2_L3_L5_L6" + elif shorthand in ["commercial share", "free to share and use commercially"]: + return "+filterui:license-L2_L3_L4" + elif shorthand in ["commercial modify,share", "free to modify, share, and use commercially"]: + return "+filterui:license-L2_L3" + else: + return "" def save_image(self, link, file_path): if not self.resize: @@ -97,7 +175,7 @@ def save_image(self, link, file_path): f.write(image) - + def download_image(self, link): self.download_count += 1 @@ -108,11 +186,11 @@ def download_image(self, link): file_type = filename.split(".")[-1] if file_type.lower() not in ["jpe", "jpeg", "jfif", "exif", "tiff", "gif", "bmp", "png", "webp", "jpg"]: file_type = "jpg" - + if self.verbose: # Download the image print("[%] Downloading Image #{} from {}".format(self.download_count, link)) - + self.save_image(link, self.output_dir.joinpath("Image_{}.{}".format( str(self.download_count), file_type))) if self.verbose: @@ -122,7 +200,7 @@ def download_image(self, link): self.download_count -= 1 print("[!] Issue getting: {}\n[!] Error:: {}".format(link, e)) - + def run(self): while self.download_count < self.limit: if self.verbose: @@ -130,7 +208,7 @@ def run(self): # Parse the page source and download pics request_url = 'https://www.bing.com/images/async?q=' + urllib.parse.quote_plus(self.query) \ + '&first=' + str(self.page_counter) + '&count=' + str(self.limit) \ - + '&adlt=' + self.adult + '&qft=' + ('' if self.filter is None else self.get_filter(self.filter)) + + '&adlt=' + self.adult + '&qft=' + self.get_filter() request = urllib.request.Request(request_url, None, headers=self.headers) response = urllib.request.urlopen(request) html = response.read().decode('utf8') From b0e300c4745704d5b6c4d518bbcf02bcf89b93ad Mon Sep 17 00:00:00 2001 From: Yi-Chia Chen <24441731+Yi-Chia-Chen@users.noreply.github.com> Date: Tue, 13 Aug 2024 19:00:08 -0700 Subject: [PATCH 2/3] Update README.md --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 05b141d..65ff4da 100644 --- a/README.md +++ b/README.md @@ -43,13 +43,13 @@ downloader.download(query_string, limit=100, output_dir='dataset', adult_filter `verbose` : (optional, default is True) Enable downloaded message.
Filter options: -"size": choose from ["small", "medium", "large", "extra large", or a specific size "480x480"] -"color": choose from ["color", "grayscale", "red", "orange", "yellow", "green", "teal", "blue", "purple", "pink", "brown", "black", "gray", "white"] -"type": choose from ["line", "photo", "clipart", "gif", "transparent"] -"layout": choose from ["square", "wide", "tall"] -"people": choose from ["faces", "head&shoulders"] -"date": choose from ["day", "week", "month", "year"] -"license": choose from ["cc", "public", "share", "modify,share", "commercial share", "commercial modify,share"] +"size": choose from ["small", "medium", "large", "extra large", or a specific size "480x480"]
+"color": choose from ["color", "grayscale", "red", "orange", "yellow", "green", "teal", "blue", "purple", "pink", "brown", "black", "gray", "white"]
+"type": choose from ["line", "photo", "clipart", "gif", "transparent"]
+"layout": choose from ["square", "wide", "tall"]
+"people": choose from ["faces", "head&shoulders"]
+"date": choose from ["day", "week", "month", "year"]
+"license": choose from ["cc", "public", "share", "modify,share", "commercial share", "commercial modify,share"]
You can also test the programm by runnning `test.py keyword` From 83aac7ff6010bc05094f74693f62d7aae07cc47b Mon Sep 17 00:00:00 2001 From: Yi-Chia Chen <24441731+Yi-Chia-Chen@users.noreply.github.com> Date: Tue, 13 Aug 2024 19:01:00 -0700 Subject: [PATCH 3/3] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 65ff4da..aab1dfd 100644 --- a/README.md +++ b/README.md @@ -39,10 +39,10 @@ downloader.download(query_string, limit=100, output_dir='dataset', adult_filter `adult_filter_off` : (optional, default is True) Enable of disable adult filteration.
`force_replace` : (optional, default is False) Delete folder if present and start a fresh download.
`timeout` : (optional, default is 60) timeout for connection in seconds.
-`filter` : (optional, default is "") filter, take a dictionary (e.g., {'type':'photo'}), see options below
+`filter` : (optional, default is "") filter, take a dictionary (e.g., {'type':'photo'}), see options below*
`verbose` : (optional, default is True) Enable downloaded message.
-Filter options: +*Filter options:
"size": choose from ["small", "medium", "large", "extra large", or a specific size "480x480"]
"color": choose from ["color", "grayscale", "red", "orange", "yellow", "green", "teal", "blue", "purple", "pink", "brown", "black", "gray", "white"]
"type": choose from ["line", "photo", "clipart", "gif", "transparent"]