diff --git a/README.md b/README.md
index cafd119..aab1dfd 100644
--- a/README.md
+++ b/README.md
@@ -11,14 +11,14 @@ This package uses async url, which makes it very fast while downloading.
### Disclaimer
This program lets you download tons of images from Bing.
-Please do not download or use any image that violates its copyright terms.
+Please do not download or use any image that violates its copyright terms.
### Installation
```sh
pip install bing-image-downloader
```
-or
+or
```bash
git clone https://github.com/gurugaurav/bing_image_downloader
cd bing_image_downloader
@@ -39,9 +39,17 @@ downloader.download(query_string, limit=100, output_dir='dataset', adult_filter
`adult_filter_off` : (optional, default is True) Enable of disable adult filteration.
`force_replace` : (optional, default is False) Delete folder if present and start a fresh download.
`timeout` : (optional, default is 60) timeout for connection in seconds.
-`filter` : (optional, default is "") filter, choose from [line, photo, clipart, gif, transparent]
+`filter` : (optional, default is "") filter, take a dictionary (e.g., {'type':'photo'}), see options below*
`verbose` : (optional, default is True) Enable downloaded message.
+*Filter options:
+"size": choose from ["small", "medium", "large", "extra large", or a specific size "480x480"]
+"color": choose from ["color", "grayscale", "red", "orange", "yellow", "green", "teal", "blue", "purple", "pink", "brown", "black", "gray", "white"]
+"type": choose from ["line", "photo", "clipart", "gif", "transparent"]
+"layout": choose from ["square", "wide", "tall"]
+"people": choose from ["faces", "head&shoulders"]
+"date": choose from ["day", "week", "month", "year"]
+"license": choose from ["cc", "public", "share", "modify,share", "commercial share", "commercial modify,share"]
You can also test the programm by runnning `test.py keyword`
@@ -58,7 +66,3 @@ https://pypi.org/project/bing-image-downloader/
You can buy me a coffee if this project was helpful to you.
[](https://www.buymeacoffee.com/gurugaurav)
-
-
-
-
diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py
index 39f5633..38d7d6f 100644
--- a/bing_image_downloader/bing.py
+++ b/bing_image_downloader/bing.py
@@ -19,7 +19,7 @@ def image_to_byte_array(image: Image) -> bytes:
imgByteArr = imgByteArr.getvalue()
return imgByteArr
-
+
def resize(url,size: tuple):
response = urllib.request.urlopen(url)
@@ -31,7 +31,7 @@ def resize(url,size: tuple):
return img
class Bing:
- def __init__(self, query, limit, output_dir, adult, timeout, filter='',resize=None, verbose=True):
+ def __init__(self, query, limit, output_dir, adult, timeout, filter={}, resize=None, verbose=True):
self.download_count = 0
self.query = query
self.output_dir = output_dir
@@ -39,7 +39,7 @@ def __init__(self, query, limit, output_dir, adult, timeout, filter='',resize=No
self.filter = filter
self.verbose = verbose
self.seen = set()
-
+
assert type(limit) == int, "limit must be integer"
self.limit = limit
@@ -50,7 +50,7 @@ def __init__(self, query, limit, output_dir, adult, timeout, filter='',resize=No
# self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'}
self.page_counter = 0
- self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
+ self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
'AppleWebKit/537.11 (KHTML, like Gecko) '
'Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
@@ -60,20 +60,98 @@ def __init__(self, query, limit, output_dir, adult, timeout, filter='',resize=No
'Connection': 'keep-alive'}
- def get_filter(self, shorthand):
- if shorthand == "line" or shorthand == "linedrawing":
- return "+filterui:photo-linedrawing"
- elif shorthand == "photo":
- return "+filterui:photo-photo"
- elif shorthand == "clipart":
- return "+filterui:photo-clipart"
- elif shorthand == "gif" or shorthand == "animatedgif":
- return "+filterui:photo-animatedgif"
- elif shorthand == "transparent":
- return "+filterui:photo-transparent"
- else:
- return ""
-
+ def get_filter(self):
+ filter_string = ""
+ for k, v in self.filter.items():
+ k = k.lower()
+ filter_string+= eval(f"self.get_{k}(v)")
+ return filter_string
+
+ def get_size(self, shorthand):
+ if shorthand == "small":
+ return "+filterui:imagesize-small"
+ elif shorthand == "medium":
+ return "+filterui:imagesize-medium"
+ elif shorthand == "large":
+ return "+filterui:imagesize-large"
+ elif shorthand == "extra large":
+ return "+filterui:imagesize-wallpaper"
+ elif "x" in shorthand:
+ w, h = shorthand.split('x')
+ return f"+filterui:imagesize-custom_{w}_{h}"
+ else:
+ return ""
+
+ def get_color(self, shorthand):
+ shorthand = shorthand.lower()
+ if shorthand in ["color", "color only"]:
+ return "+filterui:color2-color"
+ elif shorthand in ["grayscale", "black & white"]:
+ return "+filterui:color2-bw"
+ elif shorthand in ["red", "orange", "yellow", "green", "teal", "blue", "purple", "pink", "brown", "black", "gray", "white"]:
+ return f"+filterui:color2-FGcls_{shorthand.upper()}"
+ else:
+ return ""
+
+ def get_type(self, shorthand):
+ if shorthand == "line" or shorthand == "linedrawing":
+ return "+filterui:photo-linedrawing"
+ elif shorthand == "photo":
+ return "+filterui:photo-photo"
+ elif shorthand == "clipart":
+ return "+filterui:photo-clipart"
+ elif shorthand == "gif" or shorthand == "animatedgif":
+ return "+filterui:photo-animatedgif"
+ elif shorthand == "transparent":
+ return "+filterui:photo-transparent"
+ else:
+ return ""
+
+ def get_layout(self, shorthand):
+ shorthand = shorthand.lower()
+ if shorthand in ["square", "wide", "tall"]:
+ return f"+filterui:aspect-{shorthand}"
+ else:
+ return ""
+
+ def get_people(self, shorthand):
+ shorthand = shorthand.lower()
+ if shorthand in ["faces", "just faces"]:
+ return "+filterui:face-face"
+ elif shorthand in ["head&shoulders", "head & shoulders", "portrait"]:
+ return "+filterui:face-portrait"
+ else:
+ return ""
+
+ def get_date(self, shorthand):
+ shorthand = shorthand.lower()
+ if shorthand in ["day", "past 24 hours"]:
+ return "+filterui:age-lt1440"
+ elif shorthand in ["week", "past week"]:
+ return "+filterui:age-lt10080"
+ elif shorthand in ["month", "past month"]:
+ return "+filterui:age-lt43200"
+ elif shorthand in ["year", "past year"]:
+ return "+filterui:age-lt525600"
+ else:
+ return ""
+
+ def get_license(self, shorthand):
+ shorthand = shorthand.lower()
+ if shorthand in ["cc", "creative commons", "all creative commons"]:
+ return "+filterui:licenseType-Any"
+ elif shorthand in ["public", "public domain"]:
+ return "+filterui:license-L1"
+ elif shorthand in ["share", "free to share and use"]:
+ return "+filterui:license-L2_L3_L4_L5_L6_L7"
+ elif shorthand in ["modify,share", "free to modify, share, and use"]:
+ return "+filterui:license-L2_L3_L5_L6"
+ elif shorthand in ["commercial share", "free to share and use commercially"]:
+ return "+filterui:license-L2_L3_L4"
+ elif shorthand in ["commercial modify,share", "free to modify, share, and use commercially"]:
+ return "+filterui:license-L2_L3"
+ else:
+ return ""
def save_image(self, link, file_path):
if not self.resize:
@@ -97,7 +175,7 @@ def save_image(self, link, file_path):
f.write(image)
-
+
def download_image(self, link):
self.download_count += 1
@@ -108,11 +186,11 @@ def download_image(self, link):
file_type = filename.split(".")[-1]
if file_type.lower() not in ["jpe", "jpeg", "jfif", "exif", "tiff", "gif", "bmp", "png", "webp", "jpg"]:
file_type = "jpg"
-
+
if self.verbose:
# Download the image
print("[%] Downloading Image #{} from {}".format(self.download_count, link))
-
+
self.save_image(link, self.output_dir.joinpath("Image_{}.{}".format(
str(self.download_count), file_type)))
if self.verbose:
@@ -122,7 +200,7 @@ def download_image(self, link):
self.download_count -= 1
print("[!] Issue getting: {}\n[!] Error:: {}".format(link, e))
-
+
def run(self):
while self.download_count < self.limit:
if self.verbose:
@@ -130,7 +208,7 @@ def run(self):
# Parse the page source and download pics
request_url = 'https://www.bing.com/images/async?q=' + urllib.parse.quote_plus(self.query) \
+ '&first=' + str(self.page_counter) + '&count=' + str(self.limit) \
- + '&adlt=' + self.adult + '&qft=' + ('' if self.filter is None else self.get_filter(self.filter))
+ + '&adlt=' + self.adult + '&qft=' + self.get_filter()
request = urllib.request.Request(request_url, None, headers=self.headers)
response = urllib.request.urlopen(request)
html = response.read().decode('utf8')