Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add all filter options #59

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ This package uses async url, which makes it very fast while downloading.<br/>
### Disclaimer<br />

This program lets you download tons of images from Bing.
Please do not download or use any image that violates its copyright terms.
Please do not download or use any image that violates its copyright terms.

### Installation <br />
```sh
pip install bing-image-downloader
```

or
or
```bash
git clone https://github.com/gurugaurav/bing_image_downloader
cd bing_image_downloader
Expand All @@ -39,9 +39,17 @@ downloader.download(query_string, limit=100, output_dir='dataset', adult_filter
`adult_filter_off` : (optional, default is True) Enable of disable adult filteration.<br />
`force_replace` : (optional, default is False) Delete folder if present and start a fresh download.<br />
`timeout` : (optional, default is 60) timeout for connection in seconds.<br />
`filter` : (optional, default is "") filter, choose from [line, photo, clipart, gif, transparent]<br />
`filter` : (optional, default is "") filter, take a dictionary (e.g., {'type':'photo'}), see options below*<br />
`verbose` : (optional, default is True) Enable downloaded message.<br />

*Filter options:<br />
"size": choose from ["small", "medium", "large", "extra large", or a specific size "480x480"]<br />
"color": choose from ["color", "grayscale", "red", "orange", "yellow", "green", "teal", "blue", "purple", "pink", "brown", "black", "gray", "white"]<br />
"type": choose from ["line", "photo", "clipart", "gif", "transparent"]<br />
"layout": choose from ["square", "wide", "tall"]<br />
"people": choose from ["faces", "head&shoulders"]<br />
"date": choose from ["day", "week", "month", "year"]<br />
"license": choose from ["cc", "public", "share", "modify,share", "commercial share", "commercial modify,share"]<br />

You can also test the programm by runnning `test.py keyword`

Expand All @@ -58,7 +66,3 @@ https://pypi.org/project/bing-image-downloader/
You can buy me a coffee if this project was helpful to you.</br>

[<img src="https://www.buymeacoffee.com/assets/img/guidelines/download-assets-sm-1.svg" alt="Show your support" width="180"/>](https://www.buymeacoffee.com/gurugaurav)




124 changes: 101 additions & 23 deletions bing_image_downloader/bing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def image_to_byte_array(image: Image) -> bytes:
imgByteArr = imgByteArr.getvalue()
return imgByteArr


def resize(url,size: tuple):

response = urllib.request.urlopen(url)
Expand All @@ -31,15 +31,15 @@ def resize(url,size: tuple):
return img

class Bing:
def __init__(self, query, limit, output_dir, adult, timeout, filter='',resize=None, verbose=True):
def __init__(self, query, limit, output_dir, adult, timeout, filter={}, resize=None, verbose=True):
self.download_count = 0
self.query = query
self.output_dir = output_dir
self.adult = adult
self.filter = filter
self.verbose = verbose
self.seen = set()


assert type(limit) == int, "limit must be integer"
self.limit = limit
Expand All @@ -50,7 +50,7 @@ def __init__(self, query, limit, output_dir, adult, timeout, filter='',resize=No

# self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'}
self.page_counter = 0
self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
'AppleWebKit/537.11 (KHTML, like Gecko) '
'Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
Expand All @@ -60,20 +60,98 @@ def __init__(self, query, limit, output_dir, adult, timeout, filter='',resize=No
'Connection': 'keep-alive'}


def get_filter(self, shorthand):
if shorthand == "line" or shorthand == "linedrawing":
return "+filterui:photo-linedrawing"
elif shorthand == "photo":
return "+filterui:photo-photo"
elif shorthand == "clipart":
return "+filterui:photo-clipart"
elif shorthand == "gif" or shorthand == "animatedgif":
return "+filterui:photo-animatedgif"
elif shorthand == "transparent":
return "+filterui:photo-transparent"
else:
return ""

def get_filter(self):
filter_string = ""
for k, v in self.filter.items():
k = k.lower()
filter_string+= eval(f"self.get_{k}(v)")
return filter_string

def get_size(self, shorthand):
if shorthand == "small":
return "+filterui:imagesize-small"
elif shorthand == "medium":
return "+filterui:imagesize-medium"
elif shorthand == "large":
return "+filterui:imagesize-large"
elif shorthand == "extra large":
return "+filterui:imagesize-wallpaper"
elif "x" in shorthand:
w, h = shorthand.split('x')
return f"+filterui:imagesize-custom_{w}_{h}"
else:
return ""

def get_color(self, shorthand):
shorthand = shorthand.lower()
if shorthand in ["color", "color only"]:
return "+filterui:color2-color"
elif shorthand in ["grayscale", "black & white"]:
return "+filterui:color2-bw"
elif shorthand in ["red", "orange", "yellow", "green", "teal", "blue", "purple", "pink", "brown", "black", "gray", "white"]:
return f"+filterui:color2-FGcls_{shorthand.upper()}"
else:
return ""

def get_type(self, shorthand):
if shorthand == "line" or shorthand == "linedrawing":
return "+filterui:photo-linedrawing"
elif shorthand == "photo":
return "+filterui:photo-photo"
elif shorthand == "clipart":
return "+filterui:photo-clipart"
elif shorthand == "gif" or shorthand == "animatedgif":
return "+filterui:photo-animatedgif"
elif shorthand == "transparent":
return "+filterui:photo-transparent"
else:
return ""

def get_layout(self, shorthand):
shorthand = shorthand.lower()
if shorthand in ["square", "wide", "tall"]:
return f"+filterui:aspect-{shorthand}"
else:
return ""

def get_people(self, shorthand):
shorthand = shorthand.lower()
if shorthand in ["faces", "just faces"]:
return "+filterui:face-face"
elif shorthand in ["head&shoulders", "head & shoulders", "portrait"]:
return "+filterui:face-portrait"
else:
return ""

def get_date(self, shorthand):
shorthand = shorthand.lower()
if shorthand in ["day", "past 24 hours"]:
return "+filterui:age-lt1440"
elif shorthand in ["week", "past week"]:
return "+filterui:age-lt10080"
elif shorthand in ["month", "past month"]:
return "+filterui:age-lt43200"
elif shorthand in ["year", "past year"]:
return "+filterui:age-lt525600"
else:
return ""

def get_license(self, shorthand):
shorthand = shorthand.lower()
if shorthand in ["cc", "creative commons", "all creative commons"]:
return "+filterui:licenseType-Any"
elif shorthand in ["public", "public domain"]:
return "+filterui:license-L1"
elif shorthand in ["share", "free to share and use"]:
return "+filterui:license-L2_L3_L4_L5_L6_L7"
elif shorthand in ["modify,share", "free to modify, share, and use"]:
return "+filterui:license-L2_L3_L5_L6"
elif shorthand in ["commercial share", "free to share and use commercially"]:
return "+filterui:license-L2_L3_L4"
elif shorthand in ["commercial modify,share", "free to modify, share, and use commercially"]:
return "+filterui:license-L2_L3"
else:
return ""

def save_image(self, link, file_path):
if not self.resize:
Expand All @@ -97,7 +175,7 @@ def save_image(self, link, file_path):
f.write(image)



def download_image(self, link):

self.download_count += 1
Expand All @@ -108,11 +186,11 @@ def download_image(self, link):
file_type = filename.split(".")[-1]
if file_type.lower() not in ["jpe", "jpeg", "jfif", "exif", "tiff", "gif", "bmp", "png", "webp", "jpg"]:
file_type = "jpg"

if self.verbose:
# Download the image
print("[%] Downloading Image #{} from {}".format(self.download_count, link))

self.save_image(link, self.output_dir.joinpath("Image_{}.{}".format(
str(self.download_count), file_type)))
if self.verbose:
Expand All @@ -122,15 +200,15 @@ def download_image(self, link):
self.download_count -= 1
print("[!] Issue getting: {}\n[!] Error:: {}".format(link, e))


def run(self):
while self.download_count < self.limit:
if self.verbose:
print('\n\n[!!]Indexing page: {}\n'.format(self.page_counter + 1))
# Parse the page source and download pics
request_url = 'https://www.bing.com/images/async?q=' + urllib.parse.quote_plus(self.query) \
+ '&first=' + str(self.page_counter) + '&count=' + str(self.limit) \
+ '&adlt=' + self.adult + '&qft=' + ('' if self.filter is None else self.get_filter(self.filter))
+ '&adlt=' + self.adult + '&qft=' + self.get_filter()
request = urllib.request.Request(request_url, None, headers=self.headers)
response = urllib.request.urlopen(request)
html = response.read().decode('utf8')
Expand Down