From 7daed7b433222639fe8e6f145e88a9b2d9f4ef3c Mon Sep 17 00:00:00 2001
From: Misi <61704770+MilanVarady@users.noreply.github.com>
Date: Mon, 4 Jul 2022 11:08:21 +0200
Subject: [PATCH] Add page limit option
---
.gitignore | 10 +++++++++-
README.md | 3 ++-
bing_image_downloader/bing.py | 13 +++++++++++--
bing_image_downloader/downloader.py | 10 ++++------
4 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/.gitignore b/.gitignore
index ec358d5..034b77c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,12 @@ dataset/*
dataset
dist
image_search.egg-info
-**/.vscode/*
\ No newline at end of file
+**/.vscode/*
+
+### macOS ###
+.DS_Store
+.AppleDouble
+.LSOverride
+
+### Jetbrains ###
+.idea/**
\ No newline at end of file
diff --git a/README.md b/README.md
index 7d251db..1eaef14 100644
--- a/README.md
+++ b/README.md
@@ -30,12 +30,13 @@ pip install .
### Usage
```python
from bing_image_downloader import downloader
-downloader.download(query_string, limit=100, output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, verbose=True)
+downloader.download(query_string, limit=100, output_dir='dataset', page_limit=100, adult_filter_off=True, force_replace=False, timeout=60, verbose=True)
```
`query_string` : String to be searched.
`limit` : (optional, default is 100) Number of images to download.
`output_dir` : (optional, default is 'dataset') Name of output dir.
+`page_limit` : (optional, default is 100) Number of pages to scan for images.
`adult_filter_off` : (optional, default is True) Enable of disable adult filteration.
`force_replace` : (optional, default is False) Delete folder if present and start a fresh download.
`timeout` : (optional, default is 60) timeout for connection in seconds.
diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py
index 4156678..4578a2a 100644
--- a/bing_image_downloader/bing.py
+++ b/bing_image_downloader/bing.py
@@ -12,7 +12,7 @@
class Bing:
- def __init__(self, query, limit, output_dir, adult, timeout, filter='', verbose=True):
+ def __init__(self, query, limit, output_dir, page_limit, adult, timeout, filter='', verbose=True):
self.download_count = 0
self.query = query
self.output_dir = output_dir
@@ -23,6 +23,8 @@ def __init__(self, query, limit, output_dir, adult, timeout, filter='', verbose
assert type(limit) == int, "limit must be integer"
self.limit = limit
+ assert type(page_limit) == int, "page_limit must be integer"
+ self.page_limit = page_limit
assert type(timeout) == int, "timeout must be integer"
self.timeout = timeout
@@ -88,7 +90,7 @@ def download_image(self, link):
def run(self):
- while self.download_count < self.limit:
+ while self.download_count < self.limit and self.page_counter < self.page_limit:
if self.verbose:
print('\n\n[!!]Indexing page: {}\n'.format(self.page_counter + 1))
# Parse the page source and download pics
@@ -112,4 +114,11 @@ def run(self):
self.download_image(link)
self.page_counter += 1
+
+ if self.download_count >= self.limit:
+ print("[!!]Reached download limit")
+
+ if self.page_counter >= self.page_limit:
+ print("[!!]Reached page limit")
+
print("\n\n[%] Done. Downloaded {} images.".format(self.download_count))
diff --git a/bing_image_downloader/downloader.py b/bing_image_downloader/downloader.py
index 41789dd..5610971 100644
--- a/bing_image_downloader/downloader.py
+++ b/bing_image_downloader/downloader.py
@@ -8,16 +8,14 @@
from .bing import Bing
-def download(query, limit=100, output_dir='dataset', adult_filter_off=True,
-force_replace=False, timeout=60, filter="", verbose=True):
-
+def download(query, limit=100, output_dir='dataset', page_limit=100, adult_filter_off=True,
+ force_replace=False, timeout=60, filter="", verbose=True):
# engine = 'bing'
if adult_filter_off:
adult = 'off'
else:
adult = 'on'
-
image_dir = Path(output_dir).joinpath(query).absolute()
if force_replace:
@@ -32,9 +30,9 @@ def download(query, limit=100, output_dir='dataset', adult_filter_off=True,
except Exception as e:
print('[Error]Failed to create directory.', e)
sys.exit(1)
-
+
print("[%] Downloading Images to {}".format(str(image_dir.absolute())))
- bing = Bing(query, limit, image_dir, adult, timeout, filter, verbose)
+ bing = Bing(query, limit, image_dir, page_limit, adult, timeout, filter, verbose)
bing.run()