From cfbf4f4eb917a24f080b4b0b5138769aea9358ea Mon Sep 17 00:00:00 2001
From: thomasasia <thomas.harriett@gmail.com>
Date: Mon, 11 Apr 2022 17:55:23 -0400
Subject: [PATCH 1/7] Added error protection functionality

---
 bing_image_downloader/bing.py       | 71 +++++++++++++++++++++++++----
 bing_image_downloader/downloader.py | 10 ++--
 test.py                             |  7 ++-
 3 files changed, 69 insertions(+), 19 deletions(-)

diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py
index 4156678..16908bb 100644
--- a/bing_image_downloader/bing.py
+++ b/bing_image_downloader/bing.py
@@ -4,6 +4,7 @@
 import imghdr
 import posixpath
 import re
+from time import sleep
 
 '''
 Python api to download image form Bing.
@@ -12,7 +13,7 @@
 
 
 class Bing:
-    def __init__(self, query, limit, output_dir, adult, timeout,  filter='', verbose=True):
+    def __init__(self, query, limit, output_dir, adult, timeout,  filter='', verbose=True, error_protection=False):
         self.download_count = 0
         self.query = query
         self.output_dir = output_dir
@@ -20,6 +21,7 @@ def __init__(self, query, limit, output_dir, adult, timeout,  filter='', verbose
         self.filter = filter
         self.verbose = verbose
         self.seen = set()
+        self.error_protection = error_protection
 
         assert type(limit) == int, "limit must be integer"
         self.limit = limit
@@ -28,7 +30,7 @@ def __init__(self, query, limit, output_dir, adult, timeout,  filter='', verbose
 
         # self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0'}
         self.page_counter = 0
-        self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) ' 
+        self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
       'AppleWebKit/537.11 (KHTML, like Gecko) '
       'Chrome/23.0.1271.64 Safari/537.11',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
@@ -62,7 +64,7 @@ def save_image(self, link, file_path):
         with open(str(file_path), 'wb') as f:
             f.write(image)
 
-    
+
     def download_image(self, link):
         self.download_count += 1
         # Get the image link
@@ -72,21 +74,50 @@ def download_image(self, link):
             file_type = filename.split(".")[-1]
             if file_type.lower() not in ["jpe", "jpeg", "jfif", "exif", "tiff", "gif", "bmp", "png", "webp", "jpg"]:
                 file_type = "jpg"
-                
+            downloaded = False
             if self.verbose:
                 # Download the image
                 print("[%] Downloading Image #{} from {}".format(self.download_count, link))
-                
-            self.save_image(link, self.output_dir.joinpath("Image_{}.{}".format(
-                str(self.download_count), file_type)))
-            if self.verbose:
+            delay = 1
+            while self.error_protection:
+                try:
+                    self.save_image(link, self.output_dir.joinpath("Image_{}.{}".format(
+                        str(self.download_count), file_type)))
+                    downloaded = True
+                    break
+                except urllib.error.URLError:
+                    if self.verbose:
+                        print("[%] URLError, sleeping for " + str(delay))
+
+                    # sleeping for 1 second at a time makes it easier to escape out
+                    for i in range(delay):
+                        sleep(1)
+                    delay *= 2
+                    if self.doub_sum(delay) > self.timeout : break
+
+            else:
+                self.save_image(link, self.output_dir.joinpath("Image_{}.{}".format(
+                    str(self.download_count), file_type)))
+                downloaded = True
+            if self.verbose and downloaded:
                 print("[%] File Downloaded !\n")
+            elif self.verbose:
+                print("[%] Timeout exceeded : Persistent Connection Error, File not Downloaded !\n")
+
 
         except Exception as e:
             self.download_count -= 1
             print("[!] Issue getting: {}\n[!] Error:: {}".format(link, e))
 
-    
+    # for calculating the error_protection delay
+    def doub_sum(self, val):
+        sum = 0
+        val = int(val)
+        while val > 0:
+            sum += val
+            val //= 2
+        return sum
+
     def run(self):
         while self.download_count < self.limit:
             if self.verbose:
@@ -96,7 +127,27 @@ def run(self):
                           + '&first=' + str(self.page_counter) + '&count=' + str(self.limit) \
                           + '&adlt=' + self.adult + '&qft=' + ('' if self.filter is None else self.get_filter(self.filter))
             request = urllib.request.Request(request_url, None, headers=self.headers)
-            response = urllib.request.urlopen(request)
+
+            delay = 1
+            while self.error_protection:
+                try:
+                    response = urllib.request.urlopen(request)
+                    break
+                except urllib.error.URLError:
+                    if self.verbose:
+                        print("URLError on page, sleeping for " + str(delay))
+
+                    # sleeping for 1 second at a time makes it easier to escape out
+                    for i in range(delay):
+                        sleep(1.0)
+                    delay *= 2
+                    if self.verbose:
+                        print('\n\n[!!]Retrying page: {}\n'.format(self.page_counter + 1))
+                    if self.doub_sum(delay) > self.timeout :
+                        break
+            else:
+                response = urllib.request.urlopen(request)
+
             html = response.read().decode('utf8')
             if html ==  "":
                 print("[%] No more images are available")
diff --git a/bing_image_downloader/downloader.py b/bing_image_downloader/downloader.py
index 41789dd..eaf9961 100644
--- a/bing_image_downloader/downloader.py
+++ b/bing_image_downloader/downloader.py
@@ -8,8 +8,8 @@
     from .bing import Bing
 
 
-def download(query, limit=100, output_dir='dataset', adult_filter_off=True, 
-force_replace=False, timeout=60, filter="", verbose=True):
+def download(query, limit=100, output_dir='dataset', adult_filter_off=True,
+force_replace=False, timeout=60, filter="", verbose=True, error_protection=False):
 
     # engine = 'bing'
     if adult_filter_off:
@@ -17,7 +17,7 @@ def download(query, limit=100, output_dir='dataset', adult_filter_off=True,
     else:
         adult = 'on'
 
-    
+
     image_dir = Path(output_dir).joinpath(query).absolute()
 
     if force_replace:
@@ -32,9 +32,9 @@ def download(query, limit=100, output_dir='dataset', adult_filter_off=True,
     except Exception as e:
         print('[Error]Failed to create directory.', e)
         sys.exit(1)
-        
+
     print("[%] Downloading Images to {}".format(str(image_dir.absolute())))
-    bing = Bing(query, limit, image_dir, adult, timeout, filter, verbose)
+    bing = Bing(query, limit, image_dir, adult, timeout, filter, verbose, error_protection)
     bing.run()
 
 
diff --git a/test.py b/test.py
index e7badfc..14a03a2 100644
--- a/test.py
+++ b/test.py
@@ -7,16 +7,15 @@
     filter=sys.argv[2]
 else:
     filter=""
-    
-            
+
 downloader.download(
     query,
     limit=10,
     output_dir="dataset",
     adult_filter_off=True,
     force_replace=False,
-    timeout=60,
+    timeout=4,
     filter=filter,
     verbose=True,
+    error_protection=True
 )
-

From 41d23ae608c255e164596e595d61bce0e20a1214 Mon Sep 17 00:00:00 2001
From: thomasasia <thomas.harriett@gmail.com>
Date: Mon, 11 Apr 2022 17:59:30 -0400
Subject: [PATCH 2/7] updated readyme for error protection

---
 README.md | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 7d251db..1f10945 100644
--- a/README.md
+++ b/README.md
@@ -11,14 +11,14 @@ This package uses async url, which makes it very fast while downloading.<br/>
 ### Disclaimer<br />
 
 This program lets you download tons of images from Bing.
-Please do not download or use any image that violates its copyright terms. 
+Please do not download or use any image that violates its copyright terms.
 
 ### Installation <br />
 ```sh
 pip install bing-image-downloader
 ```
 
-or 
+or
 ```bash
 git clone https://github.com/gurugaurav/bing_image_downloader
 cd bing_image_downloader
@@ -30,7 +30,7 @@ pip install .
 ### Usage <br />
 ```python
 from bing_image_downloader import downloader
-downloader.download(query_string, limit=100,  output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, verbose=True)
+downloader.download(query_string, limit=100,  output_dir='dataset', adult_filter_off=True, force_replace=False, timeout=60, verbose=True, error_protection=False)
 ```
 
 `query_string` : String to be searched.<br />
@@ -41,6 +41,7 @@ downloader.download(query_string, limit=100,  output_dir='dataset', adult_filter
 `timeout` : (optional, default is 60) timeout for connection in seconds.<br />
 `filter` : (optional, default is "") filter, choose from [line, photo, clipart, gif, transparent]<br />
 `verbose` : (optional, default is True) Enable downloaded message.<br />
+`error_protection` : (optional, default is False) Enable protections from url errors, like disconnects.<br />
 
 
 You can also test the programm by runnning `test.py keyword`
@@ -58,7 +59,3 @@ https://pypi.org/project/bing-image-downloader/
 You can buy me a coffee if this project was helpful to you.</br>
 
 [<img src="https://www.buymeacoffee.com/assets/img/guidelines/download-assets-sm-1.svg" alt="Show your support" width="180"/>](https://www.buymeacoffee.com/gurugaurav)
-  
-
-
-

From c953b5009bdfef8e4f1c6fca4fc996a3b93c7954 Mon Sep 17 00:00:00 2001
From: thomasasia <thomas.harriett@gmail.com>
Date: Mon, 11 Apr 2022 17:59:49 -0400
Subject: [PATCH 3/7] spelling correction

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1f10945..ffd4bf2 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ downloader.download(query_string, limit=100,  output_dir='dataset', adult_filter
 `error_protection` : (optional, default is False) Enable protections from url errors, like disconnects.<br />
 
 
-You can also test the programm by runnning `test.py keyword`
+You can also test the program by runnning `test.py keyword`
 
 
 ### PyPi <br />

From f1c8f9dcc2af767bb176324e6e2f65f71227f9b5 Mon Sep 17 00:00:00 2001
From: thomasasia <thomas.harriett@gmail.com>
Date: Mon, 11 Apr 2022 18:01:07 -0400
Subject: [PATCH 4/7] URLError print formating

---
 bing_image_downloader/bing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py
index 16908bb..a886e9c 100644
--- a/bing_image_downloader/bing.py
+++ b/bing_image_downloader/bing.py
@@ -135,7 +135,7 @@ def run(self):
                     break
                 except urllib.error.URLError:
                     if self.verbose:
-                        print("URLError on page, sleeping for " + str(delay))
+                        print("[%] URLError on page, sleeping for " + str(delay))
 
                     # sleeping for 1 second at a time makes it easier to escape out
                     for i in range(delay):

From 4d9e1031efcf2e3afab4ab89b1f399a4d1f9e4d1 Mon Sep 17 00:00:00 2001
From: thomasasia <thomas.harriett@gmail.com>
Date: Mon, 11 Apr 2022 19:20:57 -0400
Subject: [PATCH 5/7] added additional error checking

---
 bing_image_downloader/bing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py
index a886e9c..a144d1f 100644
--- a/bing_image_downloader/bing.py
+++ b/bing_image_downloader/bing.py
@@ -147,8 +147,8 @@ def run(self):
                         break
             else:
                 response = urllib.request.urlopen(request)
-
-            html = response.read().decode('utf8')
+            if response:
+                html = response.read().decode('utf8')
             if html ==  "":
                 print("[%] No more images are available")
                 break

From 8f7a5ad2d003604be6b60131f8281767434e8562 Mon Sep 17 00:00:00 2001
From: thomasasia <thomas.harriett@gmail.com>
Date: Mon, 11 Apr 2022 19:23:52 -0400
Subject: [PATCH 6/7] fixed error

---
 bing_image_downloader/bing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py
index a144d1f..a8aaa71 100644
--- a/bing_image_downloader/bing.py
+++ b/bing_image_downloader/bing.py
@@ -147,7 +147,7 @@ def run(self):
                         break
             else:
                 response = urllib.request.urlopen(request)
-            if response:
+            if response is not None:
                 html = response.read().decode('utf8')
             if html ==  "":
                 print("[%] No more images are available")

From 5b91575fbf579bac2632df9f7d15cb5dbd204575 Mon Sep 17 00:00:00 2001
From: thomasasia <thomas.harriett@gmail.com>
Date: Mon, 11 Apr 2022 19:28:00 -0400
Subject: [PATCH 7/7] extended timeout for page queries

---
 bing_image_downloader/bing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bing_image_downloader/bing.py b/bing_image_downloader/bing.py
index a8aaa71..59c5569 100644
--- a/bing_image_downloader/bing.py
+++ b/bing_image_downloader/bing.py
@@ -143,7 +143,7 @@ def run(self):
                     delay *= 2
                     if self.verbose:
                         print('\n\n[!!]Retrying page: {}\n'.format(self.page_counter + 1))
-                    if self.doub_sum(delay) > self.timeout :
+                    if self.doub_sum(delay) > max(self.timeout * 4, 30): # pages are very important, so extend the timeout for those
                         break
             else:
                 response = urllib.request.urlopen(request)