Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rotate through all given proxies before letting yagoogle handle the waiting #70

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 123 additions & 37 deletions pagodo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,75 @@
ROOT_LOGGER.addHandler(console_handler)


def set_next_non_timeout_proxy(proxy_timeouts: dict, proxy_rotation_index: int, proxy: str,
proxies: list,
proxy_index: int, client: yagooglesearch.SearchClient):
timeout_counter = 0
# If all proxies have been blocked run yagoogle with automated 429 handling
if proxy_timeouts[proxy]:
if proxy_timeouts[proxy] > datetime.datetime.now():
# Skipping proxy because it was bad less than an hour ago
while proxy_timeouts[proxy] > datetime.datetime.now():
timeout_counter += 1
if timeout_counter == len(proxy_timeouts):
ROOT_LOGGER.warning("All proxies are blocked by Google.")
client.yagooglesearch_manages_http_429s = True
break
elif proxy_index == len(proxies) - 1:
proxy_rotation_index = 0
proxy_index = proxy_rotation_index % len(proxies)
proxy = proxies[proxy_index]
proxy_rotation_index += 1
client.proxy = proxy
if not proxy_timeouts[proxy]:
break
return proxy_timeouts, proxy_rotation_index, proxy, proxies, proxy_index, client


def calculate_random_waittimes(minimum_delay_between_dork_searches_in_seconds,
maximum_delay_between_dork_searches_in_seconds):
# Fancy way of generating a list of 20 random values between minimum_delay_between_dork_searches_in_seconds and
# maximum_delay_between_dork_searches_in_seconds. A random value is selected between each different Google
# dork search.
"""
1) Generate a random list of values between minimum_delay_between_dork_searches_in_seconds and
maximum_delay_between_dork_searches_in_seconds
2) Round those values to the tenths place
3) Re-case as a list
4) Sort the list
"""
return sorted(
list(
map(
lambda x: round(x, 1),
[
random.uniform(
minimum_delay_between_dork_searches_in_seconds,
maximum_delay_between_dork_searches_in_seconds,
)
for _ in range(20)
],
)
)
)


class Pagodo:
"""Pagodo class object"""

def __init__(
self,
google_dorks_file,
domain="",
max_search_result_urls_to_return_per_dork=100,
save_pagodo_results_to_json_file=None, # None = Auto-generate file name, otherwise pass a string for path and filename.
proxies="",
save_urls_to_file=None, # None = Auto-generate file name, otherwise pass a string for path and filename.
minimum_delay_between_dork_searches_in_seconds=37,
maximum_delay_between_dork_searches_in_seconds=60,
disable_verify_ssl=False,
verbosity=4,
self,
google_dorks_file,
domain="",
max_search_result_urls_to_return_per_dork=100,
save_pagodo_results_to_json_file=None, # None = Auto-generate file name, otherwise pass a string for path and filename.
proxies="",
save_urls_to_file=None, # None = Auto-generate file name, otherwise pass a string for path and filename.
minimum_delay_between_dork_searches_in_seconds=37,
maximum_delay_between_dork_searches_in_seconds=60,
disable_verify_ssl=False,
verbosity=4,
divide_waittime_by_proxies=False,
):
"""Initialize Pagodo class object."""

Expand Down Expand Up @@ -88,35 +142,23 @@ def __init__(
self.save_pagodo_results_to_json_file = save_pagodo_results_to_json_file
self.proxies = proxies.strip().strip(",").split(",")
self.save_urls_to_file = save_urls_to_file
self.minimum_delay_between_dork_searches_in_seconds = minimum_delay_between_dork_searches_in_seconds
self.maximum_delay_between_dork_searches_in_seconds = maximum_delay_between_dork_searches_in_seconds
self.divide_waittime_by_proxies = divide_waittime_by_proxies
if self.divide_waittime_by_proxies and proxies:
self.original_minimum_delay_between_dork_searches_in_seconds = minimum_delay_between_dork_searches_in_seconds
self.original_maximum_delay_between_dork_searches_in_seconds = maximum_delay_between_dork_searches_in_seconds
self.minimum_delay_between_dork_searches_in_seconds = minimum_delay_between_dork_searches_in_seconds / len(
self.proxies)
self.maximum_delay_between_dork_searches_in_seconds = maximum_delay_between_dork_searches_in_seconds / len(
self.proxies)
else:
self.minimum_delay_between_dork_searches_in_seconds = minimum_delay_between_dork_searches_in_seconds
self.maximum_delay_between_dork_searches_in_seconds = maximum_delay_between_dork_searches_in_seconds
self.disable_verify_ssl = disable_verify_ssl
self.verbosity = verbosity

# Fancy way of generating a list of 20 random values between minimum_delay_between_dork_searches_in_seconds and
# maximum_delay_between_dork_searches_in_seconds. A random value is selected between each different Google
# dork search.
"""
1) Generate a random list of values between minimum_delay_between_dork_searches_in_seconds and
maximum_delay_between_dork_searches_in_seconds
2) Round those values to the tenths place
3) Re-case as a list
4) Sort the list
"""
self.delay_between_dork_searches_list = sorted(
list(
map(
lambda x: round(x, 1),
[
random.uniform(
minimum_delay_between_dork_searches_in_seconds,
maximum_delay_between_dork_searches_in_seconds,
)
for _ in range(20)
],
)
)
)
self.delay_between_dork_searches_list = calculate_random_waittimes(
self.minimum_delay_between_dork_searches_in_seconds,
self.maximum_delay_between_dork_searches_in_seconds)

self.base_file_name = f'pagodo_results_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}'
self.total_urls_found = 0
Expand Down Expand Up @@ -152,6 +194,8 @@ def go(self):
"completion_timestamp": "",
}

proxy_timeouts = {self.proxies[i]: None for i in range(len(self.proxies))}

for dork in self.google_dorks:

self.pagodo_results_dict["dorks"][dork] = {
Expand Down Expand Up @@ -206,8 +250,16 @@ def go(self):
proxy=proxy,
verify_ssl=not self.disable_verify_ssl,
verbosity=self.verbosity,
yagooglesearch_manages_http_429s=False,
)

if proxy_timeouts[proxy]:
if proxy_timeouts[proxy] > datetime.datetime.now():
proxy_timeouts, self.proxy_rotation_index, proxy, self.proxies, proxy_index, client = \
set_next_non_timeout_proxy(proxy_timeouts, self.proxy_rotation_index,
proxy, self.proxies,
proxy_index, client)

# Randomize the user agent for best results.
client.assign_random_user_agent()

Expand All @@ -218,6 +270,31 @@ def go(self):

dork_urls_list = client.search()

if self.proxies and "HTTP_429_DETECTED" in dork_urls_list:
# Check if there are proxies and an error was detected by yagooglesearch

while "HTTP_429_DETECTED" in dork_urls_list:
proxy_timeouts[proxy] = datetime.datetime.now() + datetime.timedelta(
hours=1)

good_proxies = sum(1 for value in proxy_timeouts.values() if value is None)
if self.divide_waittime_by_proxies and self.proxies and good_proxies > 0:
self.minimum_delay_between_dork_searches_in_seconds = \
self.original_minimum_delay_between_dork_searches_in_seconds / good_proxies
self.maximum_delay_between_dork_searches_in_seconds = \
self.original_maximum_delay_between_dork_searches_in_seconds / good_proxies
self.delay_between_dork_searches_list = calculate_random_waittimes(
self.minimum_delay_between_dork_searches_in_seconds,
self.maximum_delay_between_dork_searches_in_seconds)

ROOT_LOGGER.warning(
f"Proxy {proxy} is blocked by Google, trying the next one")
proxy_timeouts, self.proxy_rotation_index, proxy, self.proxies, proxy_index, client = \
set_next_non_timeout_proxy(proxy_timeouts, self.proxy_rotation_index,
proxy,
self.proxies, proxy_index, client)
dork_urls_list = client.search()

# Remove any exploit-db.com URLs.
for url in dork_urls_list:
# Ignore results from specific URLs like exploit-db.com, cert.org, and GHDB's Twitter account that
Expand Down Expand Up @@ -405,6 +482,15 @@ def _split_lines(self, text, width):
default=4,
help="Verbosity level (0=NOTSET, 1=CRITICAL, 2=ERROR, 3=WARNING, 4=INFO, 5=DEBUG). Default: 4",
)
parser.add_argument(
"-w",
nargs="?",
metavar="",
dest="divide_waittime_by_proxies",
action="store",
default=False,
help="Divide the wait time between requests by the number of proxies that have not been blocked."
)

args = parser.parse_args()

Expand Down