From 58ee4f8fdc75cbedd3df80add484b4597955889a Mon Sep 17 00:00:00 2001 From: Arthur Taylor Date: Tue, 26 May 2020 19:50:49 +0200 Subject: [PATCH] Add reference user agent for ebay crawler --- flathunter/crawl_ebaykleinanzeigen.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flathunter/crawl_ebaykleinanzeigen.py b/flathunter/crawl_ebaykleinanzeigen.py index f62da741..acaeb593 100644 --- a/flathunter/crawl_ebaykleinanzeigen.py +++ b/flathunter/crawl_ebaykleinanzeigen.py @@ -6,6 +6,7 @@ class CrawlEbayKleinanzeigen: __log__ = logging.getLogger(__name__) + USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0' URL_PATTERN = re.compile(r'https://www\.ebay-kleinanzeigen\.de') def __init__(self): @@ -23,7 +24,7 @@ def get_results(self, search_url): return entries def get_page(self, search_url): - resp = requests.get(search_url) # TODO add page_no in url + resp = requests.get(search_url, headers={'User-Agent': self.USER_AGENT}) # TODO add page_no in url if resp.status_code != 200: self.__log__.error("Got response (%i): %s" % (resp.status_code, resp.content)) return BeautifulSoup(resp.content, 'html5lib') @@ -80,7 +81,7 @@ def extract_data(self, soup): @staticmethod def load_address(url): # extract address from expose itself - expose_html = requests.get(url).content + expose_html = requests.get(url, headers={'User-Agent': CrawlEbayKleinanzeigen.USER_AGENT}).content expose_soup = BeautifulSoup(expose_html, 'html.parser') try: street_raw = expose_soup.find(id="street-address").text