Skip to content

Commit

Permalink
Merge pull request NodyHub#8 from codders/feat/improve-user-agent-ebay
Browse files Browse the repository at this point in the history
Add reference user agent for ebay crawler
  • Loading branch information
codders authored May 27, 2020
2 parents 5438b81 + 58ee4f8 commit 77f163f
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions flathunter/crawl_ebaykleinanzeigen.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

class CrawlEbayKleinanzeigen:
__log__ = logging.getLogger(__name__)
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'
URL_PATTERN = re.compile(r'https://www\.ebay-kleinanzeigen\.de')

def __init__(self):
Expand All @@ -23,7 +24,7 @@ def get_results(self, search_url):
return entries

def get_page(self, search_url):
resp = requests.get(search_url) # TODO add page_no in url
resp = requests.get(search_url, headers={'User-Agent': self.USER_AGENT}) # TODO add page_no in url
if resp.status_code != 200:
self.__log__.error("Got response (%i): %s" % (resp.status_code, resp.content))
return BeautifulSoup(resp.content, 'html5lib')
Expand Down Expand Up @@ -80,7 +81,7 @@ def extract_data(self, soup):
@staticmethod
def load_address(url):
# extract address from expose itself
expose_html = requests.get(url).content
expose_html = requests.get(url, headers={'User-Agent': CrawlEbayKleinanzeigen.USER_AGENT}).content
expose_soup = BeautifulSoup(expose_html, 'html.parser')
try:
street_raw = expose_soup.find(id="street-address").text
Expand Down

0 comments on commit 77f163f

Please sign in to comment.