Skip to content

Commit

Permalink
Merge branch 'release/0.0.7'
Browse files Browse the repository at this point in the history
  • Loading branch information
clemfromspace committed Jan 24, 2019
2 parents 3045ad0 + 4914155 commit 8229161
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 6 deletions.
17 changes: 13 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ from scrapy_selenium import SeleniumRequest

yield SeleniumRequest(url, self.parse_result)
```
The request will be handled by selenium, and the response will have an additional `meta` key, named `driver` containing the selenium driver with the request processed.
The request will be handled by selenium, and the request will have an additional `meta` key, named `driver` containing the selenium driver with the request processed.
```python
def parse_result(self, response):
print(response.meta['driver'].title)
print(response.request.meta['driver'].title)
```
For more information about the available driver methods and attributes, refer to the [selenium python documentation](http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webdriver)

Expand All @@ -52,7 +52,7 @@ def parse_result(self, response):
```

### Additional arguments
The `scrapy_selenium.SeleniumRequest` accept 3 additional arguments:
The `scrapy_selenium.SeleniumRequest` accept 4 additional arguments:

#### `wait_time` / `wait_until`

Expand Down Expand Up @@ -80,6 +80,15 @@ yield SeleniumRequest(

def parse_result(self, response):
with open('image.png', 'wb') as image_file:
image_file.write(response.meta['screenshot])
image_file.write(response.meta['screenshot'])
```

#### `script`
When used, selenium will execute custom JavaScript code.
```python
yield SeleniumRequest(
url,
self.parse_result,
script='window.scrollTo(0, document.body.scrollHeight);',
)
```
5 changes: 4 additions & 1 deletion scrapy_selenium/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
class SeleniumRequest(Request):
"""Scrapy ``Request`` subclass providing additional arguments"""

def __init__(self, wait_time=None, wait_until=None, screenshot=False, *args, **kwargs):
def __init__(self, wait_time=None, wait_until=None, screenshot=False, script=None, *args, **kwargs):
"""Initialize a new selenium request
Parameters
Expand All @@ -19,11 +19,14 @@ def __init__(self, wait_time=None, wait_until=None, screenshot=False, *args, **k
screenshot: bool
If True, a screenshot of the page will be taken and the data of the screenshot
will be returned in the response "meta" attribute.
script: str
JavaScript code to execute.
"""

self.wait_time = wait_time
self.wait_until = wait_until
self.screenshot = screenshot
self.script = script

super().__init__(*args, **kwargs)
3 changes: 3 additions & 0 deletions scrapy_selenium/middlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ def process_request(self, request, spider):
if request.screenshot:
request.meta['screenshot'] = self.driver.get_screenshot_as_png()

if request.script:
self.driver.execute_script(request.script)

body = str.encode(self.driver.page_source)

# Expose the driver via the "meta" attribute
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = scrapy-selenium
version = 0.0.6
version = 0.0.7
url = https://github.com/clemfromspace/scrapy-selenium
licence = DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
description = Scrapy with selenium
Expand Down
18 changes: 18 additions & 0 deletions tests/test_middlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,21 @@ def test_process_request_should_return_a_screenshot_if_screenshot_option(self):
)

self.assertIsNotNone(html_response.meta['screenshot'])

def test_process_request_should_execute_script_if_script_option(self):
"""Test that the ``process_request`` should execute the script and return a response"""

selenium_request = SeleniumRequest(
url='http://www.python.org',
script='document.title = "scrapy_selenium";'
)

html_response = self.selenium_middleware.process_request(
request=selenium_request,
spider=None
)

self.assertEqual(
html_response.selector.xpath('//title/text()').extract_first(),
'scrapy_selenium'
)

0 comments on commit 8229161

Please sign in to comment.