Merge branch 'release/0.0.7'

Samstix636 · Jan 24, 2019 · 8229161 · 8229161
2 parents 3045ad0 + 4914155
commit 8229161
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -38,10 +38,10 @@ from scrapy_selenium import SeleniumRequest
 
 yield SeleniumRequest(url, self.parse_result)
 ```
-The request will be handled by selenium, and the response will have an additional `meta` key, named `driver` containing the selenium driver with the request processed.
+The request will be handled by selenium, and the request will have an additional `meta` key, named `driver` containing the selenium driver with the request processed.
 ```python
 def parse_result(self, response):
-    print(response.meta['driver'].title)
+    print(response.request.meta['driver'].title)
 ```
 For more information about the available driver methods and attributes, refer to the [selenium python documentation](http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webdriver)
 
@@ -52,7 +52,7 @@ def parse_result(self, response):
 ```
 
 ### Additional arguments
-The `scrapy_selenium.SeleniumRequest` accept 3 additional arguments:
+The `scrapy_selenium.SeleniumRequest` accept 4 additional arguments:
 
 #### `wait_time` / `wait_until`
 
@@ -80,6 +80,15 @@ yield SeleniumRequest(
 
 def parse_result(self, response):
     with open('image.png', 'wb') as image_file:
-        image_file.write(response.meta['screenshot])
+        image_file.write(response.meta['screenshot'])
 ```
 
+#### `script`
+When used, selenium will execute custom JavaScript code.
+```python
+yield SeleniumRequest(
+    url,
+    self.parse_result,
+    script='window.scrollTo(0, document.body.scrollHeight);',
+)
+```
diff --git a/scrapy_selenium/http.py b/scrapy_selenium/http.py
@@ -6,7 +6,7 @@
 class SeleniumRequest(Request):
     """Scrapy ``Request`` subclass providing additional arguments"""
 
-    def __init__(self, wait_time=None, wait_until=None, screenshot=False, *args, **kwargs):
+    def __init__(self, wait_time=None, wait_until=None, screenshot=False, script=None, *args, **kwargs):
         """Initialize a new selenium request
 
         Parameters
@@ -19,11 +19,14 @@ def __init__(self, wait_time=None, wait_until=None, screenshot=False, *args, **k
         screenshot: bool
             If True, a screenshot of the page will be taken and the data of the screenshot
             will be returned in the response "meta" attribute.
+        script: str
+            JavaScript code to execute.
 
         """
 
         self.wait_time = wait_time
         self.wait_until = wait_until
         self.screenshot = screenshot
+        self.script = script
 
         super().__init__(*args, **kwargs)
diff --git a/scrapy_selenium/middlewares.py b/scrapy_selenium/middlewares.py
@@ -99,6 +99,9 @@ def process_request(self, request, spider):
         if request.screenshot:
             request.meta['screenshot'] = self.driver.get_screenshot_as_png()
 
+        if request.script:
+            self.driver.execute_script(request.script)
+
         body = str.encode(self.driver.page_source)
 
         # Expose the driver via the "meta" attribute

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = scrapy-selenium
-version = 0.0.6
+version = 0.0.7
 url = https://github.com/clemfromspace/scrapy-selenium
 licence = DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
 description = Scrapy with selenium

diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py
@@ -117,3 +117,21 @@ def test_process_request_should_return_a_screenshot_if_screenshot_option(self):
         )
 
         self.assertIsNotNone(html_response.meta['screenshot'])
+
+    def test_process_request_should_execute_script_if_script_option(self):
+        """Test that the ``process_request`` should execute the script and return a response"""
+
+        selenium_request = SeleniumRequest(
+            url='http://www.python.org',
+            script='document.title = "scrapy_selenium";'
+        )
+
+        html_response = self.selenium_middleware.process_request(
+            request=selenium_request,
+            spider=None
+        )
+
+        self.assertEqual(
+            html_response.selector.xpath('//title/text()').extract_first(),
+            'scrapy_selenium'
+        )