Skip to content

Commit

Permalink
Merge pull request #23 from ClericPy/dev
Browse files Browse the repository at this point in the history
2.1.2
  • Loading branch information
ClericPy authored Jun 5, 2020
2 parents 2d55db2 + 1834f36 commit 3cd5a91
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 40 deletions.
26 changes: 15 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[ichrome](https://github.com/ClericPy/ichrome) [![PyPI](https://img.shields.io/pypi/v/ichrome?style=plastic)](https://pypi.org/project/ichrome/)![PyPI - Wheel](https://img.shields.io/pypi/wheel/ichrome?style=plastic)![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ichrome?style=plastic)![PyPI - Downloads](https://img.shields.io/pypi/dm/ichrome?style=plastic)![PyPI - License](https://img.shields.io/pypi/l/ichrome?style=plastic)
==============================================

> A connector to control Chrome browser ([Chrome Devtools Protocol(CDP)](https://chromedevtools.github.io/devtools-protocol/)), for python3.7+.
> Chrome controller for Humans, base on [Chrome Devtools Protocol(CDP)](https://chromedevtools.github.io/devtools-protocol/)) and python3.7+.
# Install

Expand All @@ -17,8 +17,9 @@
# Why?

- pyppeteer / selenium is awesome, but I don't need so much
- spelling of pyppeteer is confused.
- selenium is slow.
- spelling of pyppeteer is confused, and event-driven programming is not always advisable.
- selenium is slow
- webdrivers often come with memory leak.
- async communication with Chrome remote debug port, stable choice. [Recommended]
- sync way to test CDP, which is not recommended for complex production environments. [Deprecated]
- **ichrome.debugger** is a sync tool and depends on the `ichrome.async_utils`, which may be a better choice.
Expand All @@ -27,32 +28,35 @@
# Features

- Chrome process daemon
- Connect to existing chrome debug port
- Operations on Tabs
- auto restart
- command-line usage
- async environment compatible
- Connect to an existing Chrome
- Some magic operations on Tabs


<details>
<summary><b>AsyncChrome feature list</b></summary>

1. server
> return `f"http://{self.host}:{self.port}"`, such as `http://127.0.0.1:9222`
1. version
2. version
> version info from `/json/version` format like:
```
{'Browser': 'Chrome/77.0.3865.90', 'Protocol-Version': '1.3', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36', 'V8-Version': '7.7.299.11', 'WebKit-Version': '537.36 (@58c425ba843df2918d9d4b409331972646c393dd)', 'webSocketDebuggerUrl': 'ws://127.0.0.1:9222/devtools/browser/b5fbd149-959b-4603-b209-cfd26d66bdc1'}
```
1. `connect` / `check` / `ok`
3. `connect` / `check` / `ok`
> check alive
1. `get_tabs` / `tabs` / `get_tab` / `get_tabs`
4. `get_tabs` / `tabs` / `get_tab` / `get_tabs`
> get the `AsyncTab` instance from `/json`.
1. `new_tab` / `activate_tab` / `close_tab` / `close_tabs`
5. `new_tab` / `activate_tab` / `close_tab` / `close_tabs`
> operating tabs.
1. `close_browser`
6. `close_browser`
> find the activated tab and send `Browser.close` message, close the connected chrome browser gracefully.
```python
await chrome.close_browser()
```
1. `kill`
7. `kill`
> force kill the chrome process with self.port.
```python
await chrome.kill()
Expand Down
5 changes: 5 additions & 0 deletions examples_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ async def test_tab_js(tab: Tab):
assert await tab.reload()
await tab.wait_loading(2)
assert len(await tab.current_html) > 1000
# test wait tags
result = await tab.wait_tags('.python-logo1', max_wait_time=1)
assert result == []
result = await tab.wait_tags('.python-logo', max_wait_time=3)
assert result


async def test_wait_response(tab: Tab):
Expand Down
2 changes: 1 addition & 1 deletion ichrome/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .logs import logger
from .sync_utils import Chrome, Tab

__version__ = "2.1.1"
__version__ = "2.1.2"
__tips__ = "[github]: https://github.com/ClericPy/ichrome\n[cdp]: https://chromedevtools.github.io/devtools-protocol/\n[cmd args]: https://peter.sh/experiments/chromium-command-line-switches/"
__all__ = [
'Chrome', 'ChromeDaemon', 'Tab', 'Tag', 'AsyncChrome', 'AsyncTab', 'logger',
Expand Down
66 changes: 53 additions & 13 deletions ichrome/async_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,10 @@ async def _recv_daemon(self):
if self._log_all_recv:
logger.debug(f'[recv] {self!r} {msg}')
if msg.type in (WSMsgType.CLOSED, WSMsgType.ERROR):
# Message size xxxx exceeds limit 4194304: reset the max_msg_size(default=4*1024*1024) in Tab.ws_kwargs
logger.error(
f'Receive the {msg.type!r} message which break the recv daemon: "{msg.data}"'
)
break
if msg.type != WSMsgType.TEXT:
continue
Expand Down Expand Up @@ -587,16 +591,24 @@ async def stop_loading_page(self, timeout=0):
'''Page.stopLoading'''
return await self.send("Page.stopLoading", timeout=timeout)

async def wait_loading(self,
timeout: Union[int, float] = None,
callback_function: Optional[Callable] = None,
timeout_stop_loading=False) -> Union[dict, None]:
'''Page.loadEventFired event for page loaded.'''
async def wait_loading(
self,
timeout: Union[int, float] = None,
callback_function: Optional[Callable] = None,
timeout_stop_loading=False) -> Union[dict, None, bool]:
'''Page.loadEventFired event for page loaded.
If page loaded event catched, return dict.
else:
if timeout_stop_loading is True:
stop loading and return False
else:
return None'''
data = await self.wait_event("Page.loadEventFired",
timeout=timeout,
callback_function=callback_function)
if data is None and timeout_stop_loading:
await self.stop_loading_page()
return False
return data

async def wait_page_loading(self,
Expand Down Expand Up @@ -796,7 +808,7 @@ async def set_ua(self,
timeout=timeout)
return data

async def goto_history(self, entryId: int = 0, timeout=None):
async def goto_history(self, entryId: int = 0, timeout=None) -> bool:
result = await self.send('Page.navigateToHistoryEntry',
entryId=entryId,
timeout=timeout)
Expand Down Expand Up @@ -841,7 +853,7 @@ async def get_history_list(self, timeout=None) -> dict:
result = await self.send('Page.getNavigationHistory', timeout=timeout)
return self.get_data_value(result, path='result', default={})

async def reset_history(self, timeout=None):
async def reset_history(self, timeout=None) -> bool:
result = await self.send('Page.resetNavigationHistory', timeout=timeout)
return self.check_error('reset_history', result)

Expand Down Expand Up @@ -891,7 +903,10 @@ async def js(self,
timeout=timeout,
expression=javascript)

async def handle_dialog(self, accept=True, promptText=None, timeout=None):
async def handle_dialog(self,
accept=True,
promptText=None,
timeout=None) -> bool:
kwargs = {'timeout': timeout, 'accept': accept}
if promptText is not None:
kwargs['promptText'] = promptText
Expand All @@ -901,6 +916,33 @@ async def handle_dialog(self, accept=True, promptText=None, timeout=None):
accept=accept,
promptText=promptText)

async def wait_tags(self,
cssselector: str,
interval=1,
max_wait_time=None,
timeout=None) -> Union[None, List[Tag]]:
'''Wait until the tags is ready or max_wait_time used up, sometimes it is more useful than wait loading.
cssselector: css querying the Tags.
interval: checking interval for while loop.
max_wait_time: if time used up, return [].
timeout: timeout seconds for sending a msg.
If max_wait_time used up: return [].
elif querySelectorAll runs failed, return None.
else: return List[Tag]
'''
tags = []
NO_TIMEOUT = max_wait_time is None
TIMEOUT_AT = time.time() + max_wait_time
timeout = timeout if timeout is not None else self.timeout
while NO_TIMEOUT or TIMEOUT_AT > time.time():
tags = await self.querySelectorAll(cssselector=cssselector,
timeout=timeout)
if tags:
break
await asyncio.sleep(interval)
return tags

async def querySelector(self,
cssselector: str,
action: Union[None, str] = None,
Expand Down Expand Up @@ -1000,10 +1042,8 @@ async def querySelectorAll(self,
else:
return result
except Exception as e:
logger.error(f"querySelectorAll error: {e}, response: {response}")
if isinstance(index, int):
return TagNotFound()
return []
logger.error(f"querySelectorAll error: {e!r}, response: {response}")
return None

async def inject_js(self, *args, **kwargs):
# for compatible
Expand Down Expand Up @@ -1113,7 +1153,7 @@ async def add_js_onload(self, source: str, **kwargs) -> str:
**kwargs)
return self.get_data_value(data, path='result.identifier') or ''

async def remove_js_onload(self, identifier: str, timeout=None):
async def remove_js_onload(self, identifier: str, timeout=None) -> bool:
'''Page.removeScriptToEvaluateOnNewDocument, return whether the identifier exist.'''
result = await self.send('Page.removeScriptToEvaluateOnNewDocument',
identifier=identifier,
Expand Down
48 changes: 35 additions & 13 deletions ichrome/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,19 +258,38 @@ def check_chrome_ready(self):
logger.error(f"launch_chrome failed: {self}, args: {self.cmd}")
return False

def _ensure_port_free(self):
for _ in range(3):
try:
sock = socket.socket()
sock.settimeout(self._timeout)
sock.connect((self.host, self.port))
logger.info(f"shutting down chrome using port {self.port}")
self.kill(True)
continue
except (ConnectionRefusedError, socket.timeout):
@classmethod
def get_free_port(cls,
host="127.0.0.1",
start=9222,
max_tries=100,
timeout=1):
for offset in range(max_tries):
port = start + offset
if cls._check_host_port_in_use(host, port, timeout):
return port
raise RuntimeError(f'No free port beteen {start} and {start+max_tries}')

@staticmethod
def _check_host_port_in_use(host="127.0.0.1", port=9222, timeout=1):
try:
sock = socket.socket()
sock.settimeout(timeout)
sock.connect((host, port))
return False
except (ConnectionRefusedError, socket.timeout):
return True
finally:
sock.close()

def _ensure_port_free(self, max_tries=3):
for _ in range(max_tries):
ok = self._check_host_port_in_use(self.host, self.port,
self._timeout)
if ok:
return True
finally:
sock.close()
logger.info(f"shutting down chrome using port {self.port}")
self.kill(True)
else:
raise ValueError("port in used")

Expand All @@ -290,7 +309,10 @@ def _get_default_path():
return path
else:
if current_platform == 'Linux':
paths = ["google-chrome", "google-chrome-stable"]
paths = [
"google-chrome", "google-chrome-stable",
"google-chrome-beta", "google-chrome-dev"
]
elif current_platform == 'Darwin':
paths = [
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
install_requires = f.read().strip().splitlines()
with open("README.md", encoding="utf-8") as f:
README = f.read()
desc = "A toolkit for using chrome browser with the [Chrome Devtools Protocol(CDP)](https://chromedevtools.github.io/devtools-protocol/), support python3.7+. Read more: https://github.com/ClericPy/ichrome."
desc = "Chrome controller for Humans, base on Chrome Devtools Protocol(CDP) and python3.7+. Read more: https://github.com/ClericPy/ichrome."
setup(
name="ichrome",
version=version,
keywords=("chrome"),
keywords=['chrome', 'Chrome Devtools Protocol', 'daemon', 'CDP', 'browser'],
description=desc,
license="MIT License",
install_requires=install_requires,
Expand Down

0 comments on commit 3cd5a91

Please sign in to comment.