Skip to content

Commit

Permalink
find(text) and find_all(text) also search through iframes NOTE: issue…
Browse files Browse the repository at this point in the history
… tracker is off as long this is an RC
  • Loading branch information
ultrafunkamsterdam committed Feb 21, 2024
1 parent 4343483 commit 54560b4
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 18 deletions.
10 changes: 10 additions & 0 deletions nodriver/core/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,8 @@ async def get_all(
continue
connection = tab
break
else:
connection = self._browser.connection
cookies = await connection.send(cdp.storage.get_cookies())
if requests_cookie_format:
import requests.cookies
Expand Down Expand Up @@ -670,6 +672,8 @@ async def set_all(self, cookies: List[cdp.network.CookieParam]):
continue
connection = tab
break
else:
connection = self._browser.connection
cookies = await connection.send(cdp.storage.get_cookies())
await connection.send(cdp.storage.set_cookies(cookies))

Expand Down Expand Up @@ -701,6 +705,8 @@ async def save(self, file: PathLike = ".session.dat", pattern: str = ".*"):
continue
connection = tab
break
else:
connection = self._browser.connection
cookies = await connection.send(cdp.storage.get_cookies())
# if not connection:
# return
Expand Down Expand Up @@ -752,6 +758,8 @@ async def load(self, file: PathLike = ".session.dat", pattern: str = ".*"):
continue
connection = tab
break
else:
connection = self._browser.connection
cookies = await connection.send(cdp.storage.get_cookies())
for cookie in cookies:
for match in pattern.finditer(str(cookie.__dict__)):
Expand Down Expand Up @@ -780,6 +788,8 @@ async def clear(self):
continue
connection = tab
break
else:
connection = self._browser.connection
cookies = await connection.send(cdp.storage.get_cookies())
await connection.send(cdp.storage.clear_cookies())

Expand Down
41 changes: 24 additions & 17 deletions nodriver/core/tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,11 +355,11 @@ async def query_selector_all(
if _node.node_name == "IFRAME":
doc = _node.content_document
node_ids = []

try:
node_ids = await self.send(
cdp.dom.query_selector_all(doc.node_id, selector)
)
# await self.send(cdp.dom.disable())

except ProtocolException as e:
if _node is not None:
Expand All @@ -375,6 +375,7 @@ async def query_selector_all(
)
return await self.query_selector_all(selector, _node)
else:
await self.send(cdp.dom.disable())
raise
if not node_ids:
return []
Expand Down Expand Up @@ -412,9 +413,10 @@ async def query_selector(
if _node.node_name == "IFRAME":
doc = _node.content_document
node_id = None

try:
node_id = await self.send(cdp.dom.query_selector(doc.node_id, selector))
# await self.send(cdp.dom.disable())

except ProtocolException as e:
if _node is not None:
if "could not find node" in e.message.lower():
Expand All @@ -429,6 +431,7 @@ async def query_selector(
)
return await self.query_selector(selector, _node)
else:
await self.send(cdp.dom.disable())
raise
if not node_id:
return
Expand Down Expand Up @@ -463,7 +466,7 @@ async def find_elements_by_text(
node_ids = []

await self.send(cdp.dom.discard_search_results(search_id))
await self.send(cdp.dom.disable())

results = []
for nid in node_ids:
node = util.filter_recurse(doc, lambda n: n.node_id == nid)
Expand Down Expand Up @@ -509,6 +512,7 @@ async def find_elements_by_text(
iframe_text_elems = [element.create(text_node, self, iframe_elem.tree) for text_node in
iframe_text_nodes]
results.extend(text_node.parent for text_node in iframe_text_elems)
await self.send(cdp.dom.disable())
return results or []

async def find_element_by_text(
Expand Down Expand Up @@ -539,7 +543,7 @@ async def find_element_by_text(
# return
node_ids = await self.send(cdp.dom.get_search_results(search_id, 0, nresult))
await self.send(cdp.dom.discard_search_results(search_id))
await self.send(cdp.dom.disable())

if not node_ids:
node_ids = []
results = []
Expand Down Expand Up @@ -580,20 +584,23 @@ async def find_element_by_text(
if iframe_text_nodes:
iframe_text_elems = [element.create(text_node, self, iframe_elem.tree) for text_node in iframe_text_nodes]
results.extend(text_node.parent for text_node in iframe_text_elems)
if not results:
return
if best_match:
closest_by_length = min(
results, key=lambda el: abs(len(text) - len(el.text_all))
)
elem = closest_by_length or results[0]
try:
if not results:
return
if best_match:
closest_by_length = min(
results, key=lambda el: abs(len(text) - len(el.text_all))
)
elem = closest_by_length or results[0]

return elem
else:
# naively just return the first result
for elem in results:
if elem:
return elem
return elem
else:
# naively just return the first result
for elem in results:
if elem:
return elem
finally:
await self.send(cdp.dom.disable())

async def back(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion nodriver/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def deconstruct_browser():
break
time.sleep(0.15)
continue
print("successfully removed temp profile %s", _.config.user_data_dir)
print("successfully removed temp profile %s" % _.config.user_data_dir)


def filter_recurse_all(
Expand Down

0 comments on commit 54560b4

Please sign in to comment.