Skip to content

Commit

Permalink
Fix compatibility with newer lxml versions
Browse files Browse the repository at this point in the history
  • Loading branch information
ttys0dev committed Jan 8, 2024
1 parent ee46665 commit 73c2c3c
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 3 deletions.
3 changes: 2 additions & 1 deletion juriscraper/lib/html_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def get_html5_parsed_text(text: str) -> HtmlElement:
:param text: The html of the document
:return: an lxml.HtmlElement object
"""
parsed = html5parser.document_fromstring(text)
parser = html5parser.HTMLParser(namespaceHTMLElements=False)
parsed = html5parser.document_fromstring(text, parser=parser)
return fromstring(tostring(parsed, encoding="unicode"))


Expand Down
4 changes: 3 additions & 1 deletion juriscraper/pacer/claims_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ def metadata(self):

# Each cell in the header table has a value like:
# <td><b>Judge:</b> Barbara J. Houser</td>
cells = self.tree.xpath("//center/p/table[1]//td[text()]")
cells = self.tree.xpath(
"//center/p/following-sibling::table[1]//td[text()]"
)
for cell in cells:
label_node = cell.xpath("./b")[0]
data.update(
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dateparser>=1.2.0
feedparser>=6.0.11
geonamescache>=2.0.0
html5lib
lxml~=4.9
lxml>=4.9
python-dateutil>=2.8.2
requests>=2.20.0
selenium>=4.9.1
Expand Down

0 comments on commit 73c2c3c

Please sign in to comment.