Skip to content

Commit

Permalink
Merge pull request #935 from marwoodandrew/bang-fixes
Browse files Browse the repository at this point in the history
fixes to BANG to tolerate badly formed XML
  • Loading branch information
marwoodandrew authored Sep 23, 2024
2 parents 865466d + afc9021 commit 416f873
Showing 1 changed file with 20 additions and 6 deletions.
26 changes: 20 additions & 6 deletions server/aap/io/feeding_services/bang.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# For the full copyright and license information, please see the
# AUTHORS and LICENSE files distributed with this source code, or
# at https://www.sourcefabric.org/superdesk/license
import logging

import lxml.etree
import requests
Expand All @@ -19,6 +20,8 @@
MOVIES_ID = "movies_url"
SHOWBIZ_ID = "showbiz_url"

logger = logging.getLogger(__name__)


class BangFeedingService(HTTPFeedingServiceBase):
NAME = "Bang"
Expand Down Expand Up @@ -62,13 +65,24 @@ def _update(self, provider, update):
for src in self.fields:
current_url = provider.get("config").get(src.get("id"))
if current_url:
feed_items = None
provider["current_id"] = src.get("id")
r = self.session.get(current_url)
r.raise_for_status()
xml = lxml.etree.fromstring(r.content)
item = parser.parse(xml, provider=provider)

items.append(item)
try:
r = self.session.get(current_url)
r.raise_for_status()

# Set the parser to be more tolerant due to stray quotes we get in attributes at times
xml_parser = lxml.etree.XMLParser(recover=True)
xml = lxml.etree.fromstring(r.content, xml_parser)
feed_items = parser.parse(xml, provider=provider)
except lxml.etree.XMLSyntaxError:
logger.exception(f"Syntax error parsing {current_url}")
# Anything goes wrong we log it and swallow it, so one bad feed doesn't kill them all!
except Exception:
logger.exception(f"Processing url {current_url}")

if feed_items:
items.append(feed_items)

if self.session:
self.session.close()
Expand Down

0 comments on commit 416f873

Please sign in to comment.