Skip to content

Commit

Permalink
Merge pull request #928 from marwoodandrew/SDAAP-113
Browse files Browse the repository at this point in the history
SDAAP-113 Create a feeding service and parser for Bang Showbiz
  • Loading branch information
marwoodandrew authored Apr 29, 2024
2 parents 4b5a163 + 10672e7 commit a62beaf
Show file tree
Hide file tree
Showing 7 changed files with 308 additions and 1 deletion.
3 changes: 2 additions & 1 deletion server/aap/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@

import aap.io.iptc_extension # noqa
from .feeding_services.ap_media_relay import APMediaRelayFeedingService # noqa
from .feeding_services.cision import CisionFeedingService # noqa
from .feeding_services.cision import CisionFeedingService # noqa
from .feeding_services.bang import BangFeedingService # noqa
1 change: 1 addition & 0 deletions server/aap/io/feed_parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@
import aap.io.feed_parsers.abs_calendar_csv # NOQA
import aap.io.feed_parsers.three_sixty_ninjs # NOQA
import aap.io.feed_parsers.globenewswire # NOQA
import aap.io.feed_parsers.bang_parser # NOQA
94 changes: 94 additions & 0 deletions server/aap/io/feed_parsers/bang_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from datetime import datetime
from superdesk.utc import utc
from superdesk.io.feed_parsers.newsml_1_2 import NewsMLOneFeedParser
from superdesk.io.registry import register_feed_parser
from superdesk.errors import ParserError
from apps.archive.common import format_dateline_to_locmmmddsrc
from superdesk.io.iptc import subject_codes
from aap.io.feeding_services.bang import MUSIC_ID, MOVIES_ID, SHOWBIZ_ID
from flask import current_app as app


class BangShowbizParser(NewsMLOneFeedParser):
NAME = "Bang Showbiz"

label = "Bang Showbiz Feed Parser"

CITY = "London"
COUNTRY_CODE = "GB"
STATE_CODE = "GB.ENG"

provider = None

# Map the field/sources entries to appropriate IPTC codes
subject_map = {MUSIC_ID: "01011000", MOVIES_ID: "01005001", SHOWBIZ_ID: "01021000"}

def datetime(self, string):
return datetime.strptime(string, "%Y-%m-%d %H:%M:%S").replace(tzinfo=utc)

def parse(self, xml, provider=None):
self.provider = provider
items = []
self.root = xml
for newsItem in xml.findall("NewsItem"):
item = {}
try:
self.parse_news_identifier(item, newsItem)
self.parse_newslines(item, newsItem)
self.parse_news_management(item, newsItem)
item["subject"] = [
{
"qcode": self.subject_map[provider.get("current_id")],
"name": subject_codes[
self.subject_map[provider.get("current_id")]
],
}
]
item["body_html"] = (
"<p>"
+ newsItem.find(
"NewsComponent/ContentItem/DataContent/body/body.content"
).text.replace("<BR>", "</p><p>")
+ "</p>"
).replace("\n", "")

items.append(self.populate_fields(item))
except Exception as ex:
raise ParserError.newsmlOneParserError(ex, provider)
return items

def parse_newslines(self, item, tree):
parsed_el = self.parse_elements(tree.find("NewsComponent/NewsLines"))
item["headline"] = parsed_el.get("HeadLine", "").strip()
item["abstract"] = tree.find(
"NewsComponent/NewsLines/NewsLine/NewsLineText"
).text.strip()

item.setdefault("dateline", {})
cities = app.locators.find_cities(
country_code=self.COUNTRY_CODE, state_code=self.STATE_CODE
)
located = [c for c in cities if c["city"] == self.CITY]
if len(located) > 0:
item["dateline"]["located"] = located[0]
item["dateline"]["text"] = format_dateline_to_locmmmddsrc(
located[0], item["versioncreated"], self.provider.get("source")
)
return True

def parse_news_identifier(self, item, tree):
parsed_el = self.parse_elements(tree.find("Identification/NewsIdentifier"))
item["uri"] = item["guid"] = "urn:newsml:{}:{}:{}".format(
self.provider.get("current_id", ""),
self.datetime(parsed_el["DateId"]).isoformat(),
parsed_el["NewsItemId"],
)
item["versioncreated"] = self.datetime(parsed_el["DateId"])
item["firstcreated"] = self.datetime(parsed_el["DateId"])

def parse_news_management(self, item, tree):
# It's always entertainment
item["anpa_category"] = [{"qcode": "e"}]


register_feed_parser(BangShowbizParser.NAME, BangShowbizParser())
82 changes: 82 additions & 0 deletions server/aap/io/feeding_services/bang.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# -*- coding: utf-8; -*-
#
# This file is part of Superdesk.
#
# Copyright 2013, 2014 Sourcefabric z.u. and contributors.
#
# For the full copyright and license information, please see the
# AUTHORS and LICENSE files distributed with this source code, or
# at https://www.sourcefabric.org/superdesk/license

import lxml.etree
import requests

from superdesk.errors import AlreadyExistsError
from superdesk.io.feeding_services.http_base_service import HTTPFeedingServiceBase
from superdesk.io.registry import register_feeding_service

MUSIC_ID = "music_url"
MOVIES_ID = "movies_url"
SHOWBIZ_ID = "showbiz_url"


class BangFeedingService(HTTPFeedingServiceBase):
NAME = "Bang"

label = "Bang Showbiz"

HTTP_AUTH = False

session = None

# configuration fields for the source url's
fields = [
{
"id": MUSIC_ID,
"type": "text",
"label": "Music URL",
},
{
"id": MOVIES_ID,
"type": "text",
"label": "Movies URL",
},
{
"id": SHOWBIZ_ID,
"type": "text",
"label": "Showbiz URL",
},
]

@staticmethod
def _config_test(provider=None):
return True

def _update(self, provider, update):
if not self.session:
self.session = requests.Session()

parser = self.get_feed_parser(provider)

items = []
for src in self.fields:
current_url = provider.get("config").get(src.get("id"))
if current_url:
provider["current_id"] = src.get("id")
r = self.session.get(current_url)
r.raise_for_status()
xml = lxml.etree.fromstring(r.content)
item = parser.parse(xml, provider=provider)

items.append(item)

if self.session:
self.session.close()

return items


try:
register_feeding_service(BangFeedingService)
except AlreadyExistsError:
pass
28 changes: 28 additions & 0 deletions server/aap/tests/io/feed_parsers/bang_parser_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
from superdesk.tests import TestCase
from superdesk.etree import etree
from aap.io.feed_parsers.bang_parser import BangShowbizParser


class BangShowbizParserTestCase(TestCase):
filename = "ABC3058248.xml"

def setUp(self):
dirname = os.path.dirname(os.path.realpath(__file__))
fixture = os.path.normpath(os.path.join(dirname, "../fixtures", self.filename))
provider = {
"name": "Test",
"current_id": "showbiz_url",
"config": {"showbiz_url": "https://url.com/111/aa"},
}
with open(fixture) as f:
self.xml = f.read()
self.item = BangShowbizParser().parse(
etree.fromstring(self.xml.encode("UTF-8")), provider
)

def test_item(self):
self.assertEqual(self.item[0]["headline"], "Headline text here")
self.assertNotIn("byline", self.item[0])
self.assertEqual(self.item[0]["abstract"], "Summary text here")
self.assertEqual(self.item[0]["body_html"], "<p>Body here</p><p>more here</p>")
61 changes: 61 additions & 0 deletions server/aap/tests/io/feed_services/bang_feeding_service_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os
from httmock import urlmatch, HTTMock
from unittests import AAPTestCase
from aap.io.feeding_services.bang import BangFeedingService


PROVIDER = {"_id": "test_provider",
"config": {
"showbiz_url": "https://url.com/111/aa",
"music_url": "https://url.com/222/bb",
"movies_url": "https://url.com/333/cc"
},
"feed_parser": "Bang Showbiz"
}

VOCABULARIES = [
{"_id": "locators",
"items": []
}
]


class bangTestCase(AAPTestCase):

_calls = None
filename = "ABC3058248.xml"

def setUp(self):
super().setUp()
self.setupMock(self)
self.app.data.insert('vocabularies', VOCABULARIES)
self.app.data.insert('ingest_providers', [PROVIDER])
self._calls = 0
dirname = os.path.dirname(os.path.realpath(__file__))
fixture = os.path.normpath(os.path.join(dirname, "../fixtures", self.filename))
with open(fixture) as f:
self.xml = f.read()

def setupMock(self, context):
context.mock = HTTMock(*[self.showbiz_request], *[self.music_request], *[self.movies_request])
context.mock.__enter__()

@urlmatch(scheme='https', netloc='url.com', path='/111/aa')
def showbiz_request(self, url, request):
return {'status_code': 200, 'content': self.xml}

@urlmatch(scheme='https', netloc='url.com', path='/222/bb')
def music_request(self, url, request):
return {'status_code': 200, 'content': self.xml}

@urlmatch(scheme='https', netloc='url.com', path='/333/cc')
def movies_request(self, url, request):
return {'status_code': 200, 'content': self.xml}

def test_request(self):
with self.app.app_context():
provider = PROVIDER.copy()
service = BangFeedingService()
service.provider = provider
items = service._update(provider, {})
self.assertEqual(len(items), 3)
40 changes: 40 additions & 0 deletions server/aap/tests/io/fixtures/ABC3058248.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<NewsML>
<NewsItem>
<Identification>
<NewsIdentifier>
<ProviderId>abc.com</ProviderId>
<NewsItemId>ABC3058248</NewsItemId>
<DateId>2024-04-17 13:00:00</DateId>
</NewsIdentifier>
</Identification>
<NewsComponent>
<NewsLines>
<HeadLine>Headline text here
</HeadLine>
<NewsLine>
<NewsLineType FormalName="Summary"/>
<NewsLineText>
<![CDATA[Summary text here]]>
</NewsLineText>
</NewsLine>
</NewsLines>
<ContentItem>
<MediaType FormalName="Text"/>
<MimeType FormalName="text/vnd.IPTC.NITF"/>
<DataContent>
<body>
<body.content><![CDATA[Body here<BR>more here]]></body.content>
</body>
</DataContent>
</ContentItem>
<ContentItem
Href="/http://somepic.jp">
<MediaType FormalName="Picture"/>
<MimeType FormalName="image/jpg"/>
<Property FormalName="caption"
value="Caption"/>
</ContentItem>
</NewsComponent>
</NewsItem>
</NewsML>

0 comments on commit a62beaf

Please sign in to comment.