-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SDAAP-113 Create a feeding service and parser for Bang Showbiz
- Loading branch information
1 parent
1bf3885
commit 16b1812
Showing
6 changed files
with
261 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
from datetime import datetime | ||
from superdesk.utc import utc | ||
from superdesk.io.feed_parsers.newsml_1_2 import NewsMLOneFeedParser | ||
from superdesk.io.registry import register_feed_parser | ||
from superdesk.errors import ParserError | ||
from apps.archive.common import format_dateline_to_locmmmddsrc | ||
from flask import current_app as app | ||
|
||
|
||
class BangShowbizParser(NewsMLOneFeedParser): | ||
NAME = "Bang Showbiz" | ||
|
||
label = "Bang Showbiz Feed Parser" | ||
|
||
CITY = "London" | ||
COUNTRY_CODE = "GB" | ||
STATE_CODE = "GB.ENG" | ||
|
||
provider = None | ||
|
||
def datetime(self, string): | ||
return datetime.strptime(string, "%Y-%m-%d %H:%M:%S").replace(tzinfo=utc) | ||
|
||
def parse(self, xml, provider=None): | ||
self.provider = provider | ||
items = [] | ||
self.root = xml | ||
for newsItem in xml.findall("NewsItem"): | ||
item = {} | ||
try: | ||
self.parse_news_identifier(item, newsItem) | ||
self.parse_newslines(item, newsItem) | ||
self.parse_news_management(item, newsItem) | ||
|
||
item["body_html"] = ( | ||
"<p>" | ||
+ newsItem.find( | ||
"NewsComponent/ContentItem/DataContent/body/body.content" | ||
).text.replace("<BR>", "</p><p>") | ||
+ "</p>" | ||
).replace("\n", "") | ||
|
||
items.append(self.populate_fields(item)) | ||
except Exception as ex: | ||
raise ParserError.newsmlOneParserError(ex, provider) | ||
return items | ||
|
||
def parse_newslines(self, item, tree): | ||
parsed_el = self.parse_elements(tree.find("NewsComponent/NewsLines")) | ||
item["headline"] = parsed_el.get("HeadLine", "").strip() | ||
item["abstract"] = tree.find( | ||
"NewsComponent/NewsLines/NewsLine/NewsLineText" | ||
).text.strip() | ||
|
||
item.setdefault("dateline", {}) | ||
cities = app.locators.find_cities( | ||
country_code=self.COUNTRY_CODE, state_code=self.STATE_CODE | ||
) | ||
located = [c for c in cities if c["city"] == self.CITY] | ||
if len(located) > 0: | ||
item["dateline"]["located"] = located[0] | ||
item["dateline"]["text"] = format_dateline_to_locmmmddsrc( | ||
located[0], item["versioncreated"], self.provider.get("source") | ||
) | ||
return True | ||
|
||
def parse_news_identifier(self, item, tree): | ||
parsed_el = self.parse_elements(tree.find("Identification/NewsIdentifier")) | ||
item["uri"] = item["guid"] = "{}:{}".format( | ||
self.provider.get("current_url"), parsed_el["NewsItemId"] | ||
) | ||
item["versioncreated"] = self.datetime(parsed_el["DateId"]) | ||
item["firstcreated"] = self.datetime(parsed_el["DateId"]) | ||
|
||
def parse_news_management(self, item, tree): | ||
# It's always entertainment | ||
item["anpa_category"] = [{"qcode": "e"}] | ||
|
||
|
||
register_feed_parser(BangShowbizParser.NAME, BangShowbizParser()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# -*- coding: utf-8; -*- | ||
# | ||
# This file is part of Superdesk. | ||
# | ||
# Copyright 2013, 2014 Sourcefabric z.u. and contributors. | ||
# | ||
# For the full copyright and license information, please see the | ||
# AUTHORS and LICENSE files distributed with this source code, or | ||
# at https://www.sourcefabric.org/superdesk/license | ||
|
||
import lxml.etree | ||
import requests | ||
|
||
from superdesk.errors import AlreadyExistsError | ||
from superdesk.io.feeding_services.http_base_service import HTTPFeedingServiceBase | ||
from superdesk.io.iptc import subject_codes | ||
from superdesk.io.registry import register_feeding_service | ||
|
||
|
||
class BangFeedingService(HTTPFeedingServiceBase): | ||
NAME = "Bang" | ||
|
||
label = "Bang Showbiz" | ||
|
||
HTTP_AUTH = False | ||
|
||
MUSIC_ID = "music_url" | ||
MOVIES_ID = "movies_url" | ||
SHOWBIZ_ID = "showbiz_url" | ||
|
||
session = None | ||
|
||
# configuration fields for the source url's | ||
fields = [ | ||
{ | ||
"id": MUSIC_ID, | ||
"type": "text", | ||
"label": "Music URL", | ||
}, | ||
{ | ||
"id": MOVIES_ID, | ||
"type": "text", | ||
"label": "Movies URL", | ||
}, | ||
{ | ||
"id": SHOWBIZ_ID, | ||
"type": "text", | ||
"label": "Showbiz URL", | ||
}, | ||
] | ||
|
||
# Map the field/sources entries to appropriate IPTC codes | ||
subject_map = {MUSIC_ID: "01011000", MOVIES_ID: "01005001", SHOWBIZ_ID: "01021000"} | ||
|
||
@staticmethod | ||
def _config_test(provider=None): | ||
return True | ||
|
||
def _update(self, provider, update): | ||
if not self.session: | ||
self.session = requests.Session() | ||
|
||
parser = self.get_feed_parser(provider) | ||
|
||
items = [] | ||
for src in self.fields: | ||
current_url = provider.get("config").get(src.get("id")) | ||
if current_url: | ||
provider["current_url"] = current_url | ||
r = self.session.get(current_url) | ||
r.raise_for_status() | ||
xml = lxml.etree.fromstring(r.content) | ||
item = parser.parse(xml, provider=provider) | ||
for srcItem in item: | ||
srcItem["subject"] = [ | ||
{ | ||
"qcode": self.subject_map[src.get("id")], | ||
"name": subject_codes[self.subject_map[src.get("id")]], | ||
} | ||
] | ||
|
||
items.append(item) | ||
|
||
if self.session: | ||
self.session.close() | ||
|
||
return items | ||
|
||
|
||
try: | ||
register_feeding_service(BangFeedingService) | ||
except AlreadyExistsError: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import os | ||
from superdesk.tests import TestCase | ||
from superdesk.etree import etree | ||
from aap.io.feed_parsers.bang_parser import BangShowbizParser | ||
|
||
|
||
class BangShowbizParserTestCase(TestCase): | ||
filename = "ABC3058248.xml" | ||
|
||
def setUp(self): | ||
dirname = os.path.dirname(os.path.realpath(__file__)) | ||
fixture = os.path.normpath(os.path.join(dirname, "../fixtures", self.filename)) | ||
provider = {"name": "Test"} | ||
with open(fixture) as f: | ||
self.xml = f.read() | ||
self.item = BangShowbizParser().parse( | ||
etree.fromstring(self.xml.encode("UTF-8")), provider | ||
) | ||
|
||
def test_item(self): | ||
self.assertEqual(self.item[0]["headline"], "Headline text here") | ||
self.assertNotIn("byline", self.item[0]) | ||
self.assertEqual(self.item[0]["abstract"], "Summary text here") | ||
self.assertEqual(self.item[0]["body_html"], "<p>Body here</p><p>more here</p>") |
61 changes: 61 additions & 0 deletions
61
server/aap/tests/io/feed_services/bang_feeding_service_test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import os | ||
from httmock import urlmatch, HTTMock | ||
from unittests import AAPTestCase | ||
from aap.io.feeding_services.bang import BangFeedingService | ||
|
||
|
||
PROVIDER = {"_id": "test_provider", | ||
"config": { | ||
"showbiz_url": "https://url.com/111/aa", | ||
"music_url": "https://url.com/222/bb", | ||
"movies_url": "https://url.com/333/cc" | ||
}, | ||
"feed_parser": "Bang Showbiz" | ||
} | ||
|
||
VOCABULARIES = [ | ||
{"_id": "locators", | ||
"items": [] | ||
} | ||
] | ||
|
||
|
||
class bangTestCase(AAPTestCase): | ||
|
||
_calls = None | ||
filename = "ABC3058248.xml" | ||
|
||
def setUp(self): | ||
super().setUp() | ||
self.setupMock(self) | ||
self.app.data.insert('vocabularies', VOCABULARIES) | ||
self.app.data.insert('ingest_providers', [PROVIDER]) | ||
self._calls = 0 | ||
dirname = os.path.dirname(os.path.realpath(__file__)) | ||
fixture = os.path.normpath(os.path.join(dirname, "../fixtures", self.filename)) | ||
with open(fixture) as f: | ||
self.xml = f.read() | ||
|
||
def setupMock(self, context): | ||
context.mock = HTTMock(*[self.showbiz_request], *[self.music_request], *[self.movies_request]) | ||
context.mock.__enter__() | ||
|
||
@urlmatch(scheme='https', netloc='url.com', path='/111/aa') | ||
def showbiz_request(self, url, request): | ||
return {'status_code': 200, 'content': self.xml} | ||
|
||
@urlmatch(scheme='https', netloc='url.com', path='/222/bb') | ||
def music_request(self, url, request): | ||
return {'status_code': 200, 'content': self.xml} | ||
|
||
@urlmatch(scheme='https', netloc='url.com', path='/333/cc') | ||
def movies_request(self, url, request): | ||
return {'status_code': 200, 'content': self.xml} | ||
|
||
def test_request(self): | ||
with self.app.app_context(): | ||
provider = PROVIDER.copy() | ||
service = BangFeedingService() | ||
service.provider = provider | ||
items = service._update(provider, {}) | ||
self.assertEqual(len(items), 3) |