Skip to content

Commit

Permalink
Fix the mapping of the City field in the Kyodo parser [SDBELGA-693] (#…
Browse files Browse the repository at this point in the history
…413)

* Fix the mapping of the City field in the Kyodo parser [SDBELGA-693]

* Update testcases

* bump eve-elastic vesrion

* refactored the code

* Undo changes

* reformat code using black
  • Loading branch information
devketanpro authored and petrjasek committed Dec 1, 2022
1 parent 886714b commit e454fa4
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 31 deletions.
22 changes: 18 additions & 4 deletions server/belga/io/feed_parsers/belga_kyodo_newsml_1_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,25 @@
class BelgaKyodoNewsMLOneFeedParser(BaseBelgaNewsMLOneFeedParser):
"""Feed Parser for Belga specific Kyodo NewsML."""

NAME = 'belga_kyodo_newsml12'
label = 'Belga specific Kyodo News ML 1.2 Parser'
NAME = "belga_kyodo_newsml12"
label = "Belga specific Kyodo News ML 1.2 Parser"

def can_parse(self, xml):
return xml.tag == 'NewsML'
return xml.tag == "NewsML"

# SDBELGA - 693
def parse(self, xml, provider=None):
items = super().parse(xml, provider)
location_el = xml.find(
"NewsItem/NewsComponent/ContentItem/DataContent/nitf/body/body.head/dateline/location"
)
if location_el is not None:
for item in items:
item.setdefault("extra", {})["city"] = location_el.text

register_feed_parser(BelgaKyodoNewsMLOneFeedParser.NAME, BelgaKyodoNewsMLOneFeedParser())
return items


register_feed_parser(
BelgaKyodoNewsMLOneFeedParser.NAME, BelgaKyodoNewsMLOneFeedParser()
)
78 changes: 51 additions & 27 deletions server/tests/io/feed_parsers/belga_kyodo_newsml_1_2_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,66 @@


class BelgaKyodoNewsMLTestCase(TestCase):
filename = 'kyodo_newsml_1_2_belga.xml'
filename = "kyodo_newsml_1_2_belga.xml"

def setUp(self):
super().setUp()
dirname = os.path.dirname(os.path.realpath(__file__))
fixture = os.path.normpath(os.path.join(dirname, '../fixtures', self.filename))
provider = {'name': 'test'}
with open(fixture, 'rb') as f:
fixture = os.path.normpath(os.path.join(dirname, "../fixtures", self.filename))
provider = {"name": "test"}
with open(fixture, "rb") as f:
parser = BelgaKyodoNewsMLOneFeedParser()
self.xml_root = etree.parse(f).getroot()
self.item = parser.parse(self.xml_root, provider)

def test_content(self):
item = self.item[0]

self.assertEqual(item['headline'], 'Ex-Chinese Premier Li Peng dies aged 90: Xinhua')
self.assertEqual(
item["headline"], "Ex-Chinese Premier Li Peng dies aged 90: Xinhua"
)
self.assertEqual(item["slugline"], None)
self.assertEqual(item["extra"]["city"], "BEIJING")
self.assertEqual(item["keywords"], [])
self.assertEqual(item['date_id'], '20190723')
self.assertEqual(item['format'], 'Nitf_v3.0')
self.assertEqual(item['guid'], 'urn:newsml:kyodonews.jp:20190723:20161021KW___0003800010:1')
self.assertEqual(item['item_id'], '20161021KW___0003800010')
self.assertEqual(item['priority'], 9)
self.assertEqual(item['provider_id'], 'kyodonews.jp')
self.assertEqual(item['type'], 'text')
self.assertEqual(item['firstcreated'].isoformat(), '2019-07-23T20:21:19+09:00')
self.assertEqual(item['versioncreated'].isoformat(), '2019-07-23T20:21:19+09:00')
self.assertEqual(item['subject'], [
{'name': 'NEWS/GENERAL', 'parent': 'NEWS', 'qcode': 'NEWS/GENERAL', 'scheme': 'services-products'},
{'name': 'France', 'qcode': 'country_fra', 'scheme': 'country',
'translations': {'name': {'fr': 'FRANCE', 'nl': 'FRANKRIJK'}}},
{'name': 'default', 'qcode': 'default', 'scheme': 'distribution'},
{'name': 'France', 'qcode': 'fra', 'translations': {
'name': {'nl': 'Frankrijk', 'fr': 'France'}}, 'scheme': 'countries'},
{'name': 'no', 'qcode': 'no', 'scheme': 'essential'},
{'name': 'no', 'qcode': 'no', 'scheme': 'equivalents_list'},
])
self.assertEqual(item["date_id"], "20190723")
self.assertEqual(item["format"], "Nitf_v3.0")
self.assertEqual(
item["guid"], "urn:newsml:kyodonews.jp:20190723:20161021KW___0003800010:1"
)
self.assertEqual(item["item_id"], "20161021KW___0003800010")
self.assertEqual(item["priority"], 9)
self.assertEqual(item["provider_id"], "kyodonews.jp")
self.assertEqual(item["type"], "text")
self.assertEqual(item["firstcreated"].isoformat(), "2019-07-23T20:21:19+09:00")
self.assertEqual(
item["versioncreated"].isoformat(), "2019-07-23T20:21:19+09:00"
)
self.assertEqual(
item["subject"],
[
{
"name": "NEWS/GENERAL",
"parent": "NEWS",
"qcode": "NEWS/GENERAL",
"scheme": "services-products",
},
{
"name": "France",
"qcode": "country_fra",
"scheme": "country",
"translations": {"name": {"fr": "FRANCE", "nl": "FRANKRIJK"}},
},
{"name": "default", "qcode": "default", "scheme": "distribution"},
{
"name": "France",
"qcode": "fra",
"translations": {"name": {"nl": "Frankrijk", "fr": "France"}},
"scheme": "countries",
},
{"name": "no", "qcode": "no", "scheme": "essential"},
{"name": "no", "qcode": "no", "scheme": "equivalents_list"},
],
)
body_html = (
"<p> Former Chinese Premier Li Peng, who led a military crackdown on the pro-democracy movement at"
" Beijing's Tiananmen Square in 1989, died Monday of illness in the capital, the official Xinhua"
Expand All @@ -60,6 +83,7 @@ def test_content(self):
"<p> Li traveled to Japan in 1989 and 1997 while he was premier. He also visited the country in"
" 2002, which marked the 30th anniversary of normalized relations between Beijing and Tokyo, and held"
" talks with then Japanese Prime Minister Junichiro Koizumi.</p>"
"<p>==Kyodo</p>")
item['body_html'] = item['body_html'].replace('\n', '').replace('\t', '')
self.assertEqual(item['body_html'], body_html)
"<p>==Kyodo</p>"
)
item["body_html"] = item["body_html"].replace("\n", "").replace("\t", "")
self.assertEqual(item["body_html"], body_html)

0 comments on commit e454fa4

Please sign in to comment.