Skip to content

Commit

Permalink
Import Belga biographies into Superdesk [SDBELGA-623] (#386)
Browse files Browse the repository at this point in the history
* Import Belga biographies into Superdesk [SDBELGA-623]

* Minor change

* Added testcase
  • Loading branch information
GyanP authored and petrjasek committed Mar 22, 2022
1 parent 2200d17 commit 6a25cd0
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 7 deletions.
12 changes: 8 additions & 4 deletions server/belga/io/feed_parsers/belga_newsml_1_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,11 +625,11 @@ def parse_administrativemetadata(self, item, admin_el):
signoff_list = []
for element in admin_el.findall('Creator/Party'):
if element is not None and element.get('FormalName'):
_sign_off = author_name = element.get('FormalName', '').replace(' ', '')
_sign_off = author_name = element.get('FormalName', '').replace(' ', '').strip('()')
_topic = element.get('Topic', '')
author = {
'_id': [_topic], 'name': _topic, 'role': _topic,
'sub_label': author_name
'_id': [author_name, _topic], 'role': _topic,
'name': _topic, 'sub_label': author_name
}
# try to find an author in DB
user = get_resource_service('users').find_one(req=None, username=author_name)
Expand All @@ -647,8 +647,12 @@ def parse_administrativemetadata(self, item, admin_el):
signoff_list.append(_sign_off)
item.setdefault('authors', []).append(author)

# Check and remove duplicates authors if any
if item.get('authors'):
item['authors'] = [dict(i) for i, _ in itertools.groupby(sorted(item['authors'], key=lambda k: k['_id']))]

if signoff_list:
item["sign_off"] = "/".join(signoff_list)
item['sign_off'] = "/".join(signoff_list)

element = admin_el.find('Contributor/Party')
if element is not None and element.get('FormalName'):
Expand Down
12 changes: 9 additions & 3 deletions server/tests/io/feed_parsers/belga_newsml_1_2_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def test_can_parse(self):

def test_content(self):
item = self.item[0]

self.assertEqual(
item['administrative']['foreign_id'],
'BIN118'
Expand All @@ -67,7 +66,14 @@ def test_content(self):
'role': 'AUTHOR',
'sub_label': 'DWM',
},
])
{
'_id': ['TIP', 'EDITOR'],
'name': 'EDITOR',
'role': 'EDITOR',
'sub_label': 'TIP',
}
]
)
self.assertEqual(
item['body_html'],
'<p>Steven &lt;b&gt;Van Geel&lt;/b&gt; gaf zich op 31 mei 2014 aan bij de politie van zijn '
Expand Down Expand Up @@ -133,7 +139,7 @@ def test_content(self):
self.assertEqual(item['subject'], expected_subjects)

self.assertEqual(item['type'], 'text')
self.assertEqual(item['sign_off'], 'DWM')
self.assertEqual(item['sign_off'], 'DWM/TIP')
self.assertEqual(item['version'], 4)
self.assertEqual(item['versioncreated'], datetime.datetime(2019, 1, 29, 12, 34, tzinfo=pytz.utc))
self.assertEqual(item["genre"], [{'name': 'CURRENT', 'qcode': 'CURRENT'}])
Expand Down
1 change: 1 addition & 0 deletions server/tests/io/fixtures/belga_newsml_1_2.xml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
</Provider>
<Creator>
<Party FormalName="DWM" Topic="AUTHOR"/>
<Party FormalName="(TIP)" Topic="EDITOR"/>
</Creator>
<Source>
<Party FormalName="BELGA"/>
Expand Down

0 comments on commit 6a25cd0

Please sign in to comment.