diff --git a/server/aap/publish/formatters/aap_ipnews_formatter.py b/server/aap/publish/formatters/aap_ipnews_formatter.py
index 3d2a31438..d4a4cd035 100644
--- a/server/aap/publish/formatters/aap_ipnews_formatter.py
+++ b/server/aap/publish/formatters/aap_ipnews_formatter.py
@@ -79,7 +79,7 @@ def format_for_source(self, article, subscriber, source, codes=None):
odbc_item['article_text'] += ' ' + sign_off
odbc_item['service_level'] = 'a' # @service_level
- odbc_item['wordcount'] = article.get('word_count', None) # @wordcount
+ odbc_item['wordcount'] = article.get('word_count') or 0 # @wordcount
odbc_item['priority'] = map_priority(article.get('priority')) # @priority
docs.append((pub_seq_num, json.dumps(odbc_item)))
@@ -93,6 +93,7 @@ def get_wrapped_text_content(self, content):
:param content:
:return:
"""
+ content = content.replace('
', '
').replace('', '')
soup = BeautifulSoup(content, 'html.parser')
for top_level_tag in soup.find_all(recursive=False):
diff --git a/server/aap/publish/formatters/aap_ipnews_formatter_test.py b/server/aap/publish/formatters/aap_ipnews_formatter_test.py
index fc185843e..28ce62044 100644
--- a/server/aap/publish/formatters/aap_ipnews_formatter_test.py
+++ b/server/aap/publish/formatters/aap_ipnews_formatter_test.py
@@ -685,6 +685,67 @@ def test_aap_ipnews_formatter_with_body_formatted(self):
'selector_codes': 'Axx',
'genre': 'Current', 'keyword': 'slugline', 'author': 'joe'})
+ def testAdvisoryWithBreaksContent(self):
+ article = {
+ '_id': '3',
+ 'source': 'AAP',
+ 'anpa_category': [{'qcode': 'a'}],
+ 'headline': 'This is a test headline',
+ 'byline': 'joe',
+ 'slugline': 'slugline',
+ 'subject': [{'qcode': '02011001'}],
+ 'anpa_take_key': 'take_key',
+ 'unique_id': '1',
+ 'type': 'text',
+ 'body_html': '
Economy
The latest national accounts.
Farm
If you ask Treasurer' + '
Turnbull Howard
Former prime minister John Howard believes
', + 'word_count': '1', + 'priority': 1, + "linked_in_packages": [ + { + "package": "package", + "package_type": "takes" + } + ], + } + subscriber = self.app.data.find('subscribers', None, None)[0] + + f = AAPIpNewsFormatter() + seq, item = f.format(article, subscriber)[0] + item = json.loads(item) + expected = ' Economy\r\n The latest national accounts.\r\n Farm\r\n If you ask Treasurer\r\n ' \ + 'Turnbull Howard\r\n Former prime minister John Howard believes\r\n\r\nAAP' + self.assertEqual(item['article_text'], expected) + + def testNullWordCount(self): + article = { + '_id': '3', + 'source': 'AAP', + 'anpa_category': [{'qcode': 'a'}], + 'headline': 'This is a test headline', + 'byline': 'joe', + 'slugline': 'slugline', + 'subject': [{'qcode': '02011001'}], + 'anpa_take_key': 'take_key', + 'unique_id': '1', + 'type': 'text', + 'body_html': 'Test
', + 'word_count': None, + 'priority': 1, + "linked_in_packages": [ + { + "package": "package", + "package_type": "takes" + } + ], + } + subscriber = self.app.data.find('subscribers', None, None)[0] + + f = AAPIpNewsFormatter() + seq, item = f.format(article, subscriber)[0] + item = json.loads(item) + self.assertEqual(item['wordcount'], 0) + class DefaultSubjectTest(SuperdeskTestCase): diff --git a/server/aap/publish/formatters/aap_newscentre_formatter.py b/server/aap/publish/formatters/aap_newscentre_formatter.py index 8e3c60aa7..76969baf7 100644 --- a/server/aap/publish/formatters/aap_newscentre_formatter.py +++ b/server/aap/publish/formatters/aap_newscentre_formatter.py @@ -77,6 +77,7 @@ def _get_category_list(self, category_list): return get_aap_category_list(category_list) def get_text_content(self, content): + content = content.replace('