From 1cd9a672bc527519d4f712682880dd5e06ba1075 Mon Sep 17 00:00:00 2001 From: marwoodandrew Date: Mon, 30 Sep 2024 11:42:06 +1000 Subject: [PATCH] SDAAP-123 Escaped Markup being included in headlines --- .../aap/publish/formatters/reuters_newsml_1_2_formatter.py | 4 +++- .../publish/formatters/reuters_newsml_1_2_formatter_test.py | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/server/aap/publish/formatters/reuters_newsml_1_2_formatter.py b/server/aap/publish/formatters/reuters_newsml_1_2_formatter.py index 9a08e32ff..1060bfa36 100644 --- a/server/aap/publish/formatters/reuters_newsml_1_2_formatter.py +++ b/server/aap/publish/formatters/reuters_newsml_1_2_formatter.py @@ -20,6 +20,7 @@ from flask import current_app as app from apps.prepopulate.app_initialize import get_filepath from superdesk import etree as sd_etree +from superdesk.text_utils import get_text from copy import deepcopy from superdesk.utc import utcnow from superdesk.editor_utils import remove_all_embeds @@ -277,7 +278,8 @@ def _format_news_lines(self, formatted_article, main_news_component): """ news_lines = SubElement(main_news_component, "NewsLines") if formatted_article.get('headline'): - SubElement(news_lines, 'HeadLine').text = formatted_article.get('headline') + SubElement(news_lines, 'HeadLine').text = ( + get_text(formatted_article.get('headline'), 'html')) if formatted_article.get('byline'): SubElement(news_lines, 'ByLine').text = formatted_article.get('byline') or '' if formatted_article.get('dateline', {}).get('text', ''): diff --git a/server/aap/publish/formatters/reuters_newsml_1_2_formatter_test.py b/server/aap/publish/formatters/reuters_newsml_1_2_formatter_test.py index ecc87a9da..6e351a0b6 100644 --- a/server/aap/publish/formatters/reuters_newsml_1_2_formatter_test.py +++ b/server/aap/publish/formatters/reuters_newsml_1_2_formatter_test.py @@ -213,6 +213,7 @@ def test_embeded(self): self.article['versioncreated'] = now item = self.article.copy() item.update({ + 'headline': 'headline is here', 'body_html': '

pre amble

' '' @@ -226,6 +227,8 @@ def test_embeded(self): 'format': 'HTML', "fields_meta": { "body_html": { + }, + "headline": { } } }) @@ -237,3 +240,5 @@ def test_embeded(self): etree.tostring(newsml.find('./NewsItem/NewsComponent/NewsComponent/ContentItem/DataContent/' '{http://www.w3.org/1999/xhtml}html/' '{http://www.w3.org/1999/xhtml}body')).decode('utf-8')) + self.assertEqual('headline is here', + newsml.find('./NewsItem/NewsComponent/NewsComponent/NewsLines/HeadLine').text)