Skip to content

Commit

Permalink
SDAAP-122 Handle fancy quotes in ANPA
Browse files Browse the repository at this point in the history
  • Loading branch information
marwoodandrew committed Sep 30, 2024
1 parent 416f873 commit 66f45d9
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
12 changes: 7 additions & 5 deletions server/aap/publish/formatters/anpa_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .field_mappers.locator_mapper import LocatorMapper
from .field_mappers.slugline_mapper import SluglineMapper
from eve.utils import config
from .unicodetoascii import to_ascii
from .unicodetoascii import to_ascii, clean_string
from .category_list_map import get_aap_category_list
import re
from superdesk.etree import parse_html, to_string, etree
Expand Down Expand Up @@ -104,7 +104,7 @@ def format(self, article, subscriber, codes=None):
anpa.append(b'\x0D\x0A')

if formatted_article.get('ednote', '') != '':
ednote = '{}\r\n'.format(to_ascii(formatted_article.get('ednote')))
ednote = '{}\r\n'.format(to_ascii(clean_string(formatted_article.get('ednote'))))
anpa.append(ednote.encode('ascii', 'replace'))

if formatted_article.get(BYLINE):
Expand All @@ -115,7 +115,7 @@ def format(self, article, subscriber, codes=None):
anpa.append(get_text(self.append_body_footer(formatted_article),
content='html').encode('ascii', 'replace'))
else:
body = to_ascii(formatted_article.get('body_html', ''))
body = to_ascii(clean_string(formatted_article.get('body_html', '')))
# we need to inject the dateline
if formatted_article.get('dateline', {}).get('text') and not article.get('auto_publish', False):
body_html_elem = parse_html(formatted_article.get('body_html'))
Expand All @@ -125,7 +125,8 @@ def format(self, article, subscriber, codes=None):
body = to_string(body_html_elem)
anpa.append(self.get_text_content(body))
if formatted_article.get('body_footer'):
anpa.append(self.get_text_content(to_ascii(formatted_article.get('body_footer', ''))))
anpa.append(
self.get_text_content(to_ascii(clean_string(formatted_article.get('body_footer', '')))))

anpa.append(b'\x0D\x0A')
anpa.append(mapped_source.encode('ascii'))
Expand Down Expand Up @@ -171,7 +172,8 @@ def get_text_content(self, content):
def _process_headline(self, anpa, article, category):
# prepend the locator to the headline if required
article['headline'] = get_text(article.get('headline', ''))
headline = to_ascii(LocatorMapper().get_formatted_headline(article, category.decode('UTF-8').upper()))
headline = to_ascii(
clean_string(LocatorMapper().get_formatted_headline(article, category.decode('UTF-8').upper())))

# Set the maximum size to 64 including the sequence number if any
if len(headline) > 64:
Expand Down
18 changes: 18 additions & 0 deletions server/aap/publish/formatters/anpa_formatter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,24 @@ def test_preformated(self):
self.assertTrue(lines.getvalue().split('\r')[3].lstrip(), 'Test line 1')
self.assertTrue(lines.getvalue().split('\r')[4], 'Test line 2')

def test_fancy_quotes(self):
f = AAPAnpaFormatter()
subscriber = self.app.data.find('subscribers', None, None)[0][0]
item = self.article.copy()
item.update({
'body_html': "<p>\"quoted”</p>"
"<p>“In“ IBAC’s</p>"
"<p>Short hyphen­not handled fix one day!</p>"
"<p>“Then ‘You can’t have it’,\" </p>",
'format': 'html'})
resp = f.format(item, subscriber)[0]
out = resp['encoded_item']

lines = io.StringIO(out.decode())
self.assertTrue(lines.getvalue().split('\r')[3].lstrip(), '"quoted"')
self.assertTrue(lines.getvalue().split('\r')[4].lstrip(), '"In" IBAC\'s')
self.assertTrue(lines.getvalue().split('\r')[6].lstrip(), '"Then \'You can\'t have it\'," ')

def test_embed_in_body_body(self):
f = AAPAnpaFormatter()
subscriber = self.app.data.find('subscribers', None, None)[0][0]
Expand Down

0 comments on commit 66f45d9

Please sign in to comment.