From f7bbad9b95b6533dd8e4dd0970fe436ab03e2e28 Mon Sep 17 00:00:00 2001 From: marwoodandrew Date: Tue, 28 Jun 2016 17:13:31 +1000 Subject: [PATCH] [SD-4788] add signoff, more etc. to ipnews, newscentre and anpa outputs --- .../formatters/aap_ipnews_formatter.py | 28 +++++++-- .../formatters/aap_ipnews_formatter_test.py | 59 +++++++++++++++++-- .../formatters/aap_newscentre_formatter.py | 23 +++++++- .../aap_newscentre_formatter_test.py | 15 ++--- .../publish/formatters/aap_odbc_formatter.py | 21 ++++++- .../aap/publish/formatters/anpa_formatter.py | 9 ++- 6 files changed, 132 insertions(+), 23 deletions(-) diff --git a/server/aap/publish/formatters/aap_ipnews_formatter.py b/server/aap/publish/formatters/aap_ipnews_formatter.py index 721e9a0b0..e9763b513 100644 --- a/server/aap/publish/formatters/aap_ipnews_formatter.py +++ b/server/aap/publish/formatters/aap_ipnews_formatter.py @@ -17,7 +17,6 @@ from superdesk.publish.formatters import Formatter from superdesk.errors import FormatterError from superdesk.metadata.item import ITEM_TYPE, CONTENT_TYPE, FORMAT, FORMATS - import json @@ -32,8 +31,10 @@ def format(self, article, subscriber, codes=None): docs = [] for category in article.get('anpa_category'): pub_seq_num, odbc_item = self.get_odbc_item(article, subscriber, category, codes) - - soup = BeautifulSoup(self.append_body_footer(article), "html.parser") + # determine if this is the last take + is_last_take = self.is_last_take(article) + soup = BeautifulSoup(self.append_body_footer(article) if is_last_take else article.get('body_html', ''), + "html.parser") if article.get(FORMAT) == FORMATS.PRESERVED: # @article_text odbc_item['article_text'] = soup.get_text().replace('\'', '\'\'') odbc_item['texttab'] = 't' @@ -47,10 +48,27 @@ def format(self, article, subscriber, codes=None): text.write(textwrap.fill(l, 80).replace('\n', ' \r\n')) else: text.write(l + ' \r\n') - odbc_item['article_text'] = text.getvalue().replace('\'', '\'\'') + body = text.getvalue().replace('\'', '\'\'') + # if this is the first take and we have a dateline inject it + if self.is_first_part(article) and 'dateline' in article and 'text' in article.get('dateline', {}): + if body.startswith('\x19\r\n'): + body = '\x19\r\n{} {}'.format(article.get('dateline').get('text').replace('\'', '\'\''), + body[3:]) + + odbc_item['article_text'] = body odbc_item['texttab'] = 'x' - self.add_embargo(odbc_item, article) + if self.is_first_part(article): + self.add_ednote(odbc_item, article) + self.add_embargo(odbc_item, article) + + if not is_last_take: + odbc_item['article_text'] += '\r\nMORE' + else: + odbc_item['article_text'] += '\r\n' + article.get('source', '') + sign_off = article.get('sign_off', '') + if len(sign_off) > 0: + odbc_item['article_text'] += ' ' + sign_off odbc_item['service_level'] = 'a' # @service_level odbc_item['wordcount'] = article.get('word_count', None) # @wordcount diff --git a/server/aap/publish/formatters/aap_ipnews_formatter_test.py b/server/aap/publish/formatters/aap_ipnews_formatter_test.py index e8fbac0d5..94dc51bc9 100644 --- a/server/aap/publish/formatters/aap_ipnews_formatter_test.py +++ b/server/aap/publish/formatters/aap_ipnews_formatter_test.py @@ -47,6 +47,11 @@ class AapIpNewsFormatterTest(SuperdeskTestCase): 'genre': [] } + pkg = [{'_id': 'package', + 'type': 'composite', + 'package_type': 'takes', + 'last_take': '3'}] + vocab = [{'_id': 'categories', 'items': [ {'is_active': True, 'name': 'Overseas Sport', 'qcode': 'S', 'subject': '15000000'}, {'is_active': True, 'name': 'Finance', 'qcode': 'F', 'subject': '04000000'} @@ -58,6 +63,7 @@ def setUp(self): self.app.data.insert('subscribers', self.subscribers) self.app.data.insert('vocabularies', self.vocab) self.app.data.insert('desks', self.desks) + self.app.data.insert('archive', self.pkg) init_app(self.app) def testIPNewsFormatterWithNoSelector(self): @@ -73,7 +79,8 @@ def testIPNewsFormatterWithNoSelector(self): self.assertDictEqual(item, {'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0', 'headline': 'VIC:This is a test headline', 'service_level': 'a', 'originator': 'AAP', - 'take_key': 'take_key', 'article_text': 'The story body', 'priority': 'f', 'usn': '1', + 'take_key': 'take_key', 'article_text': 'The story body\r\nAAP', 'priority': 'f', + 'usn': '1', 'subject_matter': 'international law', 'news_item_type': 'News', 'subject_reference': '02011001', 'subject': 'crime, law and justice', 'wordcount': '1', 'subject_detail': 'international court or tribunal', @@ -82,6 +89,41 @@ def testIPNewsFormatterWithNoSelector(self): def testIPNewsHtmlToText(self): article = { + '_id': '1', + 'source': 'AAP', + 'anpa_category': [{'qcode': 'a'}], + 'headline': 'This is a test headline', + 'byline': 'joe', + 'slugline': 'slugline', + 'subject': [{'qcode': '02011001'}], + 'anpa_take_key': 'take_key', + 'unique_id': '1', + 'type': 'text', + 'body_html': '

The story body line 1
Line 2

\ +

abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi

', + 'word_count': '1', + 'priority': 1, + "linked_in_packages": [ + { + "package": "package", + "package_type": "takes" + } + ], + } + + subscriber = self.app.data.find('subscribers', None, None)[0] + + f = AAPIpNewsFormatter() + seq, item = f.format(article, subscriber)[0] + item = json.loads(item) + + expected = '\r\nThe story body line 1 \r\nLine 2 \r\n\r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \ + 'abcdefghi abcdefghi abcdefghi abcdefghi \r\n\r\nMORE' + self.assertEqual(item['article_text'], expected) + + def testLastTake(self): + article = { + '_id': '3', 'source': 'AAP', 'anpa_category': [{'qcode': 'a'}], 'headline': 'This is a test headline', @@ -92,9 +134,15 @@ def testIPNewsHtmlToText(self): 'unique_id': '1', 'type': 'text', 'body_html': '

The story body line 1
Line 2

\ -

abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi more

', +

abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi

', 'word_count': '1', - 'priority': 1 + 'priority': 1, + "linked_in_packages": [ + { + "package": "package", + "package_type": "takes" + } + ], } subscriber = self.app.data.find('subscribers', None, None)[0] @@ -104,7 +152,7 @@ def testIPNewsHtmlToText(self): item = json.loads(item) expected = '\r\nThe story body line 1 \r\nLine 2 \r\n\r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \ - 'abcdefghi abcdefghi abcdefghi abcdefghi \r\nmore' + 'abcdefghi abcdefghi abcdefghi abcdefghi \r\n\r\nAAP' self.assertEqual(item['article_text'], expected) def testMultipleCategories(self): @@ -240,7 +288,8 @@ def test_aap_ipnews_formatter_with_body_footer(self): {'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0', 'headline': 'VIC:This is a test headline', 'service_level': 'a', 'originator': 'AAP', 'take_key': 'take_key', - 'article_text': 'The story body\r\ncall helpline 999 if you are planning to quit smoking', + 'article_text': 'The story body\r\ncall helpline 999 if you are planning to ' + 'quit smoking\r\nAAP', 'priority': 'f', 'usn': '1', 'subject_matter': 'international law', 'news_item_type': 'News', 'subject_reference': '02011001', 'subject': 'crime, law and justice', diff --git a/server/aap/publish/formatters/aap_newscentre_formatter.py b/server/aap/publish/formatters/aap_newscentre_formatter.py index bbf97083d..c5221019f 100644 --- a/server/aap/publish/formatters/aap_newscentre_formatter.py +++ b/server/aap/publish/formatters/aap_newscentre_formatter.py @@ -28,8 +28,10 @@ def format(self, article, subscriber, codes=None): docs = [] for category in article.get('anpa_category'): pub_seq_num, odbc_item = self.get_odbc_item(article, subscriber, category, codes) + is_last_take = self.is_last_take(article) + soup = BeautifulSoup(self.append_body_footer(article) if is_last_take else article.get('body_html', ''), + "html.parser") - soup = BeautifulSoup(self.append_body_footer(article), "html.parser") if article.get(FORMAT) == FORMATS.PRESERVED: # @article_text odbc_item['article_text'] = soup.get_text().replace('\'', '\'\'') else: @@ -39,9 +41,24 @@ def format(self, article, subscriber, codes=None): ptext = p.get_text('\n') for l in ptext.split('\n'): text.write(l + ' \r\n') - odbc_item['article_text'] = text.getvalue().replace('\'', '\'\'') + body = text.getvalue().replace('\'', '\'\'') + if self.is_first_part(article) and 'dateline' in article and 'text' in article.get('dateline', {}): + if body.startswith(' \r\n'): + body = ' \r\n{} {}'.format(article.get('dateline').get('text').replace('\'', '\'\''), + body[5:]) + odbc_item['article_text'] = body - self.add_embargo(odbc_item, article) + if self.is_first_part(article): + self.add_ednote(odbc_item, article) + self.add_embargo(odbc_item, article) + + if not is_last_take: + odbc_item['article_text'] += '\r\nMORE' + else: + odbc_item['article_text'] += '\r\n' + article.get('source', '') + sign_off = article.get('sign_off', '') + if len(sign_off) > 0: + odbc_item['article_text'] += ' ' + sign_off odbc_item['category'] = odbc_item.get('category', '').upper() odbc_item['selector_codes'] = odbc_item.get('selector_codes', '').upper() diff --git a/server/aap/publish/formatters/aap_newscentre_formatter_test.py b/server/aap/publish/formatters/aap_newscentre_formatter_test.py index a407cf81e..5ecdab95e 100644 --- a/server/aap/publish/formatters/aap_newscentre_formatter_test.py +++ b/server/aap/publish/formatters/aap_newscentre_formatter_test.py @@ -39,7 +39,7 @@ class AapNewscentreFormatterTest(SuperdeskTestCase): 'unique_id': '1', 'format': 'preserved', 'type': 'text', - 'body_html': 'The story body', + 'body_html': '

The story body

', 'word_count': '1', 'priority': 1, 'place': [{'qcode': 'VIC', 'name': 'VIC'}], @@ -72,7 +72,7 @@ def testNewscentreFormatterWithNoSelector(self): self.assertDictEqual(item, {'category': 'A', 'fullStory': 1, 'ident': '0', 'headline': 'VIC:This is a test headline', 'originator': 'AAP', - 'take_key': 'take_key', 'article_text': 'The story body', 'usn': '1', + 'take_key': 'take_key', 'article_text': 'The story body\r\nAAP', 'usn': '1', 'subject_matter': 'international law', 'news_item_type': 'News', 'subject_reference': '02011001', 'subject': 'crime, law and justice', 'subject_detail': 'international court or tribunal', @@ -103,7 +103,7 @@ def testNewscentreHtmlToText(self): item = json.loads(item) expected = ' \r\nThe story body line 1 \r\nLine 2 \r\n \r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \ - 'abcdefghi abcdefghi abcdefghi abcdefghi more \r\n' + 'abcdefghi abcdefghi abcdefghi abcdefghi more \r\n\r\nAAP' self.assertEqual(item['article_text'], expected) def testMultipleCategories(self): @@ -118,7 +118,7 @@ def testMultipleCategories(self): 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', - 'body_html': 'body', + 'body_html': '

body

', 'word_count': '1', 'priority': 1, 'task': {'desk': 1}, @@ -155,7 +155,7 @@ def testNewscentreFormatterNoSubject(self): 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', - 'body_html': 'body', + 'body_html': '

body

', 'word_count': '1', 'priority': 1, 'task': {'desk': 1}, @@ -180,7 +180,7 @@ def testNewscentreFormatterNoSubject(self): 'anpa_take_key': 'take_key', 'unique_id': '1', 'type': 'text', - 'body_html': 'body', + 'body_html': '

body

', 'word_count': '1', 'priority': 1, 'task': {'desk': 1}, @@ -210,7 +210,8 @@ def test_aap_newscentre_formatter_with_body_footer(self): {'category': 'A', 'fullStory': 1, 'ident': '0', 'headline': 'VIC:This is a test headline', 'originator': 'AAP', 'take_key': 'take_key', - 'article_text': 'The story body\r\ncall helpline 999 if you are planning to quit smoking', + 'article_text': 'The story body\r\ncall helpline 999 if you are planning ' + 'to quit smoking\r\nAAP', 'usn': '1', 'subject_matter': 'international law', 'news_item_type': 'News', 'subject_reference': '02011001', 'subject': 'crime, law and justice', diff --git a/server/aap/publish/formatters/aap_odbc_formatter.py b/server/aap/publish/formatters/aap_odbc_formatter.py index 5b0d610c3..fd6648ef1 100644 --- a/server/aap/publish/formatters/aap_odbc_formatter.py +++ b/server/aap/publish/formatters/aap_odbc_formatter.py @@ -14,6 +14,7 @@ from apps.archive.common import get_utc_schedule from .field_mappers.locator_mapper import LocatorMapper from superdesk.metadata.item import EMBARGO +from eve.utils import config import superdesk @@ -62,9 +63,20 @@ def add_embargo(self, odbc_item, article): :return: """ if article.get(EMBARGO): - embargo = '{}{}'.format('Embargo Content. Timestamp: ', get_utc_schedule(article, EMBARGO).isoformat()) + embargo = '{}{}\r\n'.format('Embargo Content. Timestamp: ', get_utc_schedule(article, EMBARGO).isoformat()) odbc_item['article_text'] = embargo + odbc_item['article_text'] + def add_ednote(self, odbc_item, article): + """ + Add the editorial note if required + :param odbc_item: + :param article: + :return: + """ + if article.get('ednote'): + ednote = 'EDS:{}\r\n'.format(article.get('ednote').replace('\'', '\'\'')) + odbc_item['article_text'] = ednote + odbc_item['article_text'] + def expand_subject_codes(self, odbc_item): """ Expands the subject reference to the subject matter and subject detail @@ -98,3 +110,10 @@ def set_usn(self, odbc_item, article): odbc_item['usn'] = pkg.get('unique_id', None) # @usn else: odbc_item['usn'] = article.get('unique_id', None) # @usn + + def is_last_take(self, article): + article[config.ID_FIELD] = article.get('item_id', article.get(config.ID_FIELD)) + return TakesPackageService().is_last_takes_package_item(article) + + def is_first_part(self, article): + article.get('sequence', 1) == 1 diff --git a/server/aap/publish/formatters/anpa_formatter.py b/server/aap/publish/formatters/anpa_formatter.py index 14170a6a3..4c7426094 100644 --- a/server/aap/publish/formatters/anpa_formatter.py +++ b/server/aap/publish/formatters/anpa_formatter.py @@ -18,6 +18,8 @@ from superdesk.metadata.item import ITEM_TYPE, CONTENT_TYPE, BYLINE, EMBARGO, FORMAT, FORMATS from .field_mappers.locator_mapper import LocatorMapper from io import StringIO +from apps.packages import TakesPackageService +from eve.utils import config class AAPAnpaFormatter(Formatter): @@ -25,6 +27,9 @@ def format(self, article, subscriber, codes=None): try: docs = [] for category in article.get('anpa_category'): + article[config.ID_FIELD] = article.get('item_id', article.get(config.ID_FIELD)) + is_last_take = TakesPackageService().is_last_takes_package_item(article) + is_first_part = article.get('sequence', 1) == 1 pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber) anpa = [] @@ -111,7 +116,7 @@ def format(self, article, subscriber, codes=None): soup = BeautifulSoup(self.append_body_footer(article), "html.parser") anpa.append(soup.get_text().encode('ascii', 'replace')) else: - if article.get('dateline', {}).get('text'): + if is_first_part and article.get('dateline', {}).get('text'): soup = BeautifulSoup(article.get('body_html', ''), "html.parser") ptag = soup.find('p') if ptag is not None: @@ -123,7 +128,7 @@ def format(self, article, subscriber, codes=None): anpa.append(self.to_ascii(article.get('body_footer', ''))) anpa.append(b'\x0D\x0A') - if article.get('more_coming', False): + if not is_last_take: anpa.append('MORE'.encode('ascii')) else: anpa.append(article.get('source', '').encode('ascii'))