Merge pull request #25 from marwoodandrew/sd-4788

[SD-4788] add signoff, more etc. to ipnews, newscentre and anpa outputs
superdesk · Jun 29, 2016 · 94efa1f · 94efa1f
2 parents ba26c69 + f7bbad9
commit 94efa1f
Show file tree

Hide file tree

Showing 6 changed files with 132 additions and 23 deletions.
diff --git a/server/aap/publish/formatters/aap_ipnews_formatter.py b/server/aap/publish/formatters/aap_ipnews_formatter.py
@@ -17,7 +17,6 @@
 from superdesk.publish.formatters import Formatter
 from superdesk.errors import FormatterError
 from superdesk.metadata.item import ITEM_TYPE, CONTENT_TYPE, FORMAT, FORMATS
-
 import json
 
 
@@ -32,8 +31,10 @@ def format(self, article, subscriber, codes=None):
             docs = []
             for category in article.get('anpa_category'):
                 pub_seq_num, odbc_item = self.get_odbc_item(article, subscriber, category, codes)
-
-                soup = BeautifulSoup(self.append_body_footer(article), "html.parser")
+                # determine if this is the last take
+                is_last_take = self.is_last_take(article)
+                soup = BeautifulSoup(self.append_body_footer(article) if is_last_take else article.get('body_html', ''),
+                                     "html.parser")
                 if article.get(FORMAT) == FORMATS.PRESERVED:  # @article_text
                     odbc_item['article_text'] = soup.get_text().replace('\'', '\'\'')
                     odbc_item['texttab'] = 't'
@@ -47,10 +48,27 @@ def format(self, article, subscriber, codes=None):
                                 text.write(textwrap.fill(l, 80).replace('\n', ' \r\n'))
                             else:
                                 text.write(l + ' \r\n')
-                    odbc_item['article_text'] = text.getvalue().replace('\'', '\'\'')
+                    body = text.getvalue().replace('\'', '\'\'')
+                    # if this is the first take and we have a dateline inject it
+                    if self.is_first_part(article) and 'dateline' in article and 'text' in article.get('dateline', {}):
+                        if body.startswith('\x19\r\n'):
+                            body = '\x19\r\n{} {}'.format(article.get('dateline').get('text').replace('\'', '\'\''),
+                                                          body[3:])
+
+                    odbc_item['article_text'] = body
                     odbc_item['texttab'] = 'x'
 
-                self.add_embargo(odbc_item, article)
+                if self.is_first_part(article):
+                    self.add_ednote(odbc_item, article)
+                    self.add_embargo(odbc_item, article)
+
+                if not is_last_take:
+                    odbc_item['article_text'] += '\r\nMORE'
+                else:
+                    odbc_item['article_text'] += '\r\n' + article.get('source', '')
+                sign_off = article.get('sign_off', '')
+                if len(sign_off) > 0:
+                    odbc_item['article_text'] += ' ' + sign_off
 
                 odbc_item['service_level'] = 'a'  # @service_level
                 odbc_item['wordcount'] = article.get('word_count', None)  # @wordcount

diff --git a/server/aap/publish/formatters/aap_ipnews_formatter_test.py b/server/aap/publish/formatters/aap_ipnews_formatter_test.py
@@ -47,6 +47,11 @@ class AapIpNewsFormatterTest(SuperdeskTestCase):
         'genre': []
     }
 
+    pkg = [{'_id': 'package',
+            'type': 'composite',
+            'package_type': 'takes',
+            'last_take': '3'}]
+
     vocab = [{'_id': 'categories', 'items': [
         {'is_active': True, 'name': 'Overseas Sport', 'qcode': 'S', 'subject': '15000000'},
         {'is_active': True, 'name': 'Finance', 'qcode': 'F', 'subject': '04000000'}
@@ -58,6 +63,7 @@ def setUp(self):
         self.app.data.insert('subscribers', self.subscribers)
         self.app.data.insert('vocabularies', self.vocab)
         self.app.data.insert('desks', self.desks)
+        self.app.data.insert('archive', self.pkg)
         init_app(self.app)
 
     def testIPNewsFormatterWithNoSelector(self):
@@ -73,7 +79,8 @@ def testIPNewsFormatterWithNoSelector(self):
         self.assertDictEqual(item,
                              {'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0',
                               'headline': 'VIC:This is a test headline', 'service_level': 'a', 'originator': 'AAP',
-                              'take_key': 'take_key', 'article_text': 'The story body', 'priority': 'f', 'usn': '1',
+                              'take_key': 'take_key', 'article_text': 'The story body\r\nAAP', 'priority': 'f',
+                              'usn': '1',
                               'subject_matter': 'international law', 'news_item_type': 'News',
                               'subject_reference': '02011001', 'subject': 'crime, law and justice',
                               'wordcount': '1', 'subject_detail': 'international court or tribunal',
@@ -82,6 +89,41 @@ def testIPNewsFormatterWithNoSelector(self):
 
     def testIPNewsHtmlToText(self):
         article = {
+            '_id': '1',
+            'source': 'AAP',
+            'anpa_category': [{'qcode': 'a'}],
+            'headline': 'This is a test headline',
+            'byline': 'joe',
+            'slugline': 'slugline',
+            'subject': [{'qcode': '02011001'}],
+            'anpa_take_key': 'take_key',
+            'unique_id': '1',
+            'type': 'text',
+            'body_html': '<p>The story body line 1<br>Line 2</p>\
+                         <p>abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi</p>',
+            'word_count': '1',
+            'priority': 1,
+            "linked_in_packages": [
+                {
+                    "package": "package",
+                    "package_type": "takes"
+                }
+            ],
+        }
+
+        subscriber = self.app.data.find('subscribers', None, None)[0]
+
+        f = AAPIpNewsFormatter()
+        seq, item = f.format(article, subscriber)[0]
+        item = json.loads(item)
+
+        expected = '\r\nThe story body line 1 \r\nLine 2 \r\n\r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \
+                   'abcdefghi abcdefghi abcdefghi abcdefghi \r\n\r\nMORE'
+        self.assertEqual(item['article_text'], expected)
+
+    def testLastTake(self):
+        article = {
+            '_id': '3',
             'source': 'AAP',
             'anpa_category': [{'qcode': 'a'}],
             'headline': 'This is a test headline',
@@ -92,9 +134,15 @@ def testIPNewsHtmlToText(self):
             'unique_id': '1',
             'type': 'text',
             'body_html': '<p>The story body line 1<br>Line 2</p>\
-                         <p>abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi more</p>',
+                         <p>abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi</p>',
             'word_count': '1',
-            'priority': 1
+            'priority': 1,
+            "linked_in_packages": [
+                {
+                    "package": "package",
+                    "package_type": "takes"
+                }
+            ],
         }
 
         subscriber = self.app.data.find('subscribers', None, None)[0]
@@ -104,7 +152,7 @@ def testIPNewsHtmlToText(self):
         item = json.loads(item)
 
         expected = '\r\nThe story body line 1 \r\nLine 2 \r\n\r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \
-                   'abcdefghi abcdefghi abcdefghi abcdefghi \r\nmore'
+                   'abcdefghi abcdefghi abcdefghi abcdefghi \r\n\r\nAAP'
         self.assertEqual(item['article_text'], expected)
 
     def testMultipleCategories(self):
@@ -240,7 +288,8 @@ def test_aap_ipnews_formatter_with_body_footer(self):
                              {'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0',
                               'headline': 'VIC:This is a test headline', 'service_level': 'a', 'originator': 'AAP',
                               'take_key': 'take_key',
-                              'article_text': 'The story body\r\ncall helpline 999 if you are planning to quit smoking',
+                              'article_text': 'The story body\r\ncall helpline 999 if you are planning to '
+                              'quit smoking\r\nAAP',
                               'priority': 'f', 'usn': '1',
                               'subject_matter': 'international law', 'news_item_type': 'News',
                               'subject_reference': '02011001', 'subject': 'crime, law and justice',

diff --git a/server/aap/publish/formatters/aap_newscentre_formatter.py b/server/aap/publish/formatters/aap_newscentre_formatter.py
@@ -28,8 +28,10 @@ def format(self, article, subscriber, codes=None):
             docs = []
             for category in article.get('anpa_category'):
                 pub_seq_num, odbc_item = self.get_odbc_item(article, subscriber, category, codes)
+                is_last_take = self.is_last_take(article)
+                soup = BeautifulSoup(self.append_body_footer(article) if is_last_take else article.get('body_html', ''),
+                                     "html.parser")
 
-                soup = BeautifulSoup(self.append_body_footer(article), "html.parser")
                 if article.get(FORMAT) == FORMATS.PRESERVED:  # @article_text
                     odbc_item['article_text'] = soup.get_text().replace('\'', '\'\'')
                 else:
@@ -39,9 +41,24 @@ def format(self, article, subscriber, codes=None):
                         ptext = p.get_text('\n')
                         for l in ptext.split('\n'):
                             text.write(l + ' \r\n')
-                    odbc_item['article_text'] = text.getvalue().replace('\'', '\'\'')
+                    body = text.getvalue().replace('\'', '\'\'')
+                    if self.is_first_part(article) and 'dateline' in article and 'text' in article.get('dateline', {}):
+                        if body.startswith('   \r\n'):
+                            body = '   \r\n{} {}'.format(article.get('dateline').get('text').replace('\'', '\'\''),
+                                                         body[5:])
+                    odbc_item['article_text'] = body
 
-                self.add_embargo(odbc_item, article)
+                if self.is_first_part(article):
+                    self.add_ednote(odbc_item, article)
+                    self.add_embargo(odbc_item, article)
+
+                if not is_last_take:
+                    odbc_item['article_text'] += '\r\nMORE'
+                else:
+                    odbc_item['article_text'] += '\r\n' + article.get('source', '')
+                sign_off = article.get('sign_off', '')
+                if len(sign_off) > 0:
+                    odbc_item['article_text'] += ' ' + sign_off
 
                 odbc_item['category'] = odbc_item.get('category', '').upper()
                 odbc_item['selector_codes'] = odbc_item.get('selector_codes', '').upper()

diff --git a/server/aap/publish/formatters/aap_newscentre_formatter_test.py b/server/aap/publish/formatters/aap_newscentre_formatter_test.py
@@ -39,7 +39,7 @@ class AapNewscentreFormatterTest(SuperdeskTestCase):
         'unique_id': '1',
         'format': 'preserved',
         'type': 'text',
-        'body_html': 'The story body',
+        'body_html': '<p>The story body</p>',
         'word_count': '1',
         'priority': 1,
         'place': [{'qcode': 'VIC', 'name': 'VIC'}],
@@ -72,7 +72,7 @@ def testNewscentreFormatterWithNoSelector(self):
         self.assertDictEqual(item,
                              {'category': 'A', 'fullStory': 1, 'ident': '0',
                               'headline': 'VIC:This is a test headline', 'originator': 'AAP',
-                              'take_key': 'take_key', 'article_text': 'The story body', 'usn': '1',
+                              'take_key': 'take_key', 'article_text': 'The story body\r\nAAP', 'usn': '1',
                               'subject_matter': 'international law', 'news_item_type': 'News',
                               'subject_reference': '02011001', 'subject': 'crime, law and justice',
                               'subject_detail': 'international court or tribunal',
@@ -103,7 +103,7 @@ def testNewscentreHtmlToText(self):
         item = json.loads(item)
 
         expected = '   \r\nThe story body line 1 \r\nLine 2 \r\n   \r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \
-                   'abcdefghi abcdefghi abcdefghi abcdefghi more \r\n'
+                   'abcdefghi abcdefghi abcdefghi abcdefghi more \r\n\r\nAAP'
         self.assertEqual(item['article_text'], expected)
 
     def testMultipleCategories(self):
@@ -118,7 +118,7 @@ def testMultipleCategories(self):
             'anpa_take_key': 'take_key',
             'unique_id': '1',
             'type': 'text',
-            'body_html': 'body',
+            'body_html': '<p>body</p>',
             'word_count': '1',
             'priority': 1,
             'task': {'desk': 1},
@@ -155,7 +155,7 @@ def testNewscentreFormatterNoSubject(self):
             'anpa_take_key': 'take_key',
             'unique_id': '1',
             'type': 'text',
-            'body_html': 'body',
+            'body_html': '<p>body</p>',
             'word_count': '1',
             'priority': 1,
             'task': {'desk': 1},
@@ -180,7 +180,7 @@ def testNewscentreFormatterNoSubject(self):
             'anpa_take_key': 'take_key',
             'unique_id': '1',
             'type': 'text',
-            'body_html': 'body',
+            'body_html': '<p>body</p>',
             'word_count': '1',
             'priority': 1,
             'task': {'desk': 1},
@@ -210,7 +210,8 @@ def test_aap_newscentre_formatter_with_body_footer(self):
                              {'category': 'A', 'fullStory': 1, 'ident': '0',
                               'headline': 'VIC:This is a test headline', 'originator': 'AAP',
                               'take_key': 'take_key',
-                              'article_text': 'The story body\r\ncall helpline 999 if you are planning to quit smoking',
+                              'article_text': 'The story body\r\ncall helpline 999 if you are planning '
+                              'to quit smoking\r\nAAP',
                               'usn': '1',
                               'subject_matter': 'international law', 'news_item_type': 'News',
                               'subject_reference': '02011001', 'subject': 'crime, law and justice',

diff --git a/server/aap/publish/formatters/aap_odbc_formatter.py b/server/aap/publish/formatters/aap_odbc_formatter.py
@@ -14,6 +14,7 @@
 from apps.archive.common import get_utc_schedule
 from .field_mappers.locator_mapper import LocatorMapper
 from superdesk.metadata.item import EMBARGO
+from eve.utils import config
 import superdesk
 
 
@@ -62,9 +63,20 @@ def add_embargo(self, odbc_item, article):
         :return:
         """
         if article.get(EMBARGO):
-            embargo = '{}{}'.format('Embargo Content. Timestamp: ', get_utc_schedule(article, EMBARGO).isoformat())
+            embargo = '{}{}\r\n'.format('Embargo Content. Timestamp: ', get_utc_schedule(article, EMBARGO).isoformat())
             odbc_item['article_text'] = embargo + odbc_item['article_text']
 
+    def add_ednote(self, odbc_item, article):
+        """
+        Add the editorial note if required
+        :param odbc_item:
+        :param article:
+        :return:
+        """
+        if article.get('ednote'):
+            ednote = 'EDS:{}\r\n'.format(article.get('ednote').replace('\'', '\'\''))
+            odbc_item['article_text'] = ednote + odbc_item['article_text']
+
     def expand_subject_codes(self, odbc_item):
         """
         Expands the subject reference to the subject matter and subject detail
@@ -98,3 +110,10 @@ def set_usn(self, odbc_item, article):
             odbc_item['usn'] = pkg.get('unique_id', None)  # @usn
         else:
             odbc_item['usn'] = article.get('unique_id', None)  # @usn
+
+    def is_last_take(self, article):
+        article[config.ID_FIELD] = article.get('item_id', article.get(config.ID_FIELD))
+        return TakesPackageService().is_last_takes_package_item(article)
+
+    def is_first_part(self, article):
+        article.get('sequence', 1) == 1
diff --git a/server/aap/publish/formatters/anpa_formatter.py b/server/aap/publish/formatters/anpa_formatter.py
@@ -18,13 +18,18 @@
 from superdesk.metadata.item import ITEM_TYPE, CONTENT_TYPE, BYLINE, EMBARGO, FORMAT, FORMATS
 from .field_mappers.locator_mapper import LocatorMapper
 from io import StringIO
+from apps.packages import TakesPackageService
+from eve.utils import config
 
 
 class AAPAnpaFormatter(Formatter):
     def format(self, article, subscriber, codes=None):
         try:
             docs = []
             for category in article.get('anpa_category'):
+                article[config.ID_FIELD] = article.get('item_id', article.get(config.ID_FIELD))
+                is_last_take = TakesPackageService().is_last_takes_package_item(article)
+                is_first_part = article.get('sequence', 1) == 1
                 pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
                 anpa = []
 
@@ -111,7 +116,7 @@ def format(self, article, subscriber, codes=None):
                     soup = BeautifulSoup(self.append_body_footer(article), "html.parser")
                     anpa.append(soup.get_text().encode('ascii', 'replace'))
                 else:
-                    if article.get('dateline', {}).get('text'):
+                    if is_first_part and article.get('dateline', {}).get('text'):
                         soup = BeautifulSoup(article.get('body_html', ''), "html.parser")
                         ptag = soup.find('p')
                         if ptag is not None:
@@ -123,7 +128,7 @@ def format(self, article, subscriber, codes=None):
                         anpa.append(self.to_ascii(article.get('body_footer', '')))
 
                 anpa.append(b'\x0D\x0A')
-                if article.get('more_coming', False):
+                if not is_last_take:
                     anpa.append('MORE'.encode('ascii'))
                 else:
                     anpa.append(article.get('source', '').encode('ascii'))