Skip to content

Commit

Permalink
Merge pull request #25 from marwoodandrew/sd-4788
Browse files Browse the repository at this point in the history
[SD-4788] add signoff, more etc. to ipnews, newscentre and anpa outputs
  • Loading branch information
marwoodandrew authored Jun 29, 2016
2 parents ba26c69 + f7bbad9 commit 94efa1f
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 23 deletions.
28 changes: 23 additions & 5 deletions server/aap/publish/formatters/aap_ipnews_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from superdesk.publish.formatters import Formatter
from superdesk.errors import FormatterError
from superdesk.metadata.item import ITEM_TYPE, CONTENT_TYPE, FORMAT, FORMATS

import json


Expand All @@ -32,8 +31,10 @@ def format(self, article, subscriber, codes=None):
docs = []
for category in article.get('anpa_category'):
pub_seq_num, odbc_item = self.get_odbc_item(article, subscriber, category, codes)

soup = BeautifulSoup(self.append_body_footer(article), "html.parser")
# determine if this is the last take
is_last_take = self.is_last_take(article)
soup = BeautifulSoup(self.append_body_footer(article) if is_last_take else article.get('body_html', ''),
"html.parser")
if article.get(FORMAT) == FORMATS.PRESERVED: # @article_text
odbc_item['article_text'] = soup.get_text().replace('\'', '\'\'')
odbc_item['texttab'] = 't'
Expand All @@ -47,10 +48,27 @@ def format(self, article, subscriber, codes=None):
text.write(textwrap.fill(l, 80).replace('\n', ' \r\n'))
else:
text.write(l + ' \r\n')
odbc_item['article_text'] = text.getvalue().replace('\'', '\'\'')
body = text.getvalue().replace('\'', '\'\'')
# if this is the first take and we have a dateline inject it
if self.is_first_part(article) and 'dateline' in article and 'text' in article.get('dateline', {}):
if body.startswith('\x19\r\n'):
body = '\x19\r\n{} {}'.format(article.get('dateline').get('text').replace('\'', '\'\''),
body[3:])

odbc_item['article_text'] = body
odbc_item['texttab'] = 'x'

self.add_embargo(odbc_item, article)
if self.is_first_part(article):
self.add_ednote(odbc_item, article)
self.add_embargo(odbc_item, article)

if not is_last_take:
odbc_item['article_text'] += '\r\nMORE'
else:
odbc_item['article_text'] += '\r\n' + article.get('source', '')
sign_off = article.get('sign_off', '')
if len(sign_off) > 0:
odbc_item['article_text'] += ' ' + sign_off

odbc_item['service_level'] = 'a' # @service_level
odbc_item['wordcount'] = article.get('word_count', None) # @wordcount
Expand Down
59 changes: 54 additions & 5 deletions server/aap/publish/formatters/aap_ipnews_formatter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ class AapIpNewsFormatterTest(SuperdeskTestCase):
'genre': []
}

pkg = [{'_id': 'package',
'type': 'composite',
'package_type': 'takes',
'last_take': '3'}]

vocab = [{'_id': 'categories', 'items': [
{'is_active': True, 'name': 'Overseas Sport', 'qcode': 'S', 'subject': '15000000'},
{'is_active': True, 'name': 'Finance', 'qcode': 'F', 'subject': '04000000'}
Expand All @@ -58,6 +63,7 @@ def setUp(self):
self.app.data.insert('subscribers', self.subscribers)
self.app.data.insert('vocabularies', self.vocab)
self.app.data.insert('desks', self.desks)
self.app.data.insert('archive', self.pkg)
init_app(self.app)

def testIPNewsFormatterWithNoSelector(self):
Expand All @@ -73,7 +79,8 @@ def testIPNewsFormatterWithNoSelector(self):
self.assertDictEqual(item,
{'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0',
'headline': 'VIC:This is a test headline', 'service_level': 'a', 'originator': 'AAP',
'take_key': 'take_key', 'article_text': 'The story body', 'priority': 'f', 'usn': '1',
'take_key': 'take_key', 'article_text': 'The story body\r\nAAP', 'priority': 'f',
'usn': '1',
'subject_matter': 'international law', 'news_item_type': 'News',
'subject_reference': '02011001', 'subject': 'crime, law and justice',
'wordcount': '1', 'subject_detail': 'international court or tribunal',
Expand All @@ -82,6 +89,41 @@ def testIPNewsFormatterWithNoSelector(self):

def testIPNewsHtmlToText(self):
article = {
'_id': '1',
'source': 'AAP',
'anpa_category': [{'qcode': 'a'}],
'headline': 'This is a test headline',
'byline': 'joe',
'slugline': 'slugline',
'subject': [{'qcode': '02011001'}],
'anpa_take_key': 'take_key',
'unique_id': '1',
'type': 'text',
'body_html': '<p>The story body line 1<br>Line 2</p>\
<p>abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi</p>',
'word_count': '1',
'priority': 1,
"linked_in_packages": [
{
"package": "package",
"package_type": "takes"
}
],
}

subscriber = self.app.data.find('subscribers', None, None)[0]

f = AAPIpNewsFormatter()
seq, item = f.format(article, subscriber)[0]
item = json.loads(item)

expected = '\r\nThe story body line 1 \r\nLine 2 \r\n\r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \
'abcdefghi abcdefghi abcdefghi abcdefghi \r\n\r\nMORE'
self.assertEqual(item['article_text'], expected)

def testLastTake(self):
article = {
'_id': '3',
'source': 'AAP',
'anpa_category': [{'qcode': 'a'}],
'headline': 'This is a test headline',
Expand All @@ -92,9 +134,15 @@ def testIPNewsHtmlToText(self):
'unique_id': '1',
'type': 'text',
'body_html': '<p>The story body line 1<br>Line 2</p>\
<p>abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi more</p>',
<p>abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi</p>',
'word_count': '1',
'priority': 1
'priority': 1,
"linked_in_packages": [
{
"package": "package",
"package_type": "takes"
}
],
}

subscriber = self.app.data.find('subscribers', None, None)[0]
Expand All @@ -104,7 +152,7 @@ def testIPNewsHtmlToText(self):
item = json.loads(item)

expected = '\r\nThe story body line 1 \r\nLine 2 \r\n\r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \
'abcdefghi abcdefghi abcdefghi abcdefghi \r\nmore'
'abcdefghi abcdefghi abcdefghi abcdefghi \r\n\r\nAAP'
self.assertEqual(item['article_text'], expected)

def testMultipleCategories(self):
Expand Down Expand Up @@ -240,7 +288,8 @@ def test_aap_ipnews_formatter_with_body_footer(self):
{'category': 'a', 'texttab': 't', 'fullStory': 1, 'ident': '0',
'headline': 'VIC:This is a test headline', 'service_level': 'a', 'originator': 'AAP',
'take_key': 'take_key',
'article_text': 'The story body\r\ncall helpline 999 if you are planning to quit smoking',
'article_text': 'The story body\r\ncall helpline 999 if you are planning to '
'quit smoking\r\nAAP',
'priority': 'f', 'usn': '1',
'subject_matter': 'international law', 'news_item_type': 'News',
'subject_reference': '02011001', 'subject': 'crime, law and justice',
Expand Down
23 changes: 20 additions & 3 deletions server/aap/publish/formatters/aap_newscentre_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ def format(self, article, subscriber, codes=None):
docs = []
for category in article.get('anpa_category'):
pub_seq_num, odbc_item = self.get_odbc_item(article, subscriber, category, codes)
is_last_take = self.is_last_take(article)
soup = BeautifulSoup(self.append_body_footer(article) if is_last_take else article.get('body_html', ''),
"html.parser")

soup = BeautifulSoup(self.append_body_footer(article), "html.parser")
if article.get(FORMAT) == FORMATS.PRESERVED: # @article_text
odbc_item['article_text'] = soup.get_text().replace('\'', '\'\'')
else:
Expand All @@ -39,9 +41,24 @@ def format(self, article, subscriber, codes=None):
ptext = p.get_text('\n')
for l in ptext.split('\n'):
text.write(l + ' \r\n')
odbc_item['article_text'] = text.getvalue().replace('\'', '\'\'')
body = text.getvalue().replace('\'', '\'\'')
if self.is_first_part(article) and 'dateline' in article and 'text' in article.get('dateline', {}):
if body.startswith(' \r\n'):
body = ' \r\n{} {}'.format(article.get('dateline').get('text').replace('\'', '\'\''),
body[5:])
odbc_item['article_text'] = body

self.add_embargo(odbc_item, article)
if self.is_first_part(article):
self.add_ednote(odbc_item, article)
self.add_embargo(odbc_item, article)

if not is_last_take:
odbc_item['article_text'] += '\r\nMORE'
else:
odbc_item['article_text'] += '\r\n' + article.get('source', '')
sign_off = article.get('sign_off', '')
if len(sign_off) > 0:
odbc_item['article_text'] += ' ' + sign_off

odbc_item['category'] = odbc_item.get('category', '').upper()
odbc_item['selector_codes'] = odbc_item.get('selector_codes', '').upper()
Expand Down
15 changes: 8 additions & 7 deletions server/aap/publish/formatters/aap_newscentre_formatter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class AapNewscentreFormatterTest(SuperdeskTestCase):
'unique_id': '1',
'format': 'preserved',
'type': 'text',
'body_html': 'The story body',
'body_html': '<p>The story body</p>',
'word_count': '1',
'priority': 1,
'place': [{'qcode': 'VIC', 'name': 'VIC'}],
Expand Down Expand Up @@ -72,7 +72,7 @@ def testNewscentreFormatterWithNoSelector(self):
self.assertDictEqual(item,
{'category': 'A', 'fullStory': 1, 'ident': '0',
'headline': 'VIC:This is a test headline', 'originator': 'AAP',
'take_key': 'take_key', 'article_text': 'The story body', 'usn': '1',
'take_key': 'take_key', 'article_text': 'The story body\r\nAAP', 'usn': '1',
'subject_matter': 'international law', 'news_item_type': 'News',
'subject_reference': '02011001', 'subject': 'crime, law and justice',
'subject_detail': 'international court or tribunal',
Expand Down Expand Up @@ -103,7 +103,7 @@ def testNewscentreHtmlToText(self):
item = json.loads(item)

expected = ' \r\nThe story body line 1 \r\nLine 2 \r\n \r\nabcdefghi abcdefghi abcdefghi abcdefghi ' \
'abcdefghi abcdefghi abcdefghi abcdefghi more \r\n'
'abcdefghi abcdefghi abcdefghi abcdefghi more \r\n\r\nAAP'
self.assertEqual(item['article_text'], expected)

def testMultipleCategories(self):
Expand All @@ -118,7 +118,7 @@ def testMultipleCategories(self):
'anpa_take_key': 'take_key',
'unique_id': '1',
'type': 'text',
'body_html': 'body',
'body_html': '<p>body</p>',
'word_count': '1',
'priority': 1,
'task': {'desk': 1},
Expand Down Expand Up @@ -155,7 +155,7 @@ def testNewscentreFormatterNoSubject(self):
'anpa_take_key': 'take_key',
'unique_id': '1',
'type': 'text',
'body_html': 'body',
'body_html': '<p>body</p>',
'word_count': '1',
'priority': 1,
'task': {'desk': 1},
Expand All @@ -180,7 +180,7 @@ def testNewscentreFormatterNoSubject(self):
'anpa_take_key': 'take_key',
'unique_id': '1',
'type': 'text',
'body_html': 'body',
'body_html': '<p>body</p>',
'word_count': '1',
'priority': 1,
'task': {'desk': 1},
Expand Down Expand Up @@ -210,7 +210,8 @@ def test_aap_newscentre_formatter_with_body_footer(self):
{'category': 'A', 'fullStory': 1, 'ident': '0',
'headline': 'VIC:This is a test headline', 'originator': 'AAP',
'take_key': 'take_key',
'article_text': 'The story body\r\ncall helpline 999 if you are planning to quit smoking',
'article_text': 'The story body\r\ncall helpline 999 if you are planning '
'to quit smoking\r\nAAP',
'usn': '1',
'subject_matter': 'international law', 'news_item_type': 'News',
'subject_reference': '02011001', 'subject': 'crime, law and justice',
Expand Down
21 changes: 20 additions & 1 deletion server/aap/publish/formatters/aap_odbc_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from apps.archive.common import get_utc_schedule
from .field_mappers.locator_mapper import LocatorMapper
from superdesk.metadata.item import EMBARGO
from eve.utils import config
import superdesk


Expand Down Expand Up @@ -62,9 +63,20 @@ def add_embargo(self, odbc_item, article):
:return:
"""
if article.get(EMBARGO):
embargo = '{}{}'.format('Embargo Content. Timestamp: ', get_utc_schedule(article, EMBARGO).isoformat())
embargo = '{}{}\r\n'.format('Embargo Content. Timestamp: ', get_utc_schedule(article, EMBARGO).isoformat())
odbc_item['article_text'] = embargo + odbc_item['article_text']

def add_ednote(self, odbc_item, article):
"""
Add the editorial note if required
:param odbc_item:
:param article:
:return:
"""
if article.get('ednote'):
ednote = 'EDS:{}\r\n'.format(article.get('ednote').replace('\'', '\'\''))
odbc_item['article_text'] = ednote + odbc_item['article_text']

def expand_subject_codes(self, odbc_item):
"""
Expands the subject reference to the subject matter and subject detail
Expand Down Expand Up @@ -98,3 +110,10 @@ def set_usn(self, odbc_item, article):
odbc_item['usn'] = pkg.get('unique_id', None) # @usn
else:
odbc_item['usn'] = article.get('unique_id', None) # @usn

def is_last_take(self, article):
article[config.ID_FIELD] = article.get('item_id', article.get(config.ID_FIELD))
return TakesPackageService().is_last_takes_package_item(article)

def is_first_part(self, article):
article.get('sequence', 1) == 1
9 changes: 7 additions & 2 deletions server/aap/publish/formatters/anpa_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,18 @@
from superdesk.metadata.item import ITEM_TYPE, CONTENT_TYPE, BYLINE, EMBARGO, FORMAT, FORMATS
from .field_mappers.locator_mapper import LocatorMapper
from io import StringIO
from apps.packages import TakesPackageService
from eve.utils import config


class AAPAnpaFormatter(Formatter):
def format(self, article, subscriber, codes=None):
try:
docs = []
for category in article.get('anpa_category'):
article[config.ID_FIELD] = article.get('item_id', article.get(config.ID_FIELD))
is_last_take = TakesPackageService().is_last_takes_package_item(article)
is_first_part = article.get('sequence', 1) == 1
pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
anpa = []

Expand Down Expand Up @@ -111,7 +116,7 @@ def format(self, article, subscriber, codes=None):
soup = BeautifulSoup(self.append_body_footer(article), "html.parser")
anpa.append(soup.get_text().encode('ascii', 'replace'))
else:
if article.get('dateline', {}).get('text'):
if is_first_part and article.get('dateline', {}).get('text'):
soup = BeautifulSoup(article.get('body_html', ''), "html.parser")
ptag = soup.find('p')
if ptag is not None:
Expand All @@ -123,7 +128,7 @@ def format(self, article, subscriber, codes=None):
anpa.append(self.to_ascii(article.get('body_footer', '')))

anpa.append(b'\x0D\x0A')
if article.get('more_coming', False):
if not is_last_take:
anpa.append('MORE'.encode('ascii'))
else:
anpa.append(article.get('source', '').encode('ascii'))
Expand Down

0 comments on commit 94efa1f

Please sign in to comment.