From 85067038afda35037c4dd10ab00c664a2a1311c9 Mon Sep 17 00:00:00 2001 From: marwoodandrew Date: Mon, 22 Jul 2024 15:12:52 +1000 Subject: [PATCH] SDAAP-117,SDAAP-118 Generate broadcast news conten from Superdesk --- server/aap/macros/broadcast_auto_publish.py | 149 ++++++-- .../aap/macros/broadcast_auto_publish_test.py | 279 +++++++------- server/aap/macros/rolling_broadcast.py | 340 ++++++++++++++++++ server/aap/macros/rolling_broadcast_test.py | 246 +++++++++++++ 4 files changed, 851 insertions(+), 163 deletions(-) create mode 100644 server/aap/macros/rolling_broadcast.py create mode 100644 server/aap/macros/rolling_broadcast_test.py diff --git a/server/aap/macros/broadcast_auto_publish.py b/server/aap/macros/broadcast_auto_publish.py index 55f8449b2..3980ce39d 100644 --- a/server/aap/macros/broadcast_auto_publish.py +++ b/server/aap/macros/broadcast_auto_publish.py @@ -9,11 +9,37 @@ # at https://www.sourcefabric.org/superdesk/license -from superdesk.metadata.item import ITEM_STATE, CONTENT_STATE, ITEM_TYPE, CONTENT_TYPE, FORMAT, FORMATS -from superdesk.macros.internal_destination_auto_publish import internal_destination_auto_publish -from superdesk.text_utils import get_text_word_count -from aap.publish.formatters.aap_bulletinbuilder_formatter import AAPBulletinBuilderFormatter +from superdesk.metadata.item import ( + ITEM_STATE, + CONTENT_STATE, + ITEM_TYPE, + CONTENT_TYPE, + FORMAT, + FORMATS, +) +from superdesk.macros.internal_destination_auto_publish import ( + internal_destination_auto_publish, +) +from apps.archive.common import BROADCAST_GENRE +from superdesk.text_utils import get_text_word_count, get_text +from aap.publish.formatters.field_mappers.locator_mapper import LocatorMapper +from superdesk.errors import StopDuplication, DocumentError +from superdesk.editor_utils import remove_all_embeds from superdesk import config +from superdesk import get_resource_service +import logging + +logger = logging.getLogger(__name__) + + +def _get_profile_id(label): + profile = get_resource_service("content_types").find_one(req=None, label=label) + if profile: + return str(profile["_id"]) + return None + + +BROADCAST_PROFILE = "Broadcast Story" def broadcast_auto_publish(item, **kwargs): @@ -26,15 +52,73 @@ def broadcast_auto_publish(item, **kwargs): if item.get(ITEM_TYPE) != CONTENT_TYPE.TEXT or item.get(FORMAT) != FORMATS.HTML: return - formatter = AAPBulletinBuilderFormatter() - body_text = formatter.get_text_content(formatter.append_body_footer(item)) + if item.get("auto_publish", False): + return + + genre_list = ( + get_resource_service("vocabularies").find_one(req=None, _id="genre") or {} + ) + broadcast_genre = [ + {"qcode": genre.get("qcode"), "name": genre.get("name")} + for genre in genre_list.get("items", []) + if genre.get("qcode") == BROADCAST_GENRE and genre.get("is_active") + ] + + for genre in item.get("genre", []): + if genre.get("qcode", None) == BROADCAST_GENRE: + return + + remove_all_embeds(item) + + body_text = get_text(item.get("body_html"), lf_on_block=True) word_count = get_text_word_count(body_text) max_word_count = config.MIN_BROADCAST_TEXT_WORD_COUNT - item['genre'] = [{'name': 'Broadcast Script', 'qcode': 'Broadcast Script'}] - if item[ITEM_STATE] not in {CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED} and \ - not (item.get('flags') or {}).get('marked_for_legal'): - if word_count > max_word_count and \ - not (item.get('flags') or {}).get('marked_for_legal'): + item["genre"] = broadcast_genre + + # prefixing the slugline with “legal: ” if required + if (item.get("flags") or {}).get("marked_for_legal") and "slugline" in item: + item["slugline"] = "Legal: " + item.get("slugline", "") + + # Locator on the headline (FED:, NSW:, Vic: etc. or AFL for Sport etc.) + mapper = LocatorMapper() + if len(item.get("anpa_category", None)): + # set the headline to text + item["headline"] = get_text(item.get("headline", ""), content="html") + item["headline"] = mapper.get_formatted_headline( + item, item.get("anpa_category")[0].get("qcode", "").upper() + ) + + # Signoff is “RTV” in the story from BOB + item["sign_off"] = "RTV" + # Agency content will need to have “RAW RTV” appended to the body. + # The News Value is 5 from BOB + item["urgency"] = 5 + + # BOB strips the byline + item.pop("byline", None) + + # BOB strips pic, Some debate that audio should be preserved + # Any associated audio clips should be kept, any other embedded media should be removed. + item.pop("associations", None) + item.pop("refs", None) + + # BOB sets the takekey to location/city e.g. (SYDNEY) + item["anpa_take_key"] = ( + "(" + + ( + ((item.get("dateline") or {}).get("located") or {}).get("city", "") or "" + ).upper() + + ")" + ) + + item["profile"] = _get_profile_id(BROADCAST_PROFILE) + + if item[ITEM_STATE] not in {CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED} and not ( + item.get("flags") or {} + ).get("marked_for_legal"): + if word_count > max_word_count and not (item.get("flags") or {}).get( + "marked_for_legal" + ): lines = body_text.splitlines() new_body_html = [] for line in lines: @@ -42,28 +126,37 @@ def broadcast_auto_publish(item, **kwargs): if not para: continue - new_body_html.append('

{}

'.format(para)) - word_count = get_text_word_count(''.join(new_body_html)) + new_body_html.append("

{}

".format(para)) + word_count = get_text_word_count("".join(new_body_html)) if word_count >= max_word_count: if len(new_body_html): - item['body_html'] = ''.join(new_body_html) - item['word_count'] = word_count + item["body_html"] = "".join(new_body_html) + item["word_count"] = word_count break elif item[ITEM_STATE] in {CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED}: - lines = body_text.splitlines() - lines = ['

{}

'.format(line.strip()) for line in lines if line.strip()] - # remove the first line/paragraph of kill message - lines = lines[1:] - item['body_html'] = ''.join(lines) - fields_to_remove = ['embargo', 'dateline', 'slugline', 'genre'] - for field in fields_to_remove: - item.pop(field, None) + # Kill is not required + return - internal_destination_auto_publish(item, **kwargs) + try: + if ("desk" in kwargs and "stage" in kwargs) or ( + "dest_desk_id" in kwargs and "dest_stage_id" in kwargs + ): + internal_destination_auto_publish(item) + except StopDuplication: + logger.info("macro done item=%s", item.get("_id")) + except DocumentError as err: + logger.error( + "validation error when creating brief item=%s error=%s", + item.get("_id"), + err, + ) + except Exception as err: + logger.exception(err) + return item -name = 'broadcast_auto_publish' -label = 'Broadcast Auto Publish' +name = "broadcast_auto_publish" +label = "Broadcast Auto Publish" callback = broadcast_auto_publish -access_type = 'backend' -action_type = 'direct' +access_type = "frontend" +action_type = "direct" diff --git a/server/aap/macros/broadcast_auto_publish_test.py b/server/aap/macros/broadcast_auto_publish_test.py index 8bd13c7b5..de2021d0b 100644 --- a/server/aap/macros/broadcast_auto_publish_test.py +++ b/server/aap/macros/broadcast_auto_publish_test.py @@ -15,177 +15,186 @@ class BroadcastAutoPublishTestCase(AAPTestCase): + def setUp(self): + self.app.data.insert( + "vocabularies", + [ + { + "_id": "genre", + "items": [ + { + "is_active": True, + "name": "Broadcast Script", + "qcode": "Broadcast Script", + } + ], + } + ], + ) article = { - 'guid': 'aapimage-1', '_id': '1', 'type': 'text', - 'keywords': ['Student', 'Crime', 'Police', 'Missing'], - 'body_html': '', - 'format': 'HTML', - 'state': 'published', - 'flags': { - 'marked_for_legal': False - }, - 'genre': [{'qcode': 'foo', 'name': 'bar'}] + "guid": "aapimage-1", + "_id": "1", + "type": "text", + "keywords": ["Student", "Crime", "Police", "Missing"], + "headline": "Hello World!", + "slugline": "", + "body_html": "slugline", + "format": "HTML", + "state": "published", + "flags": {"marked_for_legal": False}, + "place": [ + { + "country": "Australia", + "world_region": "Oceania", + "name": "QLD", + "state": "Queensland", + "group": "Australia", + "qcode": "QLD", + } + ], + "genre": [{"qcode": "foo", "name": "bar"}], + "anpa_category": [{"qcode": "a"}], } def get_item(self, state=None): item = deepcopy(self.article) body_lines = [] - if state not in {'killed', 'recalled'}: + if state not in {"killed", "recalled"}: for count in range(1, 125): - body_lines.append('

line-#{}#

'.format(count)) + body_lines.append("

line-#{}#

".format(count)) else: - body_lines.append('

This is intro line

') - body_lines.append('

pursuant to your information foo bar

') - body_lines.append('

line after that

') + body_lines.append("

This is intro line

") + body_lines.append("

pursuant to your information foo bar

") + body_lines.append("

line after that

") - item['body_html'] = ''.join(body_lines) + item["body_html"] = "".join(body_lines) return item - @patch('aap.macros.broadcast_auto_publish.internal_destination_auto_publish') + @patch("aap.macros.broadcast_auto_publish.internal_destination_auto_publish") def test_broadcast_copy_state_published(self, internal_dest): item = self.get_item() - broadcast_auto_publish(item) - self.assertNotIn('line-#121#', item['body_html']) - self.assertEqual([{'name': 'Broadcast Script', 'qcode': 'Broadcast Script'}], item['genre']) + broadcast_auto_publish(item, desk="a", stage="b") + self.assertNotIn("line-#121#", item["body_html"]) + self.assertEqual( + [{"name": "Broadcast Script", "qcode": "Broadcast Script"}], item["genre"] + ) internal_dest.assert_called_with(item) - @patch('aap.macros.broadcast_auto_publish.internal_destination_auto_publish') + @patch("aap.macros.broadcast_auto_publish.internal_destination_auto_publish") def test_broadcast_copy_state_corrected(self, internal_dest): item = self.get_item() - item['state'] = 'corrected' - - broadcast_auto_publish(item) - self.assertNotIn('line-#121#', item['body_html']) - self.assertEqual([{'name': 'Broadcast Script', 'qcode': 'Broadcast Script'}], item['genre']) - internal_dest.assert_called_with(item) - - @patch('aap.macros.broadcast_auto_publish.internal_destination_auto_publish') - def test_broadcast_copy_state_killed(self, internal_dest): - item = self.get_item(state='killed') - item['state'] = 'killed' - item['slugline'] = 'recalled' - item['embargo'] = 'foo' - item['dateline'] = 'bar' - broadcast_auto_publish(item) - self.assertNotIn('genre', item) - self.assertNotIn('slugline', item) - self.assertNotIn('embargo', item) - self.assertNotIn('dateline', item) - self.assertNotIn('This is intro line', item.get('body_html')) - self.assertIn('pursuant to your information foo bar', item.get('body_html')) - self.assertIn('line after that', item.get('body_html')) - broadcast_auto_publish(item) - self.assertNotIn('genre', item) - internal_dest.assert_called_with(item) + item["state"] = "corrected" - @patch('aap.macros.broadcast_auto_publish.internal_destination_auto_publish') - def test_broadcast_copy_state_recalled(self, internal_dest): - item = self.get_item(state='recalled') - item['state'] = 'recalled' - item['slugline'] = 'recalled' - item['embargo'] = 'foo' - item['dateline'] = 'bar' - broadcast_auto_publish(item) - self.assertNotIn('genre', item) - self.assertNotIn('slugline', item) - self.assertNotIn('embargo', item) - self.assertNotIn('dateline', item) - self.assertNotIn('This is intro line', item.get('body_html')) - self.assertIn('pursuant to your information foo bar', item.get('body_html')) - self.assertIn('line after that', item.get('body_html')) + broadcast_auto_publish(item, desk="a", stage="b") + self.assertNotIn("line-#121#", item["body_html"]) + self.assertEqual( + [{"name": "Broadcast Script", "qcode": "Broadcast Script"}], item["genre"] + ) internal_dest.assert_called_with(item) - @patch('aap.macros.broadcast_auto_publish.internal_destination_auto_publish') + @patch("aap.macros.broadcast_auto_publish.internal_destination_auto_publish") def test_broadcast_copy_legal(self, internal_dest): item = self.get_item() - item['flags']['marked_for_legal'] = True - broadcast_auto_publish(item) - self.assertIn('line-#124#', item['body_html']) - self.assertEqual([{'name': 'Broadcast Script', 'qcode': 'Broadcast Script'}], item['genre']) + item["flags"]["marked_for_legal"] = True + broadcast_auto_publish(item, desk=1, stage=2) + self.assertIn("line-#124#", item["body_html"]) + self.assertEqual( + [{"name": "Broadcast Script", "qcode": "Broadcast Script"}], item["genre"] + ) internal_dest.assert_called_with(item) - @patch('aap.macros.broadcast_auto_publish.internal_destination_auto_publish') + @patch("aap.macros.broadcast_auto_publish.internal_destination_auto_publish") def test_broadcast_copy_not_text_item(self, internal_dest): item = self.get_item() - item['type'] = 'picture' + item["type"] = "picture" broadcast_auto_publish(item) - self.assertIn('line-#124#', item['body_html']) - self.assertEqual([{'qcode': 'foo', 'name': 'bar'}], item['genre']) + self.assertIn("line-#124#", item["body_html"]) + self.assertEqual([{"qcode": "foo", "name": "bar"}], item["genre"]) self.assertFalse(internal_dest.called) - @patch('aap.macros.broadcast_auto_publish.internal_destination_auto_publish') + @patch("aap.macros.broadcast_auto_publish.internal_destination_auto_publish") def test_broadcast_if_body_html_is_long(self, internal_dest): item = self.get_item() - item['type'] = 'text' - item['body_html'] = "

NSW education ministers past and present have savaged Prime Minister " \ - "Scott Morrison's private schools funding deal, while the state's teachers " \ - "union says it's \"corrupt\".

Education Minister Rob Stokes argues " \ - "the federal government's $4.6 billion proposal would spell a return to the" \ - " bad old days of the funding wars.

\"Quite simply, I won't be signing " \ - "any deal that doesn't treat every student and every school with fairness,\" the " \ - "Liberal minister said in a statement.

\"The Gonski principles provide" \ - " that school funding should be needs based and sector blind and these are the" \ - " principles we hold dear.

\"We don't want a return to the school funding" \ - " wars of the past that pitted private schools against public schools and urge" \ - " the federal government to provide equal treatment for all schools, public" \ - " and private.\"

Former NSW education minister Adrian Piccoli, is also" \ - " scathing in his criticism.

\"This is pathetic. There is nothing fair" \ - " about it. There is nothing Christian about it. It’s throwing money at the" \ - " powerful and well connected,\" the ex-Nationals MP tweeted.

Mr Piccoli" \ - " subsequently tweeted Mr Morrison's press release on the deal and " \ - "stated: \"So, tell us more about the $1.2b slush fund you are setting up " \ - "only for Catholic and independent schools.\"

The federal government" \ - " plans to give Catholic and independent schools an additional $3.4 " \ - "billion over 11 years to fund changes to the way parents' wealth is " \ - "measured based on income tax data.

A further $1.2 billion will " \ - "be spent on Catholic and independent schools as the coalition sees" \ - " fit.

NSW Teachers Federation president Maurie Mulheron is " \ - "outraged.

\"This is probably the most corrupt funding deal we've ever" \ - " seen an Australian government deliver,\" he told AAP.

\"It's nothing" \ - " more than an election slush fund. It's not based on need and public schools " \ - "right across Australia don't get one single dollar out of it.\"

Mr Mulheron" \ - " said the states and territories may now refuse to sign funding " \ - "deals.

\"We'll fight this right up to the next election. If Mr Morrison" \ - " thinks there's anything settled he's got another thing coming.\"

The prime " \ - "minister on Friday brushed off Mr Stokes' attack. 

\"I don't think " \ - "Rob's yet had the chance to really look at the full details of this,\" Mr Morrison " \ - "told ABC radio.

\"I'm sure once he sees that he'll see those comments" \ - " don't weigh up with what we've actually announced.\"

" - broadcast_auto_publish(item) - self.assertIn(item["body_html"], - ''.join( - [ - "

NSW education ministers past and present have savaged Prime Minister ", - "Scott Morrison's private schools funding deal, while the state's teachers ", - "union says it's \"corrupt\".

Education Minister Rob Stokes argues ", - "the federal government's $4.6 billion proposal would spell a return to the", - " bad old days of the funding wars.

\"Quite simply, I won't be signing ", - "any deal that doesn't treat every student and every school with fairness,\" the ", - "Liberal minister said in a statement.

\"The Gonski principles provide", - " that school funding should be needs based and sector blind and these are the", - " principles we hold dear.

\"We don't want a return to the school funding", - " wars of the past that pitted private schools against public schools and urge", - " the federal government to provide equal treatment for all schools, public", - " and private.\"

" - ]) - ) - self.assertEqual([{'name': 'Broadcast Script', 'qcode': 'Broadcast Script'}], item['genre']) + item["type"] = "text" + item["body_html"] = ( + "

NSW education ministers past and present have savaged Prime Minister " + "Scott Morrison's private schools funding deal, while the state's teachers " + 'union says it\'s "corrupt".

Education Minister Rob Stokes argues ' + "the federal government's $4.6 billion proposal would spell a return to the" + " bad old days of the funding wars.

\"Quite simply, I won't be signing " + "any deal that doesn't treat every student and every school with fairness,\" the " + 'Liberal minister said in a statement.

"The Gonski principles provide' + " that school funding should be needs based and sector blind and these are the" + " principles we hold dear.

\"We don't want a return to the school funding" + " wars of the past that pitted private schools against public schools and urge" + " the federal government to provide equal treatment for all schools, public" + ' and private."

Former NSW education minister Adrian Piccoli, is also' + ' scathing in his criticism.

"This is pathetic. There is nothing fair' + " about it. There is nothing Christian about it. It’s throwing money at the" + ' powerful and well connected," the ex-Nationals MP tweeted.

Mr Piccoli' + " subsequently tweeted Mr Morrison's press release on the deal and " + 'stated: "So, tell us more about the $1.2b slush fund you are setting up ' + 'only for Catholic and independent schools."

The federal government' + " plans to give Catholic and independent schools an additional $3.4 " + "billion over 11 years to fund changes to the way parents' wealth is " + "measured based on income tax data.

A further $1.2 billion will " + "be spent on Catholic and independent schools as the coalition sees" + " fit.

NSW Teachers Federation president Maurie Mulheron is " + "outraged.

\"This is probably the most corrupt funding deal we've ever" + ' seen an Australian government deliver," he told AAP.

"It\'s nothing' + " more than an election slush fund. It's not based on need and public schools " + "right across Australia don't get one single dollar out of it.\"

Mr Mulheron" + " said the states and territories may now refuse to sign funding " + "deals.

\"We'll fight this right up to the next election. If Mr Morrison" + " thinks there's anything settled he's got another thing coming.\"

The prime " + "minister on Friday brushed off Mr Stokes' attack. 

\"I don't think " + "Rob's yet had the chance to really look at the full details of this,\" Mr Morrison " + "told ABC radio.

\"I'm sure once he sees that he'll see those comments" + " don't weigh up with what we've actually announced.\"

" + ) + broadcast_auto_publish(item, desk="a", stage="b") + self.assertIn( + item["body_html"], + "".join( + [ + "

NSW education ministers past and present have savaged Prime Minister ", + "Scott Morrison's private schools funding deal, while the state's teachers ", + 'union says it\'s "corrupt".

Education Minister Rob Stokes argues ', + "the federal government's $4.6 billion proposal would spell a return to the", + " bad old days of the funding wars.

\"Quite simply, I won't be signing ", + "any deal that doesn't treat every student and every school with fairness,\" the ", + 'Liberal minister said in a statement.

"The Gonski principles provide', + " that school funding should be needs based and sector blind and these are the", + " principles we hold dear.

\"We don't want a return to the school funding", + " wars of the past that pitted private schools against public schools and urge", + " the federal government to provide equal treatment for all schools, public", + ' and private."

', + ] + ), + ) + self.assertEqual( + [{"name": "Broadcast Script", "qcode": "Broadcast Script"}], item["genre"] + ) internal_dest.assert_called_with(item) - @patch('aap.macros.broadcast_auto_publish.internal_destination_auto_publish') + @patch("aap.macros.broadcast_auto_publish.internal_destination_auto_publish") def test_broadcast_if_body_html_is_short(self, internal_dest): item = self.get_item() - item['type'] = 'text' - item['body_html'] = "

The first case of a contaminated strawberry has been found in the " \ - "Northern Territory.

Police and health officials are yet to say " \ - "if the strawberry contained a needle in it, as has been occurring across " \ - "the country.

Police are holding a media conference in " \ - "Darwin at 1pm (CST).

The original contamination was reported in " \ - "Queensland, prompting dozens of suspected copycat incidents " \ - "involving strawberries and other fruits.

" + item["type"] = "text" + item["body_html"] = ( + "

The first case of a contaminated strawberry has been found in the " + "Northern Territory.

Police and health officials are yet to say " + "if the strawberry contained a needle in it, as has been occurring across " + "the country.

Police are holding a media conference in " + "Darwin at 1pm (CST).

The original contamination was reported in " + "Queensland, prompting dozens of suspected copycat incidents " + "involving strawberries and other fruits.

" + ) original = deepcopy(item) - broadcast_auto_publish(item) + broadcast_auto_publish(item, desk="a", stage="b") self.assertIn(item["body_html"], original["body_html"]) - self.assertEqual([{'name': 'Broadcast Script', 'qcode': 'Broadcast Script'}], item['genre']) + self.assertEqual( + [{"name": "Broadcast Script", "qcode": "Broadcast Script"}], item["genre"] + ) internal_dest.assert_called_with(item) diff --git a/server/aap/macros/rolling_broadcast.py b/server/aap/macros/rolling_broadcast.py new file mode 100644 index 000000000..a39c6f97c --- /dev/null +++ b/server/aap/macros/rolling_broadcast.py @@ -0,0 +1,340 @@ +import logging +from typing import List, Dict, Set +from io import StringIO +from datetime import datetime, timedelta +import calendar +from eve.utils import ParsedRequest +from superdesk import get_resource_service +import json +from superdesk.utils import config +from superdesk.utc import utcnow +from superdesk.metadata.item import ITEM_STATE, CONTENT_STATE +from superdesk.text_utils import get_text_word_count, get_text, get_word_count +from superdesk.editor_utils import Editor3Content, remove_all_embeds +from aap.macros.broadcast_auto_publish import _get_profile_id, BROADCAST_PROFILE + +logger = logging.getLogger(__name__) + + +def abbreviate_item_body(item: dict): + lines = get_text(item.get("body_html", "

"), lf_on_block=True).splitlines() + new_body_html = [] + for line in lines: + para = line.strip() + if not para: + continue + + new_body_html.append("

{}

".format(para)) + word_count = get_text_word_count("".join(new_body_html)) + if word_count >= config.MIN_BROADCAST_TEXT_WORD_COUNT: + if len(new_body_html): + item["body_html"] = "".join(new_body_html) + item["word_count"] = word_count + break + + +def time_rounder(time: datetime): + """ + Round the passed datetime to the next hour or half hour + :param time: + :return: + """ + # Get the current time + current_minute = time.minute + # Calculate minutes to next half hour or hour + minutes_to_next = ( + (30 - current_minute % 30) if current_minute < 30 else (60 - current_minute) + ) + # Calculate the delta time to add + delta = timedelta(minutes=minutes_to_next) + # Return the adjusted time + return time + delta + + +def write_body(body: StringIO, articles: List[dict]): + """ + Writes to the body string an entry for each article in the past list of articles + :param body: + :param articles: + :return: + """ + for article in articles: + abbreviate_item_body(article) + body.write("



") + slugline = article.get("slugline", "") + if (article.get("flags") or {}).get( + "marked_for_legal" + ) and "slugline" in article: + slugline = "Legal: " + article.get("slugline", "") + city = article.get("dateline", {}).get("located", {}).get("city", "").upper() + if city: + body.write(f"

{slugline} ({city})

") + else: + body.write(f"

{slugline}

") + body.write(article.get("body_html", "")) + + +def get_article_category_codes(article: Dict) -> Set: + return set([c.get("qcode", "").lower() for c in article.get("anpa_category", [])]) + + +def get_broadcast_story_parent_ids() -> List: + """ + Get a list of the id's of the stories the broadcast stories have been derived from. + :return: List of story id's + """ + query = { + "query": { + "bool": { + "must": [ + {"terms": {"state": ["published", "corrected"]}}, + {"match": {"pubstatus": "usable"}}, + {"match": {"last_published_version": True}}, + {"match": {"genre.qcode": "Broadcast Script"}}, + {"exists": {"field": "processed_from"}}, + ] + } + } + } + service = get_resource_service("published") + req = ParsedRequest() + req.sort = '[("versioncreated", -1)]' + req.args = {"source": json.dumps(query)} + req.projection = json.dumps({"processed_from": 1}) + req.max_results = 100 + return [bs.get("processed_from") for bs in list(service.get(req=req, lookup=None))] + + +def get_killed_families() -> Set: + """ + Return a set of families with a killed member, so we can exclude any stories with a hint of death + :return: + """ + query = { + "query": { + "bool": { + "must": [ + {"match": {"state": "killed"}}, + {"match": {"pubstatus": "canceled"}}, + ] + } + } + } + service = get_resource_service("published") + req = ParsedRequest() + req.sort = '[("versioncreated", -1)]' + req.args = {"source": json.dumps(query)} + req.projection = json.dumps({"family_id": 1}) + req.max_results = 100 + return set(bs.get("family_id") for bs in list(service.get(req=req, lookup=None))) + + +def rolling_broadcast(item, **kwargs): + """ + Rolls up the most recently published broadcast articles into a single article. + :param item: + :param kwargs: + :return: + """ + now = datetime.now() + # Get the time for the bulletin in general the articles will be generated at quarter to the hour and quarter past + # the hour for the hour and half hour respectively + bulletin_time = time_rounder(now) + + # Sort them as follows + # updated in the last three hours, + # news value, + # the difference between the creation and the last update > 6 minutes, WEIRD DONT UNDERSTAND + # then by update time. + runtime = utcnow() + + def sort_weight(article): + updated = 0 + # if article.get("rewrite_created"): + # age = runtime - article["rewrite_created"] + # if age.seconds < 60 * 60 * 3 and article.get('rewrite_created'): + # updated = 1 + age = runtime - article["versioncreated"] + if age.seconds < 60 * 60 * 3 and article.get("rewrite_sequence"): + updated = 1 + + urgency = article.get("urgency", 10) or 10 + version_created_timestamp = calendar.timegm( + article.get("versioncreated").timetuple() + ) + + # Calculate the weight as a concatenated string + weight = str(updated) + str(10 - int(urgency)) + str(version_created_timestamp) + article["weight"] = weight + return weight + + # Determine if we should apply a 12Hour time limit on potential stories, default True + time_limit = False if "NOTIMELIMIT" in item.get("slugline", "").upper() else True + # Determine if we should verify the existence of an associated Broadcast story, Default True + id_check = False if "NOIDCHECK" in item.get("slugline", "").upper() else True + # Determin if we should publish + publish = False if "NOPUB" in item.get("slugline", "").upper() else True + + updates = {} + updates[ + "headline" + ] = f'AAP Rolling News Bulletin {bulletin_time.strftime(" %B %-d, %H%M")}' + # Fixed slugline to identify the rolling bulletins + slugline = "Rolling News Bulletin" + updates["slugline"] = slugline + updates["anpa_take_key"] = bulletin_time.strftime("%H%M") + updates["genre"] = [{"name": "Broadcast Script", "qcode": "Broadcast Script"}] + updates["priority"] = 6 + updates["urgency"] = 5 + + updates["profile"] = _get_profile_id(BROADCAST_PROFILE) + + body = StringIO() + # AAP Rolling News Bulletin for Jun 07 at 1400 + body.write( + "

AAP Rolling News Bulletin for " + bulletin_time.strftime("%B %-d at %H%M") + ) + body.write("

") + body.write("

\u000e

\u000e

") + + try: + ids = get_broadcast_story_parent_ids() + service = get_resource_service("published") + req = ParsedRequest() + query = { + "query": { + "bool": { + "must": [ + {"terms": {"state": ["published", "corrected"]}}, + {"match": {"pubstatus": "usable"}}, + {"match": {"last_published_version": True}}, + {"match": {"type": "text"}}, + ], + "must_not": [ + {"match": {"slugline": slugline}}, + {"match": {"genre.qcode": "Broadcast Script"}}, + {"wildcard": {"genre.qcode": "*FactCheck*"}}, + {"match": {"keywords": "marketplace"}}, + {"match": {"auto_publish": "true"}}, + ], + } + } + } + if time_limit: + query["query"]["bool"]["must"].append( + {"range": {"versioncreated": {"gte": "now-12H"}}} + ) + logger.warning("Applying 12 Hour limit") + else: + logger.warning("NOT Applying 12 Hour limit") + if id_check: + query["query"]["bool"]["must"].append({"terms": {"item_id": ids}}) + logger.warning("Applying Broadcast Story ID Check") + else: + logger.warning("NOT applying Broadcast Story ID check") + + req.sort = '[("versioncreated", -1),("urgency", 1)]' + req.args = {"source": json.dumps(query)} + req.max_results = 100 + articles = list(service.get(req=req, lookup=None)) + + articles.sort(key=sort_weight, reverse=True) + + logger.warning(f"Articles Found {len(articles)}") + # Need 10 stories ordered by News Value (urgency) updated in the last 3 hours + news_stories = [] + sport_stories = [] + entertainment_stories = [] + finance_stories = [] + + # Create a set of item_ids that have been rewritten + rewritten_ids = { + article.get("rewritten_by") + for article in articles + if "rewritten_by" in article + } + + killed_families = get_killed_families() + + for article in articles: + # check if the article is in the list of rewritten ones, if it is then ignore the article + if article.get("item_id") in rewritten_ids: + continue + # ignore those killed and their families + if ( + article.get("state") == "killed" + or article.get("family_id") in killed_families + ): + continue + + remove_all_embeds(article) + category_codes = get_article_category_codes(article) + logger.warning( + f"{article.get('weight')} {article.get('slugline')}:{article.get('headline')} cat{category_codes}" + ) + if len(news_stories) < 8 and category_codes & {"a", "i"}: + news_stories.append(article) + elif len(sport_stories) < 2 and category_codes & {"s", "t"}: + sport_stories.append(article) + elif len(finance_stories) < 2 and category_codes & {"f"}: + finance_stories.append(article) + elif len(entertainment_stories) < 2 and category_codes & {"e"}: + entertainment_stories.append(article) + + updates["body_html"] = "

" + write_body(body, news_stories) + + if len(finance_stories): + body.write("



In finance ...

") + write_body(body, finance_stories) + if len(entertainment_stories): + body.write("



In entertainment ...

") + write_body(body, entertainment_stories) + if len(sport_stories): + body.write("



In sport ...

") + write_body(body, sport_stories) + + body.write( + "

Ends Bulletin

Rolling News Desk inquiries : 02 9322 8611

" + ) + + updates["body_html"] = body.getvalue() + updates["word_count"] = get_word_count(updates.get("body_html", "")) + ed = Editor3Content(updates, reload=True) + ed.update_item() + + body.close() + + except Exception as e: + logger.exception("Retrieving broadcast articles raised exception: {}".format(e)) + pass + + # If the macro is being executed by a scheduled template then publish the item as well + if "desk" in kwargs and "stage" in kwargs and publish: + logger.warning(f"Auto publishing {item.get('_id')} {updates.get('headline')}") + updates["state"] = "submitted" + get_resource_service("archive").system_update( + item[config.ID_FIELD], updates, item + ) + + get_resource_service("archive_publish").patch( + id=item[config.ID_FIELD], + updates={ITEM_STATE: CONTENT_STATE.PUBLISHED, "auto_publish": True}, + ) + return get_resource_service("archive").find_one( + req=None, _id=item[config.ID_FIELD] + ) + else: + logger.warning( + f"NOT Auto publishing {item.get('_id')} {updates.get('headline')}" + ) + + item.update(updates) + return item + + +name = "Rolling Broadcast Bulletin" +label = "Rolling Broadcast Bulletin" +callback = rolling_broadcast +access_type = "frontend" +action_type = "direct" diff --git a/server/aap/macros/rolling_broadcast_test.py b/server/aap/macros/rolling_broadcast_test.py new file mode 100644 index 000000000..4420b246d --- /dev/null +++ b/server/aap/macros/rolling_broadcast_test.py @@ -0,0 +1,246 @@ +import datetime +from unittests import AAPTestCase +from .rolling_broadcast import rolling_broadcast +from superdesk.utc import utcnow +from copy import deepcopy + + +class TestRollingBroadcast(AAPTestCase): + published = [ + { + "_id": "1111", + "item_id": "urn:newsml:aap.com.au:2024-05-27T12:05:39.890501:b4ba91e7-1272-4d9d-887a-9c43df5934d9", + "family_id": "urn:newsml:aap.com.au:2024-05-27T12:05:39.890501:b4ba91e7-1272-4d9d-887a-9c43df5934d9", + "state": "published", + "type": "text", + "last_published_version": True, + "genre": {"code": "Article"}, + "slugline": "Markets Aust", + "anpa_category": [{"qcode": "f"}], + "urgency": None, + "pubstatus": "usable", + "firstcreated": utcnow() - datetime.timedelta(hours=1), + "headline": "Australian shares rebound from losses at midday", + "versioncreated": utcnow() - datetime.timedelta(hours=1), + "body_html": "

Finance Story one hour old

" + + "

1

1

1

1

1

1

1

1

1

1

1

" + * 200, + }, + { + "_id": "2222", + "item_id": "urn:newsml:aap.com.au:2024-05-27T10:01:41.293453:410954b3-3612-49a5-b11f-40b81c2e4a82", + "rewrite_of": "urn:newsml:aap.com.au:2024-07-09T08:21:22.215191:ca76857b-199b-41c8-9b91-6b6e210e40d7", + "rewrite_sequence": 1, + "state": "published", + "type": "text", + "last_published_version": True, + "genre": {"code": "Article"}, + "slugline": "Oly24 Row Aust", + "keywords": ["OLY", "Women"], + "anpa_category": [{"qcode": "s"}, {"qcode": "a"}], + "urgency": 2, + "pubstatus": "usable", + "firstcreated": utcnow() - datetime.timedelta(hours=1), + "headline": "Aussie rowers bag four World Cup silver medals", + "versioncreated": utcnow() - datetime.timedelta(hours=1), + "body_html": "

Sport and News Story one hour old and has been rewritten adds weight

" + + "

2

2

2

2

2

2

2

2

2

2

2

" + * 200, + }, + { + "_id": "3333", + "item_id": "urn:newsml:aap.com.au:2024-05-27T11:26:51.232837:72fda625-011d-41bd-9336-c7e05ac7934e", + "state": "published", + "last_published_version": True, + "type": "text", + "genre": {"code": "Article"}, + "slugline": "PNG Aust", + "anpa_category": [{"qcode": "a"}], + "urgency": 1, + "pubstatus": "usable", + "firstcreated": utcnow() - datetime.timedelta(hours=1), + "headline": "Time 'not on side' for people trapped in PNG landslide", + "versioncreated": utcnow() - datetime.timedelta(hours=1), + "body_html": "

News one hour old

" + + "

3

3

3

3

3

3

3

3

3

3

3

" + * 200, + }, + { + "_id": "4444", + "item_id": "urn:newsml:aap.com.au:2024-05-27T11:08:09.739087:6cdefd94-e4ac-4ede-8f21-bc6b1703c96c", + "state": "published", + "type": "text", + "genre": {"code": "Article"}, + "slugline": "Palestine Aust", + "last_published_version": True, + "anpa_category": [{"qcode": "a"}], + "urgency": 1, + "pubstatus": "usable", + "firstcreated": utcnow() - datetime.timedelta(hours=4), + "headline": "Cops arrive at Gaza protest as students refuse to budge", + "versioncreated": utcnow() - datetime.timedelta(hours=4), + "body_html": "

Finance Story four hours old

" + + "

4

4

4

4

4

4

4

4

4

4

4

" + * 200, + }, + { + "_id": "5555", + "item_id": "urn:newsml:aap.com.au:2024-07-09T08:21:22.215191:ca76857b-199b-41c8-9b91-6b6e210e40d7", + "rewritten_by": "urn:newsml:aap.com.au:2024-05-27T10:01:41.293453:410954b3-3612-49a5-b11f-40b81c2e4a82", + "state": "published", + "type": "text", + "genre": {"code": "Article"}, + "slugline": "Palestine Aust", + "last_published_version": True, + "anpa_category": [{"qcode": "a"}], + "urgency": 1, + "pubstatus": "usable", + "firstcreated": utcnow() - datetime.timedelta(hours=2), + "headline": "Cops arrive at Gaza protest as students refuse to budge", + "versioncreated": utcnow() - datetime.timedelta(hours=2), + "body_html": "

Updated Story 2 hours old

" + + "

4

4

4

4

4

4

4

4

4

4

4

" + * 200, + }, + { + "processed_from": "urn:newsml:aap.com.au:2024-05-27T12:05:39.890501:b4ba91e7-1272-4d9d-887a-9c43df5934d9", + "state": "published", + "type": "text", + "pubstatus": "usable", + "last_published_version": True, + "genre": {"qcode": "Broadcast Script"}, + "versioncreated": utcnow() - datetime.timedelta(hours=1), + }, + { + "processed_from": "urn:newsml:aap.com.au:2024-05-27T10:01:41.293453:410954b3-3612-49a5-b11f-40b81c2e4a82", + "state": "published", + "type": "text", + "pubstatus": "usable", + "last_published_version": True, + "genre": {"qcode": "Broadcast Script"}, + "versioncreated": utcnow() - datetime.timedelta(hours=1), + }, + { + "processed_from": "urn:newsml:aap.com.au:2024-05-27T11:26:51.232837:72fda625-011d-41bd-9336-c7e05ac7934e", + "state": "published", + "type": "text", + "pubstatus": "usable", + "last_published_version": True, + "genre": {"qcode": "Broadcast Script"}, + "versioncreated": utcnow() - datetime.timedelta(hours=1), + }, + { + "processed_from": "urn:newsml:aap.com.au:2024-05-27T11:08:09.739087:6cdefd94-e4ac-4ede-8f21-bc6b1703c96c", + "state": "published", + "type": "text", + "pubstatus": "usable", + "last_published_version": True, + "genre": {"qcode": "Broadcast Script"}, + "versioncreated": utcnow() - datetime.timedelta(hours=1), + }, + { + "processed_from": "urn:newsml:aap.com.au:2024-07-09T08:21:22.215191:ca76857b-199b-41c8-9b91-6b6e210e40d7", + "state": "published", + "type": "text", + "pubstatus": "usable", + "last_published_version": True, + "genre": {"qcode": "Broadcast Script"}, + "versioncreated": utcnow() - datetime.timedelta(hours=1), + }, + ] + + def test_sort_boost(self): + self.app.data.insert("published", self.published) + item = rolling_broadcast({"slugline": "fish"}) + self.assertEquals(item.get("slugline"), "Rolling News Bulletin") + first = item.get("body_html").find("Oly24 Row Aust") + second = item.get("body_html").find("PNG Aust") + third = item.get("body_html").find("Palestine Aust") + fourth = item.get("body_html").find("Markets Aust") + order = [first, second, third, fourth] + self.assertTrue(all(order[i] < order[i + 1] for i in range(len(order) - 1))) + self.assertNotIn("Old Story", item.get("body_html")) + + def test_remove_time_limits(self): + with_old_item = deepcopy(self.published) + with_old_item.append( + { + "_id": "6666", + "item_id": "urn:newsml:aap.com.au:2024-07-09T08:21:22.215191:ca76857b-199b-41c8-9b91-6b6e210e40d8", + "state": "published", + "type": "text", + "genre": {"code": "Article"}, + "slugline": "Old Story", + "last_published_version": True, + "anpa_category": [{"qcode": "a"}], + "urgency": 1, + "pubstatus": "usable", + "firstcreated": utcnow() - datetime.timedelta(hours=13), + "headline": "Somthing from Some time ago", + "versioncreated": utcnow() - datetime.timedelta(hours=13), + "body_html": "

Somthing from Some time ago

" + + "

6

6

6

6

6

6

6

6

6

6

6

" + * 200, + } + ) + self.app.data.insert("published", with_old_item) + item = rolling_broadcast({"slugline": "NOIDCHECK NOTIMELIMIT"}) + self.assertIn("Old Story", item.get("body_html")) + item = rolling_broadcast({"slugline": "NOTIMELIMIT"}) + self.assertNotIn("Old Story", item.get("body_html")) + + def test_id_check(self): + with_old_item = deepcopy(self.published) + with_old_item.append( + { + "_id": "6666", + "item_id": "urn:newsml:aap.com.au:2024-07-09T08:21:22.215191:ca76857b-199b-41c8-9b91-6b6e210e40d8", + "state": "published", + "type": "text", + "genre": {"code": "Article"}, + "slugline": "Story", + "last_published_version": True, + "anpa_category": [{"qcode": "a"}], + "urgency": 1, + "pubstatus": "usable", + "firstcreated": utcnow() - datetime.timedelta(hours=1), + "headline": "Recent story with no Broadcast version", + "versioncreated": utcnow() - datetime.timedelta(hours=1), + "body_html": "

Recent story with no Broadcast version

" + + "

6

6

6

6

6

6

6

6

6

6

6

" + * 200, + } + ) + self.app.data.insert("published", with_old_item) + item = rolling_broadcast({"slugline": "test"}) + self.assertNotIn( + "Recent story with no Broadcast version", item.get("body_html") + ) + + item = rolling_broadcast({"slugline": "NOIDCHECK"}) + self.assertIn("Recent story with no Broadcast version", item.get("body_html")) + + def test_remove_killed_family(self): + with_old_item = deepcopy(self.published) + with_old_item.append( + { + "_id": "6666", + "item_id": "urn:newsml:aap.com.au:2024-07-09T08:21:22.215191:ca76857b-199b-41c8-9b91-6b6e210e40d8", + "family_id": "urn:newsml:aap.com.au:2024-05-27T12:05:39.890501:b4ba91e7-1272-4d9d-887a-9c43df5934d9", + "state": "killed", + "type": "text", + "genre": {"code": "Article"}, + "slugline": "Old Story", + "last_published_version": True, + "anpa_category": [{"qcode": "a"}], + "urgency": 1, + "pubstatus": "canceled", + "firstcreated": utcnow() - datetime.timedelta(hours=13), + "headline": "Somthing from Some time ago", + "versioncreated": utcnow() - datetime.timedelta(hours=13), + "body_html": "Killed", + } + ) + self.app.data.insert("published", with_old_item) + item = rolling_broadcast({}) + self.assertNotIn("Finance Story one hour old", item.get("body_html"))