From 5822f182326edbe6f28d3916476a417bd2fedc45 Mon Sep 17 00:00:00 2001
From: marwoodandrew <amarwood@aap.com.au>
Date: Wed, 7 Oct 2020 12:29:06 +1100
Subject: [PATCH] Golf and BOB formatter patches

---
 server/aap/macros/golf_collation_v2.py        | 194 ++++++++++++++++++
 .../aap_bulletinbuilder_formatter.py          |   5 +-
 .../aap_bulletinbuilder_formatter_tests.py    |  10 +-
 server/data/golf_links.json                   |   1 +
 4 files changed, 204 insertions(+), 6 deletions(-)
 create mode 100644 server/aap/macros/golf_collation_v2.py

diff --git a/server/aap/macros/golf_collation_v2.py b/server/aap/macros/golf_collation_v2.py
new file mode 100644
index 000000000..734fb9995
--- /dev/null
+++ b/server/aap/macros/golf_collation_v2.py
@@ -0,0 +1,194 @@
+# -*- coding: utf-8; -*-
+#
+# This file is part of Superdesk.
+#
+# Copyright 2013, 2014 Sourcefabric z.u. and contributors.
+#
+# For the full copyright and license information, please see the
+# AUTHORS and LICENSE files distributed with this source code, or
+# at https://www.sourcefabric.org/superdesk/license
+
+from superdesk import get_resource_service
+import logging
+from eve.utils import ParsedRequest
+import json
+from datetime import datetime
+import pytz
+from flask import current_app as app
+from apps.prepopulate.app_initialize import get_filepath
+
+logger = logging.getLogger(__name__)
+
+
+def golf_collation(item, **kwargs):
+    """
+    Collates a number of Golf results into a single story.
+    It uses the location of the input item to filter the included stories.
+    It expects the name of the golf course (links) to be in the slugline
+    Stories will be included based on the order of the slugline
+    If grouping result into regions it expect the region name to be in the anpa_take_key of the input item
+    :param item:
+    :param kwargs:
+    :return:
+    """
+
+    def get_desk():
+        """
+        Search for a desk on the system with the name "Copytakers"
+        :return:
+        """
+        logger.info('Fetching the ObjectID for the desk "Copytakers".')
+        query = {'name': 'Copytakers'}
+        req = ParsedRequest()
+        req.where = json.dumps(query)
+
+        desk_service = get_resource_service('desks')
+        desk_item = list(desk_service.get_from_mongo(req=req, lookup=None))
+        if not desk_item:
+            raise('Failed to find the a desk called "Copytakers".')
+
+        desk_id = desk_item[0]['_id']
+        logger.info('ObjectID for the desk Copytakers is {}.'.format(desk_id))
+        return desk_item[0]
+
+    def get_hold_stages(desk_id):
+        """
+        Get any stages on the passed desk that have the word Hold in their name
+        :param desk_id:
+        :return:
+        """
+        lookup = {'$and': [{'name': {'$regex': 'Hold', '$options': 'i'}}, {'desk': str(desk_id)}]}
+        stages = get_resource_service('stages').get(req=None, lookup=lookup)
+        return stages
+
+    def get_result_items(location, desk_id, stage_ids, midnight_utc):
+        """
+        Need to find all stories the need to be collated
+        The subject should be golf
+        The place should match that of the story the macro is being run against
+        The slugline should not start with 'Golf Results' (output story will have this slugline)
+        The story should be updated/created since midnight
+        Should be on the copy takers desk maybe hold stage?
+        Not spiked
+        Not already a collated story
+        :param location:
+        :param desk_id:
+        :param stage_ids:
+        :param midnight_utc:
+        :return:
+        """
+        query = {
+            "query": {
+                "filtered": {
+                    "filter": {
+                        "bool": {
+                            "must": [
+                                {"term": {"place.qcode": location.get("qcode")}},
+                                {"term": {"subject.qcode": "15027000"}},
+                                {"term": {"task.desk": str(desk_id)}},
+                                {"terms": {"task.stage": stage_ids}},
+                                {
+                                    "range": {
+                                        "versioncreated": {
+                                            "gte": midnight_utc
+                                        }
+                                    }
+                                }
+                            ],
+                            "must_not": [
+                                {"term": {"state": "spiked"}},
+                                {"query": {
+                                    "match_phrase_prefix": {
+                                        "slugline": "Golf Results"
+                                    }
+                                }}
+                            ]
+                        }
+                    }
+                }
+            },
+            "sort": [{"slugline": "asc"}],
+            "size": 200
+        }
+
+        req = ParsedRequest()
+        repos = 'archive'
+        req.args = {'source': json.dumps(query), 'repo': repos}
+        return get_resource_service('search').get(req=req, lookup=None)
+
+    if 'place' not in item or len(item.get('place')) != 1:
+        raise Exception('The story you''re running the macro on must have a single place defined')
+    location = item.get('place')[0]
+
+    # Read the file that groups golf courses into regions
+    path = get_filepath('golf_links.json')
+    try:
+        with path.open('r') as f:
+            regions = json.load(f)
+    except Exception as ex:
+        logger.error('Exception loading golf_links.json : {}'.format(ex))
+
+    copytakers_desk = get_desk()
+
+    # Attempt to get the hold stages for the Copytakers desk
+    stages = get_hold_stages(copytakers_desk.get('_id'))
+    stage_ids = [str(s.get('_id')) for s in stages]
+    if len(stage_ids) == 0:
+        raise Exception('No hold stages found on desk "{}"'.format(copytakers_desk.get('name')))
+
+    # Get the local midnight in UTC
+    midnight_utc = datetime.now(pytz.timezone(app.config['DEFAULT_TIMEZONE']))\
+        .replace(hour=0, minute=0, second=0, microsecond=0).astimezone(pytz.utc).isoformat()[:19] + 'z'
+
+    # List of golf courses to include, if grouping by region
+    links = None
+    # A flag that indicates if all regions are to be included
+    collated_grouped = False
+
+    # Get any any entry from the golf links file for the state defined in the location of the item story
+    state_regions = [s for s in regions.get('states') if s.get('state') == location.get('qcode')]
+    if len(state_regions):
+        state_region = state_regions[0]
+        # Match the value in the take key to any region in the links file
+        region = [r for r in state_region.get('regions') if
+                  item.get('anpa_take_key', '') and r.get('name', '').lower() == item.get('anpa_take_key', '').lower()]
+        if len(region):
+            links = region[0].get('links', [])
+        else:
+            # If no match is found then it is assumed that a collated story of all regions is to be produced.
+            collated_grouped = True
+
+    items = sorted(list(get_result_items(location, copytakers_desk.get('_id'), stage_ids, midnight_utc)),
+                   key=lambda s: s.get('slugline', '').lower())
+    body = ''
+    if collated_grouped:
+        for region in state_region.get('regions'):
+            body += '<p>' + region.get('name') + '</p>'
+            for i in items:
+                if len(list(filter(lambda x: x.lower().startswith(i.get('slugline', '').lower()),
+                                   region.get('links')))) > 0:
+                    body += i.get('body_html')
+    else:
+        for i in items:
+            if links:
+                if len(list(filter(lambda x: x.lower().startswith(i.get('slugline', '').lower()), links))) > 0:
+                    body += i.get('body_html')
+            else:
+                body += i.get('body_html')
+
+    if not links:
+        dayname = datetime.now(pytz.timezone(app.config['DEFAULT_TIMEZONE'])).strftime('%A')
+        item['anpa_take_key'] = location.get('state', '') + ' ' + dayname
+
+    item['body_html'] = body
+    item['slugline'] = 'Golf Results'
+
+    return item
+
+
+name = 'Golf collation V2'
+label = 'Golf collation V2'
+callback = golf_collation
+access_type = 'frontend'
+action_type = 'direct'
+group = 'Copytakers'
diff --git a/server/aap/publish/formatters/aap_bulletinbuilder_formatter.py b/server/aap/publish/formatters/aap_bulletinbuilder_formatter.py
index 14bc28c97..d50327aa1 100644
--- a/server/aap/publish/formatters/aap_bulletinbuilder_formatter.py
+++ b/server/aap/publish/formatters/aap_bulletinbuilder_formatter.py
@@ -23,6 +23,7 @@
 from copy import deepcopy
 import json
 from superdesk.etree import parse_html, etree
+from superdesk.macros.extract_html import extract_html_macro
 
 
 class AAPBulletinBuilderFormatter(Formatter):
@@ -41,6 +42,8 @@ def format(self, article, subscriber, codes=None):
         try:
             formatted_article = deepcopy(article)
 
+            formatted_article = extract_html_macro(formatted_article)
+
             pub_seq_num = superdesk.get_resource_service('subscribers').generate_sequence_number(subscriber)
             body_html = to_ascii(self.append_body_footer(formatted_article)).strip('\r\n')
 
@@ -179,7 +182,7 @@ def format_associated_item(self, item):
         if not item.get(ASSOCIATIONS):
             return
 
-        for assoc, value in item.get(ASSOCIATIONS).items():
+        for _assoc, value in item.get(ASSOCIATIONS).items():
             if not value or value.get(ITEM_TYPE) not in {CONTENT_TYPE.AUDIO, CONTENT_TYPE.VIDEO,
                                                          CONTENT_TYPE.GRAPHIC, CONTENT_TYPE.PICTURE}:
                 continue
diff --git a/server/aap/publish/formatters/aap_bulletinbuilder_formatter_tests.py b/server/aap/publish/formatters/aap_bulletinbuilder_formatter_tests.py
index 452b1a9d9..a1c1bceaf 100644
--- a/server/aap/publish/formatters/aap_bulletinbuilder_formatter_tests.py
+++ b/server/aap/publish/formatters/aap_bulletinbuilder_formatter_tests.py
@@ -92,7 +92,7 @@ def test_strip_html(self):
                           '<table><tr><td>test</td></tr></table>')
         }
 
-        body_text = ('The story body line 1 Line 2\r\n\r\n'
+        body_text = ('The story body line 1\r\n\r\nLine 2\r\n\r\n'
                      'abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi'
                      ' abcdefghi abcdefghi abcdefghi more\r\n\r\n'
                      'test\r\n\r\n')
@@ -120,7 +120,7 @@ def test_strip_html_case1(self):
                           '<table><tr><td>test</td></tr></table>')
         }
 
-        body_text = ('The story body line 1 Line 2\r\n\r\n'
+        body_text = ('The story body line 1\r\n\r\nLine 2\r\n\r\n'
                      'abcdefghi abcdefghi abcdefghi abcdefghi abcdefghi'
                      ' abcdefghi abcdefghi abcdefghi more\r\n\r\n'
                      'test\r\n\r\n')
@@ -143,7 +143,7 @@ def test_strip_html_case2(self):
                           '<p>This is test.</p><br/><p><br/></p>')
         }
 
-        body_text = ('This is third take.\r\n\r\n'
+        body_text = ('This is third\r\n\r\ntake.\r\n\r\n'
                      'Correction in the third take.\r\n\r\n'
                      'This is test.\r\n\r\n')
 
@@ -165,8 +165,8 @@ def test_strip_html_with_linebreak(self):
                           '<p>This is test.</p><br/><p><br/></p>')
         }
 
-        body_text = ('This is third take.\r\n\r\n'
-                     'Correction in the third take.\r\n\r\n'
+        body_text = ('This is\r\n\r\nthird\r\n\r\ntake.\r\n\r\n'
+                     'Correction\r\n\r\nin the third take.\r\n\r\n'
                      'This is test.\r\n\r\n')
 
         subscriber = self.app.data.find('subscribers', None, None)[0]
diff --git a/server/data/golf_links.json b/server/data/golf_links.json
index 67dd79c2d..3dfa97c2c 100644
--- a/server/data/golf_links.json
+++ b/server/data/golf_links.json
@@ -105,6 +105,7 @@
                         "Sa Taxis",
                         "SA Taxi",
                         "Salisbury",
+                        "Sandy Creek",
                         "Shell",
                         "Shell Golf",
                         "South Lakes",