Skip to content

Commit

Permalink
Additional options on NINJS item endpoints
Browse files Browse the repository at this point in the history
  • Loading branch information
marwoodandrew committed Nov 13, 2023
1 parent 9b0e7ef commit cc0cd8a
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 13 deletions.
29 changes: 29 additions & 0 deletions features/news_api_item.feature
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,35 @@ Feature: News API Item
}
]
"""
When we get "/news/item/111?format=NINJSFormatter&no_embeds=true&no_media=1"
Then we get existing resource
"""
{
"guid": "111",
"headline": "headline 1",
"body_html": "<p>Once upon a time there was </p><p> who could swim</p><p></p>"
}
"""
When we get "/news/item/111?format=NINJSFormatter2&no_embeds=true"
Then we get existing resource
"""
{
"guid": "111",
"headline": "headline 1",
"body_html": "<p>Once upon a time there was </p><p> who could swim</p><p><!-- EMBED START Image {id: \"editor_19\"} --><figure><img src=\"somthing\" alt=\"alt text\" id=\"editor_19\">Some caption</figure><!-- EMBED END Image {id: \"editor_19\"} --></p>",
"associations": {"editor_19": {"renditions": {"original": {}}}}
}
"""
When we get "/news/item/111?format=NINJSFormatter2&no_media=true"
Then we get existing resource
"""
{
"guid": "111",
"headline": "headline 1",
"body_html": "<p>Once upon a time there was </p><div class=\"embed-block\">a fish</div><p> who could swim</p><p></p>",
"associations": {}
}
"""
When we get "/news/item/111?format=NINJSFormatter3"
Then we get existing resource
"""
Expand Down
29 changes: 16 additions & 13 deletions newsroom/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,30 +463,33 @@ def update_embeds_in_body(item, update_image=None, update_audio=None, update_vid
item['body_html'] = to_string(root_elem, method="html")


def remove_all_embeds(item):
def remove_all_embeds(item, remove_by_class=True, remove_media_embeds=True):
"""
Remove the all embeds from the body of the article, including any divs with the embed_block attribute
:param item:
:param remove_by_class: If true removes any divs that have the embed-block class, should remove such things as
embedded tweets
:param remove_media_embeds: Remove any figure tags if the passed value is true
:return:
"""

if not item.get("body_html", ""):
return

# clean all the embedded figures from the html
blacklist = ["figure"]
root_elem = lxml_html.fromstring(item.get("body_html", ""))

cleaner = clean.Cleaner(
add_nofollow=False,
kill_tags=blacklist
)
cleaned_xhtml = cleaner.clean_html(root_elem)
if remove_by_class:
# all embedded tweets etc should be in a div with the class embeded-block, these are removed
embeds = root_elem.xpath('//div[@class=\'embed-block\']')
for embed in embeds:
embed.getparent().remove(embed)

# all embedded tweets etc should be in a div with the class embeded-block, these are removed
embeds = cleaned_xhtml.xpath('//div[@class=\'embed-block\']')
for embed in embeds:
cleaned_xhtml.remove(embed)
if not remove_media_embeds:
item["body_html"] = to_string(root_elem, encoding="unicode", method='html')
return

# clean all the embedded figures from the html, it will remove the comments as well
cleaner = clean.Cleaner(add_nofollow=False, kill_tags=["figure"])
cleaned_xhtml = cleaner.clean_html(root_elem)

# remove the associations relating to the embeds
kill_keys = [key for key in item.get("associations", {}) if key.startswith("editor_")]
Expand Down
16 changes: 16 additions & 0 deletions newsroom/wire/formatters/ninjs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import flask
import json
from .base import BaseFormatter
from superdesk.utils import json_serialize_datetime_objectId
from newsroom.utils import remove_all_embeds


class NINJSFormatter(BaseFormatter):
Expand All @@ -20,7 +22,21 @@ def format_item(self, item, item_type='items'):

return json.dumps(ninjs, default=json_serialize_datetime_objectId)

@staticmethod
def test_for_true(value):
"""
Test if the value indicates false
:param value:
:return:
"""
return value.lower() == 'true' or value == '1'

def _transform_to_ninjs(self, item):
no_embeds = flask.request.args.get('no_embeds', default=False, type=self.test_for_true)
no_media = flask.request.args.get('no_media', default=False, type=self.test_for_true)
if no_media or no_embeds:
remove_all_embeds(item, remove_media_embeds=no_media, remove_by_class=no_embeds)

ninjs = {
'guid': item.get('_id'),
'version': str(item.get('version', 1)),
Expand Down

0 comments on commit cc0cd8a

Please sign in to comment.