generated from superdesk/newsroom-app
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
handle transcripts on publish (#159)
- set source based on station subject - set 90 days expiry for tv/radio transcripts - populate mediaformat subject if missing - add script to populate mediaformat for old items - add sidebar filter for mediaformat CPCN-49 CPCN-504 CPCN-520
- Loading branch information
Showing
7 changed files
with
198 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from . import fix_language # noqa | ||
from . import fix_mediaformat # noqa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import time | ||
|
||
from superdesk import get_resource_service | ||
from cp.signals import get_media_type_name, get_media_type_scheme | ||
from newsroom.commands.manager import manager | ||
|
||
|
||
@manager.command | ||
def fix_mediaformat(resource="items", limit=500, sleep_secs=2): | ||
service = get_resource_service(resource) | ||
media_type_scheme = get_media_type_scheme() | ||
source = { | ||
"query": { | ||
"bool": {"must_not": {"term": {"subject.scheme": media_type_scheme}}} | ||
}, | ||
"size": 100, | ||
} | ||
for i in range(int(limit)): | ||
items = service.search(source) | ||
if not items.count(): | ||
break | ||
for item in items: | ||
updates = {"subject": item["subject"].copy() if item.get("subject") else []} | ||
updates["subject"].append( | ||
dict( | ||
code="wiretext", | ||
name=get_media_type_name("wiretext", item.get("language")), | ||
scheme=media_type_scheme, | ||
) | ||
) | ||
|
||
service.system_update(item["_id"], updates, item) | ||
print(".", end="", flush=True) | ||
time.sleep(int(sleep_secs)) | ||
print("done.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from cp.commands.fix_mediaformat import fix_mediaformat | ||
|
||
|
||
def test_fix_mediaformat(app): | ||
app.data.insert( | ||
"items", | ||
[ | ||
{"_id": "en", "language": "en", "type": "text"}, | ||
{"_id": "fr", "language": "fr", "type": "text"}, | ||
], | ||
) | ||
|
||
fix_mediaformat() | ||
|
||
en_item = app.data.find_one("items", req=None, _id="en") | ||
assert "subject" in en_item | ||
assert 1 == len(en_item["subject"]) | ||
assert "wiretext" == en_item["subject"][0]["code"] | ||
assert "Wire text" == en_item["subject"][0]["name"] | ||
assert "mediaformat" == en_item["subject"][0]["scheme"] | ||
|
||
fr_item = app.data.find_one("items", req=None, _id="fr") | ||
assert "Texte fil de presse" == fr_item["subject"][0]["name"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,28 +3,29 @@ | |
import responses | ||
import cp.signals as signals | ||
|
||
from datetime import datetime, timedelta | ||
from responses import matchers | ||
|
||
|
||
def test_on_publish_no_extended_headline(): | ||
def test_on_publish_no_extended_headline(app): | ||
item = {"headline": "foo"} | ||
signals.on_publish_item(None, item) | ||
assert item["headline"] == "foo" | ||
|
||
|
||
def test_on_publish_empty_extended_headline(): | ||
def test_on_publish_empty_extended_headline(app): | ||
item = {"headline": "foo", "extra": {cp.HEADLINE2: ""}} | ||
signals.on_publish_item(None, item) | ||
assert item["headline"] == "foo" | ||
|
||
|
||
def test_on_publish_copy_extended_headline(): | ||
def test_on_publish_copy_extended_headline(app): | ||
item = {"headline": "foo", "extra": {cp.HEADLINE2: "bar"}} | ||
signals.on_publish_item(None, item) | ||
assert item["headline"] == "bar" | ||
|
||
|
||
def test_on_publish_add_correction_to_body_html(): | ||
def test_on_publish_add_correction_to_body_html(app): | ||
item = { | ||
"body_html": "<p>some text</p><p>another one</p>", | ||
"extra": {"correction": "correction info"}, | ||
|
@@ -45,12 +46,17 @@ def test_cem_notification_on_user_changes(app): | |
} | ||
) | ||
company_id = bson.ObjectId() | ||
app.data.insert("companies", [{ | ||
"_id": company_id, | ||
"name": "Example Company", | ||
"is_enabled": True, | ||
"auth_provider": "gip", | ||
}]) | ||
app.data.insert( | ||
"companies", | ||
[ | ||
{ | ||
"_id": company_id, | ||
"name": "Example Company", | ||
"is_enabled": True, | ||
"auth_provider": "gip", | ||
} | ||
], | ||
) | ||
user = {"_id": bson.ObjectId(), "email": "[email protected]", "company": company_id} | ||
|
||
with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: | ||
|
@@ -140,12 +146,17 @@ def test_cem_notification_for_non_google_auth(app, mocker): | |
} | ||
) | ||
company_id = bson.ObjectId() | ||
app.data.insert("companies", [{ | ||
"_id": company_id, | ||
"name": "Example Company", | ||
"is_enabled": True, | ||
"auth_provider": "azure", | ||
}]) | ||
app.data.insert( | ||
"companies", | ||
[ | ||
{ | ||
"_id": company_id, | ||
"name": "Example Company", | ||
"is_enabled": True, | ||
"auth_provider": "azure", | ||
} | ||
], | ||
) | ||
user = {"_id": bson.ObjectId(), "email": "[email protected]", "company": company_id} | ||
|
||
signals.on_user_created(None, user=user, foo=1) | ||
|
@@ -172,3 +183,38 @@ def test_language_agenda(): | |
item["language"] = "fr-ca" | ||
signals.push.send(None, item=item) | ||
assert "fr" == item["language"] | ||
|
||
|
||
def test_handle_transcripts(app): | ||
text_item = {"source": "CP", "subject": []} | ||
signals.on_publish_item(None, text_item) | ||
assert 1 == len(text_item["subject"]) | ||
assert "mediaformat" == text_item["subject"][0]["scheme"] | ||
assert "wiretext" == text_item["subject"][0]["code"] | ||
assert "Wire text" == text_item["subject"][0]["name"] | ||
|
||
text_item = {"source": "CP", "subject": [], "language": "fr_CA"} | ||
signals.on_publish_item(None, text_item) | ||
assert "Texte fil de presse" == text_item["subject"][0]["name"] | ||
|
||
transcript_item = { | ||
"source": "TVEyes", | ||
"subject": [ | ||
{"code": "tvstation", "name": "TV Station", "scheme": "mediaformat"}, | ||
{"code": "CITY24", "name": "CP24 (CITY24)", "scheme": "station"}, | ||
], | ||
} | ||
|
||
signals.on_publish_item(None, transcript_item) | ||
assert "CP24 (CITY24)" == transcript_item["source"] | ||
assert "TV Station" == transcript_item["subject"][0]["name"] | ||
assert "expiry" in transcript_item | ||
assert ( | ||
datetime.now() | ||
< transcript_item["expiry"] | ||
< datetime.now() + timedelta(days=100) | ||
) | ||
|
||
transcript_item["language"] = "fr-CA" | ||
signals.on_publish_item(None, transcript_item) | ||
assert "Station de télé" == transcript_item["subject"][0]["name"] |