Skip to content

Commit

Permalink
auto tagger (#220)
Browse files Browse the repository at this point in the history
* Update jimi_2.py Added add_parent_manual_tags method.

* Update ninjs_formatter_2.py Added method  _add_parent_manual_tags(self, item)

* Black Formatted

---------

Co-authored-by: tcp-bhargav <[email protected]>
Co-authored-by: Petr Jasek <[email protected]>
Co-authored-by: cmuldur <[email protected]>
  • Loading branch information
4 people authored Mar 21, 2024
1 parent f8e003e commit d280887
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
47 changes: 47 additions & 0 deletions server/cp/output/formatter/jimi_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def is_french(item) -> bool:


class Jimi2Formatter(Formatter):

ENCODING = "utf-8"

type = "jimi_2"
Expand Down Expand Up @@ -142,6 +143,11 @@ def _format_subject_code(self, root, item, elem, scheme) -> None:
etree.SubElement(root, elem).text = subj["qcode"]

def _format_item(self, root, item, pub_seq_num, service, services) -> None:

# Added Fix here to fetch Parents of Manual Tags.

item = self._add_parent_manual_tags(item)

if is_picture(item):
D2P1 = "http://www.w3.org/2001/XMLSchema-instance"
content = etree.SubElement(
Expand Down Expand Up @@ -397,6 +403,41 @@ def _format_dateline(self, content, dateline):
else:
etree.SubElement(content, "Placeline")

# Creating a new Method FOr adding Parents in Manually added Index Codes
def _add_parent_manual_tags(self, item):
cv = superdesk.get_resource_service("vocabularies").find_one(
req=None, _id="subject_custom"
)
vocab_items = cv.get("items", [])
vocab_mapping = {v["qcode"]: v for v in vocab_items}

def find_oldest_parent(qcode):
parent_qcode = vocab_mapping[qcode]["parent"]
while parent_qcode:
if (
vocab_mapping[parent_qcode]["in_jimi"]
and vocab_mapping[parent_qcode]["parent"] is None
):
return vocab_mapping[parent_qcode]
parent_qcode = vocab_mapping.get(parent_qcode, {}).get("parent", None)
return None

updated_subjects = item.get(
"subject", []
).copy() # Copy the current subjects to avoid direct modification

for subject in item.get("subject", []):
if "qcode" in subject and subject["qcode"] in vocab_mapping:
oldest_parent = find_oldest_parent(subject["qcode"])
if oldest_parent and oldest_parent["qcode"] not in [
s["qcode"] for s in updated_subjects
]:
# Add the entire oldest parent tag to the item's subjects
updated_subjects.append(oldest_parent)

item["subject"] = updated_subjects
return item

def _format_category_index(self, content, item):
categories = self._get_categories(item)
indexes = uniq(categories + self._get_indexes(item))
Expand Down Expand Up @@ -750,30 +791,36 @@ def _find_qcode_item(code, items, jimi_only=True):
if not jimi_only:
pass
if item.get("in_jimi"):

return item
elif item.get("parent"):
return _find_qcode_item(item["parent"], items, jimi_only)
break

elif item.get("semaphore_id") == code:

if not jimi_only:
pass
if item.get("in_jimi"):

return item
elif item.get("parent"):
return _find_qcode_item(item["parent"], items, jimi_only)
break


def _get_name(item, language):

lang = language.replace("_", "-")
if "-CA" not in lang:
lang = "{}-CA".format(lang)
try:

return item["translations"]["name"][lang]
except (KeyError,):
pass
try:

return item["translations"]["name"][lang.split("-")[0]]
except (KeyError,):
pass
Expand Down
42 changes: 42 additions & 0 deletions server/cp/output/formatter/ninjs_formatter_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,48 @@ def format(self, article, subscriber, codes=None):
except Exception as ex:
raise FormatterError.ninjsFormatterError(ex, subscriber)

# Adding a method to fetch Parents of Manual Tags

def _add_parent_manual_tags(self, item):
cv = superdesk.get_resource_service("vocabularies").find_one(
req=None, _id="subject_custom"
)
vocab_items = cv.get("items", [])
vocab_mapping = {v["qcode"]: v for v in vocab_items}

def find_oldest_parent(qcode):
parent_qcode = vocab_mapping[qcode]["parent"]
while parent_qcode:
if (
vocab_mapping[parent_qcode]["in_jimi"]
and vocab_mapping[parent_qcode]["parent"] is None
):
return vocab_mapping[parent_qcode]
parent_qcode = vocab_mapping.get(parent_qcode, {}).get("parent", None)
return None

updated_subjects = item.get(
"subject", []
).copy() # Copy the current subjects to avoid direct modification

for subject in item.get("subject", []):
if "qcode" in subject and subject["qcode"] in vocab_mapping:
oldest_parent = find_oldest_parent(subject["qcode"])
if oldest_parent and oldest_parent["qcode"] not in [
s["qcode"] for s in updated_subjects
]:
# Add the entire oldest parent tag to the item's subjects
updated_subjects.append(oldest_parent)

item["subject"] = updated_subjects
return item

def _transform_to_ninjs(self, article, subscriber, recursive=True):

# Using the method we created to fetch Parents of all Manual Tags

article = self._add_parent_manual_tags(article)

ninjs = {
"guid": article.get(GUID_FIELD, article.get("uri")),
"version": str(article.get(config.VERSION, 1)),
Expand Down Expand Up @@ -616,6 +657,7 @@ def update_ninjs_subjects(self, ninjs, language="en-CA"):
"subject_custom"
if subject.get("scheme")
== "http://cv.iptc.org/newscodes/mediatopic/"
or subject.get("scheme") is None
else subject.get("scheme")
),
}
Expand Down

0 comments on commit d280887

Please sign in to comment.