Skip to content

Commit

Permalink
Merge pull request #679 from NCATSTranslator/retain_no_trace
Browse files Browse the repository at this point in the history
implemented annotator python package
  • Loading branch information
ShervinAbd92 authored Aug 27, 2024
2 parents d8464fe + 2c5bd4e commit d273b4a
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 27 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ opentelemetry-exporter-jaeger
opentelemetry-instrumentation-requests
opentelemetry-instrumentation-celery
brotli

git+https://github.com/biothings/biothings_annotator#egg=biothings_annotatorconda activate
49 changes: 23 additions & 26 deletions tr_sys/tr_ars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
NodeBinding as vNodeBinding,
Response as vResponse
)
from biothings_annotator import annotator
from pydantic import ValidationError
from opentelemetry import trace
tracer = trace.get_tracer(__name__)
Expand Down Expand Up @@ -1053,7 +1054,8 @@ def annotate_nodes(mesg,data,agent_name):
#we have to scrub input for invalid CURIEs or we'll get a 500 back from the annotator
curie_pattern = re.compile("[\w\.]+:[\w\.]+")
invalid_nodes={}

with open(f'{agent_name}_annotator_curie_list.json', 'w') as json_file:
json.dump(nodes_message, json_file, indent=4)
for key in nodes_message['ids']:
if not curie_pattern.match(str(key)):
invalid_nodes[key]=nodes[key]
Expand All @@ -1063,37 +1065,32 @@ def annotate_nodes(mesg,data,agent_name):

#json_data = json.dumps(nodes_message)
logging.info('posting data to the annotator URL %s' % ANNOTATOR_URL)
logging.info('sending %s curie ides to the annotator'% len(curie_list))
with tracer.start_as_current_span("annotator") as span:
try:
r = requests.post(ANNOTATOR_URL,json=nodes_message,headers=headers)
r.raise_for_status()
rj=r.json()
logging.info('the response status for agent %s node annotator is: %s' % (agent_name,r.status_code))
if r.status_code==200:
notfound_count=0
emptydict_count=0
for key, value in rj.items():
if isinstance(value, list) and 'notfound' in value[0].keys() and value[0]['notfound'] == True:
notfound_count +=1
atr = annotator.Annotator()
rj = atr.annotate_curie_list(curie_list)
# r = requests.post(ANNOTATOR_URL,json=nodes_message,headers=headers)
# r.raise_for_status()
# rj=r.json()
#logging.info('the response status for agent %s node annotator' % (agent_name))
for key, value in rj.items():
if isinstance(value, list) and 'notfound' in value[0].keys() and value[0]['notfound'] == True:
pass
elif isinstance(value, dict) and value == {}:
pass
emptydict_count +=1
else:
attribute={
"attribute_type_id": "biothings_annotations",
"value": value
}
add_attribute(data['message']['knowledge_graph']['nodes'][key],attribute)
#Not sure about adding back clearly borked nodes, but it is in keeping with policy of non-destructiveness
if len(invalid_nodes)>0:
data['message']['knowledge_graph']['nodes'].update(invalid_nodes)
else:
post_processing_error(mesg,data,"Error in annotation of nodes")
elif isinstance(value, dict) and value == {}:
pass
else:
attribute={
"attribute_type_id": "biothings_annotations",
"value": value
}
add_attribute(data['message']['knowledge_graph']['nodes'][key],attribute)
#Not sure about adding back clearly borked nodes, but it is in keeping with policy of non-destructiveness
if len(invalid_nodes)>0:
data['message']['knowledge_graph']['nodes'].update(invalid_nodes)
except Exception as e:
logging.info('node annotation internal error msg is for agent %s with pk: %s is %s' % (agent_name,str(mesg.pk),str(e)))
logging.exception("error in node annotation internal function")
logging.info(f'response error %s'%(r.text))
span.set_attribute("error", True)
span.set_attribute("exception", str(e))
raise e
Expand Down

0 comments on commit d273b4a

Please sign in to comment.