Skip to content

Commit

Permalink
add constraints and empty description tag
Browse files Browse the repository at this point in the history
  • Loading branch information
ofilangi committed Oct 24, 2024
1 parent 8a7829a commit f97c861
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 9 deletions.
2 changes: 1 addition & 1 deletion config/planteome-demo.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"encodeur" : "sentence-transformers/all-MiniLM-L6-v2",
"threshold_similarity_tag_chunk" : 0.65,
"threshold_similarity_tag_chunk" : 0.70,
"threshold_similarity_tag" : 0.80,
"batch_size" : 32,

Expand Down
26 changes: 18 additions & 8 deletions llm_semantic_annotator/tag/owl_tag_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ def get_ontologies(self,list_ontologies):

return list_ontologies

def remove_prefix_tags(self,prefix_tag,text):
escaped_prefix = re.escape(prefix_tag.upper())
def remove_prefix_tags(self,text):
pattern = r'\([A-Z]+:\d+\)'

v = re.sub(pattern, '', text)
Expand Down Expand Up @@ -108,8 +107,16 @@ def build_corpus(

len_properties = len(ontology_config['properties'])
var_properties = ' '.join([ f"?prop{i}" for i in range(len_properties) ])
query_properties = ' '.join([ f"?term {ontology_config['properties'][i]} ?prop{i} .\n" for i in range(len_properties) ])
filter_properties = ' '.join([ f"FILTER(LANG(?prop{i}) = 'en' || LANG(?prop{i}) = '') .\n" for i in range(len_properties) ])

query_properties = ""
for i in range(len_properties):
query_properties += "OPTIONAL { "+f"""
?term {ontology_config['properties'][i]} ?prop{i} .
FILTER(LANG(?prop{i}) = 'en' || LANG(?prop{i}) = '') .
""" + "}\n"

filter_prefix = f"FILTER(STRSTARTS(STR(?term), '{ontology_config['prefix']}' )) .\n"


constraints_query = ""
if 'constraints' in ontology_config:
Expand All @@ -119,9 +126,9 @@ def build_corpus(
query_base = """
SELECT ?term ?labelLeaf """+var_properties+""" WHERE {
?term """+ontology_config['label']+""" ?labelLeaf .
"""+filter_prefix+"""
FILTER(LANG(?labelLeaf) = "en" || LANG(?labelLeaf) = "") .
"""+query_properties+"""
"""+filter_properties+"""
"""+constraints_query+"""
}
"""
Expand All @@ -133,8 +140,11 @@ def build_corpus(
nb_record=0
print(f"Ontology {ontology} NB RECORDS:{len(results)}")
for row in tqdm(results):

descriptionLeaf = '\n'.join([ row.get(prop.replace('?',''), '') for prop in var_properties.split(' ') ])
#print(row)
descriptionLeaf = '\n'.join([
str(row.get(prop.replace('?',''), '')) for prop in var_properties.split(' ')
])
#print("----")
labelLeaf = row.labelLeaf

descriptionLeaf = descriptionLeaf.strip()
Expand All @@ -149,7 +159,7 @@ def build_corpus(
'ontology' : ontology,
'term': str(row.term),
'rdfs_label': labelLeaf,
'description' : self.remove_prefix_tags(ontology,descriptionLeaf),
'description' : self.remove_prefix_tags(descriptionLeaf),
'group': ontology_group_name
})

Expand Down

0 comments on commit f97c861

Please sign in to comment.