Skip to content

Commit

Permalink
fix nb total abstract information
Browse files Browse the repository at this point in the history
  • Loading branch information
ofilangi committed Oct 24, 2024
1 parent f97c861 commit 7488fb3
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 8 deletions.
13 changes: 10 additions & 3 deletions llm_semantic_annotator/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def main_populate_ncbi_taxon_tag_embeddings(config_all):
def main_populate_abstract_embeddings(config_all):
get_abstract_manager(config_all).manage_abstracts()

def get_doi_file(config_all):
return config_all['retention_dir']+"/total_doi.txt"

def main_compute_tag_chunk_similarities(config_all):
"""Fonction principale pour calculer la similarité entre tous les tags et chunks."""
tags_pth_files = get_owl_tag_manager(config_all).get_files_tags_embeddings()
Expand Down Expand Up @@ -92,7 +95,7 @@ def main_compute_tag_chunk_similarities(config_all):
### -----------------------
keep_tag_embeddings = {}
results_complete_similarities = {}
total_doi = 0
for abstracts_pth_file in abstracts_pth_files:
json_f = str(os.path.splitext(abstracts_pth_file)[0])+"_scores.json"
if os.path.exists(json_f) :
Expand All @@ -101,6 +104,7 @@ def main_compute_tag_chunk_similarities(config_all):
chunk_embeddings = mem.load_filepth(abstracts_pth_file)

for doi,res in mem.compare_tags_with_chunks(tag_embeddings, chunk_embeddings).items():
total_doi+=1
if doi not in results_complete_similarities:
results_complete_similarities[doi] = res
for tag in res.keys():
Expand All @@ -119,6 +123,9 @@ def main_compute_tag_chunk_similarities(config_all):

with open(json_f, "w") as fichier:
json.dump(results_complete_similarities, fichier)

with open(get_doi_file(config_all), "w") as fichier:
fichier.write(str(total_doi))

def get_scores_files(retention_dir):
scores_files = []
Expand Down Expand Up @@ -170,14 +177,14 @@ def get_results_complete_similarities_and_tags_embedding(config_all):
return results_complete_similarities,tag_embeddings

def main_display_summary(config_all):

doi_file = get_doi_file(config_all)
results_complete_similarities,tag_embeddings = get_results_complete_similarities_and_tags_embedding(config_all)
retention_dir = config_all['retention_dir']

if len(results_complete_similarities)>0:
display_best_similarity_abstract_tag(results_complete_similarities,tag_embeddings,retention_dir)
display_ontologies_summary(results_complete_similarities,tag_embeddings,retention_dir)
display_ontologies_distribution(results_complete_similarities,tag_embeddings)
display_ontologies_distribution(results_complete_similarities,tag_embeddings,doi_file)
else:
print("No results found")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def create_rdf_graph(results_complete_similarities,
abstracts_processed = len(results_complete_similarities)

for doi, complete_similarities in results_complete_similarities.items():
doi_uri = URIRef(urllib.parse.quote(f"https://doi.org/{doi}"))
doi_uri = URIRef("https://doi.org/"+urllib.parse.quote(doi))
for tag, similarity in complete_similarities.items():
tag_uri = URIRef(tag)
annotation_node = BNode()
Expand Down
11 changes: 7 additions & 4 deletions llm_semantic_annotator/misc/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@
from rich.panel import Panel
from rich.text import Text

def display_ontologies_distribution(data, keep_tag_embeddings):
def display_ontologies_distribution(data, keep_tag_embeddings,total_doi_file):
console = Console()

nb_doi = 0
with open(total_doi_file, 'r') as file:
nb_doi = int(file.read())

# Extract key prefixes
ontologies = []
labels = []
Expand All @@ -21,14 +25,13 @@ def display_ontologies_distribution(data, keep_tag_embeddings):
distributionLabels = Counter(labels)

# General statistics
nb_abstracts = len(data)
nb_annotated = sum(1 for item in data.values() if len(item) > 0)
nb_annotated = len(data)
total_labels = sum(distributionOntologies.values())

# Display general statistics
console.print(Panel(
f"[bold cyan]General Statistics[/bold cyan]\n"
f"Total number of abstracts: [green]{nb_abstracts}[/green]\n"
f"Total number of abstracts: [green]{nb_doi}[/green]\n"
f"Number of annotated abstracts: [green]{nb_annotated}[/green]\n"
f"Total number of labels used: [green]{total_labels}[/green]",
title="Summary",
Expand Down

0 comments on commit 7488fb3

Please sign in to comment.