From e905961f94277336293f59917d85f032736dfc49 Mon Sep 17 00:00:00 2001 From: Kaushik Acharya Date: Thu, 24 Aug 2023 12:05:17 +0530 Subject: [PATCH 1/2] Updating nmslib hyperparameters guide url --- scispacy/candidate_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scispacy/candidate_generation.py b/scispacy/candidate_generation.py index a408a6e..f11a721 100644 --- a/scispacy/candidate_generation.py +++ b/scispacy/candidate_generation.py @@ -383,7 +383,7 @@ def create_tfidf_ann_index( kb = kb or UmlsKnowledgeBase() # nmslib hyperparameters (very important) - # guide: https://github.com/nmslib/nmslib/blob/master/python_bindings/parameters.md + # guide: https://github.com/nmslib/nmslib/blob/master/manual/methods.md # Default values resulted in very low recall. # set to the maximum recommended value. Improves recall at the expense of longer indexing time. From 9bdc0f848a2caa4e4e5dd4f598623f098f7bc18d Mon Sep 17 00:00:00 2001 From: Kaushik Acharya Date: Thu, 24 Aug 2023 12:07:04 +0530 Subject: [PATCH 2/2] Updating UMLS concept alias path variable with appropriate name --- scispacy/candidate_generation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scispacy/candidate_generation.py b/scispacy/candidate_generation.py index f11a721..7e9ae89 100644 --- a/scispacy/candidate_generation.py +++ b/scispacy/candidate_generation.py @@ -378,7 +378,7 @@ def create_tfidf_ann_index( tfidf_vectorizer_path = f"{out_path}/tfidf_vectorizer.joblib" ann_index_path = f"{out_path}/nmslib_index.bin" tfidf_vectors_path = f"{out_path}/tfidf_vectors_sparse.npz" - uml_concept_aliases_path = f"{out_path}/concept_aliases.json" + umls_concept_aliases_path = f"{out_path}/concept_aliases.json" kb = kb or UmlsKnowledgeBase() @@ -445,9 +445,9 @@ def create_tfidf_ann_index( assert len(concept_aliases) == numpy.size(concept_alias_tfidfs, 0) print( - f"Saving list of concept ids and tfidfs vectors to {uml_concept_aliases_path} and {tfidf_vectors_path}" + f"Saving list of concept ids and tfidfs vectors to {umls_concept_aliases_path} and {tfidf_vectors_path}" ) - json.dump(concept_aliases, open(uml_concept_aliases_path, "w")) + json.dump(concept_aliases, open(umls_concept_aliases_path, "w")) scipy.sparse.save_npz( tfidf_vectors_path, concept_alias_tfidfs.astype(numpy.float16) )