From ef96a938be668508a2815ac1bd11d9a7fde0c0ff Mon Sep 17 00:00:00 2001 From: jgroehm Date: Tue, 5 Jul 2022 19:22:41 -0500 Subject: [PATCH 1/4] encoding="utf-8" --- scispacy/umls_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scispacy/umls_utils.py b/scispacy/umls_utils.py index 73a448f0..3a3724ad 100644 --- a/scispacy/umls_utils.py +++ b/scispacy/umls_utils.py @@ -24,7 +24,7 @@ def read_umls_file_headers(meta_path: str, filename: str) -> List[str]: a list of column names """ file_descriptors = f"{meta_path}/MRFILES.RRF" # to get column names - with open(file_descriptors) as fin: + with open(file_descriptors, encoding="utf-8") as fin: for line in fin: splits = line.split("|") found_filename = splits[0] @@ -59,7 +59,7 @@ def read_umls_concepts(meta_path: str, concept_details: Dict, source: str = None """ concepts_filename = "MRCONSO.RRF" headers = read_umls_file_headers(meta_path, concepts_filename) - with open(f"{meta_path}/{concepts_filename}") as fin: + with open(f"{meta_path}/{concepts_filename}", encoding="utf-8") as fin: for line in fin: splits = line.strip().split("|") assert len(headers) == len(splits), (headers, splits) @@ -114,7 +114,7 @@ def read_umls_types(meta_path: str, concept_details: Dict): """ types_filename = "MRSTY.RRF" headers = read_umls_file_headers(meta_path, types_filename) - with open(f"{meta_path}/{types_filename}") as fin: + with open(f"{meta_path}/{types_filename}", encoding="utf-8") as fin: for line in fin: splits = line.strip().split("|") assert len(headers) == len(splits) @@ -142,7 +142,7 @@ def read_umls_definitions(meta_path: str, concept_details: Dict): """ definitions_filename = "MRDEF.RRF" headers = read_umls_file_headers(meta_path, definitions_filename) - with open(f"{meta_path}/{definitions_filename}") as fin: + with open(f"{meta_path}/{definitions_filename}", encoding="utf-8") as fin: headers = read_umls_file_headers(meta_path, definitions_filename) for line in fin: splits = line.strip().split("|") From d28f2857d2ce98cdbe3160db29b509bfedd34e3d Mon Sep 17 00:00:00 2001 From: jgroehm Date: Fri, 15 Jul 2022 22:42:51 -0500 Subject: [PATCH 2/4] set ndarray type='object' --- scispacy/candidate_generation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scispacy/candidate_generation.py b/scispacy/candidate_generation.py index 7bcf934b..00324acd 100644 --- a/scispacy/candidate_generation.py +++ b/scispacy/candidate_generation.py @@ -281,8 +281,8 @@ def nmslib_knn_with_zero_vectors( neighbors.append([]) distances.append([]) # interleave `neighbors` and Nones in `extended_neighbors` - extended_neighbors[empty_vectors_boolean_flags] = numpy.array(neighbors)[:-1] - extended_distances[empty_vectors_boolean_flags] = numpy.array(distances)[:-1] + extended_neighbors[empty_vectors_boolean_flags] = numpy.array(neighbors, dtype='object')[:-1] + extended_distances[empty_vectors_boolean_flags] = numpy.array(distances, dtype='object')[:-1] return extended_neighbors, extended_distances From 560d0e6b719d401978706c5af3f2a9d52ceca961 Mon Sep 17 00:00:00 2001 From: jgroehm Date: Sat, 16 Jul 2022 19:30:06 -0500 Subject: [PATCH 3/4] black --- scispacy/candidate_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scispacy/candidate_generation.py b/scispacy/candidate_generation.py index 00324acd..12767193 100644 --- a/scispacy/candidate_generation.py +++ b/scispacy/candidate_generation.py @@ -16,7 +16,7 @@ UmlsKnowledgeBase, Mesh, GeneOntology, - RxNorm, + RxNorm, HumanPhenotypeOntology, ) From 0d27bb1a4bba05414d48c24bf68c4d6562482993 Mon Sep 17 00:00:00 2001 From: jgroehm Date: Sat, 16 Jul 2022 20:59:25 -0500 Subject: [PATCH 4/4] black cli --- scispacy/candidate_generation.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scispacy/candidate_generation.py b/scispacy/candidate_generation.py index 12767193..9287abdd 100644 --- a/scispacy/candidate_generation.py +++ b/scispacy/candidate_generation.py @@ -16,7 +16,7 @@ UmlsKnowledgeBase, Mesh, GeneOntology, - RxNorm, + RxNorm, HumanPhenotypeOntology, ) @@ -281,8 +281,12 @@ def nmslib_knn_with_zero_vectors( neighbors.append([]) distances.append([]) # interleave `neighbors` and Nones in `extended_neighbors` - extended_neighbors[empty_vectors_boolean_flags] = numpy.array(neighbors, dtype='object')[:-1] - extended_distances[empty_vectors_boolean_flags] = numpy.array(distances, dtype='object')[:-1] + extended_neighbors[empty_vectors_boolean_flags] = numpy.array( + neighbors, dtype="object" + )[:-1] + extended_distances[empty_vectors_boolean_flags] = numpy.array( + distances, dtype="object" + )[:-1] return extended_neighbors, extended_distances