diff --git a/README.md b/README.md index acd7f47..da13a8f 100644 --- a/README.md +++ b/README.md @@ -58,8 +58,10 @@ Implemented in `/bertscore_sentence` Usage: ```python import bertscore_sentence.eval as bertscore_sentence +import dar_env metrics = { - "bertscore-sentence": functools.partial(bertscore_sentence.compute), + "bertscore-sentence-cos-mpnet": functools.partial(bertscore_sentence.compute, embedder=dar_env.sent_embedder_mpnet), + "bertscore-sentence-cos-roberta": functools.partial(bertscore_sentence.compute, embedder=dar_env.sent_embedder_roberta), } ``` @@ -72,8 +74,10 @@ Implemented in `/mnli` Usage: ```python import mnli.eval as mnli +import dar_env metrics = { - "bertscore-sentence-mnli": functools.partial(mnli.bertscore_sentence_compute), + "bertscore-sentence-mnli-roberta": functools.partial(mnli.bertscore_sentence_compute, classifier=dar_env.mnli_classifier_roberta), + "bertscore-sentence-mnli-bart": functools.partial(mnli.bertscore_sentence_compute, classifier=dar_env.mnli_classifier_bart), } ``` diff --git a/bertscore_sentence/eval.py b/bertscore_sentence/eval.py index a145be9..1a3fef4 100644 --- a/bertscore_sentence/eval.py +++ b/bertscore_sentence/eval.py @@ -1,6 +1,5 @@ import sys from os import path - file_path = path.abspath(__file__) sys.path.append(path.dirname(path.dirname(file_path))) @@ -8,56 +7,48 @@ import numpy as np import torch from tqdm.auto import trange -from dar_env import nlp -from dar_env import sent_embedder as embedder +from dar_env import nlp_spacy +import functools +import sentence_transformers -def cos_sim_mat_f(cand, ref) -> np.ndarray: +def cos_sim_mat_f(cand, ref, embedder) -> np.ndarray: def bert_encode(piece: str): sentence_emb = list() - doc = nlp(piece) + doc = nlp_spacy(piece) doc_sents = [sent.text for sent in doc.sents] for sentence in doc_sents: with torch.no_grad(): sentence_emb.append(embedder.encode(sentence, convert_to_numpy=True)) return sentence_emb, doc_sents - cand_sentence_emb, cand_sentences = bert_encode(cand) - ref_sentence_emb, ref_sentences = bert_encode(ref) - sim_mat = np.zeros((len(ref_sentence_emb), len(cand_sentence_emb))) - for i in range(len(ref_sentence_emb)): - for j in range(len(cand_sentence_emb)): - numerator = np.dot(ref_sentence_emb[i], cand_sentence_emb[j]) # float32 - denominator = np.dot(np.linalg.norm(ref_sentence_emb[i]), - np.linalg.norm(cand_sentence_emb[j])) # float32 - cos_sim = np.divide(numerator, denominator) # float32 - sim_mat[i][j] = cos_sim - del numerator, denominator, cos_sim - return sim_mat, cand_sentences, ref_sentences + ref_sent_emb_list, ref_sents = bert_encode(ref) + cand_sent_emb_list, cand_sents = bert_encode(cand) + ref_sent_emb = np.stack(ref_sent_emb_list, axis=0) + cand_sent_emb = np.stack(cand_sent_emb_list, axis=0) + numerators = np.inner(ref_sent_emb, cand_sent_emb) + ref_sent_emb_norms = np.linalg.norm(ref_sent_emb, axis=1) + cand_sent_emb_norms = np.linalg.norm(cand_sent_emb, axis=1) + denominators = np.outer(ref_sent_emb_norms, cand_sent_emb_norms) + sim_mat = np.divide(numerators, denominators) + return sim_mat, cand_sents, ref_sents def score_np(predictions: typing.List[str], references: typing.List[str], sim_mat_f: typing.Callable) -> np.ndarray: cands, refs = predictions, references # simple renaming. + all_scores = np.empty((len(cands), 3)) - all_scores = np.zeros((len(cands), 3)) - - for index in trange(len(cands), desc="bertscore-sentence cands {}".format(sim_mat_f.__name__), leave=False): # all pieces, len(cands) == len(refs) - sim_mat, cand_sentences, ref_sentences = sim_mat_f(cand=cands[index], ref=refs[index]) + for index in trange(len(cands), desc="bertscore-sentence {}".format(sim_mat_f.__name__), leave=False): # all pieces, len(cands) == len(refs) + sim_mat, cand_sents, ref_sents = sim_mat_f(cand=cands[index], ref=refs[index]) def sum_max(is_r: bool) -> float: - sum_result = 0.0 if is_r: - for i in range(len(ref_sentences)): - sum_result += sim_mat[i].max() + return np.sum(np.max(sim_mat, axis=1)) else: - sim_mat_t = sim_mat.transpose() - for j in range(len(cand_sentences)): - sum_result += sim_mat_t[j].max() - del sim_mat_t - return sum_result + return np.sum(np.max(sim_mat, axis=0)) # equals to np.sum(np.max(sim_mat.T, axis=1)) - R = (1 / len(ref_sentences)) * sum_max(True) - P = (1 / len(cand_sentences)) * sum_max(False) + R = (1 / len(ref_sents)) * sum_max(True) + P = (1 / len(cand_sents)) * sum_max(False) F = 2 * ((P * R) / (P + R)) all_scores[index, :] = np.array([P, R, F]) del sim_mat @@ -65,8 +56,11 @@ def sum_max(is_r: bool) -> float: return all_scores -def compute(predictions: typing.List[str], references: typing.List[str], sim_mat_f: typing.Callable = cos_sim_mat_f) -> typing.Dict: - cands, refs = predictions, references # simple renaming. +def compute(predictions: typing.List[str], references: typing.List[str], sim_mat_f: typing.Optional[typing.Callable] = None, embedder: typing.Optional[sentence_transformers.SentenceTransformer] = None) -> typing.Dict: + cands, refs = predictions, references # simple renaming + if sim_mat_f is None: # cosine similarity by default + sim_mat_f = functools.partial(cos_sim_mat_f, embedder=embedder) + sim_mat_f.__name__ = " ".join(["cos", embedder.__name__]) score_arr = score_np(predictions=cands, references=refs, sim_mat_f=sim_mat_f) return { "P": score_arr[:, 0].tolist(), diff --git a/dar_env.py b/dar_env.py index c5a57de..b6b66de 100644 --- a/dar_env.py +++ b/dar_env.py @@ -4,10 +4,15 @@ import evaluate -nlp = spacy.load("en_core_web_lg") -mnli_classifier = pipeline("text-classification", - model="roberta-large-mnli", top_k=None) -sent_embedder = sentence_transformers.SentenceTransformer("all-MiniLM-L6-v2") +nlp_spacy = spacy.load("en_core_web_lg") +mnli_classifier_roberta = pipeline("text-classification", model="roberta-large-mnli", top_k=None) +mnli_classifier_roberta.__name__ = "roberta-large-mnli" +mnli_classifier_bart = pipeline("text-classification", model="facebook/bart-large-mnli", top_k=None) +mnli_classifier_bart.__name__ = "bart-large-mnli" +sent_embedder_mpnet = sentence_transformers.SentenceTransformer("all-mpnet-base-v2") +sent_embedder_mpnet.__name__ = "all-mpnet-base-v2" +sent_embedder_roberta = sentence_transformers.SentenceTransformer("all-roberta-large-v1") +sent_embedder_roberta.__name__ = "all-roberta-large-v1" bertscore = evaluate.load("bertscore") rouge = evaluate.load("rouge") bertscore = evaluate.load("bertscore") diff --git a/mnli/eval.py b/mnli/eval.py index e52964d..a972ef5 100644 --- a/mnli/eval.py +++ b/mnli/eval.py @@ -1,6 +1,5 @@ import sys from os import path - file_path = path.abspath(__file__) sys.path.append(path.dirname(path.dirname(file_path))) @@ -8,23 +7,26 @@ from bertscore_sentence import eval import numpy as np from mnli.sim import similarity -from dar_env import nlp +from dar_env import nlp_spacy +import functools +import transformers -def mnli_sim_mat(cand, ref) -> np.ndarray: +def mnli_sim_mat(cand: str, ref: str, classifier: transformers.Pipeline) -> np.ndarray: def segmentation(piece: str): - doc = nlp(piece) + doc = nlp_spacy(piece) doc_sents = [sent.text for sent in doc.sents] return doc_sents - cand_sentences = segmentation(cand) - ref_sentences = segmentation(ref) - sim_mat = np.zeros((len(ref_sentences), len(cand_sentences))) - for i in range(len(ref_sentences)): - for j in range(len(cand_sentences)): - sim_mat[i][j] = similarity(ref_sentences[i], cand_sentences[j]) - return sim_mat, cand_sentences, ref_sentences + cand_sents = segmentation(cand) + ref_sents = segmentation(ref) + sent_pairs = [" ".join([x, y]) for x in ref_sents for y in cand_sents] + sim_mat = np.empty((len(ref_sents), len(cand_sents))) + sim_mat.flat = similarity(sent_pairs, classifier) + return sim_mat, cand_sents, ref_sents -def bertscore_sentence_compute(predictions: typing.List[str], references: typing.List[str]) -> typing.Dict: - return eval.compute(predictions=predictions, references=references, sim_mat_f=mnli_sim_mat) +def bertscore_sentence_compute(predictions: typing.List[str], references: typing.List[str], classifier: transformers.Pipeline) -> typing.Dict: + sim_mat_f = functools.partial(mnli_sim_mat, classifier=classifier) + sim_mat_f.__name__ = " ".join(["mnli", classifier.__name__]) + return eval.compute(predictions=predictions, references=references, sim_mat_f=sim_mat_f) diff --git a/mnli/sim.py b/mnli/sim.py index 2caf632..9541b82 100644 --- a/mnli/sim.py +++ b/mnli/sim.py @@ -1,21 +1,24 @@ import sys from os import path - file_path = path.abspath(__file__) sys.path.append(path.dirname(path.dirname(file_path))) -from dar_env import mnli_classifier +import typing +import transformers -def similarity(sentence_a: str, sentence_b: str): - sequence = " ".join([sentence_a, sentence_b]) - classes = mnli_classifier(sequence) - for c in classes[0]: - if c["label"] == "NEUTRAL": - return 1 - c["score"] - raise Exception("Not found NEUTRAL class") +def similarity(sent_pairs: typing.List[str], classifier: transformers.Pipeline): + classes = classifier(sent_pairs) + scores = [] + for c in classes: + for category in c: + if category["label"] == "NEUTRAL": + scores.append(1 - category["score"]) + break + return scores if __name__ == "__main__": - print(similarity("Each computer program uses a region of memory called the stack to enable functions to work properly.", - "From the outside, Les 4G, a Lyonnais bouchon (traditional restaurant), looked much like the nondescript cafe-cum-tobacco shops that can be found in most small French towns, but inside the decor was as warm and inviting as a country pub.")) + sample_a = "Each computer program uses a region of memory called the stack to enable functions to work properly." + sample_b = "From the outside, Les 4G, a Lyonnais bouchon (traditional restaurant), looked much like the nondescript cafe-cum-tobacco shops that can be found in most small French towns, but inside the decor was as warm and inviting as a country pub." + print(similarity([" ".join([sample_a, sample_b])])) diff --git a/topk/eval.py b/topk/eval.py index 9e1582a..c713091 100644 --- a/topk/eval.py +++ b/topk/eval.py @@ -1,15 +1,14 @@ import sys from os import path - file_path = path.abspath(__file__) sys.path.append(path.dirname(path.dirname(file_path))) import typing -from dar_env import nlp, bertscore, rouge, bleurt +from dar_env import nlp_spacy, bertscore, rouge, bleurt def extract_topk_doc(ref: str, topk: int) -> str: - doc = nlp(ref) + doc = nlp_spacy(ref) doc_sents = [sent.text for sent in doc.sents] topk_sents = doc_sents[0:topk] return " ".join(topk_sents)