forked from CornellNLP/politeness-paraphrase
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bleu.py
41 lines (36 loc) · 1.42 KB
/
bleu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# script is from https://github.com/agaralabs/transformer-drg-style-transfer/blob/master/evaluation_scripts/bleu.py
# as we follow their practice
# BLEU functions from https://github.com/MaximumEntropy/Seq2Seq-PyTorch
import numpy as np
from collections import Counter
import math
def bleu_stats(hypothesis, reference):
"""Compute statistics for BLEU."""
stats = []
stats.append(len(hypothesis))
stats.append(len(reference))
for n in range(1, 5):
s_ngrams = Counter(
[tuple(hypothesis[i:i + n]) for i in range(len(hypothesis) + 1 - n)]
)
r_ngrams = Counter(
[tuple(reference[i:i + n]) for i in range(len(reference) + 1 - n)]
)
stats.append(max([sum((s_ngrams & r_ngrams).values()), 0]))
stats.append(max([len(hypothesis) + 1 - n, 0]))
return stats
def bleu(stats):
"""Compute BLEU given n-gram statistics."""
if len(list(filter(lambda x: x == 0, stats))) > 0:
return 0
(c, r) = stats[:2]
log_bleu_prec = sum(
[math.log(float(x) / y) for x, y in zip(stats[2::2], stats[3::2])]
) / 4.
return math.exp(min([0, 1 - float(r) / c]) + log_bleu_prec)
def get_bleu(hypotheses, reference):
"""Get validation BLEU score for dev set."""
stats = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
for hyp, ref in zip(hypotheses, reference):
stats += np.array(bleu_stats(hyp, ref))
return 100 * bleu(stats)