From e5aa2c53599917da4cc4f1cbc472e2eb6cf07e7f Mon Sep 17 00:00:00 2001 From: J-Mourad Date: Thu, 25 Jun 2020 16:59:13 +0200 Subject: [PATCH] Added Parts of GAP metric for MultiLevel LTR --- tensorflow_ranking/python/metrics.py | 45 +++++++++++++++++++++++ tensorflow_ranking/python/metrics_impl.py | 14 +++++++ tensorflow_ranking/python/metrics_test.py | 19 ++++++++++ 3 files changed, 78 insertions(+) diff --git a/tensorflow_ranking/python/metrics.py b/tensorflow_ranking/python/metrics.py index 0ae97c2..99fedd7 100644 --- a/tensorflow_ranking/python/metrics.py +++ b/tensorflow_ranking/python/metrics.py @@ -57,6 +57,9 @@ class RankingMetricKey(object): # Ordered Pair Accuracy. ORDERED_PAIR_ACCURACY = 'ordered_pair_accuracy' + # Global Average Precision (GAP) + GAP = 'gap' + def compute_mean(metric_key, labels, @@ -89,6 +92,7 @@ def compute_mean(metric_key, RankingMetricKey.PRECISION: metrics_impl.PrecisionMetric(name, topn), RankingMetricKey.MAP: metrics_impl.MeanAveragePrecisionMetric(name, topn), RankingMetricKey.ORDERED_PAIR_ACCURACY: metrics_impl.OPAMetric(name), + RankingMetricKey.GAP: metrics_impl.GlobalAveragePrecisionMetric(name), } assert metric_key in metric_dict, ('metric_key %s not supported.' % metric_key) @@ -199,6 +203,15 @@ def _ordered_pair_accuracy_fn(labels, predictions, features): return ordered_pair_accuracy( labels, predictions, weights=_get_weights(features), name=name) + def _global_average_precision_fn(labels, predictions, features): + """Returns mean average precision as the metric.""" + return global_average_precision( + labels, + predictions, + weights=_get_weights(features), + topn=topn, + name=name) + metric_fn_dict = { RankingMetricKey.ARP: _average_relevance_position_fn, RankingMetricKey.MRR: _mean_reciprocal_rank_fn, @@ -207,6 +220,7 @@ def _ordered_pair_accuracy_fn(labels, predictions, features): RankingMetricKey.PRECISION: _precision_fn, RankingMetricKey.MAP: _mean_average_precision_fn, RankingMetricKey.ORDERED_PAIR_ACCURACY: _ordered_pair_accuracy_fn, + RankingMetricKey.GAP: _global_average_precision_fn, } assert metric_key in metric_fn_dict, ('metric_key %s not supported.' % metric_key) @@ -415,6 +429,37 @@ def ordered_pair_accuracy(labels, predictions, weights=None, name=None): correct_pairs, pair_weights = metric.compute(labels, predictions, weights) return tf.compat.v1.metrics.mean(correct_pairs, pair_weights) +def global_average_precision(labels, + predictions, + weights=None, + topn=None, + name=None): + """Computes global average precision (MAP). + + The implementation of GAP is based on Equation (1.7) in the following: + Kaggle Evaluation Metric page, found at + https://www.kaggle.com/c/youtube8m/overview/evaluation + + Args: + labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a + relevant example. + predictions: A `Tensor` with shape [batch_size, list_size]. Each value is + the ranking score of the corresponding example. + weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The + former case is per-example and the latter case is per-list. + topn: A cutoff for how many examples to consider for this metric. + name: A string used as the name for this metric. + + Returns: + A metric for the global average precision. + """ + metric = metrics_impl.GlobalAveragePrecisionMetric(name, topn) + with tf.compat.v1.name_scope(metric.name, 'global_average_precision', + (labels, predictions, weights)): + per_list_map, per_list_weights = metric.compute(labels, predictions, + weights) + return tf.compat.v1.metrics.mean(per_list_map, per_list_weights) + def eval_metric(metric_fn, **kwargs): """A stand-alone method to evaluate metrics on ranked results. diff --git a/tensorflow_ranking/python/metrics_impl.py b/tensorflow_ranking/python/metrics_impl.py index 0f84157..b77ad75 100644 --- a/tensorflow_ranking/python/metrics_impl.py +++ b/tensorflow_ranking/python/metrics_impl.py @@ -428,3 +428,17 @@ def compute(self, labels, predictions, weights): weights, 2) * tf.cast( valid_pair, dtype=tf.float32) return correct_pairs, pair_weights + +class GlobalAveragePrecisionMetric(_RankingMetric): + """Implements Global Average Precesion (GAP).""" + def __init__(self, name): + """Constructor.""" + self._name = name + + @property + def name(self): + """The metric name.""" + return self._name + + def compute(self, labels, predictions, weights): + pass \ No newline at end of file diff --git a/tensorflow_ranking/python/metrics_test.py b/tensorflow_ranking/python/metrics_test.py index 5f5b121..70d130d 100644 --- a/tensorflow_ranking/python/metrics_test.py +++ b/tensorflow_ranking/python/metrics_test.py @@ -817,6 +817,25 @@ def test_compute_mean(self): key, labels, scores, weights, 2, name=key)) self.assertGreater(value, 0.) + def test_global_average_precision(self): + with tf.Graph().as_default(): + scores = [[1., 3., 2.], [1., 2., 3.]] + # Note that scores are ranked in descending order, so the ranks are + # [[3, 1, 2], [3, 2, 1]] + labels = [[0., 0., 1.], [0., 1., 2.]] + rels = [[0, 0, 1], [0, 1, 1]] + m = metrics_lib.global_average_precision + # self._check_metrics([ + # (m([labels[0]], [scores[0]]), _ap(rels[0], scores[0])), + # (m([labels[0]], [scores[0]], topn=1), _ap(rels[0], scores[0], + # topn=1)), + # (m([labels[0]], [scores[0]], topn=2), _ap(rels[0], scores[0], + # topn=2)), + # (m(labels, + # scores), sum(_ap(rels[i], scores[i]) for i in range(2)) / 2.), + # (m(labels, scores, topn=1), + # sum(_ap(rels[i], scores[i], topn=1) for i in range(2)) / 2.), + # ]) if __name__ == '__main__': tf.compat.v1.enable_v2_behavior()