From e5aa2c53599917da4cc4f1cbc472e2eb6cf07e7f Mon Sep 17 00:00:00 2001
From: J-Mourad <jbenemourad@gmail.com>
Date: Thu, 25 Jun 2020 16:59:13 +0200
Subject: [PATCH] Added Parts of GAP metric for MultiLevel LTR

---
 tensorflow_ranking/python/metrics.py      | 45 +++++++++++++++++++++++
 tensorflow_ranking/python/metrics_impl.py | 14 +++++++
 tensorflow_ranking/python/metrics_test.py | 19 ++++++++++
 3 files changed, 78 insertions(+)

diff --git a/tensorflow_ranking/python/metrics.py b/tensorflow_ranking/python/metrics.py
index 0ae97c2..99fedd7 100644
--- a/tensorflow_ranking/python/metrics.py
+++ b/tensorflow_ranking/python/metrics.py
@@ -57,6 +57,9 @@ class RankingMetricKey(object):
   # Ordered Pair Accuracy.
   ORDERED_PAIR_ACCURACY = 'ordered_pair_accuracy'
 
+  # Global Average Precision (GAP)
+  GAP = 'gap'
+
 
 def compute_mean(metric_key,
                  labels,
@@ -89,6 +92,7 @@ def compute_mean(metric_key,
       RankingMetricKey.PRECISION: metrics_impl.PrecisionMetric(name, topn),
       RankingMetricKey.MAP: metrics_impl.MeanAveragePrecisionMetric(name, topn),
       RankingMetricKey.ORDERED_PAIR_ACCURACY: metrics_impl.OPAMetric(name),
+      RankingMetricKey.GAP: metrics_impl.GlobalAveragePrecisionMetric(name),
   }
   assert metric_key in metric_dict, ('metric_key %s not supported.' %
                                      metric_key)
@@ -199,6 +203,15 @@ def _ordered_pair_accuracy_fn(labels, predictions, features):
     return ordered_pair_accuracy(
         labels, predictions, weights=_get_weights(features), name=name)
 
+  def _global_average_precision_fn(labels, predictions, features):
+    """Returns mean average precision as the metric."""
+    return global_average_precision(
+        labels,
+        predictions,
+        weights=_get_weights(features),
+        topn=topn,
+        name=name)
+
   metric_fn_dict = {
       RankingMetricKey.ARP: _average_relevance_position_fn,
       RankingMetricKey.MRR: _mean_reciprocal_rank_fn,
@@ -207,6 +220,7 @@ def _ordered_pair_accuracy_fn(labels, predictions, features):
       RankingMetricKey.PRECISION: _precision_fn,
       RankingMetricKey.MAP: _mean_average_precision_fn,
       RankingMetricKey.ORDERED_PAIR_ACCURACY: _ordered_pair_accuracy_fn,
+      RankingMetricKey.GAP: _global_average_precision_fn,
   }
   assert metric_key in metric_fn_dict, ('metric_key %s not supported.' %
                                         metric_key)
@@ -415,6 +429,37 @@ def ordered_pair_accuracy(labels, predictions, weights=None, name=None):
     correct_pairs, pair_weights = metric.compute(labels, predictions, weights)
   return tf.compat.v1.metrics.mean(correct_pairs, pair_weights)
 
+def global_average_precision(labels,
+                           predictions,
+                           weights=None,
+                           topn=None,
+                           name=None):
+  """Computes global average precision (MAP).
+
+  The implementation of GAP is based on Equation (1.7) in the following:
+  Kaggle Evaluation Metric page, found at
+  https://www.kaggle.com/c/youtube8m/overview/evaluation
+
+  Args:
+    labels: A `Tensor` of the same shape as `predictions`. A value >= 1 means a
+      relevant example.
+    predictions: A `Tensor` with shape [batch_size, list_size]. Each value is
+      the ranking score of the corresponding example.
+    weights: A `Tensor` of the same shape of predictions or [batch_size, 1]. The
+      former case is per-example and the latter case is per-list.
+    topn: A cutoff for how many examples to consider for this metric.
+    name: A string used as the name for this metric.
+
+  Returns:
+    A metric for the global average precision.
+  """
+  metric = metrics_impl.GlobalAveragePrecisionMetric(name, topn)
+  with tf.compat.v1.name_scope(metric.name, 'global_average_precision',
+                               (labels, predictions, weights)):
+    per_list_map, per_list_weights = metric.compute(labels, predictions,
+                                                    weights)
+  return tf.compat.v1.metrics.mean(per_list_map, per_list_weights)
+
 
 def eval_metric(metric_fn, **kwargs):
   """A stand-alone method to evaluate metrics on ranked results.
diff --git a/tensorflow_ranking/python/metrics_impl.py b/tensorflow_ranking/python/metrics_impl.py
index 0f84157..b77ad75 100644
--- a/tensorflow_ranking/python/metrics_impl.py
+++ b/tensorflow_ranking/python/metrics_impl.py
@@ -428,3 +428,17 @@ def compute(self, labels, predictions, weights):
             weights, 2) * tf.cast(
                 valid_pair, dtype=tf.float32)
     return correct_pairs, pair_weights
+
+class GlobalAveragePrecisionMetric(_RankingMetric):
+  """Implements Global Average Precesion (GAP)."""
+  def __init__(self, name):
+    """Constructor."""
+    self._name = name
+  
+  @property
+  def name(self):
+    """The metric name."""
+    return self._name
+
+  def compute(self, labels, predictions, weights):
+    pass
\ No newline at end of file
diff --git a/tensorflow_ranking/python/metrics_test.py b/tensorflow_ranking/python/metrics_test.py
index 5f5b121..70d130d 100644
--- a/tensorflow_ranking/python/metrics_test.py
+++ b/tensorflow_ranking/python/metrics_test.py
@@ -817,6 +817,25 @@ def test_compute_mean(self):
                   key, labels, scores, weights, 2, name=key))
           self.assertGreater(value, 0.)
 
+  def test_global_average_precision(self):
+    with tf.Graph().as_default():
+      scores = [[1., 3., 2.], [1., 2., 3.]]
+      # Note that scores are ranked in descending order, so the ranks are
+      # [[3, 1, 2], [3, 2, 1]]
+      labels = [[0., 0., 1.], [0., 1., 2.]]
+      rels = [[0, 0, 1], [0, 1, 1]]
+      m = metrics_lib.global_average_precision
+      # self._check_metrics([
+      #     (m([labels[0]], [scores[0]]), _ap(rels[0], scores[0])),
+      #     (m([labels[0]], [scores[0]], topn=1), _ap(rels[0], scores[0],
+      #                                               topn=1)),
+      #     (m([labels[0]], [scores[0]], topn=2), _ap(rels[0], scores[0],
+      #                                               topn=2)),
+      #     (m(labels,
+      #        scores), sum(_ap(rels[i], scores[i]) for i in range(2)) / 2.),
+      #     (m(labels, scores, topn=1),
+      #      sum(_ap(rels[i], scores[i], topn=1) for i in range(2)) / 2.),
+      # ])
 
 if __name__ == '__main__':
   tf.compat.v1.enable_v2_behavior()