From 28a5954c3f137470bf80fcc92cc597bf205be4b2 Mon Sep 17 00:00:00 2001
From: Kashif Rasul <kashif.rasul@gmail.com>
Date: Tue, 17 Oct 2023 13:53:57 +0200
Subject: [PATCH 1/7] initial CRPS template

---
 metrics/crps/README.md        | 34 ++++++++++++++++++++++++++++++++++
 metrics/crps/app.py           |  6 ++++++
 metrics/crps/crps.py          |  0
 metrics/crps/requirements.txt |  2 ++
 4 files changed, 42 insertions(+)
 create mode 100644 metrics/crps/README.md
 create mode 100644 metrics/crps/app.py
 create mode 100644 metrics/crps/crps.py
 create mode 100644 metrics/crps/requirements.txt

diff --git a/metrics/crps/README.md b/metrics/crps/README.md
new file mode 100644
index 000000000..2b7c3cd59
--- /dev/null
+++ b/metrics/crps/README.md
@@ -0,0 +1,34 @@
+---
+title: CRPS
+emoji: 🤗 
+colorFrom: blue
+colorTo: red
+sdk: gradio
+sdk_version: 3.19.1
+app_file: app.py
+pinned: false
+tags:
+- evaluate
+- metric
+description: >-
+  Continuous Ranked Probability Score (CRPS) is a metric that measures the accuracy of probabilistic predictions.
+---
+
+# Metric Card for CRPS
+
+## Metric description
+
+Continuous Ranked Probability Score (CRPS) is a metric that measures the accuracy of probabilistic predictions. It is commonly used in weather forecasting to measure the accuracy of predicted weather probabilities. For a random variable $X$ and a cumulative distribution function (CDF) $F$ of $X$, the CRPS is defined for a ground-truth observation $x$ and an empirical estimate of $F$ from predicted samples as:
+
+$$
+CRPS(F, x) = \int_{\inf}^{\inf} (F(z) - \mathbb{1}_{z \geq x})^2 dz,
+$$
+
+where $\mathbb{1}_{z \geq x}$ is the indicator function being identity if argument is true or zero otherwise. The CRPS is  expressed in the same unit as the observed variable and generalizes the MAE metric to probabilistic predictions. The lower the CRPS, the better the predictions.
+
+## How to use
+
+```python
+>>> crps_metric = evaluate.load("crps")
+```
+
diff --git a/metrics/crps/app.py b/metrics/crps/app.py
new file mode 100644
index 000000000..0c0179cfc
--- /dev/null
+++ b/metrics/crps/app.py
@@ -0,0 +1,6 @@
+import evaluate
+from evaluate.utils import launch_gradio_widget
+
+
+module = evaluate.load("crps")
+launch_gradio_widget(module)
diff --git a/metrics/crps/crps.py b/metrics/crps/crps.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/metrics/crps/requirements.txt b/metrics/crps/requirements.txt
new file mode 100644
index 000000000..c9c2c1e33
--- /dev/null
+++ b/metrics/crps/requirements.txt
@@ -0,0 +1,2 @@
+git+https://github.com/huggingface/evaluate@{COMMIT_PLACEHOLDER}
+scikit-learn

From 0363fb2f34d1a40007c92210e9e88957edec20aa Mon Sep 17 00:00:00 2001
From: Kashif Rasul <kashif.rasul@gmail.com>
Date: Sat, 21 Oct 2023 11:27:26 +0200
Subject: [PATCH 2/7] initial crps

---
 metrics/crps/crps.py | 96 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/metrics/crps/crps.py b/metrics/crps/crps.py
index e69de29bb..bb4f64201 100644
--- a/metrics/crps/crps.py
+++ b/metrics/crps/crps.py
@@ -0,0 +1,96 @@
+# Copyright 2022 The HuggingFace Datasets Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""CRPS - Continuous Ranked Probability Score Metric"""
+
+import datasets
+import numpy as np
+
+import evaluate
+
+_CITATION = """\
+@article{doi:10.1198/016214506000001437,
+    author = {Tilmann Gneiting and Adrian E Raftery},
+    title = {Strictly Proper Scoring Rules, Prediction, and Estimation},
+    journal = {Journal of the American Statistical Association},
+    volume = {102},
+    number = {477},
+    pages = {359--378},
+    year = {2007},
+    publisher = {Taylor & Francis},
+    doi = {10.1198/016214506000001437},
+    URL = {https://doi.org/10.1198/016214506000001437},
+    eprint = {https://doi.org/10.1198/016214506000001437}
+}
+"""
+
+_DESCRIPTION = """\
+Continuous Ranked Probability Score (CRPS) is the generalization of mean absolute error to the case of probabilistic forecasts used to assess the respective accuracy of probabilistic forecasting methods.
+"""
+
+_KWARGS_DESCRIPTION = """
+Args:
+    predictions: array-like of shape (n_samples, n_data) or (n_samples, n_data, n_timesteps, n_outputs)
+        n_sampels from estimated target distribution.
+    references: array-like of shape (n_data,) or (n_data, n_timesteps, n_outputs)
+        Empirical (correct) target values from ground truth distribution.
+    sum: bool, default=False
+        Defines whether to sum over sum_axis dimension.
+    sum_axis: int, default=-1
+        Defines axis to sum over in case of multioutput input.
+    multioutput: {"raw_values", "uniform_average"}
+        Defines aggregating across the n_outputs dimension.
+        "raw_values" returns full set of scores in case of multioutput input.
+        "uniform_average" returns the average score across all outputs.
+Returns:
+    crps: float
+        Continuous Ranked Probability Score.
+
+Examples:    
+"""
+
+
+@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class Crps(evaluate.Metric):
+    def _info(self):
+        return evaluate.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            features=datasets.Features(self._get_feature_types()),
+            reference_urls=[
+                "https://www.lokad.com/continuous-ranked-probability-score/"
+            ],
+        )
+
+    def _get_feature_types(self):
+        if self.config_name == "multilist":
+            return {
+                "predictions": datasets.Sequence(datasets.Value("float")),
+                "references": datasets.Sequence(datasets.Value("float")),
+            }
+        else:
+            return {
+                "predictions": datasets.Value("float"),
+                "references": datasets.Value("float"),
+            }
+
+    def _compute(
+        self,
+        predictions,
+        references,
+        sum=False,
+        sum_axis=-1,
+        multioutput="uniform_average",
+    ):
+        pass

From c386da6ff9c2676fb5bd437540074682da8546e3 Mon Sep 17 00:00:00 2001
From: Kashif Rasul <kashif.rasul@gmail.com>
Date: Mon, 23 Oct 2023 11:32:09 +0200
Subject: [PATCH 3/7] initial crps

---
 metrics/crps/crps.py | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/metrics/crps/crps.py b/metrics/crps/crps.py
index bb4f64201..e81938df5 100644
--- a/metrics/crps/crps.py
+++ b/metrics/crps/crps.py
@@ -40,14 +40,16 @@
 
 _KWARGS_DESCRIPTION = """
 Args:
-    predictions: array-like of shape (n_samples, n_data) or (n_samples, n_data, n_timesteps, n_outputs)
-        n_sampels from estimated target distribution.
-    references: array-like of shape (n_data,) or (n_data, n_timesteps, n_outputs)
+    predictions: array-like of shape (n_samples,) or (n_samples, n_timesteps, n_outputs)
+        n_samples from estimated target distribution.
+    references: array-like of shape (1,) or (n_timesteps, n_outputs)
         Empirical (correct) target values from ground truth distribution.
+    quantiles: list of floats, default=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+        List of quantiles in the unit interval to compute CRPS over.
     sum: bool, default=False
         Defines whether to sum over sum_axis dimension.
     sum_axis: int, default=-1
-        Defines axis to sum over in case of multioutput input.
+        Defines axis to sum over in case of n_outputs > 1.
     multioutput: {"raw_values", "uniform_average"}
         Defines aggregating across the n_outputs dimension.
         "raw_values" returns full set of scores in case of multioutput input.
@@ -85,12 +87,38 @@ def _get_feature_types(self):
                 "references": datasets.Value("float"),
             }
 
+    @staticmethod
+    def quantile_loss(target: np.ndarray, forecast: np.ndarray, q: float) -> float:
+        return 2.0 * np.sum(np.abs((target - forecast) * ((target <= forecast) - q)))
+
     def _compute(
         self,
         predictions,
         references,
+        quantiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
         sum=False,
         sum_axis=-1,
         multioutput="uniform_average",
     ):
-        pass
+        # if the number of dims of predictions > 2 then sum over sum_axis dimension if sum is True
+        if sum and len(predictions.shape) > 1:
+            predictions = np.sum(predictions, axis=sum_axis)
+            references = np.sum(references, axis=sum_axis)
+
+        abs_target_sum = np.sum(np.abs(references))
+        weighted_quantile_loss = []
+        for q in quantiles:
+            forecast_quantile = np.quantile(predictions, q, axis=0)
+            weighted_quantile_loss.append(
+                self.quantile_loss(references, forecast_quantile, q) / abs_target_sum
+            )
+
+        if multioutput == "raw_values":
+            return weighted_quantile_loss
+        elif multioutput == "uniform_average":
+            return np.average(weighted_quantile_loss)
+        else:
+            raise ValueError(
+                "The multioutput parameter should be one of the following: "
+                + "'raw_values', 'uniform_average'"
+            )

From 421166ecf88a8fe71fb744caaf527eb740f134fe Mon Sep 17 00:00:00 2001
From: Kashif Rasul <kashif.rasul@gmail.com>
Date: Thu, 26 Oct 2023 19:00:02 +0200
Subject: [PATCH 4/7] initial example

---
 metrics/crps/crps.py          | 18 +++++++++++++++---
 metrics/crps/requirements.txt |  2 +-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/metrics/crps/crps.py b/metrics/crps/crps.py
index e81938df5..1b2a8f80f 100644
--- a/metrics/crps/crps.py
+++ b/metrics/crps/crps.py
@@ -58,7 +58,17 @@
     crps: float
         Continuous Ranked Probability Score.
 
-Examples:    
+Examples:
+
+    >>> crps_metric = evaluate.load("crps")
+    >>> predictions = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
+    >>> references = np.array([0.3])
+    >>> results = crps_metric.compute(predictions=predictions, references=references)
+    >>> print(results)
+    {'mase': 0.18333333333333335}
+
+
+
 """
 
 
@@ -78,12 +88,14 @@ def _info(self):
     def _get_feature_types(self):
         if self.config_name == "multilist":
             return {
-                "predictions": datasets.Sequence(datasets.Value("float")),
+                "predictions": datasets.Sequence(
+                    datasets.Sequence(datasets.Value("float"))
+                ),
                 "references": datasets.Sequence(datasets.Value("float")),
             }
         else:
             return {
-                "predictions": datasets.Value("float"),
+                "predictions": datasets.Sequence(datasets.Value("float")),
                 "references": datasets.Value("float"),
             }
 
diff --git a/metrics/crps/requirements.txt b/metrics/crps/requirements.txt
index c9c2c1e33..3684f5d71 100644
--- a/metrics/crps/requirements.txt
+++ b/metrics/crps/requirements.txt
@@ -1,2 +1,2 @@
 git+https://github.com/huggingface/evaluate@{COMMIT_PLACEHOLDER}
-scikit-learn
+numpy

From 7f21013d6d2074d3963c8acc78211fb736b3b2c8 Mon Sep 17 00:00:00 2001
From: Kashif Rasul <kashif.rasul@gmail.com>
Date: Fri, 27 Oct 2023 10:34:13 +0200
Subject: [PATCH 5/7] return dict

---
 metrics/crps/crps.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/metrics/crps/crps.py b/metrics/crps/crps.py
index 1b2a8f80f..ccbecffc3 100644
--- a/metrics/crps/crps.py
+++ b/metrics/crps/crps.py
@@ -61,14 +61,17 @@
 Examples:
 
     >>> crps_metric = evaluate.load("crps")
-    >>> predictions = np.array([0.1, 0.2, 0.3, 0.4, 0.5])
+    >>> predictions = np.array([[0.1, 0.2, 0.3, 0.4, 0.5]])
     >>> references = np.array([0.3])
     >>> results = crps_metric.compute(predictions=predictions, references=references)
     >>> print(results)
-    {'mase': 0.18333333333333335}
-
-
+    {'crps': 1.9999999254941967}
 
+    >>> crps_metric = evaluate.load("crps", "multilist")
+    >>> predictions = [[[0, 2, 4]], [[-1, 2, 5]], [[8, -5, 6]]]
+    >>> references = [[0.5], [-1], [7]]
+    >>> print(results)
+    {'crps': 3.0522875816993467}
 """
 
 
@@ -126,9 +129,9 @@ def _compute(
             )
 
         if multioutput == "raw_values":
-            return weighted_quantile_loss
+            return {"crps": weighted_quantile_loss}
         elif multioutput == "uniform_average":
-            return np.average(weighted_quantile_loss)
+            return {"crps": np.average(weighted_quantile_loss)}
         else:
             raise ValueError(
                 "The multioutput parameter should be one of the following: "

From 788c7dc6de0b419c5b209276bf5ccb69a8088a2a Mon Sep 17 00:00:00 2001
From: Kashif Rasul <kashif.rasul@gmail.com>
Date: Fri, 27 Oct 2023 11:04:51 +0200
Subject: [PATCH 6/7] isort

---
 metrics/crps/crps.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/metrics/crps/crps.py b/metrics/crps/crps.py
index ccbecffc3..e87959a1d 100644
--- a/metrics/crps/crps.py
+++ b/metrics/crps/crps.py
@@ -18,6 +18,7 @@
 
 import evaluate
 
+
 _CITATION = """\
 @article{doi:10.1198/016214506000001437,
     author = {Tilmann Gneiting and Adrian E Raftery},

From aa54d13023ef21a3641377c74729084a21df7f35 Mon Sep 17 00:00:00 2001
From: Kashif Rasul <kashif.rasul@gmail.com>
Date: Fri, 27 Oct 2023 11:07:02 +0200
Subject: [PATCH 7/7] reformat

---
 metrics/crps/crps.py | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/metrics/crps/crps.py b/metrics/crps/crps.py
index e87959a1d..f35f8c178 100644
--- a/metrics/crps/crps.py
+++ b/metrics/crps/crps.py
@@ -84,17 +84,13 @@ def _info(self):
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
             features=datasets.Features(self._get_feature_types()),
-            reference_urls=[
-                "https://www.lokad.com/continuous-ranked-probability-score/"
-            ],
+            reference_urls=["https://www.lokad.com/continuous-ranked-probability-score/"],
         )
 
     def _get_feature_types(self):
         if self.config_name == "multilist":
             return {
-                "predictions": datasets.Sequence(
-                    datasets.Sequence(datasets.Value("float"))
-                ),
+                "predictions": datasets.Sequence(datasets.Sequence(datasets.Value("float"))),
                 "references": datasets.Sequence(datasets.Value("float")),
             }
         else:
@@ -125,9 +121,7 @@ def _compute(
         weighted_quantile_loss = []
         for q in quantiles:
             forecast_quantile = np.quantile(predictions, q, axis=0)
-            weighted_quantile_loss.append(
-                self.quantile_loss(references, forecast_quantile, q) / abs_target_sum
-            )
+            weighted_quantile_loss.append(self.quantile_loss(references, forecast_quantile, q) / abs_target_sum)
 
         if multioutput == "raw_values":
             return {"crps": weighted_quantile_loss}
@@ -135,6 +129,5 @@ def _compute(
             return {"crps": np.average(weighted_quantile_loss)}
         else:
             raise ValueError(
-                "The multioutput parameter should be one of the following: "
-                + "'raw_values', 'uniform_average'"
+                "The multioutput parameter should be one of the following: " + "'raw_values', 'uniform_average'"
             )