From a0c6bb25dd7cdbc3e2682801b6e83a9def99c542 Mon Sep 17 00:00:00 2001 From: Sadra Barikbin Date: Fri, 11 Oct 2024 22:39:37 +0330 Subject: [PATCH 1/4] Implement the feature --- community_tasks/_template.py | 2 +- community_tasks/arabic_evals.py | 6 +- examples/nanotron/custom_evaluation_tasks.py | 16 +- examples/nanotron/custom_task.py | 4 +- src/lighteval/metrics/__init__.py | 10 +- src/lighteval/models/model_output.py | 48 +- src/lighteval/tasks/default_tasks.py | 2464 +++++++++--------- src/lighteval/tasks/lighteval_task.py | 21 +- tests/test_answer_extractor.py | 133 + 9 files changed, 1450 insertions(+), 1254 deletions(-) create mode 100644 tests/test_answer_extractor.py diff --git a/community_tasks/_template.py b/community_tasks/_template.py index 345aebe4..e408e928 100644 --- a/community_tasks/_template.py +++ b/community_tasks/_template.py @@ -99,7 +99,7 @@ def __init__( suite=["community"], generation_size=-1, stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, ) diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py index 323120cd..792e2774 100644 --- a/community_tasks/arabic_evals.py +++ b/community_tasks/arabic_evals.py @@ -95,7 +95,7 @@ def __init__( suite=["community"], generation_size=-1, stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -152,7 +152,7 @@ def __init__( suite=["community"], generation_size=-1, stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -254,7 +254,7 @@ def __init__( suite=["community"], generation_size=-1, stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, version=0, ) diff --git a/examples/nanotron/custom_evaluation_tasks.py b/examples/nanotron/custom_evaluation_tasks.py index 6d4edd62..7d7cbafa 100644 --- a/examples/nanotron/custom_evaluation_tasks.py +++ b/examples/nanotron/custom_evaluation_tasks.py @@ -267,7 +267,7 @@ def __init__( generation_size=40, trust_dataset=True, stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, ): super().__init__( @@ -282,7 +282,7 @@ def __init__( few_shots_select=few_shots_select, suite=suite, generation_size=generation_size, - output_regex=output_regex, + answer_extractor=answer_extractor, frozen=frozen, trust_dataset=trust_dataset, stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), @@ -371,7 +371,7 @@ def __init__( generation_size=-1, trust_dataset=True, stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, ): super().__init__( @@ -388,7 +388,7 @@ def __init__( generation_size=generation_size, trust_dataset=trust_dataset, stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), - output_regex=output_regex, + answer_extractor=answer_extractor, frozen=frozen, ) @@ -488,7 +488,7 @@ def __init__( generation_size=4, trust_dataset=True, stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, ): super().__init__( @@ -505,7 +505,7 @@ def __init__( generation_size=generation_size, trust_dataset=trust_dataset, stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), - output_regex=output_regex, + answer_extractor=answer_extractor, frozen=frozen, ) @@ -624,7 +624,7 @@ def __init__( generation_size=-1, trust_dataset=True, stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, ): super().__init__( @@ -641,7 +641,7 @@ def __init__( generation_size=generation_size, trust_dataset=trust_dataset, stop_sequence=(stop_sequence if stop_sequence is not None else ["\n"]), - output_regex=output_regex, + answer_extractor=answer_extractor, frozen=frozen, ) diff --git a/examples/nanotron/custom_task.py b/examples/nanotron/custom_task.py index 49332321..6a580b01 100644 --- a/examples/nanotron/custom_task.py +++ b/examples/nanotron/custom_task.py @@ -82,7 +82,7 @@ def mmlu_anatomy(line): generation_size=5, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, ), LightevalTaskConfig( @@ -98,7 +98,7 @@ def mmlu_anatomy(line): generation_size=5, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, ), ] diff --git a/src/lighteval/metrics/__init__.py b/src/lighteval/metrics/__init__.py index 7b9ada7a..62b88927 100644 --- a/src/lighteval/metrics/__init__.py +++ b/src/lighteval/metrics/__init__.py @@ -20,10 +20,10 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import re +from typing import Optional from lighteval.metrics.metrics import Metric, MetricCategory -from lighteval.models.model_output import ModelResponse +from lighteval.models.model_output import AnswerExtractor, ModelResponse from lighteval.tasks.requests import Doc from lighteval.utils.utils import as_list @@ -89,7 +89,7 @@ def apply_generative_metric( # noqa: C901 responses: list[list[ModelResponse]], formatted_docs: list[Doc], metrics: list[Metric], - output_regex: str = None, + answer_extractor: Optional[AnswerExtractor] = None, max_num_samples: int = 1, ): outputs = [] @@ -106,8 +106,8 @@ def apply_generative_metric( # noqa: C901 preds = [] for pred_raw in preds_raw: - if output_regex is not None: - pred = next(iter(re.findall(output_regex, pred_raw)), "") + if answer_extractor: + pred = answer_extractor(pred_raw, formatted_doc.choices) else: pred = pred_raw preds.append(pred) diff --git a/src/lighteval/models/model_output.py b/src/lighteval/models/model_output.py index 604bac39..f081b775 100644 --- a/src/lighteval/models/model_output.py +++ b/src/lighteval/models/model_output.py @@ -20,8 +20,11 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import random +import re +from abc import abstractmethod from dataclasses import dataclass, field -from typing import Optional, Union +from typing import Any, Literal, Optional, Union import torch @@ -81,3 +84,46 @@ class Batch: input_lengths: list[int] truncated: list[int] padded: list[int] + + +class AnswerExtractor: + @abstractmethod + def __call__(self, result: str): + ... + + @abstractmethod + def as_dict() -> dict: + ... + + # Bad hack. Thanks to LightevalTaskConfig's becoming dict in the beginning of the evaluation! + # Maybe it's fixed now in main branch. + @classmethod + def from_dict(cls, properties: dict[str, Any]) -> "AnswerExtractor": + return RegexAnswerExtractor(properties["regex_list"], properties["fallback"]) + + +class RegexAnswerExtractor(AnswerExtractor): + def __init__( + self, + regex_list: list[re.Pattern | str], + fallback: int | Literal["random", "keep", "empty_string"] = "empty_string", + ): + self.regex_list: list[re.Pattern] = list(map(re.compile, regex_list)) + self.fallback = fallback + + def __call__(self, result: str, choices: list[str]) -> str: + for pattern in self.regex_list: + choice = next(iter(re.findall(pattern, result)), "") + if choice in choices: + return choice + if self.fallback == "random": + return random.choice(choices) + elif self.fallback == "keep": + return result + elif self.fallback == "empty_string": + return "" + else: + return choices[self.fallback] + + def as_dict(self) -> dict: + return {"regex_list": [p.pattern for p in self.regex_list], "fallback": self.fallback} diff --git a/src/lighteval/tasks/default_tasks.py b/src/lighteval/tasks/default_tasks.py index 198ea9e9..39524010 100644 --- a/src/lighteval/tasks/default_tasks.py +++ b/src/lighteval/tasks/default_tasks.py @@ -21,6 +21,7 @@ # SOFTWARE. import lighteval.tasks.default_prompts as prompt from lighteval.metrics.metrics import Metrics +from lighteval.models.model_output import RegexAnswerExtractor from lighteval.tasks.lighteval_task import LightevalTaskConfig @@ -37,7 +38,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -55,7 +56,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -73,7 +74,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -91,7 +92,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -109,7 +110,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -127,7 +128,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -145,7 +146,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -163,7 +164,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -181,7 +182,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -199,7 +200,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -217,7 +218,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -235,7 +236,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -253,7 +254,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -271,7 +272,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -289,7 +290,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -307,7 +308,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -325,7 +326,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -343,7 +344,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=None, - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -361,7 +362,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -379,7 +380,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -397,7 +398,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -415,7 +416,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -433,7 +434,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -451,7 +452,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -469,7 +470,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -487,7 +488,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -505,7 +506,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -523,7 +524,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -541,7 +542,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -559,7 +560,7 @@ generation_size=-1, metric=[Metrics.acc_golds_likelihood], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -577,7 +578,7 @@ generation_size=-1, metric=[Metrics.acc_golds_likelihood], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -595,7 +596,7 @@ generation_size=-1, metric=[Metrics.acc_golds_likelihood], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -613,7 +614,7 @@ generation_size=-1, metric=[Metrics.acc_golds_likelihood], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -631,7 +632,7 @@ generation_size=-1, metric=[Metrics.acc_golds_likelihood], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -649,7 +650,7 @@ generation_size=-1, metric=[Metrics.acc_golds_likelihood], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -667,7 +668,7 @@ generation_size=-1, metric=[Metrics.acc_golds_likelihood], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -685,7 +686,7 @@ generation_size=-1, metric=[Metrics.acc_golds_likelihood], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -703,7 +704,7 @@ generation_size=-1, metric=[Metrics.acc_golds_likelihood], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -721,7 +722,7 @@ generation_size=-1, metric=[Metrics.acc_golds_likelihood], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -739,7 +740,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -757,7 +758,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -775,7 +776,7 @@ generation_size=-1, metric=[Metrics.acc_golds_likelihood], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -793,7 +794,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -811,7 +812,7 @@ generation_size=1, metric=[Metrics.bleu], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -829,7 +830,7 @@ generation_size=100, metric=[Metrics.perfect_exact_match], stop_sequence=None, - output_regex="[^\\.\\?\\!\\;\\n]+", + answer_extractor=RegexAnswerExtractor([r"[^\\.\\?\\!\\;\\n]+"]), trust_dataset=True, version=0, ) @@ -851,7 +852,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -869,7 +870,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -887,7 +888,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -905,7 +906,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -923,7 +924,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -941,7 +942,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -959,7 +960,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -977,7 +978,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -995,7 +996,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1013,7 +1014,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1031,7 +1032,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1049,7 +1050,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1067,7 +1068,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1085,7 +1086,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1103,7 +1104,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1121,7 +1122,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1139,7 +1140,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1157,7 +1158,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1175,7 +1176,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1193,7 +1194,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1212,7 +1213,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1231,7 +1232,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1250,7 +1251,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1269,7 +1270,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1288,7 +1289,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1307,7 +1308,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1326,7 +1327,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1345,7 +1346,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1364,7 +1365,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1383,7 +1384,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1402,7 +1403,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1421,7 +1422,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1440,7 +1441,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1459,7 +1460,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1478,7 +1479,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1497,7 +1498,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1516,7 +1517,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, must_remove_duplicate_docs=True, trust_dataset=True, @@ -1541,7 +1542,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1565,7 +1566,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1589,7 +1590,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1613,7 +1614,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1637,7 +1638,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1661,7 +1662,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1685,7 +1686,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1709,7 +1710,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1733,7 +1734,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1757,7 +1758,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1781,7 +1782,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1805,7 +1806,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1829,7 +1830,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1853,7 +1854,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1877,7 +1878,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1901,7 +1902,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1925,7 +1926,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1949,7 +1950,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1973,7 +1974,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -1997,7 +1998,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2021,7 +2022,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2045,7 +2046,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2069,7 +2070,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2093,7 +2094,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2117,7 +2118,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2141,7 +2142,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2165,7 +2166,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["", "Q=", "\n\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2189,7 +2190,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2213,7 +2214,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2237,7 +2238,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2261,7 +2262,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2285,7 +2286,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2309,7 +2310,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2333,7 +2334,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2357,7 +2358,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2381,7 +2382,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2405,7 +2406,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2429,7 +2430,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2453,7 +2454,7 @@ Metrics.perfect_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2471,7 +2472,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2489,7 +2490,7 @@ generation_size=100, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2512,7 +2513,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2535,7 +2536,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2558,7 +2559,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2581,7 +2582,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2604,7 +2605,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2627,7 +2628,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2650,7 +2651,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2673,7 +2674,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2696,7 +2697,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2719,7 +2720,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2742,7 +2743,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2765,7 +2766,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2788,7 +2789,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2811,7 +2812,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2834,7 +2835,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2857,7 +2858,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2880,7 +2881,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2903,7 +2904,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2926,7 +2927,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2949,7 +2950,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2972,7 +2973,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -2995,7 +2996,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3018,7 +3019,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3041,7 +3042,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3059,7 +3060,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3077,7 +3078,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3095,7 +3096,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3113,7 +3114,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3131,7 +3132,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3149,7 +3150,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3167,7 +3168,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3185,7 +3186,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3203,7 +3204,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3221,7 +3222,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3239,7 +3240,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3257,7 +3258,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3275,7 +3276,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3293,7 +3294,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3311,7 +3312,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3329,7 +3330,7 @@ generation_size=100, metric=[Metrics.rouge1, Metrics.rouge2, Metrics.rougeL], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3352,7 +3353,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3375,7 +3376,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3398,7 +3399,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3421,7 +3422,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3444,7 +3445,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3462,7 +3463,7 @@ generation_size=100, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3485,7 +3486,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3508,7 +3509,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3531,7 +3532,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3554,7 +3555,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3577,7 +3578,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3600,7 +3601,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3618,7 +3619,7 @@ generation_size=100, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3636,7 +3637,7 @@ generation_size=100, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3659,7 +3660,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3677,7 +3678,7 @@ generation_size=100, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3700,7 +3701,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3723,7 +3724,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3746,7 +3747,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3769,7 +3770,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3792,7 +3793,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3815,7 +3816,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3838,7 +3839,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3861,7 +3862,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3884,7 +3885,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3907,7 +3908,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3925,7 +3926,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3943,7 +3944,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3961,7 +3962,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3979,7 +3980,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -3997,7 +3998,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4015,7 +4016,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4033,7 +4034,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4051,7 +4052,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4069,7 +4070,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4087,7 +4088,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4105,7 +4106,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4123,7 +4124,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4141,7 +4142,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4159,7 +4160,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4177,7 +4178,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4195,7 +4196,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4213,7 +4214,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4231,7 +4232,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4249,7 +4250,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4267,7 +4268,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4285,7 +4286,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4303,7 +4304,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4321,7 +4322,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4339,7 +4340,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4357,7 +4358,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4375,7 +4376,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4393,7 +4394,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4411,7 +4412,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4429,7 +4430,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4447,7 +4448,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4465,7 +4466,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4483,7 +4484,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4501,7 +4502,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4519,7 +4520,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4537,7 +4538,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4555,7 +4556,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4573,7 +4574,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4591,7 +4592,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4609,7 +4610,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4627,7 +4628,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4645,7 +4646,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4663,7 +4664,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4681,7 +4682,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4699,7 +4700,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4717,7 +4718,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4735,7 +4736,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4753,7 +4754,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4771,7 +4772,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4789,7 +4790,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4807,7 +4808,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4825,7 +4826,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4843,7 +4844,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4861,7 +4862,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4879,7 +4880,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4897,7 +4898,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4915,7 +4916,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4933,7 +4934,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4951,7 +4952,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4969,7 +4970,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -4987,7 +4988,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5005,7 +5006,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5023,7 +5024,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5041,7 +5042,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5059,7 +5060,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5077,7 +5078,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5095,7 +5096,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5113,7 +5114,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5131,7 +5132,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5149,7 +5150,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5167,7 +5168,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5185,7 +5186,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5203,7 +5204,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5221,7 +5222,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5239,7 +5240,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5257,7 +5258,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5275,7 +5276,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5293,7 +5294,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5311,7 +5312,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5329,7 +5330,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5347,7 +5348,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5365,7 +5366,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5383,7 +5384,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5401,7 +5402,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5419,7 +5420,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5437,7 +5438,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5455,7 +5456,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5473,7 +5474,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5491,7 +5492,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5509,7 +5510,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5527,7 +5528,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5545,7 +5546,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5563,7 +5564,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5581,7 +5582,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5599,7 +5600,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5617,7 +5618,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5635,7 +5636,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5653,7 +5654,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5671,7 +5672,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5689,7 +5690,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5707,7 +5708,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5725,7 +5726,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5743,7 +5744,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5761,7 +5762,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5779,7 +5780,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5797,7 +5798,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5815,7 +5816,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5833,7 +5834,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5851,7 +5852,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5869,7 +5870,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5887,7 +5888,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5905,7 +5906,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5923,7 +5924,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5941,7 +5942,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5959,7 +5960,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5977,7 +5978,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -5995,7 +5996,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6013,7 +6014,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6031,7 +6032,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6049,7 +6050,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6067,7 +6068,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6085,7 +6086,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6103,7 +6104,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6121,7 +6122,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6139,7 +6140,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6157,7 +6158,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6175,7 +6176,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6193,7 +6194,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6211,7 +6212,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6229,7 +6230,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6247,7 +6248,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6265,7 +6266,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6283,7 +6284,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6301,7 +6302,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6319,7 +6320,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6337,7 +6338,7 @@ generation_size=100, metric=[Metrics.prediction_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6355,7 +6356,7 @@ generation_size=100, metric=[Metrics.prediction_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6373,7 +6374,7 @@ generation_size=100, metric=[Metrics.prediction_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6391,7 +6392,7 @@ generation_size=100, metric=[Metrics.prediction_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6409,7 +6410,7 @@ generation_size=100, metric=[Metrics.prediction_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6427,7 +6428,7 @@ generation_size=100, metric=[Metrics.prediction_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6450,7 +6451,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6473,7 +6474,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6491,7 +6492,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6509,7 +6510,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6527,7 +6528,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6545,7 +6546,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6563,7 +6564,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6581,7 +6582,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6599,7 +6600,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6624,7 +6625,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6649,7 +6650,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6674,7 +6675,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6699,7 +6700,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6724,7 +6725,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6749,7 +6750,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6774,7 +6775,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6799,7 +6800,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6824,7 +6825,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6842,7 +6843,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6860,7 +6861,7 @@ generation_size=1, metric=[Metrics.rouge_t5, Metrics.bleu], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6878,7 +6879,7 @@ generation_size=1, metric=[Metrics.rouge_t5, Metrics.bleu, Metrics.loglikelihood_acc, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6896,7 +6897,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6919,7 +6920,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6937,7 +6938,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6955,7 +6956,7 @@ generation_size=100, metric=[Metrics.rouge_t5, Metrics.bleu, Metrics.perfect_exact_match], stop_sequence=[".", ";", "!", "?"], - output_regex="[^\\.\\?\\!\\;\\n]+", + answer_extractor=RegexAnswerExtractor([r"[^\\.\\?\\!\\;\\n]+"]), trust_dataset=True, version=0, ) @@ -6972,7 +6973,7 @@ generation_size=1, metric=[Metrics.rouge_t5, Metrics.loglikelihood_acc, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -6990,7 +6991,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7008,7 +7009,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7026,7 +7027,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7044,7 +7045,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7062,7 +7063,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7080,7 +7081,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7098,7 +7099,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7116,7 +7117,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7134,7 +7135,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7152,7 +7153,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7170,7 +7171,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7188,7 +7189,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7206,7 +7207,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7224,7 +7225,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7242,7 +7243,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7260,7 +7261,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7278,7 +7279,7 @@ generation_size=100, metric=[Metrics.copyright], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7296,7 +7297,7 @@ generation_size=10, metric=[Metrics.perfect_exact_match, Metrics.f1_score], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7314,7 +7315,7 @@ generation_size=10, metric=[Metrics.perfect_exact_match, Metrics.f1_score], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7332,7 +7333,7 @@ generation_size=128, metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score, Metrics.rougeL, "bleu_1", "bleu_4"], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7350,7 +7351,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7368,7 +7369,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7386,7 +7387,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7404,7 +7405,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7422,7 +7423,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7440,7 +7441,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7458,7 +7459,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7476,7 +7477,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7494,7 +7495,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7512,7 +7513,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7530,7 +7531,7 @@ generation_size=None, metric=[Metrics.drop], stop_sequence=["."], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7548,7 +7549,7 @@ generation_size=5, metric=[Metrics.exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7566,7 +7567,7 @@ generation_size=5, metric=[Metrics.exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7584,7 +7585,7 @@ generation_size=5, metric=[Metrics.exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7602,7 +7603,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7620,7 +7621,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7638,7 +7639,7 @@ generation_size=1, metric=[Metrics.rouge_t5, Metrics.bleu, Metrics.loglikelihood_acc, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7656,7 +7657,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7674,7 +7675,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7692,7 +7693,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7710,7 +7711,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7728,7 +7729,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7746,7 +7747,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7769,7 +7770,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7792,7 +7793,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7815,7 +7816,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7838,7 +7839,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7861,7 +7862,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7884,7 +7885,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7907,7 +7908,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7930,7 +7931,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7953,7 +7954,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7976,7 +7977,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -7999,7 +8000,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8022,7 +8023,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8045,7 +8046,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8068,7 +8069,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8091,7 +8092,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8109,7 +8110,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8127,7 +8128,7 @@ generation_size=5, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8145,7 +8146,7 @@ generation_size=5, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8163,7 +8164,7 @@ generation_size=5, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8181,7 +8182,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8199,7 +8200,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8217,7 +8218,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8235,7 +8236,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8253,7 +8254,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8271,7 +8272,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.bleurt], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8289,7 +8290,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8307,7 +8308,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8325,7 +8326,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.rouge_t5], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8343,7 +8344,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8361,7 +8362,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8379,7 +8380,7 @@ generation_size=1, metric=[Metrics.rouge_t5, Metrics.bleu, Metrics.loglikelihood_acc, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8397,7 +8398,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token, "mcc_single_token"], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8415,7 +8416,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8433,7 +8434,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8451,7 +8452,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, "loglikelihood_f1"], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8469,7 +8470,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8487,7 +8488,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, "loglikelihood_f1"], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8505,7 +8506,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8523,7 +8524,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8541,7 +8542,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8559,7 +8560,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8577,7 +8578,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8595,7 +8596,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8613,7 +8614,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8631,7 +8632,7 @@ generation_size=256, metric=[Metrics.quasi_exact_match_gsm8k], stop_sequence=["Question=", "Question", "="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8649,7 +8650,7 @@ generation_size=256, metric=[Metrics.quasi_exact_match_gsm8k, Metrics.maj_at_8_gsm8k], stop_sequence=["Question="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8667,7 +8668,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8685,7 +8686,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8703,7 +8704,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8726,7 +8727,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8744,7 +8745,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8762,7 +8763,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8780,7 +8781,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8798,7 +8799,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8816,7 +8817,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8834,7 +8835,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8852,7 +8853,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8870,7 +8871,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8895,7 +8896,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8920,7 +8921,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8938,7 +8939,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8956,7 +8957,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8974,7 +8975,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -8997,7 +8998,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9020,7 +9021,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9043,7 +9044,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9066,7 +9067,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9089,7 +9090,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9112,7 +9113,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9130,7 +9131,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9148,7 +9149,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9166,7 +9167,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9184,7 +9185,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9202,7 +9203,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9220,7 +9221,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9238,7 +9239,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9256,7 +9257,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9274,7 +9275,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9292,7 +9293,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9310,7 +9311,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9328,7 +9329,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9346,7 +9347,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9364,7 +9365,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9382,7 +9383,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9400,7 +9401,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9418,7 +9419,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9436,7 +9437,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9454,7 +9455,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9472,7 +9473,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9490,7 +9491,7 @@ generation_size=10, metric=[Metrics.target_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9508,7 +9509,7 @@ generation_size=10, metric=[Metrics.target_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9526,7 +9527,7 @@ generation_size=10, metric=[Metrics.target_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9544,7 +9545,7 @@ generation_size=10, metric=[Metrics.target_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9562,7 +9563,7 @@ generation_size=10, metric=[Metrics.target_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9580,7 +9581,7 @@ generation_size=10, metric=[Metrics.target_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9598,7 +9599,7 @@ generation_size=10, metric=[Metrics.target_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9616,7 +9617,7 @@ generation_size=10, metric=[Metrics.target_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9634,7 +9635,7 @@ generation_size=10, metric=[Metrics.target_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9652,7 +9653,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9670,7 +9671,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9695,7 +9696,7 @@ Metrics.bert_score, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9720,7 +9721,7 @@ Metrics.bert_score, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9745,7 +9746,7 @@ Metrics.bert_score, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9769,7 +9770,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9793,7 +9794,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9817,7 +9818,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9841,7 +9842,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9865,7 +9866,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9889,7 +9890,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9913,7 +9914,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9937,7 +9938,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9961,7 +9962,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -9985,7 +9986,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10009,7 +10010,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10033,7 +10034,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10057,7 +10058,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10081,7 +10082,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10099,7 +10100,7 @@ generation_size=20, metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10123,7 +10124,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10147,7 +10148,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10171,7 +10172,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10195,7 +10196,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10219,7 +10220,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10243,7 +10244,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10267,7 +10268,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10291,7 +10292,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10315,7 +10316,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10339,7 +10340,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10363,7 +10364,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10381,7 +10382,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10399,7 +10400,7 @@ generation_size=100, metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match], stop_sequence=None, - output_regex="[^\\.\\?\\!\\;\\n]+", + answer_extractor=RegexAnswerExtractor([r"[^\\.\\?\\!\\;\\n]+"]), trust_dataset=True, version=0, ) @@ -10416,7 +10417,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10434,7 +10435,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10452,7 +10453,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10470,7 +10471,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10488,7 +10489,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10506,7 +10507,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10529,7 +10530,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10552,7 +10553,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10575,7 +10576,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10598,7 +10599,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10621,7 +10622,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10639,7 +10640,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=1, @@ -10657,7 +10658,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=1, @@ -10675,7 +10676,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=1, @@ -10693,7 +10694,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=1, @@ -10711,7 +10712,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=1, @@ -10729,7 +10730,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=1, @@ -10747,7 +10748,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=1, @@ -10765,7 +10766,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10783,7 +10784,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10801,7 +10802,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10819,7 +10820,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10837,7 +10838,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10855,7 +10856,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10873,7 +10874,7 @@ generation_size=2048, metric=[Metrics.quasi_exact_match_math, Metrics.maj_at_4_math], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10891,7 +10892,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10909,7 +10910,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10927,7 +10928,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10945,7 +10946,7 @@ generation_size=128, metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score, Metrics.rougeL, "bleu_1", "bleu_4"], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10963,7 +10964,7 @@ generation_size=128, metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score, Metrics.rougeL, "bleu_1", "bleu_4"], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -10981,7 +10982,7 @@ generation_size=128, metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score, Metrics.rougeL, "bleu_1", "bleu_4"], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11005,7 +11006,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11023,7 +11024,7 @@ generation_size=512, metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score, Metrics.rougeL, "bleu_1", "bleu_4"], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11047,7 +11048,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11065,7 +11066,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11083,7 +11084,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11101,7 +11102,7 @@ generation_size=None, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n", "=", "Question="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11119,7 +11120,7 @@ generation_size=None, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n", "=", "Pregunta="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11137,7 +11138,7 @@ generation_size=None, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n", "=", "Question="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11155,7 +11156,7 @@ generation_size=None, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n", "=", "Frage="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11173,7 +11174,7 @@ generation_size=None, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n", "=", "\u0417\u0430\u0434\u0430\u0447\u0430="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11191,7 +11192,7 @@ generation_size=None, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n", "=", "\u95ee\u9898="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11209,7 +11210,7 @@ generation_size=None, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n", "=", "\u554f\u984c="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11227,7 +11228,7 @@ generation_size=None, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n", "=", "\u0e42\u0e08\u0e17\u0e22\u0e4c="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11245,7 +11246,7 @@ generation_size=None, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n", "=", "Swali="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11263,7 +11264,7 @@ generation_size=None, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n", "=", "\u09aa\u09cd\u09b0\u09b6\u09cd\u09a8="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11281,7 +11282,7 @@ generation_size=None, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n", "=", "\u0c2a\u0c4d\u0c30\u0c36\u0c4d\u0c28="], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11299,7 +11300,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.rouge_t5], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11317,7 +11318,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11335,7 +11336,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11353,7 +11354,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11371,7 +11372,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11394,7 +11395,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11412,7 +11413,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11430,7 +11431,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11453,7 +11454,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11471,7 +11472,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11489,7 +11490,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11512,7 +11513,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11530,7 +11531,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11548,7 +11549,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11571,7 +11572,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11589,7 +11590,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11607,7 +11608,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11630,7 +11631,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11648,7 +11649,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11666,7 +11667,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11689,7 +11690,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11707,7 +11708,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11725,7 +11726,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11748,7 +11749,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11766,7 +11767,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11784,7 +11785,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11807,7 +11808,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11825,7 +11826,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11843,7 +11844,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11866,7 +11867,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11884,7 +11885,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11902,7 +11903,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11925,7 +11926,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11943,7 +11944,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11961,7 +11962,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -11984,7 +11985,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12002,7 +12003,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12020,7 +12021,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12043,7 +12044,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12061,7 +12062,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12079,7 +12080,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12102,7 +12103,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12120,7 +12121,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12138,7 +12139,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12161,7 +12162,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12179,7 +12180,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12197,7 +12198,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12220,7 +12221,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12238,7 +12239,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12256,7 +12257,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12279,7 +12280,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12297,7 +12298,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12315,7 +12316,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12338,7 +12339,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12356,7 +12357,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12374,7 +12375,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12397,7 +12398,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12415,7 +12416,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12433,7 +12434,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12456,7 +12457,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12474,7 +12475,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12492,7 +12493,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12515,7 +12516,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12533,7 +12534,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12551,7 +12552,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12574,7 +12575,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12592,7 +12593,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12610,7 +12611,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12633,7 +12634,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12651,7 +12652,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12669,7 +12670,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12692,7 +12693,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12710,7 +12711,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12728,7 +12729,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12751,7 +12752,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12769,7 +12770,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12787,7 +12788,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12810,7 +12811,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12828,7 +12829,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12846,7 +12847,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12869,7 +12870,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12887,7 +12888,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12905,7 +12906,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12928,7 +12929,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12946,7 +12947,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12964,7 +12965,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -12987,7 +12988,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13005,7 +13006,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13023,7 +13024,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13046,7 +13047,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13064,7 +13065,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13082,7 +13083,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13105,7 +13106,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13123,7 +13124,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13141,7 +13142,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13164,7 +13165,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13182,7 +13183,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13200,7 +13201,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13223,7 +13224,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13241,7 +13242,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13259,7 +13260,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13282,7 +13283,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13300,7 +13301,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13318,7 +13319,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13341,7 +13342,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13359,7 +13360,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13377,7 +13378,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13400,7 +13401,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13418,7 +13419,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13436,7 +13437,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13459,7 +13460,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13477,7 +13478,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13495,7 +13496,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13518,7 +13519,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13536,7 +13537,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13554,7 +13555,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13577,7 +13578,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13595,7 +13596,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13613,7 +13614,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13636,7 +13637,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13654,7 +13655,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13672,7 +13673,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13695,7 +13696,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13713,7 +13714,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13731,7 +13732,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13754,7 +13755,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13772,7 +13773,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13790,7 +13791,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13813,7 +13814,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13831,7 +13832,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13849,7 +13850,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13872,7 +13873,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13890,7 +13891,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13908,7 +13909,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13931,7 +13932,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13949,7 +13950,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13967,7 +13968,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -13990,7 +13991,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14008,7 +14009,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14026,7 +14027,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14049,7 +14050,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14067,7 +14068,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14085,7 +14086,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14108,7 +14109,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14126,7 +14127,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14144,7 +14145,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14167,7 +14168,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14185,7 +14186,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14203,7 +14204,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14226,7 +14227,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14244,7 +14245,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14262,7 +14263,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14285,7 +14286,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14303,7 +14304,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14321,7 +14322,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14344,7 +14345,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14362,7 +14363,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14380,7 +14381,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14403,7 +14404,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14421,7 +14422,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14439,7 +14440,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14462,7 +14463,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14480,7 +14481,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14498,7 +14499,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14521,7 +14522,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14539,7 +14540,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14557,7 +14558,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14580,7 +14581,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14598,7 +14599,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14616,7 +14617,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14639,7 +14640,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14657,7 +14658,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14675,7 +14676,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14698,7 +14699,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14716,7 +14717,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14734,7 +14735,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14752,7 +14753,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14770,7 +14771,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14788,7 +14789,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14806,7 +14807,7 @@ generation_size=200, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14824,7 +14825,7 @@ generation_size=200, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14842,7 +14843,7 @@ generation_size=200, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14860,7 +14861,7 @@ generation_size=200, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14878,7 +14879,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14896,7 +14897,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14914,7 +14915,7 @@ generation_size=-1, metric=[Metrics.recall_at_1, Metrics.recall_at_2, Metrics.mrr], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14932,7 +14933,7 @@ generation_size=-1, metric=[Metrics.recall_at_1, Metrics.recall_at_2, Metrics.mrr], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14950,7 +14951,7 @@ generation_size=100, metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score, Metrics.rougeL, "bleu_1", "bleu_4"], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14968,7 +14969,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.rouge_t5], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -14986,7 +14987,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15004,7 +15005,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15022,7 +15023,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15040,7 +15041,7 @@ generation_size=20, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15058,7 +15059,7 @@ generation_size=20, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15076,7 +15077,7 @@ generation_size=20, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15094,7 +15095,7 @@ generation_size=20, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15112,7 +15113,7 @@ generation_size=20, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15130,7 +15131,7 @@ generation_size=20, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15148,7 +15149,7 @@ generation_size=20, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15166,7 +15167,7 @@ generation_size=20, metric=[Metrics.exact_match, Metrics.quasi_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15184,7 +15185,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15202,7 +15203,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15225,7 +15226,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15243,7 +15244,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15261,7 +15262,8 @@ generation_size=20, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex="([-+]?\\d+)[.]0,1)$", + # Original regex seems to be erroneous: ([-+]?\\d+)[.]0,1)$ + answer_extractor=RegexAnswerExtractor([r"([-+]?\d+)[.]0,1$"]), trust_dataset=True, version=0, ) @@ -15278,7 +15280,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15296,7 +15298,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15314,7 +15316,7 @@ generation_size=100, metric=[Metrics.perfect_exact_match], stop_sequence=None, - output_regex="[^\\.\\?\\!\\;\\n]+", + answer_extractor=RegexAnswerExtractor([r"[^\\.\\?\\!\\;\\n]+"]), trust_dataset=True, version=0, ) @@ -15331,7 +15333,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15349,7 +15351,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15367,7 +15369,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15385,7 +15387,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15403,7 +15405,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15421,7 +15423,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15439,7 +15441,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15457,7 +15459,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15480,7 +15482,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15498,7 +15500,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15516,7 +15518,7 @@ generation_size=1, metric=[Metrics.f1_score], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15534,7 +15536,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15552,7 +15554,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15570,7 +15572,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15593,7 +15595,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15611,7 +15613,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15629,7 +15631,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15647,7 +15649,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15665,7 +15667,7 @@ generation_size=1, metric=[Metrics.bleurt, Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15683,7 +15685,7 @@ generation_size=20, metric=[Metrics.f1_score_quasi], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15701,7 +15703,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15719,7 +15721,7 @@ generation_size=100, metric=[Metrics.exact_match, Metrics.quasi_exact_match, Metrics.f1_score], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15737,7 +15739,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15755,7 +15757,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15780,7 +15782,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15805,7 +15807,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15830,7 +15832,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15855,7 +15857,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15880,7 +15882,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15905,7 +15907,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15930,7 +15932,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15955,7 +15957,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -15980,7 +15982,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16005,7 +16007,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16030,7 +16032,7 @@ Metrics.f1_score_micro, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16048,7 +16050,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16066,7 +16068,7 @@ generation_size=20, metric=[Metrics.prediction_perplexity], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16084,7 +16086,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16102,7 +16104,7 @@ generation_size=100, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16120,7 +16122,7 @@ generation_size=1, metric=[Metrics.rouge_t5, Metrics.bleu, Metrics.loglikelihood_acc, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16138,7 +16140,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16156,7 +16158,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16174,7 +16176,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16192,7 +16194,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16210,7 +16212,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16228,7 +16230,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16246,7 +16248,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16264,7 +16266,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16282,7 +16284,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16300,7 +16302,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16318,7 +16320,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16336,7 +16338,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16354,7 +16356,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16372,7 +16374,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16390,7 +16392,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16408,7 +16410,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16426,7 +16428,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16449,7 +16451,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16467,7 +16469,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16485,7 +16487,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16503,7 +16505,7 @@ generation_size=1, metric=[Metrics.f1_score_macro], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16521,7 +16523,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16539,7 +16541,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16557,7 +16559,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16575,7 +16577,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16593,7 +16595,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16611,7 +16613,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16629,7 +16631,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16654,7 +16656,7 @@ Metrics.bert_score, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16679,7 +16681,7 @@ Metrics.bert_score, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16704,7 +16706,7 @@ Metrics.bert_score, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16722,7 +16724,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16740,7 +16742,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc_single_token, "multi_f1_numeric"], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16758,7 +16760,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16776,7 +16778,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16794,7 +16796,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16812,7 +16814,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16830,7 +16832,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc_single_token], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16848,7 +16850,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16866,7 +16868,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm_nospace], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16884,7 +16886,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16902,7 +16904,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16925,7 +16927,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16943,7 +16945,7 @@ generation_size=20, metric=[Metrics.exact_match, Metrics.f1_score], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16961,7 +16963,7 @@ generation_size=20, metric=[Metrics.exact_match, Metrics.f1_score], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -16984,7 +16986,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17007,7 +17009,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17025,7 +17027,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.rouge_t5], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17043,7 +17045,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17061,7 +17063,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17079,7 +17081,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17097,7 +17099,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17115,7 +17117,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17133,7 +17135,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17151,7 +17153,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17169,7 +17171,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17187,7 +17189,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17205,7 +17207,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17223,7 +17225,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17241,7 +17243,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17259,7 +17261,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17277,7 +17279,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17295,7 +17297,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17313,7 +17315,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17331,7 +17333,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17349,7 +17351,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17367,7 +17369,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17385,7 +17387,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17403,7 +17405,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17421,7 +17423,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17439,7 +17441,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17457,7 +17459,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17475,7 +17477,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17493,7 +17495,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17511,7 +17513,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17529,7 +17531,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17547,7 +17549,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17565,7 +17567,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17583,7 +17585,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17601,7 +17603,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17619,7 +17621,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17637,7 +17639,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17655,7 +17657,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17673,7 +17675,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17691,7 +17693,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17709,7 +17711,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17727,7 +17729,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17745,7 +17747,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17763,7 +17765,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17781,7 +17783,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17799,7 +17801,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17817,7 +17819,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17835,7 +17837,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc, Metrics.loglikelihood_acc_norm], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17853,7 +17855,7 @@ generation_size=1, metric=[Metrics.bleu, Metrics.rouge_t5, Metrics.loglikelihood_acc, Metrics.bleurt], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17871,7 +17873,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17889,7 +17891,7 @@ generation_size=20, metric=[Metrics.quasi_exact_match_triviaqa], stop_sequence=["\n", ".", ","], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17907,7 +17909,7 @@ generation_size=200, metric=[Metrics.bleu, Metrics.rouge_t5], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17925,7 +17927,7 @@ generation_size=-1, metric=[Metrics.truthfulqa_mc_metrics], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17949,7 +17951,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17967,7 +17969,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -17985,7 +17987,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18003,7 +18005,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18021,7 +18023,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18039,7 +18041,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18057,7 +18059,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18075,7 +18077,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18093,7 +18095,7 @@ generation_size=5, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18111,7 +18113,7 @@ generation_size=5, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18129,7 +18131,7 @@ generation_size=5, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18147,7 +18149,7 @@ generation_size=5, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18165,7 +18167,7 @@ generation_size=5, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18183,7 +18185,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18201,7 +18203,7 @@ generation_size=-1, metric=[Metrics.acc_golds_likelihood], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18219,7 +18221,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18237,7 +18239,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18260,7 +18262,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18283,7 +18285,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18306,7 +18308,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18329,7 +18331,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18352,7 +18354,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18375,7 +18377,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18398,7 +18400,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18421,7 +18423,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18444,7 +18446,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18467,7 +18469,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18490,7 +18492,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18513,7 +18515,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18536,7 +18538,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18559,7 +18561,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18582,7 +18584,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18605,7 +18607,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18628,7 +18630,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18651,7 +18653,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18674,7 +18676,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18697,7 +18699,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18720,7 +18722,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18743,7 +18745,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18766,7 +18768,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18789,7 +18791,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18812,7 +18814,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18835,7 +18837,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18858,7 +18860,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18881,7 +18883,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18904,7 +18906,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18927,7 +18929,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18950,7 +18952,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18973,7 +18975,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -18996,7 +18998,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19019,7 +19021,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19042,7 +19044,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19065,7 +19067,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19088,7 +19090,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19111,7 +19113,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19134,7 +19136,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19157,7 +19159,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19180,7 +19182,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19203,7 +19205,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19226,7 +19228,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19249,7 +19251,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19272,7 +19274,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19295,7 +19297,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19318,7 +19320,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19341,7 +19343,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19364,7 +19366,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19387,7 +19389,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19410,7 +19412,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19433,7 +19435,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19456,7 +19458,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19479,7 +19481,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19502,7 +19504,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19525,7 +19527,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19548,7 +19550,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19571,7 +19573,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19594,7 +19596,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19617,7 +19619,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19640,7 +19642,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19663,7 +19665,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19686,7 +19688,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19709,7 +19711,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19732,7 +19734,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19755,7 +19757,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19778,7 +19780,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19801,7 +19803,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19824,7 +19826,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19847,7 +19849,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19870,7 +19872,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19893,7 +19895,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19916,7 +19918,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19939,7 +19941,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19962,7 +19964,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -19985,7 +19987,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20008,7 +20010,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20031,7 +20033,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20054,7 +20056,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20077,7 +20079,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20100,7 +20102,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20123,7 +20125,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20146,7 +20148,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20169,7 +20171,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20192,7 +20194,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20215,7 +20217,7 @@ Metrics.prefix_quasi_exact_match, ], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20233,7 +20235,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20251,7 +20253,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20269,7 +20271,7 @@ generation_size=-1, metric=[Metrics.word_perplexity, Metrics.byte_perplexity, Metrics.bits_per_byte], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20287,7 +20289,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20305,7 +20307,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20323,7 +20325,7 @@ generation_size=1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20341,7 +20343,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20359,7 +20361,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20377,7 +20379,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20395,7 +20397,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20413,7 +20415,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20431,7 +20433,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20449,7 +20451,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20467,7 +20469,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20485,7 +20487,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20503,7 +20505,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20521,7 +20523,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20539,7 +20541,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20557,7 +20559,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20575,7 +20577,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20593,7 +20595,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20611,7 +20613,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20629,7 +20631,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20647,7 +20649,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20665,7 +20667,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20683,7 +20685,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20701,7 +20703,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20719,7 +20721,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20737,7 +20739,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20755,7 +20757,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20773,7 +20775,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20791,7 +20793,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20809,7 +20811,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20827,7 +20829,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20845,7 +20847,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20863,7 +20865,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20881,7 +20883,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20899,7 +20901,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20917,7 +20919,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20935,7 +20937,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20953,7 +20955,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20971,7 +20973,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -20989,7 +20991,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21007,7 +21009,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21025,7 +21027,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21043,7 +21045,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21061,7 +21063,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21079,7 +21081,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21097,7 +21099,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21115,7 +21117,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21133,7 +21135,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21151,7 +21153,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21169,7 +21171,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21187,7 +21189,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21205,7 +21207,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21223,7 +21225,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21241,7 +21243,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21259,7 +21261,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21277,7 +21279,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21295,7 +21297,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21313,7 +21315,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21331,7 +21333,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21349,7 +21351,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21367,7 +21369,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21385,7 +21387,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21403,7 +21405,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21421,7 +21423,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21439,7 +21441,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21457,7 +21459,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21475,7 +21477,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21493,7 +21495,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21511,7 +21513,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21529,7 +21531,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21547,7 +21549,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21565,7 +21567,7 @@ generation_size=100, metric=[Metrics.bleu], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21583,7 +21585,7 @@ generation_size=100, metric=[Metrics.bleu], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21601,7 +21603,7 @@ generation_size=100, metric=[Metrics.bleu], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21619,7 +21621,7 @@ generation_size=100, metric=[Metrics.bleu], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21637,7 +21639,7 @@ generation_size=100, metric=[Metrics.bleu], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21655,7 +21657,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21673,7 +21675,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21691,7 +21693,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21709,7 +21711,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21727,7 +21729,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21745,7 +21747,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21763,7 +21765,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21781,7 +21783,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21799,7 +21801,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21817,7 +21819,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21835,7 +21837,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21853,7 +21855,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21871,7 +21873,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21889,7 +21891,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21907,7 +21909,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21925,7 +21927,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21943,7 +21945,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21961,7 +21963,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21979,7 +21981,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -21997,7 +21999,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22015,7 +22017,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22033,7 +22035,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22051,7 +22053,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22069,7 +22071,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22087,7 +22089,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22105,7 +22107,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22123,7 +22125,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22141,7 +22143,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22159,7 +22161,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22177,7 +22179,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22195,7 +22197,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22213,7 +22215,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22231,7 +22233,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22249,7 +22251,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22267,7 +22269,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22285,7 +22287,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22303,7 +22305,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22321,7 +22323,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22339,7 +22341,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22357,7 +22359,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22375,7 +22377,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22393,7 +22395,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22411,7 +22413,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22429,7 +22431,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22447,7 +22449,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22465,7 +22467,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22483,7 +22485,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22501,7 +22503,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22519,7 +22521,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22537,7 +22539,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22555,7 +22557,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22573,7 +22575,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22591,7 +22593,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22609,7 +22611,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22627,7 +22629,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22645,7 +22647,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22663,7 +22665,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22681,7 +22683,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22699,7 +22701,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22717,7 +22719,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22735,7 +22737,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22753,7 +22755,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22771,7 +22773,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22789,7 +22791,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22807,7 +22809,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22825,7 +22827,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22843,7 +22845,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22861,7 +22863,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22879,7 +22881,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22897,7 +22899,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22915,7 +22917,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22933,7 +22935,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22951,7 +22953,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22969,7 +22971,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -22987,7 +22989,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23005,7 +23007,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23023,7 +23025,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23041,7 +23043,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23059,7 +23061,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23077,7 +23079,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23095,7 +23097,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23113,7 +23115,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23131,7 +23133,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23149,7 +23151,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23167,7 +23169,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23185,7 +23187,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23203,7 +23205,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23221,7 +23223,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23239,7 +23241,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23257,7 +23259,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23275,7 +23277,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23293,7 +23295,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23311,7 +23313,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23329,7 +23331,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23347,7 +23349,7 @@ generation_size=None, metric=[Metrics.bleu, Metrics.chrf, Metrics.ter], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23365,7 +23367,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23383,7 +23385,7 @@ generation_size=1, metric=[Metrics.perfect_exact_match], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23401,7 +23403,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23419,7 +23421,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23437,7 +23439,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23455,7 +23457,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23473,7 +23475,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23491,7 +23493,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23509,7 +23511,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23527,7 +23529,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23545,7 +23547,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23563,7 +23565,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23581,7 +23583,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23599,7 +23601,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23617,7 +23619,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23635,7 +23637,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23653,7 +23655,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23671,7 +23673,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23689,7 +23691,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23707,7 +23709,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23725,7 +23727,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23743,7 +23745,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23761,7 +23763,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23779,7 +23781,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23797,7 +23799,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23815,7 +23817,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23833,7 +23835,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23851,7 +23853,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23869,7 +23871,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23887,7 +23889,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23905,7 +23907,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, @@ -23923,7 +23925,7 @@ generation_size=-1, metric=[Metrics.loglikelihood_acc], stop_sequence=["\n"], - output_regex=None, + answer_extractor=None, frozen=False, trust_dataset=True, version=0, diff --git a/src/lighteval/tasks/lighteval_task.py b/src/lighteval/tasks/lighteval_task.py index bb33535f..41ff3613 100644 --- a/src/lighteval/tasks/lighteval_task.py +++ b/src/lighteval/tasks/lighteval_task.py @@ -24,6 +24,7 @@ import inspect import random from dataclasses import asdict, dataclass, field +from functools import partial from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple from datasets import DatasetDict @@ -42,6 +43,7 @@ ) from lighteval.metrics.metrics import Metric, MetricCategory, Metrics from lighteval.models.base_model import BaseModel +from lighteval.models.model_output import AnswerExtractor from lighteval.tasks.prompt_manager import PromptManager from lighteval.tasks.requests import ( Doc, @@ -84,7 +86,7 @@ class LightevalTaskConfig: truncated_num_docs (bool): Whether less than the total number of documents were used trust_dataset (bool): Whether to trust the dataset at execution or not version (int): The version of the task. Defaults to 0. Can be increased if the underlying dataset or the prompt changes. - output_regex (str) + answer_extractor (AnswerExtractor): The method by which we extract the model answer from its generated output. Defaults to None. frozen (bool) """ @@ -110,7 +112,7 @@ class LightevalTaskConfig: generation_size: Optional[int] = None generation_grammar: Optional[TextGenerationInputGrammarType] = None stop_sequence: Optional[ListLike[str]] = None - output_regex: Optional[str] = None + answer_extractor: Optional[AnswerExtractor] = None num_samples: Optional[list[int]] = None suite: ListLike[str] = field(default_factory=lambda: ["custom"]) @@ -227,6 +229,7 @@ def __init__( # noqa: C901 self.generation_size = cfg.generation_size self.generation_grammar = cfg.generation_grammar self.stop_sequence = cfg.stop_sequence + self.answer_extractor = cfg.answer_extractor self.must_remove_duplicate_docs = cfg.must_remove_duplicate_docs @property @@ -504,7 +507,19 @@ def get_metric_method_from_category(self, metric_category): if not self.has_metric_category[metric_category]: raise ValueError(f"Requested a metric category {metric_category} absent from the task list.") - return LightevalTask._get_metric_method_from_category(metric_category) + metric_method = LightevalTask._get_metric_method_from_category(metric_category) + # Bad hack. I had no other way. + if ( + metric_category + in [ + MetricCategory.GENERATIVE, + MetricCategory.GENERATIVE_SAMPLING, + MetricCategory.GENERATIVE_LOGPROB, + ] + and self.answer_extractor + ): + metric_method = partial(metric_method, answer_extractor=self.answer_extractor) + return metric_method @staticmethod def _get_metric_method_from_category(metric_category): diff --git a/tests/test_answer_extractor.py b/tests/test_answer_extractor.py new file mode 100644 index 00000000..69d53856 --- /dev/null +++ b/tests/test_answer_extractor.py @@ -0,0 +1,133 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import random +import re +from unittest.mock import patch + +import pytest + +from lighteval.logging.evaluation_tracker import EvaluationTracker +from lighteval.metrics.metrics import Metrics +from lighteval.models.base_model import BaseModel +from lighteval.models.model_config import BaseModelConfig, EnvConfig +from lighteval.models.model_output import RegexAnswerExtractor +from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters +from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig, create_requests_from_tasks +from lighteval.tasks.requests import ( + Doc, +) + + +def test_answer_extractor(): + # MMLU-Pro + extractor = RegexAnswerExtractor( + [re.compile(r"answer is \(?\(([A-J])\)?\)"), re.compile(r"\.*\[aA\]nswer:\s*\(([A-J])\)")], fallback="random" + ) + + assert extractor("answer is (C)", ["A", "B", "C", "D"]) == "C" + + random.seed(41) + fallback_choice = random.choice(["A", "B", "C", "D"]) + random.seed(41) + assert extractor("answer is (F)", ["A", "B", "C", "D"]) == fallback_choice + + extractor.fallback = "keep" + assert extractor("I don't know", ["A", "B", "C", "D"]) == "I don't know" + + extractor.fallback = 0 + assert extractor("I don't know", ["A", "B", "C", "D"]) == "A" + + extractor.fallback = "empty_string" + assert extractor("I don't know", ["A", "B", "C", "D"]) == "" + + +@pytest.fixture(scope="module") +def base_model() -> BaseModel: + config = BaseModelConfig("hf-internal-testing/tiny-random-LlamaForCausalLM") + return BaseModel(config, EnvConfig(".")) + + +@pytest.fixture +def task() -> LightevalTask: + eval_docs = [ + Doc( + query="Tell me:\n\nHow many eyes do you have?", + choices=["2", "3"], + instruction="Tell me:\n\n", + gold_index=0, + ), + Doc( + query="Tell me:\n\nHow many hands do we have?", + choices=["2", "3"], + instruction="Tell me:\n\n", + gold_index=0, + ), + ] + task_config = LightevalTaskConfig( + name="test", + prompt_function=lambda _: _, + hf_repo="", + hf_subset="", + metric=[Metrics.exact_match], + answer_extractor=RegexAnswerExtractor([r"\w", r"\d"]), + generation_size=1, + stop_sequence=[], + ) + task = LightevalTask("test", task_config) + task._docs = eval_docs + return task + + +def test_integration(task: LightevalTask, base_model: BaseModel): + evaluation_tracker = EvaluationTracker(".") + pipeline_params = PipelineParameters( + env_config=EnvConfig("."), + launcher_type=ParallelismManager.NONE, + override_batch_size=0, + use_chat_template=False, + ) + with patch("lighteval.pipeline.Pipeline._init_tasks_and_requests"): + pipeline = Pipeline( + tasks="custom|test|0|0", + pipeline_parameters=pipeline_params, + evaluation_tracker=evaluation_tracker, + model=base_model, + ) + task_dict = {"custom|test": task} + evaluation_tracker.task_config_logger.log(task_dict) + pipeline.task_dict = task_dict + pipeline.task_names_list = ["custom|test"] + requests_dict, docs = create_requests_from_tasks( + task_dict=task_dict, + fewshot_dict={"custom|test": [(0, False)]}, + num_fewshot_seeds=pipeline_params.num_fewshot_seeds, + lm=base_model, + max_samples=pipeline_params.max_samples, + evaluation_tracker=evaluation_tracker, + use_chat_template=False, + system_prompt=pipeline_params.system_prompt, + ) + pipeline.requests = requests_dict + pipeline.docs = docs + evaluation_tracker.task_config_logger.log(task_dict) + pipeline.evaluate() From c7d2b5589c9751944526b82d7666f3d45fc1888d Mon Sep 17 00:00:00 2001 From: Sadra Barikbin Date: Fri, 11 Oct 2024 22:49:47 +0330 Subject: [PATCH 2/4] Remove redundant code, now that we're on the main --- src/lighteval/models/model_output.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/lighteval/models/model_output.py b/src/lighteval/models/model_output.py index f081b775..719db4f4 100644 --- a/src/lighteval/models/model_output.py +++ b/src/lighteval/models/model_output.py @@ -24,7 +24,7 @@ import re from abc import abstractmethod from dataclasses import dataclass, field -from typing import Any, Literal, Optional, Union +from typing import Literal, Optional, Union import torch @@ -91,16 +91,6 @@ class AnswerExtractor: def __call__(self, result: str): ... - @abstractmethod - def as_dict() -> dict: - ... - - # Bad hack. Thanks to LightevalTaskConfig's becoming dict in the beginning of the evaluation! - # Maybe it's fixed now in main branch. - @classmethod - def from_dict(cls, properties: dict[str, Any]) -> "AnswerExtractor": - return RegexAnswerExtractor(properties["regex_list"], properties["fallback"]) - class RegexAnswerExtractor(AnswerExtractor): def __init__( @@ -124,6 +114,3 @@ def __call__(self, result: str, choices: list[str]) -> str: return "" else: return choices[self.fallback] - - def as_dict(self) -> dict: - return {"regex_list": [p.pattern for p in self.regex_list], "fallback": self.fallback} From b802c0485dea00286c1c7ecb12798e4509bb71fd Mon Sep 17 00:00:00 2001 From: Sadra Barikbin Date: Fri, 11 Oct 2024 22:51:00 +0330 Subject: [PATCH 3/4] Fix AnswerExtractor type ann. --- src/lighteval/models/model_output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lighteval/models/model_output.py b/src/lighteval/models/model_output.py index 719db4f4..4a1ea35d 100644 --- a/src/lighteval/models/model_output.py +++ b/src/lighteval/models/model_output.py @@ -88,7 +88,7 @@ class Batch: class AnswerExtractor: @abstractmethod - def __call__(self, result: str): + def __call__(self, result: str, choices: list[str]) -> str: ... From 3218483299433935ddaca9b5658578220f6cb044 Mon Sep 17 00:00:00 2001 From: Sadra Barikbin Date: Mon, 14 Oct 2024 15:46:46 +0330 Subject: [PATCH 4/4] Update src/lighteval/metrics/__init__.py Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com> --- src/lighteval/metrics/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lighteval/metrics/__init__.py b/src/lighteval/metrics/__init__.py index 62b88927..29db9198 100644 --- a/src/lighteval/metrics/__init__.py +++ b/src/lighteval/metrics/__init__.py @@ -106,7 +106,7 @@ def apply_generative_metric( # noqa: C901 preds = [] for pred_raw in preds_raw: - if answer_extractor: + if answer_extractor is not None: pred = answer_extractor(pred_raw, formatted_doc.choices) else: pred = pred_raw