Skip to content

Commit

Permalink
Merge pull request #32 from neulab/evaluate_file_api_call
Browse files Browse the repository at this point in the history
Evaluate system file simplified API
  • Loading branch information
neubig authored Oct 11, 2022
2 parents e7917be + 9d0c94c commit 6ac0f2f
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 136 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ python -m explainaboard_client.cli.evaluate_system \
--email $EB_EMAIL --api_key $EB_API_KEY \
--task [TASK_ID] \
--system_name [MODEL_NAME] \
--system_output [SYSTEM_OUTPUT] --output_file_type [FILE_TYPE] \
--system_output_file [SYSTEM_OUTPUT] --system_output_file_type [FILE_TYPE] \
--dataset [DATASET] --sub_dataset [SUB_DATASET] --split [SPLIT] \
--source_language [SOURCE] --target_language [TARGET] \
[--public]
Expand All @@ -53,7 +53,7 @@ python -m explainaboard_client.cli.evaluate_system \
You will need to fill in all the settings appropriately, for example:
* `[TASK_ID]` is the ID of the task you want to perform. A full list is [here](https://github.com/neulab/explainaboard_web/blob/main/backend/src/impl/tasks.py).
* `[MODEL_NAME]` is whatever name you want to give to your model.
* `[SYSTEM_OUTPUT]` is the file that you want to evaluate.
* `[SYSTEM_OUTPUT_FILE]` is the file that you want to evaluate.
* `[FILE_TYPE]` is the type of the file, "text", "tsv", "csv", "conll", or "json".
* `[DATASET]`, `[SUB_DATASET]` and `[SPLIT]` indicate which dataset you're evaluating
a system output for.
Expand All @@ -71,8 +71,8 @@ python -m explainaboard_client.cli.evaluate_system \
--email $EB_EMAIL --api_key $EB_API_KEY \
--task [TASK_ID] \
--system_name [MODEL_NAME] \
--system_output [SYSTEM_OUTPUT] --output_file_type [FILE_TYPE] \
--custom_dataset [CUSTOM_DATASET] --custom_dataset_file_type [FILE_TYPE] \
--system_output_file [SYSTEM_OUTPUT] --system_output_file_type [FILE_TYPE] \
--custom_dataset_file [CUSTOM_DATASET] --custom_dataset_file_type [FILE_TYPE] \
--source_language [SOURCE] --target_language [TARGET]
```

Expand Down
117 changes: 31 additions & 86 deletions explainaboard_client/cli/evaluate_system.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,7 @@
import argparse
import json

from explainaboard_api_client.model.system import System
from explainaboard_api_client.model.system_create_props import SystemCreateProps
from explainaboard_api_client.model.system_metadata import SystemMetadata
from explainaboard_api_client.model.system_output_props import SystemOutputProps
from explainaboard_client import Config, ExplainaboardClient
from explainaboard_client.tasks import (
DEFAULT_METRICS,
FileType,
infer_file_type,
TaskType,
)
from explainaboard_client.utils import generate_dataset_id
from explainaboard_client.tasks import FileType, TaskType


def main():
Expand All @@ -30,14 +19,6 @@ def main():
help="Email address used to sign in to ExplainaBoard",
)
parser.add_argument("--api_key", type=str, required=True, help="Your API key")
parser.add_argument(
"--server",
type=str,
required=False,
default="main",
choices=["main", "staging", "local"],
help='Which server to use, "main" should be sufficient',
)
# ---- System info
parser.add_argument(
"--task",
Expand All @@ -53,13 +34,13 @@ def main():
help="Name of the system that you are evaluating",
)
parser.add_argument(
"--system_output",
"--system_output_file",
type=str,
required=True,
help="Path to the system output file",
)
parser.add_argument(
"--output_file_type",
"--system_output_file_type",
type=str,
choices=FileType.list(),
help="File type of the system output (eg text/json/tsv/conll)",
Expand All @@ -82,7 +63,7 @@ def main():
help="The name of the dataset split to process",
)
dataset_group.add_argument(
"--custom_dataset", type=str, help="The path to a custom dataset file"
"--custom_dataset_file", type=str, help="The path to a custom dataset file"
)
parser.add_argument(
"--custom_dataset_file_type",
Expand All @@ -105,74 +86,23 @@ def main():
"--target_language", type=str, help="The language on the output side"
)
parser.add_argument(
"--system_details", type=str, help="File of system details in JSON format"
"--system_details_file", type=str, help="File of system details in JSON format"
)
parser.add_argument(
"--public", action="store_true", help="Make the evaluation results public"
)
parser.add_argument(
"--shared_users", type=str, nargs="+", help="Emails of users to share with"
)
args = parser.parse_args()

# Sanity checks
if not (args.source_language or args.target_language):
raise ValueError("You must specify source and/or target language")

# Infer missing values
task = TaskType(args.task)
metric_names = args.metric_names or DEFAULT_METRICS[args.task]
source_language = args.source_language or args.target_language
target_language = args.target_language or args.source_language
output_file_type = args.output_file_type or infer_file_type(
args.system_output, task
)
custom_dataset_file_type = args.custom_dataset_file_type or infer_file_type(
args.custom_dataset_file_type, task
)
shared_users = args.shared_users or []

# Read system details file
system_details = {}
if args.system_details:
with open(args.system_details, "r") as fin:
system_details = json.load(fin)

# Do the actual upload
system_output = SystemOutputProps(
data=args.system_output,
file_type=output_file_type,
)
metadata = SystemMetadata(
task=args.task,
is_private=not args.public,
system_name=args.system_name,
metric_names=metric_names,
source_language=source_language,
target_language=target_language,
dataset_split=args.split,
shared_users=shared_users,
system_details=system_details,
)
custom_dataset = None
if args.custom_dataset:
custom_dataset = SystemOutputProps(
data=args.custom_dataset,
file_type=custom_dataset_file_type,
)
else:
metadata.dataset_metadata_id = generate_dataset_id(
args.dataset, args.sub_dataset
)
create_props = (
SystemCreateProps(
metadata=metadata,
system_output=system_output,
custom_dataset=custom_dataset,
)
if custom_dataset is not None
else SystemCreateProps(metadata=metadata, system_output=system_output)
parser.add_argument(
"--server",
type=str,
required=False,
default="main",
choices=["main", "staging", "local"],
help='Which server to use, "main" should be sufficient',
)
args = parser.parse_args()

client_config = Config(
args.email,
Expand All @@ -181,11 +111,26 @@ def main():
)
client = ExplainaboardClient(client_config)

result: System = client.systems_post(create_props)
try:
sys_id = result.system_id
client.systems_get_by_id(sys_id)
evaluation_data = client.evaluate_system_file(
task=args.task,
system_name=args.system_name,
system_output_file=args.system_output_file,
system_output_file_type=args.system_output_file_type,
dataset=args.dataset,
sub_dataset=args.sub_dataset,
split=args.split,
custom_dataset_file=args.custom_dataset_file,
custom_dataset_file_type=args.custom_dataset_file_type,
metric_names=args.metric_names,
source_language=args.source_language,
target_language=args.target_language,
system_details_file=args.system_details_file,
public=args.public,
shared_users=args.shared_users,
)
frontend = client_config.get_env_host_map()[args.server].frontend
sys_id = evaluation_data.system_id
print(
f"successfully evaluated system {args.system_name} with ID {sys_id}\n"
f"view it at {frontend}/systems?system_id={sys_id}\n"
Expand Down
165 changes: 159 additions & 6 deletions explainaboard_client/client.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,184 @@
from __future__ import annotations

import json
import logging
from multiprocessing.pool import ApplyResult
from typing import Union

from explainaboard_api_client import ApiClient
from explainaboard_api_client.api.default_api import DefaultApi
from explainaboard_api_client.model.system_metadata import SystemMetadata
from explainaboard_api_client.models import System, SystemCreateProps, SystemOutputProps
from explainaboard_client.config import Config
from explainaboard_client.utils import encode_file_to_base64
from explainaboard_client.tasks import DEFAULT_METRICS, infer_file_type, TaskType
from explainaboard_client.utils import encode_file_to_base64, generate_dataset_id


class ExplainaboardClient(DefaultApi):
class ExplainaboardClient:
# ---- Initializers, etc.
def __init__(self, config: Config) -> None:
self._config = config
"""Initialize the ExplainaBoard client with a specific configuration.
Args:
config (Config): The configuration for the ExplainaBoard client.
"""
self._config: Config = config
api_client = ApiClient(self._config.to_client_config())
super().__init__(api_client)
self._default_api: DefaultApi = DefaultApi(api_client)
self._active: bool = True

def close(self):
self.api_client.close()
self._default_api.api_client.close()
self._active = False

def __enter__(self):
return self

def __exit__(self):
self.close()

# ---- Client Functions
def evaluate_system_file(
self,
task: str,
system_name: str,
system_output_file: str,
system_output_file_type: str | None = None,
dataset: str | None = None,
sub_dataset: str | None = None,
split: str | None = None,
custom_dataset_file: str | None = None,
custom_dataset_file_type: str | None = None,
metric_names: list[str] | None = None,
source_language: str | None = None,
target_language: str | None = None,
system_details_file: str | None = None,
public: bool = False,
shared_users: list[str] | None = None,
) -> dict:
"""Evaluate a system output file and return a dictionary of results.
Args:
task: What task you will be analyzing.
system_name: Name of the system that you are evaluating.
system_output_file: Path to the system output file.
system_output_file_type: File type of the system output
(eg text/json/tsv/conll).
dataset: A dataset name from DataLab.
sub_dataset: A sub-dataset name from DataLab.
split: The name of the dataset split to process.
custom_dataset_file: The path to a custom dataset file.
custom_dataset_file_type: File type of the custom dataset
(eg text/json/tsv/conll)
metric_names: The metrics to compute, leave blank for task defaults
source_language: The language on the input side.
target_language: The language on the output side.
system_details_file: File of system details in JSON format.
public: Make the evaluation results public.
shared_users: Emails of users to share with.
"""
# Sanity checks
if not (source_language or target_language):
raise ValueError("You must specify source and/or target language")

# Infer missing values
task = TaskType(task)
metric_names = metric_names or DEFAULT_METRICS[task]
source_language = source_language or target_language
target_language = target_language or source_language
system_output_file_type = system_output_file_type or infer_file_type(
system_output_file, task
)
custom_dataset_file_type = custom_dataset_file_type or infer_file_type(
custom_dataset_file_type, task
)
shared_users = shared_users or []

# Read system details file
system_details: dict = {}
if system_details_file is not None:
with open(system_details_file, "r") as fin:
system_details = json.load(fin)

# Do the actual upload
system_output = SystemOutputProps(
data=system_output_file,
file_type=system_output_file_type,
)
metadata = SystemMetadata(
task=task,
is_private=not public,
system_name=system_name,
metric_names=metric_names,
source_language=source_language,
target_language=target_language,
dataset_split=split,
shared_users=shared_users,
system_details=system_details,
)
custom_dataset = None
if custom_dataset_file:
custom_dataset = SystemOutputProps(
data=custom_dataset_file,
file_type=custom_dataset_file_type,
)
elif dataset is not None:
metadata.dataset_metadata_id = generate_dataset_id(dataset, sub_dataset)
else:
raise ValueError("Must specify dataset or custom_dataset_file")
create_props = (
SystemCreateProps(
metadata=metadata,
system_output=system_output,
custom_dataset=custom_dataset,
)
if custom_dataset is not None
else SystemCreateProps(metadata=metadata, system_output=system_output)
)

result: System = self._systems_post(create_props)
return result.to_dict()

# --- Pass-through API calls that will be deprecated
def systems_post(
self, system_create_props: SystemCreateProps, **kwargs
) -> Union[System, ApplyResult]:
"""Post a system using the client.
The public function is deprecated and will be removed."""
logging.getLogger("explainaboard_client").warning(
"WARNING: systems_post() is deprecated and may be removed in the future."
" Please use evaluate_file() instead."
)
return self._systems_post(system_create_props, **kwargs)

def systems_get_by_id(self, system_id: str, **kwargs):
"""API call to get systems. Will be replaced in the future."""
return self._default_api.systems_get_by_id(system_id, **kwargs)

def systems_delete_by_id(self, system_id: str, **kwargs):
"""API call to delete systems. Will be replaced in the future."""
self._default_api.systems_delete_by_id(system_id, **kwargs)

def systems_get(self, **kwargs):
"""API call to get systems. Will be replaced in the future."""
return self._default_api.systems_get(**kwargs)

def info_get(self, **kwargs):
"""API call to get info. Will be replaced in the future."""
return self._default_api.info_get(**kwargs)

def user_get(self, **kwargs):
"""API call to get a user. Will be replaced in the future."""
return self._default_api.user_get(**kwargs)

# --- Private utility functions
def _systems_post(
self, system_create_props: SystemCreateProps, **kwargs
) -> Union[System, ApplyResult]:
"""Post a system using the client."""
if not self._active:
raise RuntimeError("Client is closed.")
loaded_system_output = SystemOutputProps(
data=encode_file_to_base64(system_create_props.system_output.data),
file_type=system_create_props.system_output.file_type,
Expand All @@ -45,4 +198,4 @@ def systems_post(
metadata=system_create_props.metadata,
system_output=loaded_system_output,
)
return super().systems_post(props_with_loaded_file, **kwargs)
return self._default_api.systems_post(props_with_loaded_file, **kwargs)
Loading

0 comments on commit 6ac0f2f

Please sign in to comment.