Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Evaluate system file simplified API #32

Merged
merged 4 commits into from
Oct 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ python -m explainaboard_client.cli.evaluate_system \
--email $EB_EMAIL --api_key $EB_API_KEY \
--task [TASK_ID] \
--system_name [MODEL_NAME] \
--system_output [SYSTEM_OUTPUT] --output_file_type [FILE_TYPE] \
--system_output_file [SYSTEM_OUTPUT] --system_output_file_type [FILE_TYPE] \
--dataset [DATASET] --sub_dataset [SUB_DATASET] --split [SPLIT] \
--source_language [SOURCE] --target_language [TARGET] \
[--public]
Expand All @@ -53,7 +53,7 @@ python -m explainaboard_client.cli.evaluate_system \
You will need to fill in all the settings appropriately, for example:
* `[TASK_ID]` is the ID of the task you want to perform. A full list is [here](https://github.com/neulab/explainaboard_web/blob/main/backend/src/impl/tasks.py).
* `[MODEL_NAME]` is whatever name you want to give to your model.
* `[SYSTEM_OUTPUT]` is the file that you want to evaluate.
* `[SYSTEM_OUTPUT_FILE]` is the file that you want to evaluate.
* `[FILE_TYPE]` is the type of the file, "text", "tsv", "csv", "conll", or "json".
* `[DATASET]`, `[SUB_DATASET]` and `[SPLIT]` indicate which dataset you're evaluating
a system output for.
Expand All @@ -71,8 +71,8 @@ python -m explainaboard_client.cli.evaluate_system \
--email $EB_EMAIL --api_key $EB_API_KEY \
--task [TASK_ID] \
--system_name [MODEL_NAME] \
--system_output [SYSTEM_OUTPUT] --output_file_type [FILE_TYPE] \
--custom_dataset [CUSTOM_DATASET] --custom_dataset_file_type [FILE_TYPE] \
--system_output_file [SYSTEM_OUTPUT] --system_output_file_type [FILE_TYPE] \
--custom_dataset_file [CUSTOM_DATASET] --custom_dataset_file_type [FILE_TYPE] \
--source_language [SOURCE] --target_language [TARGET]
```

Expand Down
117 changes: 31 additions & 86 deletions explainaboard_client/cli/evaluate_system.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,7 @@
import argparse
import json

from explainaboard_api_client.model.system import System
from explainaboard_api_client.model.system_create_props import SystemCreateProps
from explainaboard_api_client.model.system_metadata import SystemMetadata
from explainaboard_api_client.model.system_output_props import SystemOutputProps
from explainaboard_client import Config, ExplainaboardClient
from explainaboard_client.tasks import (
DEFAULT_METRICS,
FileType,
infer_file_type,
TaskType,
)
from explainaboard_client.utils import generate_dataset_id
from explainaboard_client.tasks import FileType, TaskType


def main():
Expand All @@ -30,14 +19,6 @@ def main():
help="Email address used to sign in to ExplainaBoard",
)
parser.add_argument("--api_key", type=str, required=True, help="Your API key")
parser.add_argument(
"--server",
type=str,
required=False,
default="main",
choices=["main", "staging", "local"],
help='Which server to use, "main" should be sufficient',
)
# ---- System info
parser.add_argument(
"--task",
Expand All @@ -53,13 +34,13 @@ def main():
help="Name of the system that you are evaluating",
)
parser.add_argument(
"--system_output",
"--system_output_file",
Copy link
Collaborator

@pfliu-nlp pfliu-nlp Oct 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these changes in this PR will not break the implementation of the one in the teaching class. (but let me know if it will affect so that I can make corresponding revisions)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it will, but I'd actually suggest that you set a specific version that you use for the class to prevent any future changes from causing issues. That's what I did as well: https://github.com/neubig/minbert-assignment/blob/main/setup.sh#L13

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"suggest that you set a specific version that you use for the class to prevent any future changes from causing issues"

yeah, this makes sense

type=str,
required=True,
help="Path to the system output file",
)
parser.add_argument(
"--output_file_type",
"--system_output_file_type",
type=str,
choices=FileType.list(),
help="File type of the system output (eg text/json/tsv/conll)",
Expand All @@ -82,7 +63,7 @@ def main():
help="The name of the dataset split to process",
)
dataset_group.add_argument(
"--custom_dataset", type=str, help="The path to a custom dataset file"
"--custom_dataset_file", type=str, help="The path to a custom dataset file"
)
parser.add_argument(
"--custom_dataset_file_type",
Expand All @@ -105,74 +86,23 @@ def main():
"--target_language", type=str, help="The language on the output side"
)
parser.add_argument(
"--system_details", type=str, help="File of system details in JSON format"
"--system_details_file", type=str, help="File of system details in JSON format"
)
parser.add_argument(
"--public", action="store_true", help="Make the evaluation results public"
)
parser.add_argument(
"--shared_users", type=str, nargs="+", help="Emails of users to share with"
)
args = parser.parse_args()

# Sanity checks
if not (args.source_language or args.target_language):
raise ValueError("You must specify source and/or target language")

# Infer missing values
task = TaskType(args.task)
metric_names = args.metric_names or DEFAULT_METRICS[args.task]
source_language = args.source_language or args.target_language
target_language = args.target_language or args.source_language
output_file_type = args.output_file_type or infer_file_type(
args.system_output, task
)
custom_dataset_file_type = args.custom_dataset_file_type or infer_file_type(
args.custom_dataset_file_type, task
)
shared_users = args.shared_users or []

# Read system details file
system_details = {}
if args.system_details:
with open(args.system_details, "r") as fin:
system_details = json.load(fin)

# Do the actual upload
system_output = SystemOutputProps(
data=args.system_output,
file_type=output_file_type,
)
metadata = SystemMetadata(
task=args.task,
is_private=not args.public,
system_name=args.system_name,
metric_names=metric_names,
source_language=source_language,
target_language=target_language,
dataset_split=args.split,
shared_users=shared_users,
system_details=system_details,
)
custom_dataset = None
if args.custom_dataset:
custom_dataset = SystemOutputProps(
data=args.custom_dataset,
file_type=custom_dataset_file_type,
)
else:
metadata.dataset_metadata_id = generate_dataset_id(
args.dataset, args.sub_dataset
)
create_props = (
SystemCreateProps(
metadata=metadata,
system_output=system_output,
custom_dataset=custom_dataset,
)
if custom_dataset is not None
else SystemCreateProps(metadata=metadata, system_output=system_output)
parser.add_argument(
"--server",
type=str,
required=False,
default="main",
choices=["main", "staging", "local"],
help='Which server to use, "main" should be sufficient',
)
args = parser.parse_args()

client_config = Config(
args.email,
Expand All @@ -181,11 +111,26 @@ def main():
)
client = ExplainaboardClient(client_config)

result: System = client.systems_post(create_props)
try:
sys_id = result.system_id
client.systems_get_by_id(sys_id)
evaluation_data = client.evaluate_system_file(
task=args.task,
system_name=args.system_name,
system_output_file=args.system_output_file,
system_output_file_type=args.system_output_file_type,
dataset=args.dataset,
sub_dataset=args.sub_dataset,
split=args.split,
custom_dataset_file=args.custom_dataset_file,
custom_dataset_file_type=args.custom_dataset_file_type,
metric_names=args.metric_names,
source_language=args.source_language,
target_language=args.target_language,
system_details_file=args.system_details_file,
public=args.public,
shared_users=args.shared_users,
)
frontend = client_config.get_env_host_map()[args.server].frontend
sys_id = evaluation_data.system_id
print(
f"successfully evaluated system {args.system_name} with ID {sys_id}\n"
f"view it at {frontend}/systems?system_id={sys_id}\n"
Expand Down
165 changes: 159 additions & 6 deletions explainaboard_client/client.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,184 @@
from __future__ import annotations

import json
import logging
from multiprocessing.pool import ApplyResult
from typing import Union

from explainaboard_api_client import ApiClient
from explainaboard_api_client.api.default_api import DefaultApi
from explainaboard_api_client.model.system_metadata import SystemMetadata
from explainaboard_api_client.models import System, SystemCreateProps, SystemOutputProps
from explainaboard_client.config import Config
from explainaboard_client.utils import encode_file_to_base64
from explainaboard_client.tasks import DEFAULT_METRICS, infer_file_type, TaskType
from explainaboard_client.utils import encode_file_to_base64, generate_dataset_id


class ExplainaboardClient(DefaultApi):
class ExplainaboardClient:
# ---- Initializers, etc.
def __init__(self, config: Config) -> None:
self._config = config
"""Initialize the ExplainaBoard client with a specific configuration.

Args:
config (Config): The configuration for the ExplainaBoard client.
"""
self._config: Config = config
api_client = ApiClient(self._config.to_client_config())
super().__init__(api_client)
self._default_api: DefaultApi = DefaultApi(api_client)
self._active: bool = True

def close(self):
self.api_client.close()
self._default_api.api_client.close()
self._active = False

def __enter__(self):
return self

def __exit__(self):
self.close()

# ---- Client Functions
def evaluate_system_file(
self,
task: str,
system_name: str,
system_output_file: str,
system_output_file_type: str | None = None,
dataset: str | None = None,
sub_dataset: str | None = None,
split: str | None = None,
custom_dataset_file: str | None = None,
custom_dataset_file_type: str | None = None,
metric_names: list[str] | None = None,
source_language: str | None = None,
target_language: str | None = None,
system_details_file: str | None = None,
public: bool = False,
shared_users: list[str] | None = None,
) -> dict:
"""Evaluate a system output file and return a dictionary of results.

Args:
task: What task you will be analyzing.
system_name: Name of the system that you are evaluating.
system_output_file: Path to the system output file.
system_output_file_type: File type of the system output
(eg text/json/tsv/conll).
dataset: A dataset name from DataLab.
sub_dataset: A sub-dataset name from DataLab.
split: The name of the dataset split to process.
custom_dataset_file: The path to a custom dataset file.
custom_dataset_file_type: File type of the custom dataset
(eg text/json/tsv/conll)
metric_names: The metrics to compute, leave blank for task defaults
source_language: The language on the input side.
target_language: The language on the output side.
system_details_file: File of system details in JSON format.
public: Make the evaluation results public.
shared_users: Emails of users to share with.
"""
# Sanity checks
if not (source_language or target_language):
raise ValueError("You must specify source and/or target language")

# Infer missing values
task = TaskType(task)
metric_names = metric_names or DEFAULT_METRICS[task]
source_language = source_language or target_language
target_language = target_language or source_language
system_output_file_type = system_output_file_type or infer_file_type(
system_output_file, task
)
custom_dataset_file_type = custom_dataset_file_type or infer_file_type(
custom_dataset_file_type, task
)
shared_users = shared_users or []

# Read system details file
system_details: dict = {}
if system_details_file is not None:
with open(system_details_file, "r") as fin:
system_details = json.load(fin)

# Do the actual upload
system_output = SystemOutputProps(
data=system_output_file,
file_type=system_output_file_type,
)
metadata = SystemMetadata(
task=task,
is_private=not public,
system_name=system_name,
metric_names=metric_names,
source_language=source_language,
target_language=target_language,
dataset_split=split,
shared_users=shared_users,
system_details=system_details,
)
custom_dataset = None
if custom_dataset_file:
custom_dataset = SystemOutputProps(
data=custom_dataset_file,
file_type=custom_dataset_file_type,
)
elif dataset is not None:
metadata.dataset_metadata_id = generate_dataset_id(dataset, sub_dataset)
else:
raise ValueError("Must specify dataset or custom_dataset_file")
create_props = (
SystemCreateProps(
metadata=metadata,
system_output=system_output,
custom_dataset=custom_dataset,
)
if custom_dataset is not None
else SystemCreateProps(metadata=metadata, system_output=system_output)
)

result: System = self._systems_post(create_props)
return result.to_dict()

# --- Pass-through API calls that will be deprecated
def systems_post(
self, system_create_props: SystemCreateProps, **kwargs
) -> Union[System, ApplyResult]:
"""Post a system using the client.

The public function is deprecated and will be removed."""
logging.getLogger("explainaboard_client").warning(
"WARNING: systems_post() is deprecated and may be removed in the future."
" Please use evaluate_file() instead."
)
return self._systems_post(system_create_props, **kwargs)

def systems_get_by_id(self, system_id: str, **kwargs):
"""API call to get systems. Will be replaced in the future."""
return self._default_api.systems_get_by_id(system_id, **kwargs)

def systems_delete_by_id(self, system_id: str, **kwargs):
"""API call to delete systems. Will be replaced in the future."""
self._default_api.systems_delete_by_id(system_id, **kwargs)

def systems_get(self, **kwargs):
"""API call to get systems. Will be replaced in the future."""
return self._default_api.systems_get(**kwargs)

def info_get(self, **kwargs):
"""API call to get info. Will be replaced in the future."""
return self._default_api.info_get(**kwargs)

def user_get(self, **kwargs):
"""API call to get a user. Will be replaced in the future."""
return self._default_api.user_get(**kwargs)

# --- Private utility functions
def _systems_post(
self, system_create_props: SystemCreateProps, **kwargs
) -> Union[System, ApplyResult]:
"""Post a system using the client."""
if not self._active:
raise RuntimeError("Client is closed.")
loaded_system_output = SystemOutputProps(
data=encode_file_to_base64(system_create_props.system_output.data),
file_type=system_create_props.system_output.file_type,
Expand All @@ -45,4 +198,4 @@ def systems_post(
metadata=system_create_props.metadata,
system_output=loaded_system_output,
)
return super().systems_post(props_with_loaded_file, **kwargs)
return self._default_api.systems_post(props_with_loaded_file, **kwargs)
Loading