Skip to content

Commit

Permalink
feat: improve failure status reason
Browse files Browse the repository at this point in the history
  • Loading branch information
ajberdy committed Oct 24, 2023
1 parent 6e08b88 commit be95c0b
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 8 deletions.
2 changes: 1 addition & 1 deletion base/jobs/docker/1.0/py3/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
amazon-braket-default-simulator==1.20.1
amazon-braket-schemas==1.19.1
amazon-braket-pennylane-plugin==1.21.0
amazon-braket-sdk==1.58.0
amazon-braket-sdk==1.59.1
awscli==1.29.53
botocore==1.31.53
boto3==1.28.53
Expand Down
2 changes: 1 addition & 1 deletion pytorch/jobs/docker/2.0/py3/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
amazon-braket-default-simulator==1.20.1
amazon-braket-schemas==1.19.1
amazon-braket-pennylane-plugin==1.21.0
amazon-braket-sdk==1.58.0
amazon-braket-sdk==1.59.1
awscli==1.29.53
botocore==1.31.53
boto3==1.28.53
Expand Down
21 changes: 16 additions & 5 deletions src/braket_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def unpack_code_and_add_to_path(local_s3_file: str, compression_type: str):
sys.path.append(EXTRACTED_CUSTOMER_CODE_PATH)


def kick_off_customer_script(entry_point: str) -> multiprocessing.Process:
def kick_off_customer_script(entry_point: str, queue: multiprocessing.Queue) -> multiprocessing.Process:
"""
Runs the customer script as a separate process.
Expand All @@ -151,7 +151,13 @@ def kick_off_customer_script(entry_point: str) -> multiprocessing.Process:
customer_module = importlib.import_module(str_module)
customer_method = getattr(customer_module, str_method)

process_kwargs = {"target": customer_method}
def wrapped_customer_method(queue, **kwargs):
try:
customer_method(**kwargs)
except Exception as exc:
queue.put(exc)

process_kwargs = {"target": wrapped_customer_method, "args": (queue,)}

function_args = try_bind_hyperparameters_to_customer_method(customer_method)
if function_args is not None:
Expand Down Expand Up @@ -186,7 +192,7 @@ def try_bind_hyperparameters_to_customer_method(customer_method: Callable):
return function_args


def join_customer_script(customer_code_process: multiprocessing.Process):
def join_customer_script(customer_code_process: multiprocessing.Process, queue: multiprocessing.Queue):
"""
Joins the process running the customer code.
Expand All @@ -195,6 +201,10 @@ def join_customer_script(customer_code_process: multiprocessing.Process):
"""
try:
customer_code_process.join()

if not queue.empty():
exception = queue.get()
log_failure_and_exit(f"{type(exception).__name__}: {exception}")
except Exception as e:
log_failure_and_exit(f"Job did not exit gracefully.\nException: {e}")

Expand Down Expand Up @@ -265,8 +275,9 @@ def run_customer_code_as_process(entry_point: str) -> int:
int: The exit code of the customer code run.
"""
print("Running Code As Process")
customer_code_process = kick_off_customer_script(entry_point)
join_customer_script(customer_code_process)
queue = multiprocessing.Queue()
customer_code_process = kick_off_customer_script(entry_point, queue)
join_customer_script(customer_code_process, queue)
print("Code Run Finished")
return customer_code_process.exitcode

Expand Down
2 changes: 1 addition & 1 deletion tensorflow/jobs/docker/2.13/py3/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
amazon-braket-default-simulator==1.20.1
amazon-braket-schemas==1.19.1
amazon-braket-pennylane-plugin==1.21.0
amazon-braket-sdk==1.58.0
amazon-braket-sdk==1.59.1
awscli==1.29.53
botocore==1.31.53
boto3==1.28.53
Expand Down

0 comments on commit be95c0b

Please sign in to comment.