diff --git a/Dockerfile b/Dockerfile index c95ef408..d968ecc2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,10 +36,6 @@ FROM builder-py-base as builder-py-3_12 RUN git clone -b v2.3.28 --depth 1 https://github.com/pyenv/pyenv.git $PYENV_ROOT \ && /build_python.sh 3.12.0 # ------------------------------------------------------------------------------ -FROM builder-py-base as builder-py-3_13 -RUN git clone -b v2.3.28 --depth 1 https://github.com/pyenv/pyenv.git $PYENV_ROOT \ - && /build_python.sh 3.13-dev -# ------------------------------------------------------------------------------ FROM python:3.11-slim-bookworm as base ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \ @@ -55,7 +51,6 @@ RUN apt-get -y update \ COPY --link --from=builder-nsjail /nsjail/nsjail /usr/sbin/ COPY --link --from=builder-py-3_12 /lang/ /lang/ -COPY --link --from=builder-py-3_13 /lang/ /lang/ RUN chmod +x /usr/sbin/nsjail \ && ln -s /lang/python/3.12/ /lang/python/default diff --git a/config/gunicorn.conf.py b/config/gunicorn.conf.py index 5e492bb7..02f02a07 100644 --- a/config/gunicorn.conf.py +++ b/config/gunicorn.conf.py @@ -1,6 +1,6 @@ workers = 2 bind = "0.0.0.0:8060" -logger_class = "snekbox.utils.gunicorn.GunicornLogger" +logger_class = "snekbox.logging.GunicornLogger" access_logformat = "%(m)s %(U)s%(q)s %(s)s %(b)s %(L)ss" access_logfile = "-" wsgi_app = "snekbox:SnekAPI()" diff --git a/snekbox/__init__.py b/snekbox/__init__.py index b45960bd..191cec14 100644 --- a/snekbox/__init__.py +++ b/snekbox/__init__.py @@ -9,8 +9,8 @@ __version__ = "0.0.0.0+unknown" from snekbox.api import SnekAPI # noqa: E402 +from snekbox.logging import init_logger, init_sentry # noqa: E402 from snekbox.nsjail import NsJail # noqa: E402 -from snekbox.utils.logging import init_logger, init_sentry # noqa: E402 __all__ = ("NsJail", "SnekAPI", "DEBUG") diff --git a/snekbox/api/resources/eval.py b/snekbox/api/resources/eval.py index 9a53577b..55bba984 100644 --- a/snekbox/api/resources/eval.py +++ b/snekbox/api/resources/eval.py @@ -6,11 +6,10 @@ from falcon.media.validators.jsonschema import validate from snekbox.nsjail import NsJail +from snekbox.snekio import FileAttachment, ParsingError __all__ = ("EvalResource",) -from snekbox.snekio import FileAttachment, ParsingError - log = logging.getLogger(__name__) diff --git a/snekbox/limits/__init__.py b/snekbox/limits/__init__.py new file mode 100644 index 00000000..1f986c76 --- /dev/null +++ b/snekbox/limits/__init__.py @@ -0,0 +1,3 @@ +from . import cgroup, swap, timed + +__all__ = ("cgroup", "swap", "timed") diff --git a/snekbox/utils/cgroup.py b/snekbox/limits/cgroup.py similarity index 100% rename from snekbox/utils/cgroup.py rename to snekbox/limits/cgroup.py diff --git a/snekbox/utils/swap.py b/snekbox/limits/swap.py similarity index 100% rename from snekbox/utils/swap.py rename to snekbox/limits/swap.py diff --git a/snekbox/utils/timed.py b/snekbox/limits/timed.py similarity index 100% rename from snekbox/utils/timed.py rename to snekbox/limits/timed.py diff --git a/snekbox/logging/__init__.py b/snekbox/logging/__init__.py new file mode 100644 index 00000000..c5d14f2c --- /dev/null +++ b/snekbox/logging/__init__.py @@ -0,0 +1,4 @@ +from .gunicorn import GunicornLogger +from .init import FORMAT, init_logger, init_sentry + +__all__ = ("FORMAT", "init_logger", "init_sentry", "GunicornLogger") diff --git a/snekbox/utils/gunicorn.py b/snekbox/logging/gunicorn.py similarity index 97% rename from snekbox/utils/gunicorn.py rename to snekbox/logging/gunicorn.py index 96d2e022..d0ef3e10 100644 --- a/snekbox/utils/gunicorn.py +++ b/snekbox/logging/gunicorn.py @@ -5,7 +5,7 @@ from snekbox import DEBUG -from .logging import FORMAT +from .init import FORMAT __all__ = ("GunicornLogger",) diff --git a/snekbox/utils/logging.py b/snekbox/logging/init.py similarity index 100% rename from snekbox/utils/logging.py rename to snekbox/logging/init.py diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py index 9bf20bf9..dc093930 100644 --- a/snekbox/nsjail.py +++ b/snekbox/nsjail.py @@ -2,44 +2,31 @@ import re import subprocess import sys -from collections.abc import Generator +from collections.abc import Iterable, Sequence from contextlib import nullcontext from pathlib import Path from tempfile import NamedTemporaryFile -from typing import Iterable, TypeVar from google.protobuf import text_format -from snekbox import DEBUG, utils +from snekbox import DEBUG, limits from snekbox.config_pb2 import NsJailConfig -from snekbox.filesystem import Size -from snekbox.memfs import MemFS -from snekbox.process import EvalResult -from snekbox.snekio import FileAttachment -from snekbox.utils.timed import time_limit +from snekbox.limits.timed import time_limit +from snekbox.result import EvalError, EvalResult +from snekbox.snekio import FileAttachment, MemFS +from snekbox.snekio.filesystem import Size +from snekbox.utils.iter import iter_lstrip __all__ = ("NsJail",) log = logging.getLogger(__name__) -_T = TypeVar("_T") - # [level][timestamp][PID]? function_signature:line_no? message LOG_PATTERN = re.compile( r"\[(?P(I)|[DWEF])\]\[.+?\](?(2)|(?P\[\d+\] .+?:\d+ )) ?(?P.+)" ) -def iter_lstrip(iterable: Iterable[_T]) -> Generator[_T, None, None]: - """Remove leading falsy objects from an iterable.""" - it = iter(iterable) - for item in it: - if item: - yield item - break - yield from it - - class NsJail: """ Core Snekbox functionality, providing safe execution of Python code. @@ -89,8 +76,8 @@ def __init__( self.files_pattern = files_pattern self.config = self._read_config(config_path) - self.cgroup_version = utils.cgroup.init(self.config) - self.ignore_swap_limits = utils.swap.should_ignore_limit(self.config, self.cgroup_version) + self.cgroup_version = limits.cgroup.init(self.config) + self.ignore_swap_limits = limits.swap.should_ignore_limit(self.config, self.cgroup_version) log.info(f"Assuming cgroup version {self.cgroup_version}.") @@ -162,19 +149,105 @@ def _consume_stdout(self, nsjail: subprocess.Popen) -> str: with nsjail: # We'll consume STDOUT as long as the NsJail subprocess is running. while nsjail.poll() is None: - chars = nsjail.stdout.read(self.read_chunk_size) + try: + chars = nsjail.stdout.read(self.read_chunk_size) + except UnicodeDecodeError as e: + raise EvalError("UnicodeDecodeError: invalid Unicode in output pipe") from e + output_size += sys.getsizeof(chars) output.append(chars) if output_size > self.max_output_size: # Terminate the NsJail subprocess with SIGTERM. # This in turn reaps and kills children with SIGKILL. - log.info("Output exceeded the output limit, sending SIGTERM to NsJail.") + log.info("Output exceeded the output limit. Sending SIGTERM to NsJail.") nsjail.terminate() break return "".join(output) + def _build_args( + self, py_args: Iterable[str], nsjail_args: Iterable[str], log_path: str, fs_home: str + ) -> Sequence[str]: + if self.cgroup_version == 2: + nsjail_args = ("--use_cgroupv2", *nsjail_args) + + if self.ignore_swap_limits: + nsjail_args = ( + "--cgroup_mem_memsw_max", + "0", + "--cgroup_mem_swap_max", + "-1", + *nsjail_args, + ) + + nsjail_args = ( + # Mount `home` with Read/Write access + "--bindmount", + f"{fs_home}:home", + *nsjail_args, + ) + + return [ + self.nsjail_path, + "--config", + self.config_path, + "--log", + log_path, + *nsjail_args, + "--", + self.config.exec_bin.path, + # Filter out empty strings at start of Python args + # (causes issues with python cli) + *iter_lstrip(self.config.exec_bin.arg), + *iter_lstrip(py_args), + ] + + def _write_files(self, home: Path, files: Iterable[FileAttachment]) -> dict[Path, float]: + files_written = {} + for file in files: + try: + f_path = file.save_to(home) + # Allow file to be writable + f_path.chmod(0o777) + # Save the written at time to later check if it was modified + files_written[f_path] = f_path.stat().st_mtime + log.info(f"Created file at {(home / file.path)!r}.") + except OSError as e: + log.info(f"Failed to create file at {(home / file.path)!r}.", exc_info=e) + raise EvalError( + f"{e.__class__.__name__}: Failed to create file '{file.path}'." + ) from e + + return files_written + + def _parse_attachments( + self, fs: MemFS, files_written: dict[Path, float] + ) -> list[FileAttachment]: + try: + with time_limit(self.files_timeout) if self.files_timeout else nullcontext(): + attachments = fs.files_list( + limit=self.files_limit, + pattern=self.files_pattern, + preload_dict=True, + exclude_files=files_written, + timeout=self.files_timeout, + ) + + log.info(f"Found {len(attachments)} files.") + return attachments + except RecursionError as e: + log.info("Recursion error while parsing attachments") + raise EvalError( + "FileParsingError: Exceeded directory depth limit while parsing attachments" + ) from e + except TimeoutError as e: + log.info(f"Exceeded time limit while parsing attachments: {e}") + raise EvalError("TimeoutError: Exceeded time limit while parsing attachments") from e + except Exception as e: + log.exception(f"Unexpected {type(e).__name__} while parse attachments", exc_info=e) + raise EvalError("FileParsingError: Unknown error while parsing attachments") from e + def python3( self, py_args: Iterable[str], @@ -189,119 +262,43 @@ def python3( files: FileAttachments to write to the sandbox prior to running Python. nsjail_args: Overrides for the NsJail configuration. """ - if self.cgroup_version == 2: - nsjail_args = ("--use_cgroupv2", *nsjail_args) - - if self.ignore_swap_limits: - nsjail_args = ( - "--cgroup_mem_memsw_max", - "0", - "--cgroup_mem_swap_max", - "-1", - *nsjail_args, - ) - with NamedTemporaryFile() as nsj_log, MemFS( instance_size=self.memfs_instance_size, home=self.memfs_home, output=self.memfs_output, ) as fs: - nsjail_args = ( - # Mount `home` with Read/Write access - "--bindmount", - f"{fs.home}:home", - *nsjail_args, - ) + args = self._build_args(py_args, nsjail_args, nsj_log.name, str(fs.home)) + try: + files_written = self._write_files(fs.home, files) + + msg = "Executing code..." + if DEBUG: + msg = f"{msg[:-3]} with the arguments {args}." + log.info(msg) - args = [ - self.nsjail_path, - "--config", - self.config_path, - "--log", - nsj_log.name, - *nsjail_args, - "--", - self.config.exec_bin.path, - # Filter out empty strings at start of Python args - # (causes issues with python cli) - *iter_lstrip(self.config.exec_bin.arg), - *iter_lstrip(py_args), - ] - - # Write provided files if any - files_written: dict[Path, float] = {} - for file in files: try: - f_path = file.save_to(fs.home) - # Allow file to be writable - f_path.chmod(0o777) - # Save the written at time to later check if it was modified - files_written[f_path] = f_path.stat().st_mtime - log.info(f"Created file at {(fs.home / file.path)!r}.") - except OSError as e: - log.info(f"Failed to create file at {(fs.home / file.path)!r}.", exc_info=e) - return EvalResult( - args, None, f"{e.__class__.__name__}: Failed to create file '{file.path}'." + nsjail = subprocess.Popen( + args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True ) + except ValueError: + return EvalResult(args, None, "ValueError: embedded null byte") - msg = "Executing code..." - if DEBUG: - msg = f"{msg[:-3]} with the arguments {args}." - log.info(msg) - - try: - nsjail = subprocess.Popen( - args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True - ) - except ValueError: - return EvalResult(args, None, "ValueError: embedded null byte") - - try: output = self._consume_stdout(nsjail) - except UnicodeDecodeError: - return EvalResult(args, None, "UnicodeDecodeError: invalid Unicode in output pipe") - - # When you send signal `N` to a subprocess to terminate it using Popen, it - # will return `-N` as its exit code. As we normally get `N + 128` back, we - # convert negative exit codes to the `N + 128` form. - returncode = -nsjail.returncode + 128 if nsjail.returncode < 0 else nsjail.returncode - - # Parse attachments with time limit - try: - with time_limit(self.files_timeout) if self.files_timeout else nullcontext(): - attachments = fs.files_list( - limit=self.files_limit, - pattern=self.files_pattern, - preload_dict=True, - exclude_files=files_written, - timeout=self.files_timeout, - ) - log.info(f"Found {len(attachments)} files.") - except RecursionError: - log.info("Recursion error while parsing attachments") - return EvalResult( - args, - None, - "FileParsingError: Exceeded directory depth limit while parsing attachments", - ) - except TimeoutError as e: - log.info(f"Exceeded time limit while parsing attachments: {e}") - return EvalResult( - args, None, "TimeoutError: Exceeded time limit while parsing attachments" - ) - except Exception as e: - log.exception(f"Unexpected {type(e).__name__} while parse attachments", exc_info=e) - return EvalResult( - args, None, "FileParsingError: Unknown error while parsing attachments" - ) + attachments = self._parse_attachments(fs, files_written) + log_lines = nsj_log.read().decode("utf-8").splitlines() + except EvalError as e: + return EvalResult(args, None, str(e)) - log_lines = nsj_log.read().decode("utf-8").splitlines() - if not log_lines and returncode == 255: - # NsJail probably failed to parse arguments so log output will still be in stdout - log_lines = output.splitlines() + # When you send signal `N` to a subprocess to terminate it using Popen, it + # will return `-N` as its exit code. As we normally get `N + 128` back, we + # convert negative exit codes to the `N + 128` form. + return_code = -nsjail.returncode + 128 if nsjail.returncode < 0 else nsjail.returncode - self._parse_log(log_lines) + if not log_lines and return_code == 255: + # NsJail probably failed to parse arguments so log output will still be in stdout + log_lines = output.splitlines() - log.info(f"nsjail return code: {returncode}") + self._parse_log(log_lines) + log.info(f"NsJail return code: {return_code}") - return EvalResult(args, returncode, output, files=attachments) + return EvalResult(args, return_code, output, files=attachments) diff --git a/snekbox/process.py b/snekbox/result.py similarity index 81% rename from snekbox/process.py rename to snekbox/result.py index 552b91ad..e11d0e47 100644 --- a/snekbox/process.py +++ b/snekbox/result.py @@ -1,4 +1,4 @@ -"""Utilities for process management.""" +"""Types for representing the result of an evaluation job.""" from collections.abc import Sequence from os import PathLike from subprocess import CompletedProcess @@ -6,6 +6,8 @@ from snekbox.snekio import FileAttachment +__all__ = ("EvalError", "EvalResult") + _T = TypeVar("_T") ArgType = ( str @@ -16,6 +18,10 @@ ) +class EvalError(RuntimeError): + """An error that occurred during evaluation.""" + + class EvalResult(CompletedProcess[_T]): """An evaluation job that has finished running.""" diff --git a/snekbox/snekio/__init__.py b/snekbox/snekio/__init__.py new file mode 100644 index 00000000..e7d2e8f3 --- /dev/null +++ b/snekbox/snekio/__init__.py @@ -0,0 +1,6 @@ +from . import filesystem +from .attachment import FileAttachment, safe_path +from .errors import IllegalPathError, ParsingError +from .memfs import MemFS + +__all__ = ("filesystem", "safe_path", "FileAttachment", "IllegalPathError", "MemFS", "ParsingError") diff --git a/snekbox/snekio.py b/snekbox/snekio/attachment.py similarity index 93% rename from snekbox/snekio.py rename to snekbox/snekio/attachment.py index 821f0578..73e26b8f 100644 --- a/snekbox/snekio.py +++ b/snekbox/snekio/attachment.py @@ -6,6 +6,10 @@ from functools import cached_property from pathlib import Path +from .errors import IllegalPathError, ParsingError + +__all__ = ("safe_path", "FileAttachment") + def safe_path(path: str) -> str: """ @@ -28,14 +32,6 @@ def safe_path(path: str) -> str: return path -class ParsingError(ValueError): - """Raised when an incoming content cannot be parsed.""" - - -class IllegalPathError(ParsingError): - """Raised when a request file has an illegal path.""" - - @dataclass(frozen=True) class FileAttachment: """A file attachment.""" diff --git a/snekbox/snekio/errors.py b/snekbox/snekio/errors.py new file mode 100644 index 00000000..3e710e4d --- /dev/null +++ b/snekbox/snekio/errors.py @@ -0,0 +1,6 @@ +class ParsingError(ValueError): + """Raised when an incoming content cannot be parsed.""" + + +class IllegalPathError(ParsingError): + """Raised when a request file has an illegal path.""" diff --git a/snekbox/filesystem.py b/snekbox/snekio/filesystem.py similarity index 100% rename from snekbox/filesystem.py rename to snekbox/snekio/filesystem.py diff --git a/snekbox/memfs.py b/snekbox/snekio/memfs.py similarity index 99% rename from snekbox/memfs.py rename to snekbox/snekio/memfs.py index 40b57c47..6d4a00b1 100644 --- a/snekbox/memfs.py +++ b/snekbox/snekio/memfs.py @@ -13,8 +13,8 @@ from typing import Type from uuid import uuid4 -from snekbox.filesystem import mount, unmount from snekbox.snekio import FileAttachment +from snekbox.snekio.filesystem import mount, unmount log = logging.getLogger(__name__) diff --git a/snekbox/utils/__init__.py b/snekbox/utils/__init__.py index 010fa65e..16aade71 100644 --- a/snekbox/utils/__init__.py +++ b/snekbox/utils/__init__.py @@ -1,3 +1,3 @@ -from . import cgroup, logging, swap, timed +from . import iter -__all__ = ("cgroup", "logging", "swap", "timed") +__all__ = ("iter",) diff --git a/snekbox/utils/iter.py b/snekbox/utils/iter.py new file mode 100644 index 00000000..841ce575 --- /dev/null +++ b/snekbox/utils/iter.py @@ -0,0 +1,16 @@ +from collections.abc import Generator, Iterable +from typing import TypeVar + +__all__ = ("iter_lstrip",) + +_T = TypeVar("_T") + + +def iter_lstrip(iterable: Iterable[_T]) -> Generator[_T, None, None]: + """Remove leading falsy objects from an iterable.""" + it = iter(iterable) + for item in it: + if item: + yield item + break + yield from it diff --git a/tests/api/__init__.py b/tests/api/__init__.py index 5f20faf1..c84080e8 100644 --- a/tests/api/__init__.py +++ b/tests/api/__init__.py @@ -4,7 +4,7 @@ from falcon import testing from snekbox.api import SnekAPI -from snekbox.process import EvalResult +from snekbox.result import EvalResult class SnekAPITestCase(testing.TestCase): diff --git a/tests/limits/__init__.py b/tests/limits/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_timed.py b/tests/limits/test_timed.py similarity index 95% rename from tests/test_timed.py rename to tests/limits/test_timed.py index e46bd374..8a1119bd 100644 --- a/tests/test_timed.py +++ b/tests/limits/test_timed.py @@ -2,7 +2,7 @@ import time from unittest import TestCase -from snekbox.utils.timed import time_limit +from snekbox.limits.timed import time_limit class TimedTests(TestCase): diff --git a/tests/snekio/__init__.py b/tests/snekio/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_filesystem.py b/tests/snekio/test_filesystem.py similarity index 98% rename from tests/test_filesystem.py rename to tests/snekio/test_filesystem.py index e4d081ff..9f6b76d6 100644 --- a/tests/test_filesystem.py +++ b/tests/snekio/test_filesystem.py @@ -5,7 +5,7 @@ from unittest import TestCase from uuid import uuid4 -from snekbox.filesystem import UnmountFlags, mount, unmount +from snekbox.snekio.filesystem import UnmountFlags, mount, unmount class LibMountTests(TestCase): diff --git a/tests/test_memfs.py b/tests/snekio/test_memfs.py similarity index 93% rename from tests/test_memfs.py rename to tests/snekio/test_memfs.py index 05557267..cbe2fe4d 100644 --- a/tests/test_memfs.py +++ b/tests/snekio/test_memfs.py @@ -4,7 +4,7 @@ from unittest import TestCase, mock from uuid import uuid4 -from snekbox.memfs import MemFS +from snekbox.snekio import MemFS UUID_TEST = uuid4() @@ -12,10 +12,10 @@ class MemFSTests(TestCase): def setUp(self): super().setUp() - self.logger = logging.getLogger("snekbox.memfs") + self.logger = logging.getLogger("snekbox.snekio.memfs") self.logger.setLevel(logging.WARNING) - @mock.patch("snekbox.memfs.uuid4", lambda: UUID_TEST) + @mock.patch("snekbox.snekio.memfs.uuid4", lambda: UUID_TEST) def test_assignment_thread_safe(self): """Test concurrent mounting works in multi-thread environments.""" # Concurrently create MemFS in threads, check only 1 can be created diff --git a/tests/test_snekio.py b/tests/snekio/test_snekio.py similarity index 100% rename from tests/test_snekio.py rename to tests/snekio/test_snekio.py diff --git a/tests/test_nsjail.py b/tests/test_nsjail.py index 5d927c26..d54d31b6 100644 --- a/tests/test_nsjail.py +++ b/tests/test_nsjail.py @@ -9,9 +9,9 @@ from pathlib import Path from textwrap import dedent -from snekbox.filesystem import Size from snekbox.nsjail import NsJail from snekbox.snekio import FileAttachment +from snekbox.snekio.filesystem import Size class NsJailTests(unittest.TestCase): @@ -576,7 +576,7 @@ class NsJailCgroupTests(unittest.TestCase): # This should still pass for v2, even if this test isn't relevant. def test_cgroupv1(self): logging.getLogger("snekbox.nsjail").setLevel(logging.ERROR) - logging.getLogger("snekbox.utils.swap").setLevel(logging.ERROR) + logging.getLogger("snekbox.limits.swap").setLevel(logging.ERROR) config_base = dedent( """