From 8b84b1777a47dd0ebbeb40e86e50ff57e97642ac Mon Sep 17 00:00:00 2001 From: William Di Luigi Date: Mon, 18 Jan 2016 15:29:09 +0100 Subject: [PATCH 01/22] Increase default num boxes --- config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.h b/config.h index 8a5f6d9..ed47be4 100644 --- a/config.h +++ b/config.h @@ -7,7 +7,7 @@ /* Range of UIDs and GIDs reserved for use by the sandboxes. */ #define CONFIG_ISOLATE_FIRST_UID 60000 #define CONFIG_ISOLATE_FIRST_GID 60000 -#define CONFIG_ISOLATE_NUM_BOXES 100 +#define CONFIG_ISOLATE_NUM_BOXES 1000 /* Root of the cgroup hierarchy. */ #define CONFIG_ISOLATE_CGROUP_ROOT "/sys/fs/cgroup" From a3a2c4df2f44993c4785bdf16a1e536c7af308f8 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Fri, 22 Jan 2016 19:54:14 +0100 Subject: [PATCH 02/22] var_len of default environment rules was not initialized This made it impossible to override the default rules (which is unlikely to do, though :)). --- isolate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/isolate.c b/isolate.c index d49fe3f..b73e75e 100644 --- a/isolate.c +++ b/isolate.c @@ -1,7 +1,7 @@ /* * A Process Isolator based on Linux Containers * - * (c) 2012-2015 Martin Mares + * (c) 2012-2016 Martin Mares * (c) 2012-2014 Bernard Blackham */ @@ -330,7 +330,7 @@ static struct env_rule *first_env_rule; static struct env_rule **last_env_rule = &first_env_rule; static struct env_rule default_env_rules[] = { - { "LIBC_FATAL_STDERR_", "1" } + { .var = "LIBC_FATAL_STDERR_", .val = "1", .var_len = 18 }, }; static int From ab6ec57fa14214f7b3b38605b19a9821a9293fb2 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sat, 23 Jan 2016 09:56:08 +0100 Subject: [PATCH 03/22] Implemented "--silent" mode Also documented behavior wrt. std{in,out,err}. --- isolate.1.txt | 15 ++++++++++++--- isolate.c | 24 ++++++++++++++++++------ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/isolate.1.txt b/isolate.1.txt index 6ed7b42..5066b72 100644 --- a/isolate.1.txt +++ b/isolate.1.txt @@ -88,15 +88,19 @@ OPTIONS *-i, --stdin=*'file':: Redirect standard input from 'file'. The 'file' has to be accessible - inside the sandbox. + inside the sandbox. Otherwise, standard input is inherited from the + parent process. *-o, --stdout=*'file':: Redirect standard output to 'file'. The 'file' has to be accessible - inside the sandbox. + inside the sandbox. Otherwise, standard output is inherited from the + parent process and the sandbox manager does not write anything to it. *-r, --stderr=*'file':: Redirect standard error output to 'file'. The 'file' has to be accessible - inside the sandbox. + inside the sandbox. Otherwise, standard error output is inherited from + the parent process and both the sandboxed process and the sandbox manager + can write their status messages to it. *-c, --chdir=*'dir':: Change directory to 'dir' before executing the program. This path must be @@ -119,6 +123,11 @@ OPTIONS Tell the sandbox manager to be verbose and report on what is going on. Using *-v* multiple times produces even more jabber. +*-s, --silent*:: + Tell the sandbox manager to keep silence. No status messages are printed + to stderr except for fatal errors of the sandbox itself. The combination of + *--verbose* and *--silent* has an undefined effect. + ENVIRONMENT RULES ----------------- UNIX processes normally inherit all environment variables from their parent. The diff --git a/isolate.c b/isolate.c index b73e75e..62abaa6 100644 --- a/isolate.c +++ b/isolate.c @@ -53,6 +53,7 @@ static int wall_timeout; static int extra_timeout; static int pass_environ; static int verbose; +static int silent; static int fsize_limit; static int memory_limit; static int stack_limit; @@ -227,8 +228,11 @@ err(char *msg, ...) char buf[1024]; vsnprintf(buf, sizeof(buf), msg, args); meta_printf("message:%s\n", buf); - fputs(buf, stderr); - fputc('\n', stderr); + if (!silent) + { + fputs(buf, stderr); + fputc('\n', stderr); + } box_exit(1); } @@ -1166,9 +1170,12 @@ box_keeper(void) if (wall_timeout && wall_ms > wall_timeout) err("TO: Time limit exceeded (wall clock)"); flush_line(); - fprintf(stderr, "OK (%d.%03d sec real, %d.%03d sec wall)\n", - total_ms/1000, total_ms%1000, - wall_ms/1000, wall_ms%1000); + if (!silent) + { + fprintf(stderr, "OK (%d.%03d sec real, %d.%03d sec wall)\n", + total_ms/1000, total_ms%1000, + wall_ms/1000, wall_ms%1000); + } box_exit(0); } else if (WIFSIGNALED(stat)) @@ -1430,6 +1437,7 @@ Options:\n\ -M, --meta=\tOutput process information to (name:value)\n\ -q, --quota=,\tSet disk quota to blocks and inodes\n\ --share-net\t\tShare network namespace with the parent process\n\ +-s, --silent\t\tDo not print status messages except for fatal errors\n\ -k, --stack=\tLimit stack size to KB (default: 0=unlimited)\n\ -r, --stderr=\tRedirect stderr to \n\ -i, --stdin=\tRedirect stdin from \n\ @@ -1459,7 +1467,7 @@ enum opt_code { OPT_SHARE_NET, }; -static const char short_opts[] = "b:c:d:eE:i:k:m:M:o:p::q:r:t:vw:x:"; +static const char short_opts[] = "b:c:d:eE:i:k:m:M:o:p::q:r:st:vw:x:"; static const struct option long_opts[] = { { "box-id", 1, NULL, 'b' }, @@ -1480,6 +1488,7 @@ static const struct option long_opts[] = { { "quota", 1, NULL, 'q' }, { "run", 0, NULL, OPT_RUN }, { "share-net", 0, NULL, OPT_SHARE_NET }, + { "silent", 0, NULL, 's' }, { "stack", 1, NULL, 'k' }, { "stderr", 1, NULL, 'r' }, { "stdin", 1, NULL, 'i' }, @@ -1557,6 +1566,9 @@ main(int argc, char **argv) case 'r': redir_stderr = optarg; break; + case 's': + silent++; + break; case 't': timeout = 1000*atof(optarg); break; From c228c566a7301804f42ada5a210c31ce2135005b Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sat, 23 Jan 2016 11:19:51 +0100 Subject: [PATCH 04/22] Clean up signal handling It turned out that Isolate's signal handling is prone to race conditions, especially if the machine is under heavy load. Among other problems: o If multiple SIGINT-like signals were received in a quick succession, the "UGH" message appeared. o SIGPIPE could have been caught when writing the meta-file to a pipe. This could trigger the same problem as above. o There was a short time window between starting the child process and setting up signal handlers, where an interrupt signal could have killed the master process and leave the child running on its own. o If the master process received a SIGSEGV-like signal, it was reported as an error of the sandboxed process instead of a proper internal error. I switched to handling different signals differently: o Interrupt signals are handled synchronously like we already did with SIGALRM. o Signals like SIGSEGV are reported as internal errors after killing the child process. o SIGPIPE, SIGUSR1, and SIGUSR2 are ignored. Also, we set up the signal handlers right before calling clone() and reset them inside the child process, so we should not catch a signal unprepared. --- isolate.c | 113 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 93 insertions(+), 20 deletions(-) diff --git a/isolate.c b/isolate.c index 62abaa6..e1e6878 100644 --- a/isolate.c +++ b/isolate.c @@ -83,7 +83,7 @@ static int cleanup_ownership; static struct timeval start_time; static int ticks_per_sec; static int total_ms, wall_ms; -static volatile sig_atomic_t timer_tick; +static volatile sig_atomic_t timer_tick, interrupt; static int error_pipes[2]; static int write_errors_to_fd; @@ -197,6 +197,13 @@ die(char *msg, ...) char buf[1024]; int n = vsnprintf(buf, sizeof(buf), msg, args); + // If the child process is still running, show no mercy. + if (box_pid > 0) + { + kill(-box_pid, SIGKILL); + kill(box_pid, SIGKILL); + } + if (write_errors_to_fd) { // We are inside the box, have to use error pipe for error reporting. @@ -990,7 +997,34 @@ set_quota(void) free(dev); } -/*** The keeper process ***/ +/*** Signal handling in keeper process ***/ + +/* + * Signal handling is tricky. We must set up signal handlers before + * we start the child process (and reset them in the child process). + * Otherwise, there is a short time window where a SIGINT can kill + * us and leave the child process running. + */ + +struct signal_rule { + int signum; + enum { SIGNAL_IGNORE, SIGNAL_INTERRUPT, SIGNAL_FATAL } action; +}; + +static const struct signal_rule signal_rules[] = { + { SIGHUP, SIGNAL_INTERRUPT }, + { SIGINT, SIGNAL_INTERRUPT }, + { SIGQUIT, SIGNAL_INTERRUPT }, + { SIGILL, SIGNAL_FATAL }, + { SIGABRT, SIGNAL_FATAL }, + { SIGFPE, SIGNAL_FATAL }, + { SIGSEGV, SIGNAL_FATAL }, + { SIGPIPE, SIGNAL_IGNORE }, + { SIGTERM, SIGNAL_INTERRUPT }, + { SIGUSR1, SIGNAL_IGNORE }, + { SIGUSR2, SIGNAL_IGNORE }, + { SIGBUS, SIGNAL_FATAL }, +}; static void signal_alarm(int unused UNUSED) @@ -1003,11 +1037,55 @@ signal_alarm(int unused UNUSED) static void signal_int(int signum) { - /* Interrupts are fatal, so no synchronization requirements. */ - meta_printf("exitsig:%d\n", signum); - err("SG: Interrupted"); + /* Interrupts (e.g., SIGINT) are synchronous, too. */ + interrupt = signum; } +static void +signal_fatal(int signum) +{ + /* If we receive SIGSEGV or a similar signal, we try to die gracefully. */ + die("Sandbox keeper received fatal signal %d", signum); +} + +static void +setup_signals(void) +{ + struct sigaction sa_int, sa_fatal; + bzero(&sa_int, sizeof(sa_int)); + sa_int.sa_handler = signal_int; + bzero(&sa_fatal, sizeof(sa_fatal)); + sa_fatal.sa_handler = signal_fatal; + + for (int i=0; i < ARRAY_SIZE(signal_rules); i++) + { + const struct signal_rule *sr = &signal_rules[i]; + switch (sr->action) + { + case SIGNAL_IGNORE: + signal(sr->signum, SIG_IGN); + break; + case SIGNAL_INTERRUPT: + sigaction(sr->signum, &sa_int, NULL); + break; + case SIGNAL_FATAL: + sigaction(sr->signum, &sa_fatal, NULL); + break; + default: + die("Invalid signal rule"); + } + } +} + +static void +reset_signals(void) +{ + for (int i=0; i < ARRAY_SIZE(signal_rules); i++) + signal(signal_rules[i].signum, SIG_DFL); +} + +/*** The keeper process ***/ + #define PROC_BUF_SIZE 4096 static void read_proc_file(char *buf, char *name, int *fdp) @@ -1100,21 +1178,6 @@ box_keeper(void) read_errors_from_fd = error_pipes[0]; close(error_pipes[1]); - struct sigaction sa; - bzero(&sa, sizeof(sa)); - sa.sa_handler = signal_int; - sigaction(SIGHUP, &sa, NULL); - sigaction(SIGINT, &sa, NULL); - sigaction(SIGQUIT, &sa, NULL); - sigaction(SIGILL, &sa, NULL); - sigaction(SIGABRT, &sa, NULL); - sigaction(SIGFPE, &sa, NULL); - sigaction(SIGSEGV, &sa, NULL); - sigaction(SIGPIPE, &sa, NULL); - sigaction(SIGTERM, &sa, NULL); - sigaction(SIGUSR1, &sa, NULL); - sigaction(SIGUSR2, &sa, NULL); - gettimeofday(&start_time, NULL); ticks_per_sec = sysconf(_SC_CLK_TCK); if (ticks_per_sec <= 0) @@ -1122,6 +1185,8 @@ box_keeper(void) if (timeout || wall_timeout) { + struct sigaction sa; + bzero(&sa, sizeof(sa)); sa.sa_handler = signal_alarm; sigaction(SIGALRM, &sa, NULL); alarm(1); @@ -1132,6 +1197,11 @@ box_keeper(void) struct rusage rus; int stat; pid_t p; + if (interrupt) + { + meta_printf("exitsig:%d\n", interrupt); + err("SG: Interrupted"); + } if (timer_tick) { check_timeout(); @@ -1297,6 +1367,7 @@ box_inside(void *arg) close(error_pipes[0]); meta_close(); + reset_signals(); cg_enter(); setup_root(); setup_credentials(); @@ -1370,6 +1441,8 @@ run(char **argv) fcntl(error_pipes[i], F_SETFL, fcntl(error_pipes[i], F_GETFL) | O_NONBLOCK) < 0) die("fcntl on pipe: %m"); + setup_signals(); + box_pid = clone( box_inside, // Function to execute as the body of the new process argv, // Pass our stack From 6d7aa39f9edb6ac6ca17e628c2f01210792963a2 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sun, 24 Jan 2016 16:38:00 +0100 Subject: [PATCH 05/22] Call watchdog timer every 100 ms This significantly decreases the latency of killing the process after it exceeds its time limit. Based on a patch by Alexander Crustev. --- isolate.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/isolate.c b/isolate.c index e1e6878..2a8de1a 100644 --- a/isolate.c +++ b/isolate.c @@ -48,6 +48,8 @@ #define UNUSED __attribute__((unused)) #define ARRAY_SIZE(a) (int)(sizeof(a)/sizeof(a[0])) +#define TIMER_INTERVAL_US 100000 + static int timeout; /* milliseconds */ static int wall_timeout; static int extra_timeout; @@ -1031,7 +1033,7 @@ signal_alarm(int unused UNUSED) { /* Time limit checks are synchronous, so we only schedule them there. */ timer_tick = 1; - alarm(1); + msg("[timer]"); } static void @@ -1189,7 +1191,11 @@ box_keeper(void) bzero(&sa, sizeof(sa)); sa.sa_handler = signal_alarm; sigaction(SIGALRM, &sa, NULL); - alarm(1); + struct itimerval timer = { + .it_interval = { .tv_usec = TIMER_INTERVAL_US }, + .it_value = { .tv_usec = TIMER_INTERVAL_US }, + }; + setitimer(ITIMER_REAL, &timer, NULL); } for(;;) From 8af30e70191972a105e457920af9e6cca99654cb Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sun, 24 Jan 2016 16:53:25 +0100 Subject: [PATCH 06/22] Changed default box location to /var/local/lib/isolate/ The previous location in /tmp/box/ was prone to symlink attacks. --- Makefile | 3 +++ config.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cd19997..eaadb24 100644 --- a/Makefile +++ b/Makefile @@ -13,11 +13,13 @@ BUILD_COMMIT:=$(shell if git rev-parse >/dev/null 2>/dev/null ; then git describ CFLAGS += -DVERSION='"$(VERSION)"' -DYEAR='"$(YEAR)"' -DBUILD_DATE='"$(BUILD_DATE)"' -DBUILD_COMMIT='"$(BUILD_COMMIT)"' PREFIX = $(DESTDIR)/usr/local +VARPREFIX = $(DESTDIR)/var/local BINDIR = $(PREFIX)/bin DATAROOTDIR = $(PREFIX)/share DATADIR = $(DATAROOTDIR) MANDIR = $(DATADIR)/man MAN1DIR = $(MANDIR)/man1 +BOXDIR = $(VARPREFIX)/lib/isolate isolate: isolate.c config.h $(CC) $(CFLAGS) -o $@ $^ @@ -37,6 +39,7 @@ clean: install: isolate install -D $< $(BINDIR)/$< chmod u+s $(BINDIR)/$< + install -d $(BOXDIR) install-doc: isolate.1 install -D $< $(MAN1DIR)/$< diff --git a/config.h b/config.h index ed47be4..bf38a22 100644 --- a/config.h +++ b/config.h @@ -2,7 +2,7 @@ #define __ISOLATE_CONFIG_H__ /* A directory under which all sandboxes are created. */ -#define CONFIG_ISOLATE_BOX_DIR "/tmp/box" +#define CONFIG_ISOLATE_BOX_DIR "/var/local/lib/isolate" /* Range of UIDs and GIDs reserved for use by the sandboxes. */ #define CONFIG_ISOLATE_FIRST_UID 60000 From 469333345828acc5936f42c77d42c7839fd1f505 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sun, 24 Jan 2016 16:55:46 +0100 Subject: [PATCH 07/22] Makefile: Do not use "asciidoc -D" when building man pages Apparently, the switch is ignored for man pages and newer versions of asciidoc warn on its use. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index eaadb24..1d0cae2 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ isolate: isolate.c config.h $(CC) $(CFLAGS) -o $@ $^ isolate.1: isolate.1.txt - a2x -f manpage -D . $< + a2x -f manpage $< # The dependency on isolate.1 is there to serialize both calls of asciidoc, # which does not name temporary files safely. From ce9dad066c99017b44cf37abcfffb085727cbe29 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sun, 24 Jan 2016 17:13:04 +0100 Subject: [PATCH 08/22] Fixed race condition in make_dir() Inspired by pull request #7 by @bblackham and patch by @austrin. --- isolate.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/isolate.c b/isolate.c index 2a8de1a..ddc45eb 100644 --- a/isolate.c +++ b/isolate.c @@ -586,13 +586,21 @@ static void make_dir(char *path) if (sep) *sep = 0; - if (!dir_exists(path) && mkdir(path, 0777) < 0) - die("Cannot create directory %s: %m\n", path); + if (mkdir(path, 0777) < 0 && errno != EEXIST) + die("Cannot create directory %s: %m", path); if (!sep) - return; + break; *sep++ = '/'; } + + // mkdir() above may have returned EEXIST even if the path was not + // a directory. Ensure that it is. + struct stat st; + if (stat(path, &st) < 0) + die("Cannot stat %s: %m", path); + if (!S_ISDIR(st.st_mode)) + die("Cannot create %s: already exists, but not a directory", path); } static void apply_dir_rules(void) From 6b4601788f125da30118f5fde64ecc68dabcbcf2 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sun, 24 Jan 2016 17:25:16 +0100 Subject: [PATCH 09/22] Add a LICENSE file. Closes #4. --- LICENSE | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6022fc3 --- /dev/null +++ b/LICENSE @@ -0,0 +1,12 @@ +Isolate is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +If you have less than 10 copies of the GPL on your system :-), +you can find it at http://www.gnu.org/licenses/. From c8b0eef7aca9903dc2fce383bce9f12152580bd4 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sun, 24 Jan 2016 18:14:23 +0100 Subject: [PATCH 10/22] Source split to several files --- Makefile | 17 +- cg.c | 289 ++++++++++++++++++ isolate.c | 852 ++---------------------------------------------------- isolate.h | 67 +++++ rules.c | 393 +++++++++++++++++++++++++ util.c | 152 ++++++++++ 6 files changed, 937 insertions(+), 833 deletions(-) create mode 100644 cg.c create mode 100644 isolate.h create mode 100644 rules.c create mode 100644 util.c diff --git a/Makefile b/Makefile index 1d0cae2..f56c44d 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,15 @@ # Makefile for Isolate -# (c) 2015 Martin Mares +# (c) 2015--2016 Martin Mares all: isolate isolate.1 isolate.1.html CC=gcc -CFLAGS=-std=gnu99 -Wall -Wextra -Wno-parentheses -Wno-unused-result -Wno-missing-field-initializers +CFLAGS=-std=gnu99 -Wall -Wextra -Wno-parentheses -Wno-unused-result -Wno-missing-field-initializers -D_GNU_SOURCE VERSION=1.1 -YEAR=2015 +YEAR=2016 BUILD_DATE:=$(shell date '+%Y-%m-%d') BUILD_COMMIT:=$(shell if git rev-parse >/dev/null 2>/dev/null ; then git describe --always ; else echo '' ; fi) -CFLAGS += -DVERSION='"$(VERSION)"' -DYEAR='"$(YEAR)"' -DBUILD_DATE='"$(BUILD_DATE)"' -DBUILD_COMMIT='"$(BUILD_COMMIT)"' PREFIX = $(DESTDIR)/usr/local VARPREFIX = $(DESTDIR)/var/local @@ -21,8 +20,13 @@ MANDIR = $(DATADIR)/man MAN1DIR = $(MANDIR)/man1 BOXDIR = $(VARPREFIX)/lib/isolate -isolate: isolate.c config.h - $(CC) $(CFLAGS) -o $@ $^ +isolate: isolate.o util.o rules.o cg.o + $(CC) $(LDFLAGS) -o $@ $^ + +%.o: %.c isolate.h config.h + $(CC) $(CFLAGS) -c -o $@ $< + +isolate.o: CFLAGS += -DVERSION='"$(VERSION)"' -DYEAR='"$(YEAR)"' -DBUILD_DATE='"$(BUILD_DATE)"' -DBUILD_COMMIT='"$(BUILD_COMMIT)"' isolate.1: isolate.1.txt a2x -f manpage $< @@ -33,6 +37,7 @@ isolate.1.html: isolate.1.txt isolate.1 a2x -f xhtml -D . $< clean: + rm -f *.o rm -f isolate isolate.1 isolate.1.html rm -f docbook-xsl.css diff --git a/cg.c b/cg.c new file mode 100644 index 0000000..84f9511 --- /dev/null +++ b/cg.c @@ -0,0 +1,289 @@ +/* + * Process Isolator -- Control Groups + * + * (c) 2012-2016 Martin Mares + * (c) 2012-2014 Bernard Blackham + */ + +#include "isolate.h" + +#include +#include +#include +#include +#include +#include + +struct cg_controller_desc { + const char *name; + int optional; +}; + +typedef enum { + CG_MEMORY = 0, + CG_CPUACCT, + CG_CPUSET, + CG_NUM_CONTROLLERS, +} cg_controller; + +static const struct cg_controller_desc cg_controllers[CG_NUM_CONTROLLERS+1] = { + [CG_MEMORY] = { "memory", 0 }, + [CG_CPUACCT] = { "cpuacct", 0 }, + [CG_CPUSET] = { "cpuset", 1 }, + [CG_NUM_CONTROLLERS] = { NULL, 0 }, +}; + +#define FOREACH_CG_CONTROLLER(_controller) \ + for (cg_controller (_controller) = 0; \ + (_controller) < CG_NUM_CONTROLLERS; (_controller)++) + +static const char *cg_controller_name(cg_controller c) +{ + return cg_controllers[c].name; +} + +static int cg_controller_optional(cg_controller c) +{ + return cg_controllers[c].optional; +} + +static char cg_name[256]; + +#define CG_BUFSIZE 1024 + +static void +cg_makepath(char *buf, size_t len, cg_controller c, const char *attr) +{ + const char *cg_root = CONFIG_ISOLATE_CGROUP_ROOT; + snprintf(buf, len, "%s/%s/%s/%s", cg_root, cg_controller_name(c), cg_name, attr); +} + +static int +cg_read(cg_controller controller, const char *attr, char *buf) +{ + int result = 0; + int maybe = 0; + if (attr[0] == '?') + { + attr++; + maybe = 1; + } + + char path[256]; + cg_makepath(path, sizeof(path), controller, attr); + + int fd = open(path, O_RDONLY); + if (fd < 0) + { + if (maybe) + goto fail; + die("Cannot read %s: %m", path); + } + + int n = read(fd, buf, CG_BUFSIZE); + if (n < 0) + { + if (maybe) + goto fail_close; + die("Cannot read %s: %m", path); + } + if (n >= CG_BUFSIZE - 1) + die("Attribute %s too long", path); + if (n > 0 && buf[n-1] == '\n') + n--; + buf[n] = 0; + + if (verbose > 1) + msg("CG: Read %s = %s\n", attr, buf); + + result = 1; +fail_close: + close(fd); +fail: + return result; +} + +static void __attribute__((format(printf,3,4))) +cg_write(cg_controller controller, const char *attr, const char *fmt, ...) +{ + int maybe = 0; + if (attr[0] == '?') + { + attr++; + maybe = 1; + } + + va_list args; + va_start(args, fmt); + + char buf[CG_BUFSIZE]; + int n = vsnprintf(buf, sizeof(buf), fmt, args); + if (n >= CG_BUFSIZE) + die("cg_write: Value for attribute %s is too long", attr); + + if (verbose > 1) + msg("CG: Write %s = %s", attr, buf); + + char path[256]; + cg_makepath(path, sizeof(path), controller, attr); + + int fd = open(path, O_WRONLY | O_TRUNC); + if (fd < 0) + { + if (maybe) + goto fail; + else + die("Cannot write %s: %m", path); + } + + int written = write(fd, buf, n); + if (written < 0) + { + if (maybe) + goto fail_close; + else + die("Cannot set %s to %s: %m", path, buf); + } + if (written != n) + die("Short write to %s (%d out of %d bytes)", path, written, n); + +fail_close: + close(fd); +fail: + va_end(args); +} + +void +cg_init(void) +{ + if (!cg_enable) + return; + + char *cg_root = CONFIG_ISOLATE_CGROUP_ROOT; + if (!dir_exists(cg_root)) + die("Control group filesystem at %s not mounted", cg_root); + + snprintf(cg_name, sizeof(cg_name), "box-%d", box_id); + msg("Using control group %s\n", cg_name); +} + +void +cg_prepare(void) +{ + if (!cg_enable) + return; + + struct stat st; + char buf[CG_BUFSIZE]; + char path[256]; + + FOREACH_CG_CONTROLLER(controller) + { + cg_makepath(path, sizeof(path), controller, ""); + if (stat(path, &st) >= 0 || errno != ENOENT) + { + msg("Control group %s already exists, trying to empty it.\n", path); + if (rmdir(path) < 0) + die("Failed to reset control group %s: %m", path); + } + + if (mkdir(path, 0777) < 0 && !cg_controller_optional(controller)) + die("Failed to create control group %s: %m", path); + } + + // If cpuset module is enabled, copy allowed cpus and memory nodes from parent group + if (cg_read(CG_CPUSET, "?cpuset.cpus", buf)) + cg_write(CG_CPUSET, "cpuset.cpus", "%s", buf); + if (cg_read(CG_CPUSET, "?cpuset.mems", buf)) + cg_write(CG_CPUSET, "cpuset.mems", "%s", buf); +} + +void +cg_enter(void) +{ + if (!cg_enable) + return; + + msg("Entering control group %s\n", cg_name); + + FOREACH_CG_CONTROLLER(controller) + { + if (cg_controller_optional(controller)) + cg_write(controller, "?tasks", "%d\n", (int) getpid()); + else + cg_write(controller, "tasks", "%d\n", (int) getpid()); + } + + if (cg_memory_limit) + { + cg_write(CG_MEMORY, "memory.limit_in_bytes", "%lld\n", (long long) cg_memory_limit << 10); + cg_write(CG_MEMORY, "?memory.memsw.limit_in_bytes", "%lld\n", (long long) cg_memory_limit << 10); + } + + if (cg_timing) + cg_write(CG_CPUACCT, "cpuacct.usage", "0\n"); +} + +int +cg_get_run_time_ms(void) +{ + if (!cg_enable) + return 0; + + char buf[CG_BUFSIZE]; + cg_read(CG_CPUACCT, "cpuacct.usage", buf); + unsigned long long ns = atoll(buf); + return ns / 1000000; +} + +void +cg_stats(void) +{ + if (!cg_enable) + return; + + char buf[CG_BUFSIZE]; + + // Memory usage statistics + unsigned long long mem=0, memsw=0; + if (cg_read(CG_MEMORY, "?memory.max_usage_in_bytes", buf)) + mem = atoll(buf); + if (cg_read(CG_MEMORY, "?memory.memsw.max_usage_in_bytes", buf)) + { + memsw = atoll(buf); + if (memsw > mem) + mem = memsw; + } + if (mem) + meta_printf("cg-mem:%lld\n", mem >> 10); +} + +void +cg_remove(void) +{ + char buf[CG_BUFSIZE]; + + if (!cg_enable) + return; + + FOREACH_CG_CONTROLLER(controller) + { + if (cg_controller_optional(controller)) + { + if (!cg_read(controller, "?tasks", buf)) + continue; + } + else + cg_read(controller, "tasks", buf); + + if (buf[0]) + die("Some tasks left in controller %s of cgroup %s, failed to remove it", + cg_controller_name(controller), cg_name); + + char path[256]; + cg_makepath(path, sizeof(path), controller, ""); + + if (rmdir(path) < 0) + die("Cannot remove control group %s: %m", path); + } +} diff --git a/isolate.c b/isolate.c index ddc45eb..1b60fb8 100644 --- a/isolate.c +++ b/isolate.c @@ -5,34 +5,25 @@ * (c) 2012-2014 Bernard Blackham */ -#define _GNU_SOURCE - -#include "config.h" +#include "isolate.h" #include -#include #include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include +#include +#include #include -#include +#include #include -#include +#include +#include +#include /* May not be defined in older glibc headers */ #ifndef MS_PRIVATE @@ -44,38 +35,34 @@ #define MS_REC (1 << 14) #endif -#define NONRET __attribute__((noreturn)) -#define UNUSED __attribute__((unused)) -#define ARRAY_SIZE(a) (int)(sizeof(a)/sizeof(a[0])) - #define TIMER_INTERVAL_US 100000 static int timeout; /* milliseconds */ static int wall_timeout; static int extra_timeout; -static int pass_environ; -static int verbose; +int pass_environ; +int verbose; static int silent; static int fsize_limit; static int memory_limit; static int stack_limit; -static int block_quota; -static int inode_quota; +int block_quota; +int inode_quota; static int max_processes = 1; static char *redir_stdin, *redir_stdout, *redir_stderr; static char *set_cwd; static int share_net; -static int cg_enable; -static int cg_memory_limit; -static int cg_timing; +int cg_enable; +int cg_memory_limit; +int cg_timing; -static int box_id; +int box_id; static char box_dir[1024]; static pid_t box_pid; -static uid_t box_uid; -static gid_t box_gid; +uid_t box_uid; +gid_t box_gid; static uid_t orig_uid; static gid_t orig_gid; @@ -91,52 +78,10 @@ static int error_pipes[2]; static int write_errors_to_fd; static int read_errors_from_fd; -static void die(char *msg, ...) NONRET; -static void cg_stats(void); static int get_wall_time_ms(void); static int get_run_time_ms(struct rusage *rus); -static void chowntree(char *path, uid_t uid, gid_t gid); - -/*** Meta-files ***/ - -static FILE *metafile; - -static void -meta_open(const char *name) -{ - if (!strcmp(name, "-")) - { - metafile = stdout; - return; - } - if (setfsuid(getuid()) < 0) - die("Failed to switch FS UID: %m"); - metafile = fopen(name, "w"); - if (setfsuid(geteuid()) < 0) - die("Failed to switch FS UID back: %m"); - if (!metafile) - die("Failed to open metafile '%s'",name); -} - -static void -meta_close(void) -{ - if (metafile && metafile != stdout) - fclose(metafile); -} - -static void __attribute__((format(printf,1,2))) -meta_printf(const char *fmt, ...) -{ - if (!metafile) - return; - - va_list args; - va_start(args, fmt); - vfprintf(metafile, fmt, args); - va_end(args); -} +/*** Messages and exits ***/ static void final_stats(struct rusage *rus) @@ -153,8 +98,6 @@ final_stats(struct rusage *rus) cg_stats(); } -/*** Messages and exits ***/ - static void NONRET box_exit(int rc) { @@ -191,7 +134,7 @@ flush_line(void) } /* Report an error of the sandbox itself */ -static void NONRET __attribute__((format(printf,1,2))) +void NONRET __attribute__((format(printf,1,2))) die(char *msg, ...) { va_list args; @@ -223,7 +166,7 @@ die(char *msg, ...) } /* Report an error of the program inside the sandbox */ -static void NONRET __attribute__((format(printf,1,2))) +void NONRET __attribute__((format(printf,1,2))) err(char *msg, ...) { va_list args; @@ -246,7 +189,7 @@ err(char *msg, ...) } /* Write a message, but only if in verbose mode */ -static void __attribute__((format(printf,1,2))) +void __attribute__((format(printf,1,2))) msg(char *msg, ...) { va_list args; @@ -262,751 +205,6 @@ msg(char *msg, ...) va_end(args); } -/*** Utility functions ***/ - -static void * -xmalloc(size_t size) -{ - void *p = malloc(size); - if (!p) - die("Out of memory"); - return p; -} - -static char * -xstrdup(char *str) -{ - char *p = strdup(str); - if (!p) - die("Out of memory"); - return p; -} - -static int dir_exists(char *path) -{ - struct stat st; - return (stat(path, &st) >= 0 && S_ISDIR(st.st_mode)); -} - -static int rmtree_helper(const char *fpath, const struct stat *sb, - int typeflag UNUSED, struct FTW *ftwbuf UNUSED) -{ - if (S_ISDIR(sb->st_mode)) - { - if (rmdir(fpath) < 0) - die("Cannot rmdir %s: %m", fpath); - } - else - { - if (unlink(fpath) < 0) - die("Cannot unlink %s: %m", fpath); - } - return FTW_CONTINUE; -} - -static void -rmtree(char *path) -{ - nftw(path, rmtree_helper, 32, FTW_MOUNT | FTW_PHYS | FTW_DEPTH); -} - -static uid_t chown_uid; -static gid_t chown_gid; - -static int chowntree_helper(const char *fpath, const struct stat *sb UNUSED, - int typeflag UNUSED, struct FTW *ftwbuf UNUSED) -{ - if (lchown(fpath, chown_uid, chown_gid) < 0) - die("Cannot chown %s: %m", fpath); - else - return FTW_CONTINUE; -} - -static void -chowntree(char *path, uid_t uid, gid_t gid) -{ - chown_uid = uid; - chown_gid = gid; - nftw(path, chowntree_helper, 32, FTW_MOUNT | FTW_PHYS); -} - -/*** Environment rules ***/ - -struct env_rule { - char *var; // Variable to match - char *val; // ""=clear, NULL=inherit - int var_len; - struct env_rule *next; -}; - -static struct env_rule *first_env_rule; -static struct env_rule **last_env_rule = &first_env_rule; - -static struct env_rule default_env_rules[] = { - { .var = "LIBC_FATAL_STDERR_", .val = "1", .var_len = 18 }, -}; - -static int -set_env_action(char *a0) -{ - struct env_rule *r = xmalloc(sizeof(*r) + strlen(a0) + 1); - char *a = (char *)(r+1); - strcpy(a, a0); - - char *sep = strchr(a, '='); - if (sep == a) - return 0; - r->var = a; - if (sep) - { - *sep++ = 0; - r->val = sep; - } - else - r->val = NULL; - *last_env_rule = r; - last_env_rule = &r->next; - r->next = NULL; - return 1; -} - -static int -match_env_var(char *env_entry, struct env_rule *r) -{ - if (strncmp(env_entry, r->var, r->var_len)) - return 0; - return (env_entry[r->var_len] == '='); -} - -static void -apply_env_rule(char **env, int *env_sizep, struct env_rule *r) -{ - // First remove the variable if already set - int pos = 0; - while (pos < *env_sizep && !match_env_var(env[pos], r)) - pos++; - if (pos < *env_sizep) - { - (*env_sizep)--; - env[pos] = env[*env_sizep]; - env[*env_sizep] = NULL; - } - - // What is the new value? - char *new; - if (r->val) - { - if (!r->val[0]) - return; - new = xmalloc(r->var_len + 1 + strlen(r->val) + 1); - sprintf(new, "%s=%s", r->var, r->val); - } - else - { - pos = 0; - while (environ[pos] && !match_env_var(environ[pos], r)) - pos++; - if (!(new = environ[pos])) - return; - } - - // Add it at the end of the array - env[(*env_sizep)++] = new; - env[*env_sizep] = NULL; -} - -static char ** -setup_environment(void) -{ - // Link built-in rules with user rules - for (int i=ARRAY_SIZE(default_env_rules)-1; i >= 0; i--) - { - default_env_rules[i].next = first_env_rule; - first_env_rule = &default_env_rules[i]; - } - - // Scan the original environment - char **orig_env = environ; - int orig_size = 0; - while (orig_env[orig_size]) - orig_size++; - - // For each rule, reserve one more slot and calculate length - int num_rules = 0; - for (struct env_rule *r = first_env_rule; r; r=r->next) - { - num_rules++; - r->var_len = strlen(r->var); - } - - // Create a new environment - char **env = xmalloc((orig_size + num_rules + 1) * sizeof(char *)); - int size; - if (pass_environ) - { - memcpy(env, environ, orig_size * sizeof(char *)); - size = orig_size; - } - else - size = 0; - env[size] = NULL; - - // Apply the rules one by one - for (struct env_rule *r = first_env_rule; r; r=r->next) - apply_env_rule(env, &size, r); - - // Return the new env and pass some gossip - if (verbose > 1) - { - fprintf(stderr, "Passing environment:\n"); - for (int i=0; env[i]; i++) - fprintf(stderr, "\t%s\n", env[i]); - } - return env; -} - -/*** Directory rules ***/ - -struct dir_rule { - char *inside; // A relative path - char *outside; // This can be an absolute path or a relative path starting with "./" - unsigned int flags; // DIR_FLAG_xxx - struct dir_rule *next; -}; - -enum dir_rule_flags { - DIR_FLAG_RW = 1, - DIR_FLAG_NOEXEC = 2, - DIR_FLAG_FS = 4, - DIR_FLAG_MAYBE = 8, - DIR_FLAG_DEV = 16, -}; - -static const char * const dir_flag_names[] = { "rw", "noexec", "fs", "maybe", "dev" }; - -static struct dir_rule *first_dir_rule; -static struct dir_rule **last_dir_rule = &first_dir_rule; - -static int add_dir_rule(char *in, char *out, unsigned int flags) -{ - // Make sure that "in" is relative - while (in[0] == '/') - in++; - if (!*in) - return 0; - - // Check "out" - if (flags & DIR_FLAG_FS) - { - if (!out || out[0] == '/') - return 0; - } - else - { - if (out && out[0] != '/' && strncmp(out, "./", 2)) - return 0; - } - - // Override an existing rule - struct dir_rule *r; - for (r = first_dir_rule; r; r = r->next) - if (!strcmp(r->inside, in)) - break; - - // Add a new rule - if (!r) - { - r = xmalloc(sizeof(*r)); - r->inside = in; - *last_dir_rule = r; - last_dir_rule = &r->next; - r->next = NULL; - } - r->outside = out; - r->flags = flags; - return 1; -} - -static unsigned int parse_dir_option(char *opt) -{ - for (unsigned int i = 0; i < ARRAY_SIZE(dir_flag_names); i++) - if (!strcmp(opt, dir_flag_names[i])) - return 1U << i; - die("Unknown directory option %s", opt); -} - -static int set_dir_action(char *arg) -{ - arg = xstrdup(arg); - - char *colon = strchr(arg, ':'); - unsigned int flags = 0; - while (colon) - { - *colon++ = 0; - char *next = strchr(colon, ':'); - if (next) - *next = 0; - flags |= parse_dir_option(colon); - colon = next; - } - - char *eq = strchr(arg, '='); - if (eq) - { - *eq++ = 0; - return add_dir_rule(arg, (*eq ? eq : NULL), flags); - } - else - { - char *out = xmalloc(1 + strlen(arg) + 1); - sprintf(out, "/%s", arg); - return add_dir_rule(arg, out, flags); - } -} - -static void init_dir_rules(void) -{ - set_dir_action("box=./box:rw"); - set_dir_action("bin"); - set_dir_action("dev:dev"); - set_dir_action("lib"); - set_dir_action("lib64:maybe"); - set_dir_action("proc=proc:fs"); - set_dir_action("usr"); -} - -static void make_dir(char *path) -{ - char *sep = (path[0] == '/' ? path+1 : path); - - for (;;) - { - sep = strchr(sep, '/'); - if (sep) - *sep = 0; - - if (mkdir(path, 0777) < 0 && errno != EEXIST) - die("Cannot create directory %s: %m", path); - - if (!sep) - break; - *sep++ = '/'; - } - - // mkdir() above may have returned EEXIST even if the path was not - // a directory. Ensure that it is. - struct stat st; - if (stat(path, &st) < 0) - die("Cannot stat %s: %m", path); - if (!S_ISDIR(st.st_mode)) - die("Cannot create %s: already exists, but not a directory", path); -} - -static void apply_dir_rules(void) -{ - for (struct dir_rule *r = first_dir_rule; r; r=r->next) - { - char *in = r->inside; - char *out = r->outside; - if (!out) - { - msg("Not binding anything on %s\n", r->inside); - continue; - } - - if ((r->flags & DIR_FLAG_MAYBE) && !dir_exists(out)) - { - msg("Not binding %s on %s (does not exist)\n", out, r->inside); - continue; - } - - char root_in[1024]; - snprintf(root_in, sizeof(root_in), "root/%s", in); - make_dir(root_in); - - unsigned long mount_flags = 0; - if (!(r->flags & DIR_FLAG_RW)) - mount_flags |= MS_RDONLY; - if (r->flags & DIR_FLAG_NOEXEC) - mount_flags |= MS_NOEXEC; - if (!(r->flags & DIR_FLAG_DEV)) - mount_flags |= MS_NODEV; - - if (r->flags & DIR_FLAG_FS) - { - msg("Mounting %s on %s (flags %lx)\n", out, in, mount_flags); - if (mount("none", root_in, out, mount_flags, "") < 0) - die("Cannot mount %s on %s: %m", out, in); - } - else - { - mount_flags |= MS_BIND | MS_NOSUID; - msg("Binding %s on %s (flags %lx)\n", out, in, mount_flags); - // Most mount flags need remount to work - if (mount(out, root_in, "none", mount_flags, "") < 0 || - mount(out, root_in, "none", MS_REMOUNT | mount_flags, "") < 0) - die("Cannot mount %s on %s: %m", out, in); - } - } -} - -/*** Control groups ***/ - -struct cg_controller_desc { - const char *name; - int optional; -}; - -typedef enum { - CG_MEMORY = 0, - CG_CPUACCT, - CG_CPUSET, - CG_NUM_CONTROLLERS, -} cg_controller; - -static const struct cg_controller_desc cg_controllers[CG_NUM_CONTROLLERS+1] = { - [CG_MEMORY] = { "memory", 0 }, - [CG_CPUACCT] = { "cpuacct", 0 }, - [CG_CPUSET] = { "cpuset", 1 }, - [CG_NUM_CONTROLLERS] = { NULL, 0 }, -}; - -#define FOREACH_CG_CONTROLLER(_controller) \ - for (cg_controller (_controller) = 0; \ - (_controller) < CG_NUM_CONTROLLERS; (_controller)++) - -static const char *cg_controller_name(cg_controller c) -{ - return cg_controllers[c].name; -} - -static int cg_controller_optional(cg_controller c) -{ - return cg_controllers[c].optional; -} - -static char cg_name[256]; - -#define CG_BUFSIZE 1024 - -static void -cg_makepath(char *buf, size_t len, cg_controller c, const char *attr) -{ - const char *cg_root = CONFIG_ISOLATE_CGROUP_ROOT; - snprintf(buf, len, "%s/%s/%s/%s", cg_root, cg_controller_name(c), cg_name, attr); -} - -static int -cg_read(cg_controller controller, const char *attr, char *buf) -{ - int result = 0; - int maybe = 0; - if (attr[0] == '?') - { - attr++; - maybe = 1; - } - - char path[256]; - cg_makepath(path, sizeof(path), controller, attr); - - int fd = open(path, O_RDONLY); - if (fd < 0) - { - if (maybe) - goto fail; - die("Cannot read %s: %m", path); - } - - int n = read(fd, buf, CG_BUFSIZE); - if (n < 0) - { - if (maybe) - goto fail_close; - die("Cannot read %s: %m", path); - } - if (n >= CG_BUFSIZE - 1) - die("Attribute %s too long", path); - if (n > 0 && buf[n-1] == '\n') - n--; - buf[n] = 0; - - if (verbose > 1) - msg("CG: Read %s = %s\n", attr, buf); - - result = 1; -fail_close: - close(fd); -fail: - return result; -} - -static void __attribute__((format(printf,3,4))) -cg_write(cg_controller controller, const char *attr, const char *fmt, ...) -{ - int maybe = 0; - if (attr[0] == '?') - { - attr++; - maybe = 1; - } - - va_list args; - va_start(args, fmt); - - char buf[CG_BUFSIZE]; - int n = vsnprintf(buf, sizeof(buf), fmt, args); - if (n >= CG_BUFSIZE) - die("cg_write: Value for attribute %s is too long", attr); - - if (verbose > 1) - msg("CG: Write %s = %s", attr, buf); - - char path[256]; - cg_makepath(path, sizeof(path), controller, attr); - - int fd = open(path, O_WRONLY | O_TRUNC); - if (fd < 0) - { - if (maybe) - goto fail; - else - die("Cannot write %s: %m", path); - } - - int written = write(fd, buf, n); - if (written < 0) - { - if (maybe) - goto fail_close; - else - die("Cannot set %s to %s: %m", path, buf); - } - if (written != n) - die("Short write to %s (%d out of %d bytes)", path, written, n); - -fail_close: - close(fd); -fail: - va_end(args); -} - -static void -cg_init(void) -{ - if (!cg_enable) - return; - - char *cg_root = CONFIG_ISOLATE_CGROUP_ROOT; - if (!dir_exists(cg_root)) - die("Control group filesystem at %s not mounted", cg_root); - - snprintf(cg_name, sizeof(cg_name), "box-%d", box_id); - msg("Using control group %s\n", cg_name); -} - -static void -cg_prepare(void) -{ - if (!cg_enable) - return; - - struct stat st; - char buf[CG_BUFSIZE]; - char path[256]; - - FOREACH_CG_CONTROLLER(controller) - { - cg_makepath(path, sizeof(path), controller, ""); - if (stat(path, &st) >= 0 || errno != ENOENT) - { - msg("Control group %s already exists, trying to empty it.\n", path); - if (rmdir(path) < 0) - die("Failed to reset control group %s: %m", path); - } - - if (mkdir(path, 0777) < 0 && !cg_controller_optional(controller)) - die("Failed to create control group %s: %m", path); - } - - // If cpuset module is enabled, copy allowed cpus and memory nodes from parent group - if (cg_read(CG_CPUSET, "?cpuset.cpus", buf)) - cg_write(CG_CPUSET, "cpuset.cpus", "%s", buf); - if (cg_read(CG_CPUSET, "?cpuset.mems", buf)) - cg_write(CG_CPUSET, "cpuset.mems", "%s", buf); -} - -static void -cg_enter(void) -{ - if (!cg_enable) - return; - - msg("Entering control group %s\n", cg_name); - - FOREACH_CG_CONTROLLER(controller) - { - if (cg_controller_optional(controller)) - cg_write(controller, "?tasks", "%d\n", (int) getpid()); - else - cg_write(controller, "tasks", "%d\n", (int) getpid()); - } - - if (cg_memory_limit) - { - cg_write(CG_MEMORY, "memory.limit_in_bytes", "%lld\n", (long long) cg_memory_limit << 10); - cg_write(CG_MEMORY, "?memory.memsw.limit_in_bytes", "%lld\n", (long long) cg_memory_limit << 10); - } - - if (cg_timing) - cg_write(CG_CPUACCT, "cpuacct.usage", "0\n"); -} - -static int -cg_get_run_time_ms(void) -{ - if (!cg_enable) - return 0; - - char buf[CG_BUFSIZE]; - cg_read(CG_CPUACCT, "cpuacct.usage", buf); - unsigned long long ns = atoll(buf); - return ns / 1000000; -} - -static void -cg_stats(void) -{ - if (!cg_enable) - return; - - char buf[CG_BUFSIZE]; - - // Memory usage statistics - unsigned long long mem=0, memsw=0; - if (cg_read(CG_MEMORY, "?memory.max_usage_in_bytes", buf)) - mem = atoll(buf); - if (cg_read(CG_MEMORY, "?memory.memsw.max_usage_in_bytes", buf)) - { - memsw = atoll(buf); - if (memsw > mem) - mem = memsw; - } - if (mem) - meta_printf("cg-mem:%lld\n", mem >> 10); -} - -static void -cg_remove(void) -{ - char buf[CG_BUFSIZE]; - - if (!cg_enable) - return; - - FOREACH_CG_CONTROLLER(controller) - { - if (cg_controller_optional(controller)) - { - if (!cg_read(controller, "?tasks", buf)) - continue; - } - else - cg_read(controller, "tasks", buf); - - if (buf[0]) - die("Some tasks left in controller %s of cgroup %s, failed to remove it", - cg_controller_name(controller), cg_name); - - char path[256]; - cg_makepath(path, sizeof(path), controller, ""); - - if (rmdir(path) < 0) - die("Cannot remove control group %s: %m", path); - } -} - -/*** Disk quotas ***/ - -static int -path_begins_with(char *path, char *with) -{ - while (*with) - if (*path++ != *with++) - return 0; - return (!*with || *with == '/'); -} - -static char * -find_device(char *path) -{ - FILE *f = setmntent("/proc/mounts", "r"); - if (!f) - die("Cannot open /proc/mounts: %m"); - - struct mntent *me; - int best_len = 0; - char *best_dev = NULL; - while (me = getmntent(f)) - { - if (!path_begins_with(me->mnt_fsname, "/dev")) - continue; - if (path_begins_with(path, me->mnt_dir)) - { - int len = strlen(me->mnt_dir); - if (len > best_len) - { - best_len = len; - free(best_dev); - best_dev = xstrdup(me->mnt_fsname); - } - } - } - endmntent(f); - return best_dev; -} - -static void -set_quota(void) -{ - if (!block_quota) - return; - - char cwd[PATH_MAX]; - if (!getcwd(cwd, sizeof(cwd))) - die("getcwd: %m"); - - char *dev = find_device(cwd); - if (!dev) - die("Cannot identify filesystem which contains %s", cwd); - msg("Quota: Mapped path %s to a filesystem on %s\n", cwd, dev); - - // Sanity check - struct stat dev_st, cwd_st; - if (stat(dev, &dev_st) < 0) - die("Cannot identify block device %s: %m", dev); - if (!S_ISBLK(dev_st.st_mode)) - die("Expected that %s is a block device", dev); - if (stat(".", &cwd_st) < 0) - die("Cannot stat cwd: %m"); - if (cwd_st.st_dev != dev_st.st_rdev) - die("Identified %s as a filesystem on %s, but it is obviously false", cwd, dev); - - struct dqblk dq = { - .dqb_bhardlimit = block_quota, - .dqb_bsoftlimit = block_quota, - .dqb_ihardlimit = inode_quota, - .dqb_isoftlimit = inode_quota, - .dqb_valid = QIF_LIMITS, - }; - if (quotactl(QCMD(Q_SETQUOTA, USRQUOTA), dev, box_uid, (caddr_t) &dq) < 0) - die("Cannot set disk quota: %m"); - msg("Quota: Set block quota %d and inode quota %d\n", block_quota, inode_quota); - - free(dev); -} - /*** Signal handling in keeper process ***/ /* diff --git a/isolate.h b/isolate.h new file mode 100644 index 0000000..0e923c4 --- /dev/null +++ b/isolate.h @@ -0,0 +1,67 @@ +/* + * Process Isolator + * + * (c) 2012-2016 Martin Mares + * (c) 2012-2014 Bernard Blackham + */ + +#include +#include +#include + +#include "config.h" + +#define NONRET __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) +#define ARRAY_SIZE(a) (int)(sizeof(a)/sizeof(a[0])) + +/* isolate.c */ + +void die(char *msg, ...) NONRET; +void NONRET __attribute__((format(printf,1,2))) err(char *msg, ...); +void __attribute__((format(printf,1,2))) msg(char *msg, ...); + +extern int pass_environ; +extern int verbose; +extern int block_quota; +extern int inode_quota; +extern int cg_enable; +extern int cg_memory_limit; +extern int cg_timing; + +extern int box_id; +extern uid_t box_uid; +extern gid_t box_gid; + +/* util.c */ + +void *xmalloc(size_t size); +char *xstrdup(char *str); +int dir_exists(char *path); +void rmtree(char *path); +void make_dir(char *path); +void chowntree(char *path, uid_t uid, gid_t gid); + +void meta_open(const char *name); +void meta_close(void); +void __attribute__((format(printf,1,2))) meta_printf(const char *fmt, ...); + +/* rules.c */ + +int set_env_action(char *a0); +char **setup_environment(void); + +void init_dir_rules(void); +int set_dir_action(char *arg); +void apply_dir_rules(void); + +void set_quota(void); + +/* cg.c */ + +void cg_init(void); +void cg_prepare(void); +void cg_enter(void); +int cg_get_run_time_ms(void); +void cg_stats(void); +void cg_remove(void); diff --git a/rules.c b/rules.c new file mode 100644 index 0000000..e2934d8 --- /dev/null +++ b/rules.c @@ -0,0 +1,393 @@ +/* + * Process Isolator -- Rules + * + * (c) 2012-2016 Martin Mares + * (c) 2012-2014 Bernard Blackham + */ + +#include "isolate.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/*** Environment rules ***/ + +struct env_rule { + char *var; // Variable to match + char *val; // ""=clear, NULL=inherit + int var_len; + struct env_rule *next; +}; + +static struct env_rule *first_env_rule; +static struct env_rule **last_env_rule = &first_env_rule; + +static struct env_rule default_env_rules[] = { + { .var = "LIBC_FATAL_STDERR_", .val = "1", .var_len = 18 }, +}; + +int +set_env_action(char *a0) +{ + struct env_rule *r = xmalloc(sizeof(*r) + strlen(a0) + 1); + char *a = (char *)(r+1); + strcpy(a, a0); + + char *sep = strchr(a, '='); + if (sep == a) + return 0; + r->var = a; + if (sep) + { + *sep++ = 0; + r->val = sep; + } + else + r->val = NULL; + *last_env_rule = r; + last_env_rule = &r->next; + r->next = NULL; + return 1; +} + +static int +match_env_var(char *env_entry, struct env_rule *r) +{ + if (strncmp(env_entry, r->var, r->var_len)) + return 0; + return (env_entry[r->var_len] == '='); +} + +static void +apply_env_rule(char **env, int *env_sizep, struct env_rule *r) +{ + // First remove the variable if already set + int pos = 0; + while (pos < *env_sizep && !match_env_var(env[pos], r)) + pos++; + if (pos < *env_sizep) + { + (*env_sizep)--; + env[pos] = env[*env_sizep]; + env[*env_sizep] = NULL; + } + + // What is the new value? + char *new; + if (r->val) + { + if (!r->val[0]) + return; + new = xmalloc(r->var_len + 1 + strlen(r->val) + 1); + sprintf(new, "%s=%s", r->var, r->val); + } + else + { + pos = 0; + while (environ[pos] && !match_env_var(environ[pos], r)) + pos++; + if (!(new = environ[pos])) + return; + } + + // Add it at the end of the array + env[(*env_sizep)++] = new; + env[*env_sizep] = NULL; +} + +char ** +setup_environment(void) +{ + // Link built-in rules with user rules + for (int i=ARRAY_SIZE(default_env_rules)-1; i >= 0; i--) + { + default_env_rules[i].next = first_env_rule; + first_env_rule = &default_env_rules[i]; + } + + // Scan the original environment + char **orig_env = environ; + int orig_size = 0; + while (orig_env[orig_size]) + orig_size++; + + // For each rule, reserve one more slot and calculate length + int num_rules = 0; + for (struct env_rule *r = first_env_rule; r; r=r->next) + { + num_rules++; + r->var_len = strlen(r->var); + } + + // Create a new environment + char **env = xmalloc((orig_size + num_rules + 1) * sizeof(char *)); + int size; + if (pass_environ) + { + memcpy(env, environ, orig_size * sizeof(char *)); + size = orig_size; + } + else + size = 0; + env[size] = NULL; + + // Apply the rules one by one + for (struct env_rule *r = first_env_rule; r; r=r->next) + apply_env_rule(env, &size, r); + + // Return the new env and pass some gossip + if (verbose > 1) + { + fprintf(stderr, "Passing environment:\n"); + for (int i=0; env[i]; i++) + fprintf(stderr, "\t%s\n", env[i]); + } + return env; +} + +/*** Directory rules ***/ + +struct dir_rule { + char *inside; // A relative path + char *outside; // This can be an absolute path or a relative path starting with "./" + unsigned int flags; // DIR_FLAG_xxx + struct dir_rule *next; +}; + +enum dir_rule_flags { + DIR_FLAG_RW = 1, + DIR_FLAG_NOEXEC = 2, + DIR_FLAG_FS = 4, + DIR_FLAG_MAYBE = 8, + DIR_FLAG_DEV = 16, +}; + +static const char * const dir_flag_names[] = { "rw", "noexec", "fs", "maybe", "dev" }; + +static struct dir_rule *first_dir_rule; +static struct dir_rule **last_dir_rule = &first_dir_rule; + +static int add_dir_rule(char *in, char *out, unsigned int flags) +{ + // Make sure that "in" is relative + while (in[0] == '/') + in++; + if (!*in) + return 0; + + // Check "out" + if (flags & DIR_FLAG_FS) + { + if (!out || out[0] == '/') + return 0; + } + else + { + if (out && out[0] != '/' && strncmp(out, "./", 2)) + return 0; + } + + // Override an existing rule + struct dir_rule *r; + for (r = first_dir_rule; r; r = r->next) + if (!strcmp(r->inside, in)) + break; + + // Add a new rule + if (!r) + { + r = xmalloc(sizeof(*r)); + r->inside = in; + *last_dir_rule = r; + last_dir_rule = &r->next; + r->next = NULL; + } + r->outside = out; + r->flags = flags; + return 1; +} + +static unsigned int parse_dir_option(char *opt) +{ + for (unsigned int i = 0; i < ARRAY_SIZE(dir_flag_names); i++) + if (!strcmp(opt, dir_flag_names[i])) + return 1U << i; + die("Unknown directory option %s", opt); +} + +int set_dir_action(char *arg) +{ + arg = xstrdup(arg); + + char *colon = strchr(arg, ':'); + unsigned int flags = 0; + while (colon) + { + *colon++ = 0; + char *next = strchr(colon, ':'); + if (next) + *next = 0; + flags |= parse_dir_option(colon); + colon = next; + } + + char *eq = strchr(arg, '='); + if (eq) + { + *eq++ = 0; + return add_dir_rule(arg, (*eq ? eq : NULL), flags); + } + else + { + char *out = xmalloc(1 + strlen(arg) + 1); + sprintf(out, "/%s", arg); + return add_dir_rule(arg, out, flags); + } +} + +void init_dir_rules(void) +{ + set_dir_action("box=./box:rw"); + set_dir_action("bin"); + set_dir_action("dev:dev"); + set_dir_action("lib"); + set_dir_action("lib64:maybe"); + set_dir_action("proc=proc:fs"); + set_dir_action("usr"); +} + +void apply_dir_rules(void) +{ + for (struct dir_rule *r = first_dir_rule; r; r=r->next) + { + char *in = r->inside; + char *out = r->outside; + if (!out) + { + msg("Not binding anything on %s\n", r->inside); + continue; + } + + if ((r->flags & DIR_FLAG_MAYBE) && !dir_exists(out)) + { + msg("Not binding %s on %s (does not exist)\n", out, r->inside); + continue; + } + + char root_in[1024]; + snprintf(root_in, sizeof(root_in), "root/%s", in); + make_dir(root_in); + + unsigned long mount_flags = 0; + if (!(r->flags & DIR_FLAG_RW)) + mount_flags |= MS_RDONLY; + if (r->flags & DIR_FLAG_NOEXEC) + mount_flags |= MS_NOEXEC; + if (!(r->flags & DIR_FLAG_DEV)) + mount_flags |= MS_NODEV; + + if (r->flags & DIR_FLAG_FS) + { + msg("Mounting %s on %s (flags %lx)\n", out, in, mount_flags); + if (mount("none", root_in, out, mount_flags, "") < 0) + die("Cannot mount %s on %s: %m", out, in); + } + else + { + mount_flags |= MS_BIND | MS_NOSUID; + msg("Binding %s on %s (flags %lx)\n", out, in, mount_flags); + // Most mount flags need remount to work + if (mount(out, root_in, "none", mount_flags, "") < 0 || + mount(out, root_in, "none", MS_REMOUNT | mount_flags, "") < 0) + die("Cannot mount %s on %s: %m", out, in); + } + } +} + +/*** Disk quotas ***/ + +static int +path_begins_with(char *path, char *with) +{ + while (*with) + if (*path++ != *with++) + return 0; + return (!*with || *with == '/'); +} + +static char * +find_device(char *path) +{ + FILE *f = setmntent("/proc/mounts", "r"); + if (!f) + die("Cannot open /proc/mounts: %m"); + + struct mntent *me; + int best_len = 0; + char *best_dev = NULL; + while (me = getmntent(f)) + { + if (!path_begins_with(me->mnt_fsname, "/dev")) + continue; + if (path_begins_with(path, me->mnt_dir)) + { + int len = strlen(me->mnt_dir); + if (len > best_len) + { + best_len = len; + free(best_dev); + best_dev = xstrdup(me->mnt_fsname); + } + } + } + endmntent(f); + return best_dev; +} + +void +set_quota(void) +{ + if (!block_quota) + return; + + char cwd[PATH_MAX]; + if (!getcwd(cwd, sizeof(cwd))) + die("getcwd: %m"); + + char *dev = find_device(cwd); + if (!dev) + die("Cannot identify filesystem which contains %s", cwd); + msg("Quota: Mapped path %s to a filesystem on %s\n", cwd, dev); + + // Sanity check + struct stat dev_st, cwd_st; + if (stat(dev, &dev_st) < 0) + die("Cannot identify block device %s: %m", dev); + if (!S_ISBLK(dev_st.st_mode)) + die("Expected that %s is a block device", dev); + if (stat(".", &cwd_st) < 0) + die("Cannot stat cwd: %m"); + if (cwd_st.st_dev != dev_st.st_rdev) + die("Identified %s as a filesystem on %s, but it is obviously false", cwd, dev); + + struct dqblk dq = { + .dqb_bhardlimit = block_quota, + .dqb_bsoftlimit = block_quota, + .dqb_ihardlimit = inode_quota, + .dqb_isoftlimit = inode_quota, + .dqb_valid = QIF_LIMITS, + }; + if (quotactl(QCMD(Q_SETQUOTA, USRQUOTA), dev, box_uid, (caddr_t) &dq) < 0) + die("Cannot set disk quota: %m"); + msg("Quota: Set block quota %d and inode quota %d\n", block_quota, inode_quota); + + free(dev); +} diff --git a/util.c b/util.c new file mode 100644 index 0000000..111204d --- /dev/null +++ b/util.c @@ -0,0 +1,152 @@ +/* + * Process Isolator -- Utility Functions + * + * (c) 2012-2016 Martin Mares + * (c) 2012-2014 Bernard Blackham + */ + +#include "isolate.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +void * +xmalloc(size_t size) +{ + void *p = malloc(size); + if (!p) + die("Out of memory"); + return p; +} + +char * +xstrdup(char *str) +{ + char *p = strdup(str); + if (!p) + die("Out of memory"); + return p; +} + +int +dir_exists(char *path) +{ + struct stat st; + return (stat(path, &st) >= 0 && S_ISDIR(st.st_mode)); +} + +void make_dir(char *path) +{ + char *sep = (path[0] == '/' ? path+1 : path); + + for (;;) + { + sep = strchr(sep, '/'); + if (sep) + *sep = 0; + + if (mkdir(path, 0777) < 0 && errno != EEXIST) + die("Cannot create directory %s: %m", path); + + if (!sep) + break; + *sep++ = '/'; + } + + // mkdir() above may have returned EEXIST even if the path was not + // a directory. Ensure that it is. + struct stat st; + if (stat(path, &st) < 0) + die("Cannot stat %s: %m", path); + if (!S_ISDIR(st.st_mode)) + die("Cannot create %s: already exists, but not a directory", path); +} + + +static int rmtree_helper(const char *fpath, const struct stat *sb, + int typeflag UNUSED, struct FTW *ftwbuf UNUSED) +{ + if (S_ISDIR(sb->st_mode)) + { + if (rmdir(fpath) < 0) + die("Cannot rmdir %s: %m", fpath); + } + else + { + if (unlink(fpath) < 0) + die("Cannot unlink %s: %m", fpath); + } + return FTW_CONTINUE; +} + +void +rmtree(char *path) +{ + nftw(path, rmtree_helper, 32, FTW_MOUNT | FTW_PHYS | FTW_DEPTH); +} + +static uid_t chown_uid; +static gid_t chown_gid; + +static int chowntree_helper(const char *fpath, const struct stat *sb UNUSED, + int typeflag UNUSED, struct FTW *ftwbuf UNUSED) +{ + if (lchown(fpath, chown_uid, chown_gid) < 0) + die("Cannot chown %s: %m", fpath); + else + return FTW_CONTINUE; +} + +void +chowntree(char *path, uid_t uid, gid_t gid) +{ + chown_uid = uid; + chown_gid = gid; + nftw(path, chowntree_helper, 32, FTW_MOUNT | FTW_PHYS); +} + +/*** Meta-files ***/ + +static FILE *metafile; + +void +meta_open(const char *name) +{ + if (!strcmp(name, "-")) + { + metafile = stdout; + return; + } + if (setfsuid(getuid()) < 0) + die("Failed to switch FS UID: %m"); + metafile = fopen(name, "w"); + if (setfsuid(geteuid()) < 0) + die("Failed to switch FS UID back: %m"); + if (!metafile) + die("Failed to open metafile '%s'",name); +} + +void +meta_close(void) +{ + if (metafile && metafile != stdout) + fclose(metafile); +} + +void +meta_printf(const char *fmt, ...) +{ + if (!metafile) + return; + + va_list args; + va_start(args, fmt); + vfprintf(metafile, fmt, args); + va_end(args); +} From 4d364d538cc2bb69c8ce4f33702a7749459b1e94 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sun, 24 Jan 2016 19:02:37 +0100 Subject: [PATCH 11/22] Compile-time configuration moved to a run-time config file --- Makefile | 6 ++- cg.c | 8 ++-- config.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++ config.h | 15 -------- default.cf | 14 +++++++ isolate.c | 18 +++------ isolate.h | 12 +++++- 7 files changed, 149 insertions(+), 35 deletions(-) create mode 100644 config.c delete mode 100644 config.h create mode 100644 default.cf diff --git a/Makefile b/Makefile index f56c44d..dee3e48 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,8 @@ BUILD_COMMIT:=$(shell if git rev-parse >/dev/null 2>/dev/null ; then git describ PREFIX = $(DESTDIR)/usr/local VARPREFIX = $(DESTDIR)/var/local +CONFIGDIR = $(PREFIX)/etc +CONFIG = $(CONFIGDIR)/isolate BINDIR = $(PREFIX)/bin DATAROOTDIR = $(PREFIX)/share DATADIR = $(DATAROOTDIR) @@ -20,13 +22,14 @@ MANDIR = $(DATADIR)/man MAN1DIR = $(MANDIR)/man1 BOXDIR = $(VARPREFIX)/lib/isolate -isolate: isolate.o util.o rules.o cg.o +isolate: isolate.o util.o rules.o cg.o config.o $(CC) $(LDFLAGS) -o $@ $^ %.o: %.c isolate.h config.h $(CC) $(CFLAGS) -c -o $@ $< isolate.o: CFLAGS += -DVERSION='"$(VERSION)"' -DYEAR='"$(YEAR)"' -DBUILD_DATE='"$(BUILD_DATE)"' -DBUILD_COMMIT='"$(BUILD_COMMIT)"' +config.o: CFLAGS += -DCONFIG_FILE='"$(CONFIG)"' isolate.1: isolate.1.txt a2x -f manpage $< @@ -45,6 +48,7 @@ install: isolate install -D $< $(BINDIR)/$< chmod u+s $(BINDIR)/$< install -d $(BOXDIR) + install -D default.cf $(CONFIG) install-doc: isolate.1 install -D $< $(MAN1DIR)/$< diff --git a/cg.c b/cg.c index 84f9511..3fe7fe7 100644 --- a/cg.c +++ b/cg.c @@ -54,8 +54,7 @@ static char cg_name[256]; static void cg_makepath(char *buf, size_t len, cg_controller c, const char *attr) { - const char *cg_root = CONFIG_ISOLATE_CGROUP_ROOT; - snprintf(buf, len, "%s/%s/%s/%s", cg_root, cg_controller_name(c), cg_name, attr); + snprintf(buf, len, "%s/%s/%s/%s", cf_cg_root, cg_controller_name(c), cg_name, attr); } static int @@ -159,9 +158,8 @@ cg_init(void) if (!cg_enable) return; - char *cg_root = CONFIG_ISOLATE_CGROUP_ROOT; - if (!dir_exists(cg_root)) - die("Control group filesystem at %s not mounted", cg_root); + if (!dir_exists(cf_cg_root)) + die("Control group filesystem at %s not mounted", cf_cg_root); snprintf(cg_name, sizeof(cg_name), "box-%d", box_id); msg("Using control group %s\n", cg_name); diff --git a/config.c b/config.c new file mode 100644 index 0000000..672d911 --- /dev/null +++ b/config.c @@ -0,0 +1,111 @@ +/* + * Process Isolator -- Configuration File + * + * (c) 2016 Martin Mares + */ + +#include "isolate.h" + +#include +#include +#include +#include + +#define MAX_LINE_LEN 1024 + +char *cf_box_root; +char *cf_cg_root; +int cf_first_uid; +int cf_first_gid; +int cf_num_boxes; + +static int line_number; + +static void NONRET +cf_err(char *msg) +{ + die("Error in config file, line %d: %s", line_number, msg); +} + +static char * +cf_string(char *val) +{ + return xstrdup(val); +} + +static int +cf_int(char *val) +{ + char *end; + errno = 0; + long int x = strtol(val, &end, 10); + if (errno || end == val || end && *end) + cf_err("Invalid number"); + if ((long int)(int) x != x) + cf_err("Number out of range"); + return x; +} + +void cf_entry(char *key, char *val) +{ + if (!strcmp(key, "box_root")) + cf_box_root = cf_string(val); + else if (!strcmp(key, "cg_root")) + cf_cg_root = cf_string(val); + else if (!strcmp(key, "first_uid")) + cf_first_uid = cf_int(val); + else if (!strcmp(key, "first_gid")) + cf_first_gid = cf_int(val); + else if (!strcmp(key, "num_boxes")) + cf_num_boxes = cf_int(val); + else + cf_err("Unknown configuration item"); +} + +static void +cf_check(void) +{ + if (!cf_box_root || + !cf_cg_root || + !cf_first_uid || + !cf_first_gid || + !cf_num_boxes) + cf_err("Configuration is not complete"); +} + +void +cf_parse(void) +{ + FILE *f = fopen(CONFIG_FILE, "r"); + if (!f) + die("Cannot open %s: %m", CONFIG_FILE); + + char line[MAX_LINE_LEN]; + while (fgets(line, sizeof(line), f)) + { + line_number++; + char *nl = strchr(line, '\n'); + if (!nl) + cf_err("Line not terminated or too long"); + *nl = 0; + + if (!line[0] || line[0] == '#') + continue; + + char *s = line; + while (*s && *s != ' ' && *s != '\t' && *s != '=') + s++; + while (*s == ' ' || *s == '\t') + *s++ = 0; + if (*s != '=') + cf_err("Syntax error, expecting key=value"); + *s++ = 0; + while (*s == ' ' || *s == '\t') + *s++ = 0; + + cf_entry(line, s); + } + + fclose(f); + cf_check(); +} diff --git a/config.h b/config.h deleted file mode 100644 index bf38a22..0000000 --- a/config.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef __ISOLATE_CONFIG_H__ -#define __ISOLATE_CONFIG_H__ - -/* A directory under which all sandboxes are created. */ -#define CONFIG_ISOLATE_BOX_DIR "/var/local/lib/isolate" - -/* Range of UIDs and GIDs reserved for use by the sandboxes. */ -#define CONFIG_ISOLATE_FIRST_UID 60000 -#define CONFIG_ISOLATE_FIRST_GID 60000 -#define CONFIG_ISOLATE_NUM_BOXES 1000 - -/* Root of the cgroup hierarchy. */ -#define CONFIG_ISOLATE_CGROUP_ROOT "/sys/fs/cgroup" - -#endif /* __ISOLATE_CONFIG_H__ */ diff --git a/default.cf b/default.cf new file mode 100644 index 0000000..67306a3 --- /dev/null +++ b/default.cf @@ -0,0 +1,14 @@ +# This is a configuration file for Isolate + +# All sandboxes are created under this directory. +# To avoid symlink attacks, this directory and all its ancestors +# must be writeable only to root. +box_root = /var/local/lib/isolate + +# Root of the control group hierarchy +cg_root = /sys/fs/cgroup + +# Block of UIDs and GIDs reserved for sandboxes +first_uid = 60000 +first_gid = 60000 +num_boxes = 1000 diff --git a/isolate.c b/isolate.c index 1b60fb8..21bfb2d 100644 --- a/isolate.c +++ b/isolate.c @@ -597,12 +597,12 @@ box_inside(void *arg) static void box_init(void) { - if (box_id < 0 || box_id >= CONFIG_ISOLATE_NUM_BOXES) - die("Sandbox ID out of range (allowed: 0-%d)", CONFIG_ISOLATE_NUM_BOXES-1); - box_uid = CONFIG_ISOLATE_FIRST_UID + box_id; - box_gid = CONFIG_ISOLATE_FIRST_GID + box_id; + if (box_id < 0 || box_id >= cf_num_boxes) + die("Sandbox ID out of range (allowed: 0-%d)", cf_num_boxes-1); + box_uid = cf_first_uid + box_id; + box_gid = cf_first_gid + box_id; - snprintf(box_dir, sizeof(box_dir), "%s/%d", CONFIG_ISOLATE_BOX_DIR, box_id); + snprintf(box_dir, sizeof(box_dir), "%s/%d", cf_box_root, box_id); make_dir(box_dir); if (chdir(box_dir) < 0) die("chdir(%s): %m", box_dir); @@ -673,13 +673,6 @@ show_version(void) printf("The process isolator " VERSION "\n"); printf("(c) 2012--" YEAR " Martin Mares and Bernard Blackham\n"); printf("Built on " BUILD_DATE " from Git commit " BUILD_COMMIT "\n"); - printf("\nCompile-time configuration:\n"); - printf("Sandbox directory: %s\n", CONFIG_ISOLATE_BOX_DIR); - printf("Sandbox credentials: uid=%u-%u gid=%u-%u\n", - CONFIG_ISOLATE_FIRST_UID, - CONFIG_ISOLATE_FIRST_UID + CONFIG_ISOLATE_NUM_BOXES - 1, - CONFIG_ISOLATE_FIRST_GID, - CONFIG_ISOLATE_FIRST_GID + CONFIG_ISOLATE_NUM_BOXES - 1); } /*** Options ***/ @@ -902,6 +895,7 @@ main(int argc, char **argv) orig_gid = getgid(); umask(022); + cf_parse(); box_init(); cg_init(); diff --git a/isolate.h b/isolate.h index 0e923c4..23eb573 100644 --- a/isolate.h +++ b/isolate.h @@ -9,8 +9,6 @@ #include #include -#include "config.h" - #define NONRET __attribute__((noreturn)) #define UNUSED __attribute__((unused)) #define ARRAY_SIZE(a) (int)(sizeof(a)/sizeof(a[0])) @@ -65,3 +63,13 @@ void cg_enter(void); int cg_get_run_time_ms(void); void cg_stats(void); void cg_remove(void); + +/* config.c */ + +extern char *cf_box_root; +extern char *cf_cg_root; +extern int cf_first_uid; +extern int cf_first_gid; +extern int cf_num_boxes; + +void cf_parse(void); From cb04630b4ad4a3a1add6eb052bf2b46fb65e4872 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sun, 24 Jan 2016 22:07:26 +0100 Subject: [PATCH 12/22] Update the manual page to reflect recent changes --- isolate.1.txt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/isolate.1.txt b/isolate.1.txt index 5066b72..7ea33e7 100644 --- a/isolate.1.txt +++ b/isolate.1.txt @@ -266,14 +266,13 @@ you should also enable the swap controller (+CONFIG_MEMCG_SWAP+). Debian 7.x and newer require enabling the memory and swap cgroup controllers by adding the parameters "cgroup_enable=memory swapaccount=1" to the kernel -command-line, which can be set using GRUB_CMDLINE_LINUX_DEFAULT in +command-line, which can be set using +GRUB_CMDLINE_LINUX_DEFAULT+ in /etc/default/grub. Isolate is designed to run setuid to root. The sub-process inside the sandbox then switches to a non-privileged user ID (different for each *--box-id*). -The range of UIDs available and several filesystem paths are embedded in the -isolate's binary during compilation; please see +config.h+ in the source -tree for description. +The range of UIDs available and several filesystem paths are set in a configuration +file, by default located in /usr/local/etc/isolate. Before you run isolate with control groups, you need to ensure that the cgroup filesystem is enabled and mounted. Most modern Linux distributions already @@ -298,4 +297,4 @@ LICENSE ------- Isolate was written by Martin Mares and Bernard Blackham. It can be distributed and used under the terms of the GNU -General Public License version 2. +General Public License version 2 or any later version. From 450096de61b40358f2151c3006785b48c946ec9b Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sun, 24 Jan 2016 22:07:42 +0100 Subject: [PATCH 13/22] Released as version 1.2 --- Makefile | 2 +- TODO | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile b/Makefile index dee3e48..1640590 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ all: isolate isolate.1 isolate.1.html CC=gcc CFLAGS=-std=gnu99 -Wall -Wextra -Wno-parentheses -Wno-unused-result -Wno-missing-field-initializers -D_GNU_SOURCE -VERSION=1.1 +VERSION=1.2 YEAR=2016 BUILD_DATE:=$(shell date '+%Y-%m-%d') BUILD_COMMIT:=$(shell if git rev-parse >/dev/null 2>/dev/null ; then git describe --always ; else echo '' ; fi) diff --git a/TODO b/TODO index 685afab..86a6de8 100644 --- a/TODO +++ b/TODO @@ -1,8 +1,6 @@ -Installation Test: ptrace self Test: SIGSTOP Test: ping-pong timing attacks Test: big static memory Examine the use of taskstats for measuring memory Doc: mount -t cgroup none -o cpuset,cpuacct,memory /sys/fs/cgroup -Switch license to GPL2/GPL3 From 8fc6594aa515243d66a104781e1f962022fe3b59 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Tue, 16 Aug 2016 12:04:32 +0300 Subject: [PATCH 14/22] Man page: Do not refer to a non-existent section --- isolate.1.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/isolate.1.txt b/isolate.1.txt index 7ea33e7..042322e 100644 --- a/isolate.1.txt +++ b/isolate.1.txt @@ -192,7 +192,7 @@ CONTROL GROUPS -------------- Isolate can make use of system control groups provided by the kernel to constrain programs consisting of multiple processes. Please note -that this feature needs special system setup described in the REQUIREMENTS +that this feature needs special system setup described in the INSTALLATION section. *--cg*:: From e7f421b2254dd1e7dbf04510bdc330ff85237ea8 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Tue, 16 Aug 2016 12:33:35 +0200 Subject: [PATCH 15/22] Makefile: Enable prototype warnings --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1640590..ce9bc2d 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ all: isolate isolate.1 isolate.1.html CC=gcc -CFLAGS=-std=gnu99 -Wall -Wextra -Wno-parentheses -Wno-unused-result -Wno-missing-field-initializers -D_GNU_SOURCE +CFLAGS=-std=gnu99 -Wall -Wextra -Wno-parentheses -Wno-unused-result -Wno-missing-field-initializers -Wstrict-prototypes -Wmissing-prototypes -D_GNU_SOURCE VERSION=1.2 YEAR=2016 From 9fa5760cf1e26d76b5d6cebd09ec14c8d2986588 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Tue, 16 Aug 2016 12:50:16 +0200 Subject: [PATCH 16/22] cgroups: Fix inheritance of cpusets It was broken by commit a40942b05ac1a660063a91a5a7452a6fc28f871e. The bug was probably harmless: the cpu/mem sets of the subgroup got initialized to empty strings by the kernel, which caused it to use the settings of the parent group anyway. --- cg.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/cg.c b/cg.c index 3fe7fe7..ad2e928 100644 --- a/cg.c +++ b/cg.c @@ -7,6 +7,7 @@ #include "isolate.h" +#include #include #include #include @@ -24,6 +25,7 @@ typedef enum { CG_CPUACCT, CG_CPUSET, CG_NUM_CONTROLLERS, + CG_PARENT = 256, } cg_controller; static const struct cg_controller_desc cg_controllers[CG_NUM_CONTROLLERS+1] = { @@ -39,11 +41,13 @@ static const struct cg_controller_desc cg_controllers[CG_NUM_CONTROLLERS+1] = { static const char *cg_controller_name(cg_controller c) { + assert(c < CG_NUM_CONTROLLERS); return cg_controllers[c].name; } static int cg_controller_optional(cg_controller c) { + assert(c < CG_NUM_CONTROLLERS); return cg_controllers[c].optional; } @@ -54,7 +58,10 @@ static char cg_name[256]; static void cg_makepath(char *buf, size_t len, cg_controller c, const char *attr) { - snprintf(buf, len, "%s/%s/%s/%s", cf_cg_root, cg_controller_name(c), cg_name, attr); + if (c & CG_PARENT) + snprintf(buf, len, "%s/%s/%s", cf_cg_root, cg_controller_name(c & ~CG_PARENT), attr); + else + snprintf(buf, len, "%s/%s/%s/%s", cf_cg_root, cg_controller_name(c), cg_name, attr); } static int @@ -190,9 +197,9 @@ cg_prepare(void) } // If cpuset module is enabled, copy allowed cpus and memory nodes from parent group - if (cg_read(CG_CPUSET, "?cpuset.cpus", buf)) + if (cg_read(CG_PARENT | CG_CPUSET, "?cpuset.cpus", buf)) cg_write(CG_CPUSET, "cpuset.cpus", "%s", buf); - if (cg_read(CG_CPUSET, "?cpuset.mems", buf)) + if (cg_read(CG_PARENT | CG_CPUSET, "?cpuset.mems", buf)) cg_write(CG_CPUSET, "cpuset.mems", "%s", buf); } From a01a65eac6b2da28501aeb7764a457b28300a351 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Tue, 16 Aug 2016 13:16:34 +0200 Subject: [PATCH 17/22] Added per-box configuration of CPU and NUMA node sets --- cg.c | 9 ++++++--- config.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++- default.cf | 6 ++++++ isolate.h | 9 +++++++++ 4 files changed, 75 insertions(+), 4 deletions(-) diff --git a/cg.c b/cg.c index ad2e928..67f5206 100644 --- a/cg.c +++ b/cg.c @@ -196,11 +196,14 @@ cg_prepare(void) die("Failed to create control group %s: %m", path); } - // If cpuset module is enabled, copy allowed cpus and memory nodes from parent group + // If the cpuset module is enabled, set up allowed cpus and memory nodes. + // If per-box configuration exists, use it; otherwise, inherit the settings + // from the parent cgroup. + struct cf_per_box *cf = cf_current_box(); if (cg_read(CG_PARENT | CG_CPUSET, "?cpuset.cpus", buf)) - cg_write(CG_CPUSET, "cpuset.cpus", "%s", buf); + cg_write(CG_CPUSET, "cpuset.cpus", "%s", cf->cpus ? cf->cpus : buf); if (cg_read(CG_PARENT | CG_CPUSET, "?cpuset.mems", buf)) - cg_write(CG_CPUSET, "cpuset.mems", "%s", buf); + cg_write(CG_CPUSET, "cpuset.mems", "%s", cf->mems ? cf->mems : buf); } void diff --git a/config.c b/config.c index 672d911..e8c989c 100644 --- a/config.c +++ b/config.c @@ -20,6 +20,7 @@ int cf_first_gid; int cf_num_boxes; static int line_number; +static struct cf_per_box *per_box_configs; static void NONRET cf_err(char *msg) @@ -46,7 +47,8 @@ cf_int(char *val) return x; } -void cf_entry(char *key, char *val) +static void +cf_entry_toplevel(char *key, char *val) { if (!strcmp(key, "box_root")) cf_box_root = cf_string(val); @@ -62,6 +64,35 @@ void cf_entry(char *key, char *val) cf_err("Unknown configuration item"); } +static void +cf_entry_compound(char *key, char *subkey, char *val) +{ + if (strncmp(key, "box", 3)) + cf_err("Unknown configuration section"); + int box_id = cf_int(key + 3); + struct cf_per_box *c = cf_per_box(box_id); + + if (!strcmp(subkey, "cpus")) + c->cpus = cf_string(val); + else if (!strcmp(subkey, "mems")) + c->mems = cf_string(val); + else + cf_err("Unknown per-box configuration item"); +} + +static void +cf_entry(char *key, char *val) +{ + char *dot = strchr(key, '.'); + if (!dot) + cf_entry_toplevel(key, val); + else + { + *dot++ = 0; + cf_entry_compound(key, dot, val); + } +} + static void cf_check(void) { @@ -109,3 +140,25 @@ cf_parse(void) fclose(f); cf_check(); } + +struct cf_per_box * +cf_per_box(int box_id) +{ + struct cf_per_box *c; + + for (c = per_box_configs; c; c = c->next) + if (c->box_id == box_id) + return c; + + c = xmalloc(sizeof(*c)); + c->next = per_box_configs; + per_box_configs = c; + c->box_id = box_id; + return c; +} + +struct cf_per_box * +cf_current_box(void) +{ + return cf_per_box(box_id); +} diff --git a/default.cf b/default.cf index 67306a3..c0372f5 100644 --- a/default.cf +++ b/default.cf @@ -12,3 +12,9 @@ cg_root = /sys/fs/cgroup first_uid = 60000 first_gid = 60000 num_boxes = 1000 + +# Per-box settings of the set of allowed CPUs and NUMA nodes +# (see linux/Documentation/cgroups/cpusets.txt for precise syntax) + +#box0.cpus = 4-7 +#box0.mems = 1 diff --git a/isolate.h b/isolate.h index 23eb573..61bba2c 100644 --- a/isolate.h +++ b/isolate.h @@ -72,4 +72,13 @@ extern int cf_first_uid; extern int cf_first_gid; extern int cf_num_boxes; +struct cf_per_box { + struct cf_per_box *next; + int box_id; + char *cpus; + char *mems; +}; + void cf_parse(void); +struct cf_per_box *cf_per_box(int box_id); +struct cf_per_box *cf_current_box(void); From a2590eb44da7d41b18612ca6b429081f4339cc84 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Tue, 16 Aug 2016 17:34:47 +0200 Subject: [PATCH 18/22] Added a possibility to choose a parent cgroup --- cg.c | 24 ++++++++++++++++++------ config.c | 3 +++ default.cf | 4 ++++ isolate.h | 1 + 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/cg.c b/cg.c index 67f5206..db16b6d 100644 --- a/cg.c +++ b/cg.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -52,16 +53,18 @@ static int cg_controller_optional(cg_controller c) } static char cg_name[256]; +static char cg_parent_name[256]; #define CG_BUFSIZE 1024 static void cg_makepath(char *buf, size_t len, cg_controller c, const char *attr) { - if (c & CG_PARENT) - snprintf(buf, len, "%s/%s/%s", cf_cg_root, cg_controller_name(c & ~CG_PARENT), attr); - else - snprintf(buf, len, "%s/%s/%s/%s", cf_cg_root, cg_controller_name(c), cg_name, attr); + snprintf(buf, len, "%s/%s/%s/%s", + cf_cg_root, + cg_controller_name(c & ~CG_PARENT), + (c & CG_PARENT) ? cg_parent_name : cg_name, + attr); } static int @@ -168,8 +171,17 @@ cg_init(void) if (!dir_exists(cf_cg_root)) die("Control group filesystem at %s not mounted", cf_cg_root); - snprintf(cg_name, sizeof(cg_name), "box-%d", box_id); - msg("Using control group %s\n", cg_name); + if (cf_cg_parent) + { + snprintf(cg_name, sizeof(cg_name), "%s/box-%d", cf_cg_parent, box_id); + snprintf(cg_parent_name, sizeof(cg_parent_name), "%s", cf_cg_parent); + } + else + { + snprintf(cg_name, sizeof(cg_name), "box-%d", box_id); + strcpy(cg_parent_name, "."); + } + msg("Using control group %s under parent %s\n", cg_name, cg_parent_name); } void diff --git a/config.c b/config.c index e8c989c..730b296 100644 --- a/config.c +++ b/config.c @@ -15,6 +15,7 @@ char *cf_box_root; char *cf_cg_root; +char *cf_cg_parent; int cf_first_uid; int cf_first_gid; int cf_num_boxes; @@ -54,6 +55,8 @@ cf_entry_toplevel(char *key, char *val) cf_box_root = cf_string(val); else if (!strcmp(key, "cg_root")) cf_cg_root = cf_string(val); + else if (!strcmp(key, "cg_parent")) + cf_cg_parent = cf_string(val); else if (!strcmp(key, "first_uid")) cf_first_uid = cf_int(val); else if (!strcmp(key, "first_gid")) diff --git a/default.cf b/default.cf index c0372f5..eebd421 100644 --- a/default.cf +++ b/default.cf @@ -8,6 +8,10 @@ box_root = /var/local/lib/isolate # Root of the control group hierarchy cg_root = /sys/fs/cgroup +# If the following variable is defined, the per-box cgroups +# are created as sub-groups of the named cgroup +#cg_parent = boxes + # Block of UIDs and GIDs reserved for sandboxes first_uid = 60000 first_gid = 60000 diff --git a/isolate.h b/isolate.h index 61bba2c..b9fdecb 100644 --- a/isolate.h +++ b/isolate.h @@ -68,6 +68,7 @@ void cg_remove(void); extern char *cf_box_root; extern char *cf_cg_root; +extern char *cf_cg_parent; extern int cf_first_uid; extern int cf_first_gid; extern int cf_num_boxes; From 3bf44c474f4777d95f304fa5de04a3972d72a807 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Mon, 10 Oct 2016 16:11:24 +0200 Subject: [PATCH 19/22] Released as version 1.3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ce9bc2d..3fc06f8 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ all: isolate isolate.1 isolate.1.html CC=gcc CFLAGS=-std=gnu99 -Wall -Wextra -Wno-parentheses -Wno-unused-result -Wno-missing-field-initializers -Wstrict-prototypes -Wmissing-prototypes -D_GNU_SOURCE -VERSION=1.2 +VERSION=1.3 YEAR=2016 BUILD_DATE:=$(shell date '+%Y-%m-%d') BUILD_COMMIT:=$(shell if git rev-parse >/dev/null 2>/dev/null ; then git describe --always ; else echo '' ; fi) From e3c13684f5cab245471ee3042b9ea762a6464aad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herman=20Zvonimir=20Do=C5=A1ilovi=C4=87?= Date: Sat, 22 Oct 2016 21:29:39 +0200 Subject: [PATCH 20/22] Added f to short_opts --- isolate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/isolate.c b/isolate.c index 21bfb2d..35c5597 100644 --- a/isolate.c +++ b/isolate.c @@ -745,7 +745,7 @@ enum opt_code { OPT_SHARE_NET, }; -static const char short_opts[] = "b:c:d:eE:i:k:m:M:o:p::q:r:st:vw:x:"; +static const char short_opts[] = "b:c:d:eE:f:i:k:m:M:o:p::q:r:st:vw:x:"; static const struct option long_opts[] = { { "box-id", 1, NULL, 'b' }, From 7f55e36a27c17f07bd90e254d0eadefdac09a4d6 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Mon, 24 Oct 2016 13:51:11 +0200 Subject: [PATCH 21/22] Cosmetic: Options should be sorted --- isolate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/isolate.c b/isolate.c index 35c5597..a75b129 100644 --- a/isolate.c +++ b/isolate.c @@ -803,9 +803,6 @@ main(int argc, char **argv) if (!set_dir_action(optarg)) usage("Invalid directory specified: %s\n", optarg); break; - case 'f': - fsize_limit = atoi(optarg); - break; case 'e': pass_environ = 1; break; @@ -813,6 +810,9 @@ main(int argc, char **argv) if (!set_env_action(optarg)) usage("Invalid environment specified: %s\n", optarg); break; + case 'f': + fsize_limit = atoi(optarg); + break; case 'k': stack_limit = atoi(optarg); break; From 424e3a6eaa5cbcbff4a1f613eb8269807cb200e7 Mon Sep 17 00:00:00 2001 From: Antoine Pietri Date: Sat, 4 Mar 2017 00:16:55 +0100 Subject: [PATCH 22/22] Makefile: remove exec permission for config and manpage files --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3fc06f8..6508ea7 100644 --- a/Makefile +++ b/Makefile @@ -48,9 +48,9 @@ install: isolate install -D $< $(BINDIR)/$< chmod u+s $(BINDIR)/$< install -d $(BOXDIR) - install -D default.cf $(CONFIG) + install -m 644 -D default.cf $(CONFIG) install-doc: isolate.1 - install -D $< $(MAN1DIR)/$< + install -m 644 -D $< $(MAN1DIR)/$< .PHONY: all clean install install-doc