diff --git a/src/conmon.c b/src/conmon.c index 946b1382..e976b2b6 100644 --- a/src/conmon.c +++ b/src/conmon.c @@ -541,6 +541,7 @@ static gboolean oom_cb_cgroup_v1(int fd, GIOCondition condition, gpointer user_d char *cgroup_event_control_path = (char *)user_data; uint64_t event_count; ssize_t num_read = 0; + gboolean cgroup_removed = FALSE; if ((condition & G_IO_IN) == 0) { /* End of input */ @@ -550,6 +551,20 @@ static gboolean oom_cb_cgroup_v1(int fd, GIOCondition condition, gpointer user_d return G_SOURCE_REMOVE; } + /* Attempt to read the container's cgroup path. + * if the cgroup.memory_control file does not exist, + * we know one of the events on this fd was a cgroup removal + */ + if (access(cgroup_event_control_path, F_OK) < 0) { + ndebugf("Memory cgroup removal event received"); + cgroup_removed = TRUE; + } + + /* there are three cases we need to worry about: + * oom kill happened (1 event) + * cgroup was removed (1 event) + * oom kill happened and cgroup was removed (2 events) + */ num_read = read(fd, &event_count, sizeof(uint64_t)); if (num_read < 0) { nwarn("Failed to read oom event from eventfd"); @@ -570,23 +585,17 @@ static gboolean oom_cb_cgroup_v1(int fd, GIOCondition condition, gpointer user_d ndebugf("Memory cgroup event count: %ld", (long)event_count); if (event_count == 0) { - nwarn("Unexpected event count (zero)"); + nwarn("Unexpected event count (zero) when reading for oom event"); return G_SOURCE_CONTINUE; } - /* attempt to read the container's cgroup path. - * if we can't, the cgroup has probably been cleaned up. - * In all likelihood, this means we received an event on the eventfd - * because the memory.oom_control file was removed, not because of an OOM + + /* if there's only one event, and the cgroup was removed + * we know the event was for a cgroup removal, not an OOM kill */ - if (access(cgroup_event_control_path, F_OK) < 0) { - ndebugf("Memory cgroup removal event received"); - /* if event_count == 1, we know the only event triggered was a cgroup removal - * if it was greater than 1, we know the cgroup oomed, and then was cleaned up. - */ - if (event_count == 1) - return G_SOURCE_CONTINUE; - } + if (event_count == 1 && cgroup_removed) + return G_SOURCE_CONTINUE; + /* we catch the two other cases here, both of which are OOM kill events */ ninfo("OOM event received"); write_oom_files();