Skip to content

Commit

Permalink
Merge pull request #127 from haircommander/oom-fix-race
Browse files Browse the repository at this point in the history
oom: fix potential race between verification steps
  • Loading branch information
giuseppe authored Mar 16, 2020
2 parents 9f3f784 + 4303bb6 commit 51c0e7b
Showing 1 changed file with 22 additions and 13 deletions.
35 changes: 22 additions & 13 deletions src/conmon.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@ static gboolean oom_cb_cgroup_v1(int fd, GIOCondition condition, gpointer user_d
char *cgroup_event_control_path = (char *)user_data;
uint64_t event_count;
ssize_t num_read = 0;
gboolean cgroup_removed = FALSE;

if ((condition & G_IO_IN) == 0) {
/* End of input */
Expand All @@ -550,6 +551,20 @@ static gboolean oom_cb_cgroup_v1(int fd, GIOCondition condition, gpointer user_d
return G_SOURCE_REMOVE;
}

/* Attempt to read the container's cgroup path.
* if the cgroup.memory_control file does not exist,
* we know one of the events on this fd was a cgroup removal
*/
if (access(cgroup_event_control_path, F_OK) < 0) {
ndebugf("Memory cgroup removal event received");
cgroup_removed = TRUE;
}

/* there are three cases we need to worry about:
* oom kill happened (1 event)
* cgroup was removed (1 event)
* oom kill happened and cgroup was removed (2 events)
*/
num_read = read(fd, &event_count, sizeof(uint64_t));
if (num_read < 0) {
nwarn("Failed to read oom event from eventfd");
Expand All @@ -570,23 +585,17 @@ static gboolean oom_cb_cgroup_v1(int fd, GIOCondition condition, gpointer user_d

ndebugf("Memory cgroup event count: %ld", (long)event_count);
if (event_count == 0) {
nwarn("Unexpected event count (zero)");
nwarn("Unexpected event count (zero) when reading for oom event");
return G_SOURCE_CONTINUE;
}
/* attempt to read the container's cgroup path.
* if we can't, the cgroup has probably been cleaned up.
* In all likelihood, this means we received an event on the eventfd
* because the memory.oom_control file was removed, not because of an OOM

/* if there's only one event, and the cgroup was removed
* we know the event was for a cgroup removal, not an OOM kill
*/
if (access(cgroup_event_control_path, F_OK) < 0) {
ndebugf("Memory cgroup removal event received");
/* if event_count == 1, we know the only event triggered was a cgroup removal
* if it was greater than 1, we know the cgroup oomed, and then was cleaned up.
*/
if (event_count == 1)
return G_SOURCE_CONTINUE;
}
if (event_count == 1 && cgroup_removed)
return G_SOURCE_CONTINUE;

/* we catch the two other cases here, both of which are OOM kill events */
ninfo("OOM event received");
write_oom_files();

Expand Down

0 comments on commit 51c0e7b

Please sign in to comment.