diff --git a/README.md b/README.md index 50045e02..b7226ca3 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,11 @@ such as: /proc/slabinfo /sys/devices/system/cpu /sys/devices/system/cpu/online +/sys/devices/system/node +/sys/devices/system/node/online +/sys/devices/system/node/has_cpu +/sys/devices/system/node/has_memory +/sys/devices/system/node/has_normal_memory ``` are container aware such that the values displayed (e.g. in `/proc/uptime`) diff --git a/src/bindings.h b/src/bindings.h index e35423ac..c142aee1 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -67,6 +67,22 @@ enum lxcfs_virt_t { LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE, #define LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE_PATH "/sys/devices/system/cpu/online" + + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE, + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBDIR, + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBFILE, + + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE, +#define LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE_PATH "/sys/devices/system/node/online" + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU, +#define LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU_PATH "/sys/devices/system/node/has_cpu" + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY, +#define LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY_PATH "/sys/devices/system/node/has_memory" + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY, +#define LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY_PATH "/sys/devices/system/node/has_normal_memory" + + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_NODEX_CPULIST, + LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_NODEX_CPUMAP, }; struct file_info { diff --git a/src/cgroups/cgfsng.c b/src/cgroups/cgfsng.c index 72a4503c..94c7bdb6 100644 --- a/src/cgroups/cgfsng.c +++ b/src/cgroups/cgfsng.c @@ -684,6 +684,22 @@ static char *readat_cpuset(int cgroup_fd) return NULL; } +static char *readat_cpuset_mems(int cgroup_fd) +{ + __do_free char *val = NULL; + + val = readat_file(cgroup_fd, "cpuset.mems"); + if (val && strcmp(val, "") != 0) + return move_ptr(val); + + free_disarm(val); + val = readat_file(cgroup_fd, "cpuset.mems.effective"); + if (val && strcmp(val, "") != 0) + return move_ptr(val); + + return NULL; +} + static int cgfsng_get_cpuset_cpus(struct cgroup_ops *ops, const char *cgroup, char **value) { @@ -737,6 +753,59 @@ static int cgfsng_get_cpuset_cpus(struct cgroup_ops *ops, const char *cgroup, return -1; } +static int cgfsng_get_cpuset_mems(struct cgroup_ops *ops, const char *cgroup, + char **value) +{ + __do_close int cgroup_fd = -EBADF; + __do_free char *path = NULL; + char *v; + struct hierarchy *h; + int ret; + + h = ops->get_hierarchy(ops, "cpuset"); + if (!h) + return -1; + + if (!is_unified_hierarchy(h)) + ret = CGROUP_SUPER_MAGIC; + else + ret = CGROUP2_SUPER_MAGIC; + + *value = NULL; + path = must_make_path_relative(cgroup, NULL); + cgroup_fd = openat_safe(h->fd, path); + if (cgroup_fd < 0) + return -1; + + v = readat_cpuset_mems(cgroup_fd); + if (v) { + *value = v; + return ret; + } + + /* + * cpuset.cpus and cpuset.cpus.effective are empty so we need to look + * the nearest ancestor with a non-empty cpuset.cpus{.effective} file. + */ + for (;;) { + int fd; + + fd = openat_safe(cgroup_fd, "../"); + if (fd < 0 || !is_cgroup_fd(fd)) + return -1; + + close_prot_errno_replace(cgroup_fd, fd); + + v = readat_cpuset_mems(fd); + if (v) { + *value = v; + return ret; + } + } + + return -1; +} + static int cgfsng_get_io(struct cgroup_ops *ops, const char *cgroup, const char *file, char **value) { @@ -1027,6 +1096,7 @@ struct cgroup_ops *cgfsng_ops_init(void) /* cpuset */ cgfsng_ops->get_cpuset_cpus = cgfsng_get_cpuset_cpus; cgfsng_ops->can_use_cpuview = cgfsng_can_use_cpuview; + cgfsng_ops->get_cpuset_mems = cgfsng_get_cpuset_mems; /* blkio */ cgfsng_ops->get_io_service_bytes = cgfsng_get_io_service_bytes; diff --git a/src/cgroups/cgroup.c b/src/cgroups/cgroup.c index 5bf7f62d..222a7b6b 100644 --- a/src/cgroups/cgroup.c +++ b/src/cgroups/cgroup.c @@ -122,3 +122,19 @@ char *get_cpuset(const char *cg) return value; } + +/* + * Read the cpuset.mems for cg + * Return the answer in a newly allocated string which must be freed + */ +char *get_cpuset_mems(const char *cg) +{ + char *value = NULL; + int ret; + + ret = cgroup_ops->get_cpuset_mems(cgroup_ops, cg, &value); + if (ret < 0) + return NULL; + + return value; +} diff --git a/src/cgroups/cgroup.h b/src/cgroups/cgroup.h index 122e8ebf..b4d46b2e 100644 --- a/src/cgroups/cgroup.h +++ b/src/cgroups/cgroup.h @@ -154,6 +154,8 @@ struct cgroup_ops { int (*get_cpuset_cpus)(struct cgroup_ops *ops, const char *cgroup, char **value); bool (*can_use_cpuview)(struct cgroup_ops *ops); + int (*get_cpuset_mems)(struct cgroup_ops *ops, const char *cgroup, + char **value); /* io */ int (*get_io_service_bytes)(struct cgroup_ops *ops, const char *cgroup, @@ -211,5 +213,6 @@ static inline int get_cgroup_fd(const char *controller) extern char *get_pid_cgroup(pid_t pid, const char *contrl); extern char *get_cpuset(const char *cg); +extern char *get_cpuset_mems(const char *cg); #endif diff --git a/src/sysfs_fuse.c b/src/sysfs_fuse.c index 892be893..86208dc8 100644 --- a/src/sysfs_fuse.c +++ b/src/sysfs_fuse.c @@ -192,6 +192,21 @@ static int cpumask(char *posscpus, __u32 **bitarr, __u32 *last_set_bit) return 0; } +static int nodemask(char *possnodes, __u32 **bitarr, __u32 *last_set_bit) +{ + __do_free __u32 *possmask = NULL; + int ret; + __u32 poss_last_set_bit = 0; + + ret = lxc_cpumask(possnodes, &possmask, &poss_last_set_bit); + if (ret) + return ret; + + *bitarr = move_ptr(possmask); + *last_set_bit = poss_last_set_bit; + return 0; +} + static int do_cpuset_read(char *cg, char *buf, size_t buflen) { __do_free char *cpuset = NULL; @@ -205,31 +220,743 @@ static int do_cpuset_read(char *cg, char *buf, size_t buflen) if (!cpuset) return 0; - if (cgroup_ops->can_use_cpuview(cgroup_ops) && opts && opts->use_cfs) - use_view = true; - else - use_view = false; + if (cgroup_ops->can_use_cpuview(cgroup_ops) && opts && opts->use_cfs) + use_view = true; + else + use_view = false; + + if (use_view) + max_cpus = max_cpu_count(cg); + + if (use_view) { + if (max_cpus > 1) + total_len = snprintf(buf, buflen, "0-%d\n", max_cpus - 1); + else + total_len = snprintf(buf, buflen, "0\n"); + } else { + total_len = snprintf(buf, buflen, "%s\n", cpuset); + } + if (total_len < 0 || (size_t)total_len >= buflen) + return log_error(0, "Failed to write to cache"); + + return total_len; +} + +/* + * Get online nodes from cpuset.cpus or cpuset.cpus.effective + * + * Traverse nodes listed by /sys/devices/system/node/online. If + * cpuX specified by cpuset.cpus or cpuset.cpus.effective is listed + * in /sys/devices/system/node/nodeY/cpulist, nodeY is online. + */ +static int do_get_online_nodes_from_cpuset_cpus(char *cg, __u32 **bitarr, __u32 *last_set_bit) +{ + __do_free char *cpuset_cpus = NULL; + __do_free char *node_online = NULL; + __do_free char *node_cpulist = NULL; + __do_free __u32 *bitarr_cpus = NULL; + __do_free __u32 *bitarr_node_online = NULL; + __do_free __u32 *bitarr_node_cpulist = NULL; + __u32 *arr_u32 = zalloc(sizeof(__u32)); + __u32 last_set_bit_cpus = 0; + __u32 last_set_bit_node_online = 0; + __u32 last_set_bit_node_cpulist = 0; + __u32 last_set_bit_node_from_cpuset_cpus = 0; + char node_cpulist_path[BUF_RESERVE_SIZE] = {}; + int ret = 0; + + cpuset_cpus = get_cpuset(cg); + if (!cpuset_cpus) + return 0; + + ret = cpumask(cpuset_cpus, &bitarr_cpus, &last_set_bit_cpus); + if (ret) + return ret; + + node_online = read_file_at(-EBADF, "/sys/devices/system/node/online", PROTECT_OPEN); + if (!node_online) + return -1; + + ret = nodemask(node_online, &bitarr_node_online, &last_set_bit_node_online); + if (ret) + return ret; + + for (__u32 bit = 0; bit <= last_set_bit_node_online; bit++) { + + ret = snprintf(node_cpulist_path, sizeof(node_cpulist_path), + "/sys/devices/system/node/node%u/cpulist", bit); + if (ret < 0 || (size_t)ret >= sizeof(node_cpulist_path)) + continue; + + node_cpulist = read_file_at(-EBADF, node_cpulist_path, PROTECT_OPEN); + if (!node_cpulist) + return -1; + + ret = cpumask(node_cpulist, &bitarr_node_cpulist, &last_set_bit_node_cpulist); + if (ret) + return ret; + + for (__u32 bit_cpu = 0; bit_cpu <= last_set_bit_cpus; bit_cpu++) { + if (is_set(bit_cpu, bitarr_cpus) && is_set(bit_cpu, bitarr_node_cpulist)) { + set_bit(bit, arr_u32); + last_set_bit_node_from_cpuset_cpus = bit; + break; + } + } + } + *last_set_bit = last_set_bit_node_from_cpuset_cpus; + *bitarr = move_ptr(arr_u32); + + return ret; +} + +/* + * Get online nodes + * + * Online nodes come from: + * - cpuset.cpus or cpuset.cpus.effective, indicating which nodes have cpus online + * - cpuset.mems or cpuset.mems.effective, indecating which nodes have mems online + */ +static int do_get_online_nodes(char *cg, __u32 **bitarr, __u32 *last_set_bit) +{ + __do_free char *cpuset_mems = NULL; + __do_free __u32 *bitarr_mems = NULL; + __do_free __u32 *bitarr_node_from_cpuset_cpus = NULL; + __u32 last_set_bit_mems = 0; + __u32 last_set_bit_node_from_cpuset_cpus = 0; + int ret = 0; + + cpuset_mems = get_cpuset_mems(cg); + if (!cpuset_mems) + return 0; + + ret = nodemask(cpuset_mems, &bitarr_mems, &last_set_bit_mems); + if (ret) + return ret; + + ret = do_get_online_nodes_from_cpuset_cpus(cg, &bitarr_node_from_cpuset_cpus, + &last_set_bit_node_from_cpuset_cpus); + if (ret) + return ret; + + *last_set_bit = last_set_bit_mems > last_set_bit_node_from_cpuset_cpus ? + last_set_bit_mems : last_set_bit_node_from_cpuset_cpus; + *bitarr_node_from_cpuset_cpus |= *bitarr_mems; + *bitarr = move_ptr(bitarr_node_from_cpuset_cpus); + + return ret; +} + +static int sys_devices_system_cpu_online_read(char *buf, size_t size, + off_t offset, + struct fuse_file_info *fi) +{ + __do_free char *cg = NULL; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + char *cache = d->buf; + pid_t initpid; + ssize_t total_len = 0; + + if (offset) { + size_t left; + + if (!d->cached) + return 0; + + if (offset > d->size) + return -EINVAL; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return read_file_fuse("/sys/devices/system/cpu/online", buf, size, d); + prune_init_slice(cg); + + total_len = do_cpuset_read(cg, d->buf, d->buflen); + + d->size = (int)total_len; + d->cached = 1; + + if ((size_t)total_len > size) + total_len = size; + + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int sys_devices_system_cpu_online_getsize(const char *path) +{ + __do_free char *cg = NULL; + struct fuse_context *fc = fuse_get_context(); + pid_t initpid; + char buf[BUF_RESERVE_SIZE]; + int buflen = sizeof(buf); + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return get_sysfile_size(path); + prune_init_slice(cg); + + return do_cpuset_read(cg, buf, buflen); +} + +static int sys_devices_system_node_online_read(char *buf, size_t size, + off_t offset, + struct fuse_file_info *fi) +{ + __do_free char *cg = NULL; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + char *cache = d->buf; + pid_t initpid; + ssize_t total_len = 0; + __do_free __u32 *bitarr = NULL; + __u32 last_set_bit = 0; + int ret = 0; + char list[BUF_RESERVE_SIZE] = {0}; + + if (offset) { + size_t left; + + if (!d->cached) + return 0; + + if (offset > d->size) + return -EINVAL; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return read_file_fuse("/sys/devices/system/node/online", buf, size, d); + prune_init_slice(cg); + + ret = do_get_online_nodes(cg, &bitarr, &last_set_bit); + if (ret) + return ret; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + + total_len = snprintf(d->buf, d->buflen, "%s\n", list); + if (total_len < 0 || total_len >= d->buflen) + return log_error(0, "Failed to write to cache"); + + d->size = (int)total_len; + d->cached = 1; + + if ((size_t)total_len > size) + total_len = size; + + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int sys_devices_system_node_online_getsize(const char *path) +{ + __do_free char *cg = NULL, *cpuset_mems = NULL; + struct fuse_context *fc = fuse_get_context(); + pid_t initpid; + __do_free __u32 *bitarr = NULL; + __u32 last_set_bit = 0; + ssize_t total_len = 0; + char list[BUF_RESERVE_SIZE] = {0}; + char buf[BUF_RESERVE_SIZE] = {0}; + int buflen = sizeof(buf); + int ret = 0; + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return get_sysfile_size(path); + prune_init_slice(cg); + + ret = do_get_online_nodes(cg, &bitarr, &last_set_bit); + if (ret < 0) + return ret; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + + total_len = snprintf(buf, buflen, "%s\n", list); + if (total_len < 0 || total_len >= buflen) + return log_error(0, "Failed to write to cache"); + + return total_len; +} + +/* + * Get nodes have cpus + * + * Nodes list from do_get_online_nodes_from_cpuset_cpus is bitwise-anded + * with nodes list from /sys/devices/system/node/has_cpu. + */ +static int sys_devices_system_node_has_cpu_read(char *buf, size_t size, + off_t offset, + struct fuse_file_info *fi) +{ + __do_free char *cg = NULL; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + char *cache = d->buf; + pid_t initpid; + ssize_t total_len = 0; + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_has_cpu = NULL; + __u32 last_set_bit = 0; + __u32 last_set_bit_has_cpu = 0; + int ret = 0; + char list[BUF_RESERVE_SIZE] = {0}; + __do_free char *has_cpu = NULL; + + if (offset) { + size_t left; + + if (!d->cached) + return 0; + + if (offset > d->size) + return -EINVAL; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return read_file_fuse("/sys/devices/system/node/has_cpu", buf, size, d); + prune_init_slice(cg); + + ret = do_get_online_nodes_from_cpuset_cpus(cg, &bitarr, &last_set_bit); + if (ret) + return ret; + + has_cpu = read_file_at(-EBADF, "/sys/devices/system/node/has_cpu", PROTECT_OPEN); + if (!has_cpu) + return -1; + + ret = nodemask(has_cpu, &bitarr_has_cpu, &last_set_bit_has_cpu); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_has_cpu ? + last_set_bit : last_set_bit_has_cpu; + *bitarr &= *bitarr_has_cpu; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + + total_len = snprintf(d->buf, d->buflen, "%s\n", list); + if (total_len < 0 || total_len >= d->buflen) + return log_error(0, "Failed to write to cache"); + + d->size = (int)total_len; + d->cached = 1; + + if ((size_t)total_len > size) + total_len = size; + + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int sys_devices_system_node_has_cpu_getsize(const char *path) +{ + __do_free char *cg = NULL; + struct fuse_context *fc = fuse_get_context(); + pid_t initpid; + ssize_t total_len = 0; + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_has_cpu = NULL; + __u32 last_set_bit = 0; + __u32 last_set_bit_has_cpu = 0; + int ret = 0; + char list[BUF_RESERVE_SIZE] = {0}; + __do_free char *has_cpu = NULL; + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return get_sysfile_size("/sys/devices/system/node/has_cpu"); + prune_init_slice(cg); + + ret = do_get_online_nodes_from_cpuset_cpus(cg, &bitarr, &last_set_bit); + if (ret) + return ret; + + has_cpu = read_file_at(-EBADF, "/sys/devices/system/node/has_cpu", PROTECT_OPEN); + if (!has_cpu) + return -1; + + ret = nodemask(has_cpu, &bitarr_has_cpu, &last_set_bit_has_cpu); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_has_cpu ? + last_set_bit : last_set_bit_has_cpu; + *bitarr &= *bitarr_has_cpu; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + total_len = ret + 1; + + return total_len; +} + +/* + * Get nodes have memorys + * + * Nodes list from cpuset.mems or cpuset.mems.effective is bitwise-anded + * with nodes list from /sys/devices/system/node/has_memory. + */ +static int sys_devices_system_node_has_memory_read(char *buf, size_t size, + off_t offset, + struct fuse_file_info *fi) +{ + __do_free char *cg = NULL; + __do_free char *cpuset_mems = NULL; + __do_free char *has_memory = NULL; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + char *cache = d->buf; + pid_t initpid; + ssize_t total_len = 0; + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_has_memory = NULL; + __u32 last_set_bit = 0; + __u32 last_set_bit_has_memory = 0; + int ret = 0; + char list[BUF_RESERVE_SIZE] = {0}; + + if (offset) { + size_t left; + + if (!d->cached) + return 0; + + if (offset > d->size) + return -EINVAL; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return read_file_fuse("/sys/devices/system/node/has_memory", buf, size, d); + prune_init_slice(cg); + + cpuset_mems = get_cpuset_mems(cg); + if (!cpuset_mems) + return 0; + + ret = nodemask(cpuset_mems, &bitarr, &last_set_bit); + if (ret) + return ret; + + has_memory = read_file_at(-EBADF, "/sys/devices/system/node/has_memory", PROTECT_OPEN); + if (!has_memory) + return -1; + + ret = nodemask(has_memory, &bitarr_has_memory, &last_set_bit_has_memory); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_has_memory ? + last_set_bit : last_set_bit_has_memory; + *bitarr &= *bitarr_has_memory; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + + total_len = snprintf(d->buf, d->buflen, "%s\n", list); + if (total_len < 0 || total_len >= d->buflen) + return log_error(0, "Failed to write to cache"); + + d->size = (int)total_len; + d->cached = 1; + + if ((size_t)total_len > size) + total_len = size; + + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int sys_devices_system_node_has_memory_normal_memory_getsize(const char *path) +{ + __do_free char *cg = NULL; + __do_free char *cpuset_mems = NULL; + __do_free char *has_memory = NULL; + struct fuse_context *fc = fuse_get_context(); + pid_t initpid; + ssize_t total_len = 0; + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_has_memory = NULL; + __u32 last_set_bit = 0; + __u32 last_set_bit_has_memory = 0; + int ret = 0; + char list[BUF_RESERVE_SIZE] = {0}; + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return get_sysfile_size(path); + prune_init_slice(cg); + + cpuset_mems = get_cpuset_mems(cg); + if (!cpuset_mems) + return 0; + + ret = nodemask(cpuset_mems, &bitarr, &last_set_bit); + if (ret) + return ret; + + has_memory = read_file_at(-EBADF, path, PROTECT_OPEN); + if (!has_memory) + return -1; + + ret = nodemask(has_memory, &bitarr_has_memory, &last_set_bit_has_memory); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_has_memory ? + last_set_bit : last_set_bit_has_memory; + *bitarr &= *bitarr_has_memory; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + total_len = ret + 1; + + return total_len; +} + +/* + * Get nodes have normal memorys + * + * Nodes list from cpuset.mems or cpuset.mems.effective is bitwise-anded + * with nodes list from /sys/devices/system/node/has_normal_memory. + */ +static int sys_devices_system_node_has_normal_memory_read(char *buf, size_t size, + off_t offset, + struct fuse_file_info *fi) +{ + __do_free char *cg = NULL; + __do_free char *cpuset_mems = NULL; + __do_free char *has_normal_memory = NULL; + struct fuse_context *fc = fuse_get_context(); + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + char *cache = d->buf; + pid_t initpid; + ssize_t total_len = 0; + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_has_normal_memory = NULL; + __u32 last_set_bit = 0; + __u32 last_set_bit_has_normal_memory = 0; + int ret = 0; + char list[BUF_RESERVE_SIZE] = {0}; + + if (offset) { + size_t left; + + if (!d->cached) + return 0; + + if (offset > d->size) + return -EINVAL; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return read_file_fuse("/sys/devices/system/node/has_normal_memory", buf, size, d); + prune_init_slice(cg); + + cpuset_mems = get_cpuset_mems(cg); + if (!cpuset_mems) + return 0; + + ret = nodemask(cpuset_mems, &bitarr, &last_set_bit); + if (ret) + return ret; + + has_normal_memory = read_file_at(-EBADF, "/sys/devices/system/node/has_normal_memory", PROTECT_OPEN); + if (!has_normal_memory) + return -1; + + ret = nodemask(has_normal_memory, &bitarr_has_normal_memory, &last_set_bit_has_normal_memory); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_has_normal_memory ? + last_set_bit : last_set_bit_has_normal_memory; + *bitarr &= *bitarr_has_normal_memory; + + ret = bitarr_to_list(list, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return log_error(0, "Failed to write to cache"); + + total_len = snprintf(d->buf, d->buflen, "%s\n", list); + if (total_len < 0 || total_len >= d->buflen) + return log_error(0, "Failed to write to cache"); + + d->size = (int)total_len; + d->cached = 1; + + if ((size_t)total_len > size) + total_len = size; + + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int do_cpulist_cpumap_read(const char *path, char *cg, char *buf, size_t buflen) +{ + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_cpulist = NULL; + __do_free char *cpuset = NULL; + __do_free char *cpulist = NULL; + ssize_t total_len = 0; + __u32 last_set_bit = 0; + __u32 last_set_bit_cpulist = 0; + __u32 ndwords = 0; + int path_len = strlen(path); + int ret; + char file_path[BUF_RESERVE_SIZE] = {0}, cpulistmap[BUF_RESERVE_SIZE] = {0}; + bool cpumap; + __u32 i, pos = 0; + + cpuset = get_cpuset(cg); + if (!cpuset) + return 0; + + ret = cpumask(cpuset, &bitarr, &last_set_bit); + if (ret) + return ret; - if (use_view) - max_cpus = max_cpu_count(cg); + if((strcmp(path + path_len - 6, "cpumap") == 0)) { + strncpy(file_path, path, path_len -6); + strcpy(file_path + path_len - 6, "cpulist"); + cpumap = true; + } + else { + strcpy(file_path, path); + cpumap = false; + } - if (use_view) { - if (max_cpus > 1) - total_len = snprintf(buf, buflen, "0-%d\n", max_cpus - 1); - else - total_len = snprintf(buf, buflen, "0\n"); + if (file_exists(file_path)) { + cpulist = read_file_at(-EBADF, file_path, PROTECT_OPEN); + if (!cpulist) + return -1; + + if (!isdigit(cpulist[0])) + free_disarm(cpulist); } else { - total_len = snprintf(buf, buflen, "%s\n", cpuset); + log_error(0, "/sys/devices/system/node/node*/cpulist does not exist"); } - if (total_len < 0 || (size_t)total_len >= buflen) - return log_error(0, "Failed to write to cache"); - return total_len; + ret = cpumask(cpulist, &bitarr_cpulist, &last_set_bit_cpulist); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_cpulist ? + last_set_bit : last_set_bit_cpulist; + ndwords = last_set_bit / 32 + 1; + for (i = 0; i < ndwords; i++) + *(bitarr + i) &= *(bitarr_cpulist + i); + + if (cpumap) { + for (i = 0; i < ndwords; i++) { + *(bitarr + ndwords - 1 - i) &= *(bitarr_cpulist + ndwords - 1 - i); + if (i) + pos += sprintf(cpulistmap + pos, "%08x,", *(bitarr + ndwords - 1 - i)); + else + pos += sprintf(cpulistmap, "%x,", *(bitarr + ndwords - 1 - i)); + } + cpulistmap[strlen(cpulistmap) - 1] = '\0'; + } + else { + ret = bitarr_to_list(cpulistmap, bitarr, last_set_bit); + if (ret < 0 || (size_t)ret >= sizeof(cpulistmap)) + return log_error(0, "Failed to write to cache"); + } + + total_len = snprintf(buf, buflen, "%s\n", cpulistmap); + if (total_len < 0 || (size_t)total_len >= buflen) + return log_error(0, "Failed to write to cache"); + + return total_len; } -static int sys_devices_system_cpu_online_read(char *buf, size_t size, - off_t offset, - struct fuse_file_info *fi) +static int sys_devices_system_node_nodex_cpulist_cpumap_read(const char *path, char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) { __do_free char *cg = NULL; struct fuse_context *fc = fuse_get_context(); @@ -260,11 +987,10 @@ static int sys_devices_system_cpu_online_read(char *buf, size_t size, cg = get_pid_cgroup(initpid, "cpuset"); if (!cg) - return read_file_fuse("/sys/devices/system/cpu/online", buf, size, d); + return read_file_fuse(path, buf, size, d); prune_init_slice(cg); - total_len = do_cpuset_read(cg, d->buf, d->buflen); - + total_len = do_cpulist_cpumap_read(path, cg, d->buf, d->buflen); d->size = (int)total_len; d->cached = 1; @@ -276,24 +1002,24 @@ static int sys_devices_system_cpu_online_read(char *buf, size_t size, return total_len; } -static int sys_devices_system_cpu_online_getsize(const char *path) +static int sys_devices_system_node_nodex_cpulist_cpumap_getsize(const char *path) { - __do_free char *cg = NULL; - struct fuse_context *fc = fuse_get_context(); - pid_t initpid; - char buf[BUF_RESERVE_SIZE]; - int buflen = sizeof(buf); + __do_free char *cg = NULL; + struct fuse_context *fc = fuse_get_context(); + pid_t initpid; + char buf[BUF_RESERVE_SIZE]; + int buflen = sizeof(buf); - initpid = lookup_initpid_in_store(fc->pid); - if (initpid <= 1 || is_shared_pidns(initpid)) - initpid = fc->pid; + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; - cg = get_pid_cgroup(initpid, "cpuset"); - if (!cg) - return get_sysfile_size(path); - prune_init_slice(cg); + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return get_sysfile_size(path); + prune_init_slice(cg); - return do_cpuset_read(cg, buf, buflen); + return do_cpulist_cpumap_read(path, cg, buf, buflen); } static int filler_sys_devices_system_cpu(const char *path, void *buf, @@ -361,6 +1087,179 @@ static int filler_sys_devices_system_cpu(const char *path, void *buf, return 0; } +static int filler_sys_devices_system_node(const char *path, void *buf, + fuse_fill_dir_t filler) +{ + __do_free char *cg = NULL; + __do_closedir DIR *dirp = NULL; + struct fuse_context *fc = fuse_get_context(); + __do_free __u32 *bitarr = NULL; + __u32 last_set_bit = 0; + int ret; + struct dirent *dirent; + pid_t initpid; + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return 0; + prune_init_slice(cg); + + ret = do_get_online_nodes(cg, &bitarr, &last_set_bit); + if (ret) + return ret; + + dirp = opendir(path); + if (!dirp) + return -ENOENT; + + for (__u32 bit = 0; bit <= last_set_bit; bit++) { + char node[100]; + + if (!is_set(bit, bitarr)) + continue; + + ret = snprintf(node, sizeof(node), "node%u", bit); + if (ret < 0 || (size_t)ret >= sizeof(node)) + continue; + + if (dir_fillerat(filler, dirp, node, buf, 0) != 0) + return -ENOENT; + } + + while ((dirent = readdir(dirp))) { + char *entry = dirent->d_name; + + if (strlen(entry) <= 4) + continue; + entry += 4; + + /* Don't emit entries we already filtered above. */ + if (isdigit(*entry)) + continue; + + if (dirent_fillerat(filler, dirp, dirent, buf, 0) != 0) + return -ENOENT; + } + + return 0; +} + +static int filler_sys_devices_system_node_nodex(const char *path, void *buf, + fuse_fill_dir_t filler) +{ + __do_free __u32 *bitarr = NULL; + __do_free __u32 *bitarr_mems = NULL; + __do_free __u32 *bitarr_cpulist = NULL; + __do_free char *cpulist = NULL; + __do_free char *cg = NULL, *cpuset = NULL, *cpuset_mems = NULL; + __do_closedir DIR *dirp = NULL; + struct fuse_context *fc = fuse_get_context(); + __u32 last_set_bit = 0; + __u32 last_set_bit_mems = 0; + __u32 last_set_bit_cpulist = 0; + __u32 ndwords = 0; + int ret; + struct dirent *dirent; + pid_t initpid; + char cpulist_path[100]; + bool nomem = false; + + initpid = lookup_initpid_in_store(fc->pid); + if (initpid <= 1 || is_shared_pidns(initpid)) + initpid = fc->pid; + + cg = get_pid_cgroup(initpid, "cpuset"); + if (!cg) + return 0; + prune_init_slice(cg); + + cpuset = get_cpuset(cg); + if (!cpuset) + return 0; + + ret = cpumask(cpuset, &bitarr, &last_set_bit); + if (ret) + return ret; + + cpuset_mems = get_cpuset_mems(cg); + if (!cpuset_mems) + return 0; + + ret = nodemask(cpuset_mems, &bitarr_mems, &last_set_bit_mems); + if (ret) + return ret; + int nodex = atoi(path + strlen("/sys/devices/system/node/node")); + if (!is_set(nodex, bitarr_mems)) + nomem = true; + + ret = snprintf(cpulist_path, sizeof(cpulist_path), "%s/cpulist", path); + if (ret < 0 || (size_t)ret >= sizeof(cpulist_path)) + log_error(0, "Failed to write to cpulist buf"); + + if (file_exists(cpulist_path)) { + cpulist = read_file_at(-EBADF, cpulist_path, PROTECT_OPEN); + if (!cpulist) + return -1; + + if (!isdigit(cpulist[0])) + free_disarm(cpulist); + } else { + log_error(0, "/sys/devices/system/node/node*/cpulist does not exist"); + } + + if (cpulist) + ret = cpumask(cpulist, &bitarr_cpulist, &last_set_bit_cpulist); + if (ret) + return ret; + + last_set_bit = last_set_bit < last_set_bit_cpulist ? last_set_bit : last_set_bit_cpulist; + ndwords = last_set_bit / 32 + 1; + while (ndwords--) + *(bitarr+ndwords) &= *(bitarr_cpulist+ndwords); + + dirp = opendir(path); + if (!dirp) + return -ENOENT; + + for (__u32 bit = 0; bit <= last_set_bit; bit++) { + char cpu[100]; + + if (!is_set(bit, bitarr)) + continue; + + ret = snprintf(cpu, sizeof(cpu), "cpu%u", bit); + if (ret < 0 || (size_t)ret >= sizeof(cpu)) + continue; + + if (dir_fillerat(filler, dirp, cpu, buf, 0) != 0) + return -ENOENT; + } + + while ((dirent = readdir(dirp))) { + char *entry = dirent->d_name; + + if (nomem && (strncmp(entry, "cpu", strlen("cpu")) != 0)) + continue; + + if (strlen(entry) <= 3) + continue; + entry += 3; + + /* Don't emit entries we already filtered above. */ + if (isdigit(*entry)) + continue; + + if (dirent_fillerat(filler, dirp, dirent, buf, 0) != 0) + return -ENOENT; + } + + return 0; +} + static int get_st_mode(const char *path, mode_t *mode) { struct stat sb; @@ -432,6 +1331,40 @@ static int sys_getattr_legacy(const char *path, struct stat *sb) return 0; } + if (strcmp(path, "/sys/devices/system/node") == 0) { + sb->st_mode = S_IFDIR | 00555; + sb->st_nlink = 2; + return 0; + } + + if (strcmp(path, "/sys/devices/system/node/online") == 0) { + sb->st_size = sys_devices_system_node_online_getsize(path); + sb->st_mode = S_IFREG | 00444; + sb->st_nlink = 1; + return 0; + } + + if (strcmp(path, "/sys/devices/system/node/has_cpu") == 0) { + sb->st_size = sys_devices_system_node_has_cpu_getsize(path); + sb->st_mode = S_IFREG | 00444; + sb->st_nlink = 1; + return 0; + } + + if (strcmp(path, "/sys/devices/system/node/has_memory") == 0) { + sb->st_size = sys_devices_system_node_has_memory_normal_memory_getsize(path); + sb->st_mode = S_IFREG | 00444; + sb->st_nlink = 1; + return 0; + } + + if (strcmp(path, "/sys/devices/system/node/has_normal_memory") == 0) { + sb->st_size = sys_devices_system_node_has_memory_normal_memory_getsize(path); + sb->st_mode = S_IFREG | 00444; + sb->st_nlink = 1; + return 0; + } + return -ENOENT; } @@ -440,6 +1373,7 @@ __lxcfs_fuse_ops int sys_getattr(const char *path, struct stat *sb) int ret; struct timespec now; mode_t st_mode; + int path_len = strlen(path); if (!liblxcfs_functional()) return -EIO; @@ -467,6 +1401,19 @@ __lxcfs_fuse_ops int sys_getattr(const char *path, struct stat *sb) if (S_ISREG(st_mode) || S_ISLNK(st_mode)) { if (strcmp(path, "/sys/devices/system/cpu/online") == 0) sb->st_size = sys_devices_system_cpu_online_getsize(path); + else if (strcmp(path, "/sys/devices/system/node/online") == 0) + sb->st_size = sys_devices_system_node_online_getsize(path); + else if (strcmp(path, "/sys/devices/system/node/has_cpu") == 0) + sb->st_size = sys_devices_system_node_has_cpu_getsize(path); + else if (strcmp(path, "/sys/devices/system/node/has_memory") == 0) + sb->st_size = sys_devices_system_node_has_memory_normal_memory_getsize(path); + else if (strcmp(path, "/sys/devices/system/node/has_normal_memory") == 0) + sb->st_size = sys_devices_system_node_has_memory_normal_memory_getsize(path); + else if ((strncmp(path, "/sys/devices/system/node/node", + STRLITERALLEN("/sys/devices/system/node/node")) == 0) && + ((strcmp(path + path_len - strlen("cpulist"), "cpulist") == 0) || + (strcmp(path + path_len - strlen("cpumap"), "cpumap") == 0))) + sb->st_size = sys_devices_system_node_nodex_cpulist_cpumap_getsize(path); else sb->st_size = get_sysfile_size(path); sb->st_mode = st_mode; @@ -498,7 +1445,8 @@ __lxcfs_fuse_ops int sys_write(const char *path, const char *buf, size_t size, if (!liblxcfs_functional()) return -EIO; - if (f->type != LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE) + if (f->type != LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE && + f->type != LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBFILE) return -EINVAL; fd = open(path, O_WRONLY | O_CLOEXEC); @@ -530,7 +1478,8 @@ static int sys_readdir_legacy(const char *path, void *buf, fuse_fill_dir_t fille if (strcmp(path, "/sys/devices/system") == 0) { if (dir_filler(filler, buf, ".", 0) != 0 || dir_filler(filler, buf, "..", 0) != 0 || - dirent_filler(filler, path, "cpu", buf, 0) != 0) + dirent_filler(filler, path, "cpu", buf, 0) != 0 || + dirent_filler(filler, path, "node", buf, 0) != 0) return -ENOENT; return 0; @@ -543,6 +1492,17 @@ static int sys_readdir_legacy(const char *path, void *buf, fuse_fill_dir_t fille return 0; } + if (strcmp(path, "/sys/devices/system/node") == 0) { + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0 || + dirent_filler(filler, path, "online", buf, 0) != 0 || + dirent_filler(filler, path, "has_cpu", buf, 0) != 0 || + dirent_filler(filler, path, "has_memory", buf, 0) != 0 || + dirent_filler(filler, path, "has_normal_memory", buf, 0) != 0) + return -ENOENT; + + return 0; + } return 0; } @@ -586,7 +1546,8 @@ __lxcfs_fuse_ops int sys_readdir(const char *path, void *buf, case LXC_TYPE_SYS_DEVICES_SYSTEM: if (dir_filler(filler, buf, ".", 0) != 0 || dir_filler(filler, buf, "..", 0) != 0 || - dirent_filler(filler, path, "cpu", buf, 0) != 0) + dirent_filler(filler, path, "cpu", buf, 0) != 0 || + dirent_filler(filler, path, "node", buf, 0) != 0) return -ENOENT; return 0; case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU: @@ -605,6 +1566,18 @@ __lxcfs_fuse_ops int sys_readdir(const char *path, void *buf, return -ENOENT; } return 0; + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE: + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0) + return -ENOENT; + + return filler_sys_devices_system_node(path, buf, filler); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBDIR: + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0) + return -ENOENT; + + return filler_sys_devices_system_node_nodex(path, buf, filler); } return -EINVAL; @@ -642,6 +1615,16 @@ static int sys_open_legacy(const char *path, struct fuse_file_info *fi) type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU; if (strcmp(path, "/sys/devices/system/cpu/online") == 0) type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE; + if (strcmp(path, "/sys/devices/system/node") == 0) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU; + if (strcmp(path, "/sys/devices/system/node/online") == 0) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE; + if (strcmp(path, "/sys/devices/system/node/has_cpu") == 0) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU; + if (strcmp(path, "/sys/devices/system/node/has_memory") == 0) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY; + if (strcmp(path, "/sys/devices/system/node/has_normal_memory") == 0) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY; if (type == -1) return -ENOENT; @@ -670,6 +1653,7 @@ __lxcfs_fuse_ops int sys_open(const char *path, struct fuse_file_info *fi) { __do_free struct file_info *info = NULL; int type = -1; + int path_len = strlen(path); if (!liblxcfs_functional()) return -EIO; @@ -679,6 +1663,22 @@ __lxcfs_fuse_ops int sys_open(const char *path, struct fuse_file_info *fi) if (strcmp(path, "/sys/devices/system/cpu/online") == 0) { type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE; + } else if (strcmp(path, "/sys/devices/system/node/online") == 0) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE; + } else if (strcmp(path, "/sys/devices/system/node/has_cpu") == 0) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU; + } else if (strcmp(path, "/sys/devices/system/node/has_memory") == 0) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY; + } else if (strcmp(path, "/sys/devices/system/node/has_normal_memory") == 0) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY; + } else if ((strncmp(path, "/sys/devices/system/node/node", + STRLITERALLEN("/sys/devices/system/node/node")) == 0) && + (strcmp(path + path_len - strlen("cpulist"), "cpulist") == 0)) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_NODEX_CPULIST; + } else if ((strncmp(path, "/sys/devices/system/node/node", + STRLITERALLEN("/sys/devices/system/node/node")) == 0) && + (strcmp(path + path_len - strlen("cpumap"), "cpumap") == 0)) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_NODEX_CPUMAP; } else if (strncmp(path, "/sys/devices/system/cpu/", STRLITERALLEN("/sys/devices/system/cpu/")) == 0) { int ret; @@ -690,6 +1690,17 @@ __lxcfs_fuse_ops int sys_open(const char *path, struct fuse_file_info *fi) if (S_ISREG(st_mode)) type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE; + } else if (strncmp(path, "/sys/devices/system/node/", + STRLITERALLEN("/sys/devices/system/node/")) == 0) { + int ret; + mode_t st_mode; + + ret = get_st_mode(path, &st_mode); + if (ret) + return ret; + + if (S_ISREG(st_mode)) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBFILE; } if (type == -1) return -ENOENT; @@ -742,6 +1753,19 @@ __lxcfs_fuse_ops int sys_opendir(const char *path, struct fuse_file_info *fi) if (S_ISDIR(st_mode)) type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBDIR; + } else if (strcmp(path, "/sys/devices/system/node") == 0) { + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE; + } else if (strncmp(path, "/sys/devices/system/node/", + STRLITERALLEN("/sys/devices/system/node/")) == 0) { + int ret; + mode_t st_mode; + + ret = get_st_mode(path, &st_mode); + if (ret) + return ret; + + if (S_ISDIR(st_mode)) + type = LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBDIR; } if (type == -1) return -ENOENT; @@ -775,6 +1799,10 @@ static int sys_access_legacy(const char *path, int mask) access(path, R_OK) == 0) return 0; + if (strcmp(path, "/sys/devices/system/node") == 0 && + access(path, R_OK) == 0) + return 0; + /* these are all read-only */ if ((mask & ~R_OK) != 0) return -EACCES; @@ -805,12 +1833,38 @@ static int sys_read_legacy(const char *path, char *buf, size_t size, return read_file_fuse_with_offset(LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE_PATH, buf, size, offset, f); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE: + if (liblxcfs_functional()) + return sys_devices_system_node_online_read(buf, size, offset, fi); + + return read_file_fuse_with_offset(LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE_PATH, + buf, size, offset, f); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU: + if (liblxcfs_functional()) + return sys_devices_system_node_has_cpu_read(buf, size, offset, fi); + + return read_file_fuse_with_offset(LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU_PATH, + buf, size, offset, f); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY: + if (liblxcfs_functional()) + return sys_devices_system_node_has_memory_read(buf, size, offset, fi); + + return read_file_fuse_with_offset(LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY_PATH, + buf, size, offset, f); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY: + if (liblxcfs_functional()) + return sys_devices_system_node_has_normal_memory_read(buf, size, offset, fi); + + return read_file_fuse_with_offset(LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY_PATH, + buf, size, offset, f); case LXC_TYPE_SYS_DEVICES: break; case LXC_TYPE_SYS_DEVICES_SYSTEM: break; case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU: break; + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE: + break; } return -EINVAL; @@ -830,7 +1884,19 @@ __lxcfs_fuse_ops int sys_read(const char *path, char *buf, size_t size, switch (f->type) { case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE: return sys_devices_system_cpu_online_read(buf, size, offset, fi); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_ONLINE: + return sys_devices_system_node_online_read(buf, size, offset, fi); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_CPU: + return sys_devices_system_node_has_cpu_read(buf, size, offset, fi); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_MEMORY: + return sys_devices_system_node_has_memory_read(buf, size, offset, fi); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_HAS_NORMAL_MEMORY: + return sys_devices_system_node_has_normal_memory_read(buf, size, offset, fi); + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_NODEX_CPULIST: + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_NODEX_CPUMAP: + return sys_devices_system_node_nodex_cpulist_cpumap_read(path, buf, size, offset, fi); case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE: + case LXC_TYPE_SYS_DEVICES_SYSTEM_NODE_SUBFILE: return read_file_fuse_with_offset(path, buf, size, offset, f); } diff --git a/src/utils.c b/src/utils.c index 8771be2f..93fdda8a 100644 --- a/src/utils.c +++ b/src/utils.c @@ -642,3 +642,46 @@ DIR *opathdir(const char *path) return dirp; } + +/* Convert bitarray to a list */ +int bitarr_to_list(char *list, __u32 *bitarr, __u32 last_set_bit) +{ + int pos = 0; + int pos1, pos2, pos3, pos4; + int ret = 0; + + for (__u32 bit = 0; bit <= last_set_bit; bit++) { + if (is_set(bit, bitarr)) { + if (bit && is_set(bit - 1, bitarr)) + list[pos - 1] = '-'; + ret = snprintf(list + pos, sizeof(list), "%u,", bit); + if (ret < 0 || (size_t)ret >= sizeof(list)) + return -1; + pos += ret; + } + } + + for (pos1 = 0; list[pos1] != '\0';) { + if (list[pos1] != '-') { + pos1++; + continue; + } + for (pos2 = pos1 + 1;; pos2++) { + if (list[pos2] == '-') { + for (pos3 = pos1, pos4 = pos2; list[pos4] != '\0';) + list[pos3++] = list[pos4++]; + list[pos3] = '\0'; + break; + } + if (list[pos2] == ',') { + pos1 = pos2; + break; + } + } + if (list[pos2] == '\0') + break; + } + list[--pos] = '\0'; + + return pos; +} diff --git a/src/utils.h b/src/utils.h index 0e8d3c86..65a0dc0a 100644 --- a/src/utils.h +++ b/src/utils.h @@ -74,5 +74,5 @@ static inline bool file_exists(const char *f) #define PROTECT_OPEN_WITH_TRAILING_SYMLINKS (O_CLOEXEC | O_NOCTTY | O_RDONLY) #define PROTECT_OPEN (PROTECT_OPEN_WITH_TRAILING_SYMLINKS | O_NOFOLLOW) extern char *read_file_at(int dfd, const char *fnam, unsigned int o_flags); - +extern int bitarr_to_list(char *list, __u32 *bitarr, __u32 last_set_bit); #endif /* __LXCFS_UTILS_H */