From ff318d4a45284f737bf3ed9ee3ca8f47d1e4d11b Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 20 Aug 2024 10:21:49 -0400 Subject: [PATCH] dump: Support `--filter` I'm looking at storing OCI container images with their metadata included in the image - giving a "single file" holding all the relevant bits for an image. Specifically a structure like this: ``` /manifest.json /config.json /rootfs ``` I want higher level tooling to be able to read the metadata efficiently. Of course we could directly parse the EROFS image...but a lot of pitfalls there. We could also mount the image but that's pretty inefficient for this. Add support for filtering when generating a dump; this can operate fully unprivileged, and only requires some minor modifications to the library and CLI tooling. Signed-off-by: Colin Walters --- libcomposefs/lcfs-writer-erofs.c | 74 ++++++++++++++++++++++++++++---- libcomposefs/lcfs-writer.c | 13 +++++- libcomposefs/lcfs-writer.h | 13 ++++++ man/composefs-info.md | 7 +++ tests/meson.build | 2 + tests/test-dump-filtered.sh | 21 +++++++++ tools/composefs-info.c | 39 ++++++++++++++++- 7 files changed, 157 insertions(+), 12 deletions(-) create mode 100755 tests/test-dump-filtered.sh diff --git a/libcomposefs/lcfs-writer-erofs.c b/libcomposefs/lcfs-writer-erofs.c index e9dd7946..64293f11 100644 --- a/libcomposefs/lcfs-writer-erofs.c +++ b/libcomposefs/lcfs-writer-erofs.c @@ -1467,11 +1467,12 @@ static const erofs_inode *lcfs_image_get_erofs_inode(struct lcfs_image_data *dat } static struct lcfs_node_s *lcfs_build_node_from_image(struct lcfs_image_data *data, - uint64_t nid); + uint64_t nid, + Hash_table *filter); static int erofs_readdir_block(struct lcfs_image_data *data, struct lcfs_node_s *parent, const uint8_t *block, - size_t block_size) + size_t block_size, Hash_table *filter) { const struct erofs_dirent *dirents = (struct erofs_dirent *)block; size_t dirents_size = lcfs_u16_from_file(dirents[0].nameoff); @@ -1511,7 +1512,11 @@ static int erofs_readdir_block(struct lcfs_image_data *data, memcpy(name_buf, child_name, child_name_len); name_buf[child_name_len] = 0; - child = lcfs_build_node_from_image(data, nid); + if (filter != NULL && hash_lookup(filter, name_buf) == NULL) { + continue; + } + + child = lcfs_build_node_from_image(data, nid, NULL); if (child == NULL) { if (errno == ENOTSUP) continue; /* Skip real whiteouts (00-ff) */ @@ -1593,7 +1598,7 @@ static int lcfs_build_node_erofs_xattr(struct lcfs_node_s *node, uint8_t name_in } static struct lcfs_node_s *lcfs_build_node_from_image(struct lcfs_image_data *data, - uint64_t nid) + uint64_t nid, Hash_table *filter) { const erofs_inode *cino; cleanup_node struct lcfs_node_s *node = NULL; @@ -1715,19 +1720,24 @@ static struct lcfs_node_s *lcfs_build_node_from_image(struct lcfs_image_data *da } } - if (erofs_readdir_block(data, node, block_data, block_size) < 0) + if (erofs_readdir_block(data, node, block_data, + block_size, filter) < 0) return NULL; } /* Then inline */ if (tailpacked) { - if (erofs_readdir_block(data, node, tail_data, tail_size) < 0) + if (erofs_readdir_block(data, node, tail_data, + tail_size, filter) < 0) return NULL; } } else if (type == S_IFLNK) { char name_buf[PATH_MAX]; + // Filter only applies to toplevel + assert(filter == NULL); + if (file_size >= PATH_MAX || !tailpacked) { errno = EINVAL; return NULL; @@ -1742,6 +1752,9 @@ static struct lcfs_node_s *lcfs_build_node_from_image(struct lcfs_image_data *da cleanup_free uint8_t *content = NULL; size_t oob_size; + // Filter only applies to toplevel + assert(filter == NULL); + content = malloc(file_size); if (content == NULL) { errno = ENOMEM; @@ -1840,8 +1853,19 @@ static bool node_ht_comparator(const void *d1, const void *d2) return v1->nid == v2->nid; } -struct lcfs_node_s *lcfs_load_node_from_image(const uint8_t *image_data, - size_t image_data_size) +static size_t str_ht_hash(const void *entry, size_t table_size) +{ + return hash_string(entry, table_size); +} + +static bool str_ht_eq(const void *entry1, const void *entry2) +{ + return strcmp(entry1, entry2) == 0; +} + +struct lcfs_node_s * +lcfs_load_node_from_image_ext(const uint8_t *image_data, size_t image_data_size, + const struct lcfs_read_options_s *opts) { const uint8_t *image_data_end; struct lcfs_image_data data = { image_data, image_data_size }; @@ -1850,6 +1874,8 @@ struct lcfs_node_s *lcfs_load_node_from_image(const uint8_t *image_data, uint64_t erofs_root_nid; struct lcfs_node_s *root; + assert(opts); + if (image_data_size < EROFS_BLKSIZ) { errno = EINVAL; return NULL; @@ -1909,9 +1935,39 @@ struct lcfs_node_s *lcfs_load_node_from_image(const uint8_t *image_data, return NULL; } - root = lcfs_build_node_from_image(&data, erofs_root_nid); + Hash_table *toplevel_entries_hash = NULL; + if (opts->toplevel_entries) { + toplevel_entries_hash = + hash_initialize(0, NULL, str_ht_hash, str_ht_eq, NULL); + if (toplevel_entries_hash == NULL) { + errno = ENOMEM; + return NULL; + } + for (const char *const *it = opts->toplevel_entries; it && *it; it++) { + const char *name = *it; + if (hash_insert_if_absent(toplevel_entries_hash, name, + NULL) < 0) { + errno = ENOMEM; + return NULL; + } + } + } + + root = lcfs_build_node_from_image(&data, erofs_root_nid, + toplevel_entries_hash); + if (toplevel_entries_hash != NULL) + hash_free(toplevel_entries_hash); hash_free(data.node_hash); return root; } + +struct lcfs_node_s *lcfs_load_node_from_image(const uint8_t *image_data, + size_t image_data_size) +{ + struct lcfs_read_options_s opts = { + 0, + }; + return lcfs_load_node_from_image_ext(image_data, image_data_size, &opts); +} \ No newline at end of file diff --git a/libcomposefs/lcfs-writer.c b/libcomposefs/lcfs-writer.c index 209781c2..10ce35c7 100644 --- a/libcomposefs/lcfs-writer.c +++ b/libcomposefs/lcfs-writer.c @@ -843,7 +843,8 @@ int lcfs_version_from_fd(int fd) return r; } -struct lcfs_node_s *lcfs_load_node_from_fd(int fd) +struct lcfs_node_s *lcfs_load_node_from_fd_ext(int fd, + const struct lcfs_read_options_s *opts) { struct lcfs_node_s *node; uint8_t *image_data; @@ -864,7 +865,7 @@ struct lcfs_node_s *lcfs_load_node_from_fd(int fd) return NULL; } - node = lcfs_load_node_from_image(image_data, image_data_size); + node = lcfs_load_node_from_image_ext(image_data, image_data_size, opts); if (node == NULL) { errsv = errno; munmap(image_data, image_data_size); @@ -877,6 +878,14 @@ struct lcfs_node_s *lcfs_load_node_from_fd(int fd) return node; } +struct lcfs_node_s *lcfs_load_node_from_fd(int fd) +{ + struct lcfs_read_options_s opts = { + 0, + }; + return lcfs_load_node_from_fd_ext(fd, &opts); +} + int lcfs_node_set_payload(struct lcfs_node_s *node, const char *payload) { char *dup = NULL; diff --git a/libcomposefs/lcfs-writer.h b/libcomposefs/lcfs-writer.h index 10927fb7..b8ef7a72 100644 --- a/libcomposefs/lcfs-writer.h +++ b/libcomposefs/lcfs-writer.h @@ -82,7 +82,20 @@ LCFS_EXTERN struct lcfs_node_s *lcfs_load_node_from_file(int dirfd, const char * int buildflags); LCFS_EXTERN struct lcfs_node_s *lcfs_load_node_from_image(const uint8_t *image_data, size_t image_data_size); +struct lcfs_read_options_s { + // If non-NULL, this is a NULL terminated array of filenames; only entries + // for these files will be loaded. At the current time only filenames (not full paths) + // are supported. + const char *const *toplevel_entries; + uint32_t reserved[3]; + void *reserved2[4]; +}; +LCFS_EXTERN struct lcfs_node_s * +lcfs_load_node_from_image_ext(const uint8_t *image_data, size_t image_data_size, + const struct lcfs_read_options_s *opts); LCFS_EXTERN struct lcfs_node_s *lcfs_load_node_from_fd(int fd); +LCFS_EXTERN struct lcfs_node_s * +lcfs_load_node_from_fd_ext(int fd, const struct lcfs_read_options_s *opts); LCFS_EXTERN int lcfs_version_from_fd(int fd); LCFS_EXTERN const char *lcfs_node_get_xattr(struct lcfs_node_s *node, diff --git a/man/composefs-info.md b/man/composefs-info.md index b57705e6..f598ebee 100644 --- a/man/composefs-info.md +++ b/man/composefs-info.md @@ -46,6 +46,13 @@ can be specified. : This should point to a directory of backing files, and will be used by the **missing-objects** command to know what files are available. +**\-\-filter**=*NAME* +: Only print entries whose name matches one of these. Can be specified + multiple times. Cannot be a full path (e.g. cannot contain `/`). + This is intended to be used for tooling to efficiently lookup metadata + files embedded in the image without loading and printing the entire + image. + # SEE ALSO **composefs-info(1)**, **composefs-dump(5)** diff --git a/tests/meson.build b/tests/meson.build index e7068b00..f25ceec8 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -31,6 +31,7 @@ extra_dist = [ 'test-units.sh', 'test-random-fuse.sh', 'test-checksums.sh', + 'test-dump-filtered.sh', 'test-should-fail.sh', 'integration.sh', ] @@ -49,6 +50,7 @@ tools_dir = meson.current_build_dir() / '../tools' test('check-units', find_program('test-units.sh'), args : [tools_dir]) test('check-checksums', find_program('test-checksums.sh'), args : [tools_dir, meson.current_source_dir() / 'assets', ' '.join(test_assets)]) +test('check-dump-filtered', find_program('test-dump-filtered.sh'), args : [tools_dir, meson.current_source_dir() / 'assets']) test('check-random-fuse', find_program('test-random-fuse.sh'), args : [tools_dir], timeout : 300) should_fail_args = [tools_dir] foreach case : test_assets_should_fail diff --git a/tests/test-dump-filtered.sh b/tests/test-dump-filtered.sh new file mode 100755 index 00000000..f3f5bb85 --- /dev/null +++ b/tests/test-dump-filtered.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +BINDIR="$1" +ASSET_DIR="$2" + +. $(dirname $0)/test-lib.sh + +set -eu +tmpd=$(mktemp -d) +trap 'rm -rf -- "$tmpd"' EXIT + +${BINDIR}/mkcomposefs --from-file $ASSET_DIR/special.dump $tmpd/out.cfs +${BINDIR}/composefs-info --filter=chardev --filter=whiteout dump $tmpd/out.cfs > $tmpd/dump.txt +foundlines=$(wc -l < $tmpd/dump.txt) +if test "${foundlines}" != "3"; then + fatal "Filtered dump failed, expected 3 lines, found $foundlines" +fi +assert_file_has_content $tmpd/dump.txt '^/ ' +assert_file_has_content $tmpd/dump.txt '^/chardev ' +assert_file_has_content $tmpd/dump.txt '^/whiteout ' +echo "ok" \ No newline at end of file diff --git a/tools/composefs-info.c b/tools/composefs-info.c index 63bc78b5..6c616456 100644 --- a/tools/composefs-info.c +++ b/tools/composefs-info.c @@ -40,6 +40,10 @@ #define ESCAPE_LONE_DASH (1 << 2) const char *opt_basedir_path; +// Counting for a NULL terminated char array...so painful to reimplement each time in C +static size_t n_filters; +static size_t filter_capacity = 1; +static char **opt_filter; int opt_basedir_fd; static locale_t c_locale; @@ -369,6 +373,7 @@ static void usage(const char *argv0) } #define OPT_BASEDIR 100 +#define OPT_FILTER 101 // Most of the rest of this code operates on composefs superblocks. This function // just prints the fsverity digest of the provided files. @@ -414,6 +419,12 @@ int main(int argc, char **argv) flag: NULL, val: OPT_BASEDIR }, + { + name: "filter", + has_arg: required_argument, + flag: NULL, + val: OPT_FILTER + }, {}, }; @@ -422,6 +433,24 @@ int main(int argc, char **argv) case OPT_BASEDIR: opt_basedir_path = optarg; break; + case OPT_FILTER: + // Ensure we have space for the NULL terminator + if (opt_filter == NULL || (n_filters + 1) >= filter_capacity) { + filter_capacity *= 2; + opt_filter = reallocarray(opt_filter, filter_capacity, + sizeof(char *)); + if (opt_filter == NULL) + oom(); + } + if (strchr(optarg, '/') != NULL) { + err(EXIT_FAILURE, + "Filter must be a single name: %s", optarg); + } + opt_filter[n_filters] = strdup(optarg); + if (opt_filter[n_filters] == NULL) + oom(); + n_filters++; + break; case ':': fprintf(stderr, "option needs a value\n"); exit(EXIT_FAILURE); @@ -486,6 +515,10 @@ int main(int argc, char **argv) if (handler_init) handler_data = handler_init(); + // Ensure filters are NULL terminated + if (opt_filter) + opt_filter[n_filters] = NULL; + for (int i = 2; i < argc; i++) { const char *image_path = image_path = argv[i]; @@ -494,7 +527,11 @@ int main(int argc, char **argv) err(EXIT_FAILURE, "Failed to open '%s'", image_path); } - cleanup_node struct lcfs_node_s *root = lcfs_load_node_from_fd(fd); + const char *const *toplevel_entries = (const char *const *)opt_filter; + struct lcfs_read_options_s opts = { .toplevel_entries = + toplevel_entries }; + cleanup_node struct lcfs_node_s *root = + lcfs_load_node_from_fd_ext(fd, &opts); if (root == NULL) { err(EXIT_FAILURE, "Failed to load '%s'", image_path); }