Skip to content

Commit

Permalink
dump: Support --filter
Browse files Browse the repository at this point in the history
I'm looking at storing OCI container images with their metadata
included in the image - giving a "single file" holding all
the relevant bits for an image. Specifically a structure like this:

```
/manifest.json
/config.json
/rootfs
```

I want higher level tooling to be able to read the metadata efficiently.
Of course we could directly parse the EROFS image...but a lot of
pitfalls there. We could also mount the image but that's pretty
inefficient for this.

Add support for filtering when generating a dump; this can operate
fully unprivileged, and only requires some minor modifications
to the library and CLI tooling.

Signed-off-by: Colin Walters <[email protected]>
  • Loading branch information
cgwalters committed Aug 20, 2024
1 parent bbda399 commit ff318d4
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 12 deletions.
74 changes: 65 additions & 9 deletions libcomposefs/lcfs-writer-erofs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1467,11 +1467,12 @@ static const erofs_inode *lcfs_image_get_erofs_inode(struct lcfs_image_data *dat
}

static struct lcfs_node_s *lcfs_build_node_from_image(struct lcfs_image_data *data,
uint64_t nid);
uint64_t nid,
Hash_table *filter);

static int erofs_readdir_block(struct lcfs_image_data *data,
struct lcfs_node_s *parent, const uint8_t *block,
size_t block_size)
size_t block_size, Hash_table *filter)
{
const struct erofs_dirent *dirents = (struct erofs_dirent *)block;
size_t dirents_size = lcfs_u16_from_file(dirents[0].nameoff);
Expand Down Expand Up @@ -1511,7 +1512,11 @@ static int erofs_readdir_block(struct lcfs_image_data *data,
memcpy(name_buf, child_name, child_name_len);
name_buf[child_name_len] = 0;

child = lcfs_build_node_from_image(data, nid);
if (filter != NULL && hash_lookup(filter, name_buf) == NULL) {
continue;
}

child = lcfs_build_node_from_image(data, nid, NULL);
if (child == NULL) {
if (errno == ENOTSUP)
continue; /* Skip real whiteouts (00-ff) */
Expand Down Expand Up @@ -1593,7 +1598,7 @@ static int lcfs_build_node_erofs_xattr(struct lcfs_node_s *node, uint8_t name_in
}

static struct lcfs_node_s *lcfs_build_node_from_image(struct lcfs_image_data *data,
uint64_t nid)
uint64_t nid, Hash_table *filter)
{
const erofs_inode *cino;
cleanup_node struct lcfs_node_s *node = NULL;
Expand Down Expand Up @@ -1715,19 +1720,24 @@ static struct lcfs_node_s *lcfs_build_node_from_image(struct lcfs_image_data *da
}
}

if (erofs_readdir_block(data, node, block_data, block_size) < 0)
if (erofs_readdir_block(data, node, block_data,
block_size, filter) < 0)
return NULL;
}

/* Then inline */
if (tailpacked) {
if (erofs_readdir_block(data, node, tail_data, tail_size) < 0)
if (erofs_readdir_block(data, node, tail_data,
tail_size, filter) < 0)
return NULL;
}

} else if (type == S_IFLNK) {
char name_buf[PATH_MAX];

// Filter only applies to toplevel
assert(filter == NULL);

if (file_size >= PATH_MAX || !tailpacked) {
errno = EINVAL;
return NULL;
Expand All @@ -1742,6 +1752,9 @@ static struct lcfs_node_s *lcfs_build_node_from_image(struct lcfs_image_data *da
cleanup_free uint8_t *content = NULL;
size_t oob_size;

// Filter only applies to toplevel
assert(filter == NULL);

content = malloc(file_size);
if (content == NULL) {
errno = ENOMEM;
Expand Down Expand Up @@ -1840,8 +1853,19 @@ static bool node_ht_comparator(const void *d1, const void *d2)
return v1->nid == v2->nid;
}

struct lcfs_node_s *lcfs_load_node_from_image(const uint8_t *image_data,
size_t image_data_size)
static size_t str_ht_hash(const void *entry, size_t table_size)
{
return hash_string(entry, table_size);
}

static bool str_ht_eq(const void *entry1, const void *entry2)
{
return strcmp(entry1, entry2) == 0;
}

struct lcfs_node_s *
lcfs_load_node_from_image_ext(const uint8_t *image_data, size_t image_data_size,
const struct lcfs_read_options_s *opts)
{
const uint8_t *image_data_end;
struct lcfs_image_data data = { image_data, image_data_size };
Expand All @@ -1850,6 +1874,8 @@ struct lcfs_node_s *lcfs_load_node_from_image(const uint8_t *image_data,
uint64_t erofs_root_nid;
struct lcfs_node_s *root;

assert(opts);

if (image_data_size < EROFS_BLKSIZ) {
errno = EINVAL;
return NULL;
Expand Down Expand Up @@ -1909,9 +1935,39 @@ struct lcfs_node_s *lcfs_load_node_from_image(const uint8_t *image_data,
return NULL;
}

root = lcfs_build_node_from_image(&data, erofs_root_nid);
Hash_table *toplevel_entries_hash = NULL;
if (opts->toplevel_entries) {
toplevel_entries_hash =
hash_initialize(0, NULL, str_ht_hash, str_ht_eq, NULL);
if (toplevel_entries_hash == NULL) {
errno = ENOMEM;
return NULL;
}
for (const char *const *it = opts->toplevel_entries; it && *it; it++) {
const char *name = *it;
if (hash_insert_if_absent(toplevel_entries_hash, name,
NULL) < 0) {
errno = ENOMEM;
return NULL;
}
}
}

root = lcfs_build_node_from_image(&data, erofs_root_nid,
toplevel_entries_hash);

if (toplevel_entries_hash != NULL)
hash_free(toplevel_entries_hash);
hash_free(data.node_hash);

return root;
}

struct lcfs_node_s *lcfs_load_node_from_image(const uint8_t *image_data,
size_t image_data_size)
{
struct lcfs_read_options_s opts = {
0,
};
return lcfs_load_node_from_image_ext(image_data, image_data_size, &opts);
}
13 changes: 11 additions & 2 deletions libcomposefs/lcfs-writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,8 @@ int lcfs_version_from_fd(int fd)
return r;
}

struct lcfs_node_s *lcfs_load_node_from_fd(int fd)
struct lcfs_node_s *lcfs_load_node_from_fd_ext(int fd,
const struct lcfs_read_options_s *opts)
{
struct lcfs_node_s *node;
uint8_t *image_data;
Expand All @@ -864,7 +865,7 @@ struct lcfs_node_s *lcfs_load_node_from_fd(int fd)
return NULL;
}

node = lcfs_load_node_from_image(image_data, image_data_size);
node = lcfs_load_node_from_image_ext(image_data, image_data_size, opts);
if (node == NULL) {
errsv = errno;
munmap(image_data, image_data_size);
Expand All @@ -877,6 +878,14 @@ struct lcfs_node_s *lcfs_load_node_from_fd(int fd)
return node;
}

struct lcfs_node_s *lcfs_load_node_from_fd(int fd)
{
struct lcfs_read_options_s opts = {
0,
};
return lcfs_load_node_from_fd_ext(fd, &opts);
}

int lcfs_node_set_payload(struct lcfs_node_s *node, const char *payload)
{
char *dup = NULL;
Expand Down
13 changes: 13 additions & 0 deletions libcomposefs/lcfs-writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,20 @@ LCFS_EXTERN struct lcfs_node_s *lcfs_load_node_from_file(int dirfd, const char *
int buildflags);
LCFS_EXTERN struct lcfs_node_s *lcfs_load_node_from_image(const uint8_t *image_data,
size_t image_data_size);
struct lcfs_read_options_s {
// If non-NULL, this is a NULL terminated array of filenames; only entries
// for these files will be loaded. At the current time only filenames (not full paths)
// are supported.
const char *const *toplevel_entries;
uint32_t reserved[3];
void *reserved2[4];
};
LCFS_EXTERN struct lcfs_node_s *
lcfs_load_node_from_image_ext(const uint8_t *image_data, size_t image_data_size,
const struct lcfs_read_options_s *opts);
LCFS_EXTERN struct lcfs_node_s *lcfs_load_node_from_fd(int fd);
LCFS_EXTERN struct lcfs_node_s *
lcfs_load_node_from_fd_ext(int fd, const struct lcfs_read_options_s *opts);
LCFS_EXTERN int lcfs_version_from_fd(int fd);

LCFS_EXTERN const char *lcfs_node_get_xattr(struct lcfs_node_s *node,
Expand Down
7 changes: 7 additions & 0 deletions man/composefs-info.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ can be specified.
: This should point to a directory of backing files, and will be used
by the **missing-objects** command to know what files are available.

**\-\-filter**=*NAME*
: Only print entries whose name matches one of these. Can be specified
multiple times. Cannot be a full path (e.g. cannot contain `/`).
This is intended to be used for tooling to efficiently lookup metadata
files embedded in the image without loading and printing the entire
image.

# SEE ALSO
**composefs-info(1)**, **composefs-dump(5)**

Expand Down
2 changes: 2 additions & 0 deletions tests/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ extra_dist = [
'test-units.sh',
'test-random-fuse.sh',
'test-checksums.sh',
'test-dump-filtered.sh',
'test-should-fail.sh',
'integration.sh',
]
Expand All @@ -49,6 +50,7 @@ tools_dir = meson.current_build_dir() / '../tools'

test('check-units', find_program('test-units.sh'), args : [tools_dir])
test('check-checksums', find_program('test-checksums.sh'), args : [tools_dir, meson.current_source_dir() / 'assets', ' '.join(test_assets)])
test('check-dump-filtered', find_program('test-dump-filtered.sh'), args : [tools_dir, meson.current_source_dir() / 'assets'])
test('check-random-fuse', find_program('test-random-fuse.sh'), args : [tools_dir], timeout : 300)
should_fail_args = [tools_dir]
foreach case : test_assets_should_fail
Expand Down
21 changes: 21 additions & 0 deletions tests/test-dump-filtered.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

BINDIR="$1"
ASSET_DIR="$2"

. $(dirname $0)/test-lib.sh

set -eu
tmpd=$(mktemp -d)
trap 'rm -rf -- "$tmpd"' EXIT

${BINDIR}/mkcomposefs --from-file $ASSET_DIR/special.dump $tmpd/out.cfs
${BINDIR}/composefs-info --filter=chardev --filter=whiteout dump $tmpd/out.cfs > $tmpd/dump.txt
foundlines=$(wc -l < $tmpd/dump.txt)
if test "${foundlines}" != "3"; then
fatal "Filtered dump failed, expected 3 lines, found $foundlines"
fi
assert_file_has_content $tmpd/dump.txt '^/ '
assert_file_has_content $tmpd/dump.txt '^/chardev '
assert_file_has_content $tmpd/dump.txt '^/whiteout '
echo "ok"
39 changes: 38 additions & 1 deletion tools/composefs-info.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@
#define ESCAPE_LONE_DASH (1 << 2)

const char *opt_basedir_path;
// Counting for a NULL terminated char array...so painful to reimplement each time in C
static size_t n_filters;
static size_t filter_capacity = 1;
static char **opt_filter;
int opt_basedir_fd;

static locale_t c_locale;
Expand Down Expand Up @@ -369,6 +373,7 @@ static void usage(const char *argv0)
}

#define OPT_BASEDIR 100
#define OPT_FILTER 101

// Most of the rest of this code operates on composefs superblocks. This function
// just prints the fsverity digest of the provided files.
Expand Down Expand Up @@ -414,6 +419,12 @@ int main(int argc, char **argv)
flag: NULL,
val: OPT_BASEDIR
},
{
name: "filter",
has_arg: required_argument,
flag: NULL,
val: OPT_FILTER
},
{},
};

Expand All @@ -422,6 +433,24 @@ int main(int argc, char **argv)
case OPT_BASEDIR:
opt_basedir_path = optarg;
break;
case OPT_FILTER:
// Ensure we have space for the NULL terminator
if (opt_filter == NULL || (n_filters + 1) >= filter_capacity) {
filter_capacity *= 2;
opt_filter = reallocarray(opt_filter, filter_capacity,
sizeof(char *));
if (opt_filter == NULL)
oom();
}
if (strchr(optarg, '/') != NULL) {
err(EXIT_FAILURE,
"Filter must be a single name: %s", optarg);
}
opt_filter[n_filters] = strdup(optarg);
if (opt_filter[n_filters] == NULL)
oom();
n_filters++;
break;
case ':':
fprintf(stderr, "option needs a value\n");
exit(EXIT_FAILURE);
Expand Down Expand Up @@ -486,6 +515,10 @@ int main(int argc, char **argv)
if (handler_init)
handler_data = handler_init();

// Ensure filters are NULL terminated
if (opt_filter)
opt_filter[n_filters] = NULL;

for (int i = 2; i < argc; i++) {
const char *image_path = image_path = argv[i];

Expand All @@ -494,7 +527,11 @@ int main(int argc, char **argv)
err(EXIT_FAILURE, "Failed to open '%s'", image_path);
}

cleanup_node struct lcfs_node_s *root = lcfs_load_node_from_fd(fd);
const char *const *toplevel_entries = (const char *const *)opt_filter;
struct lcfs_read_options_s opts = { .toplevel_entries =
toplevel_entries };
cleanup_node struct lcfs_node_s *root =
lcfs_load_node_from_fd_ext(fd, &opts);
if (root == NULL) {
err(EXIT_FAILURE, "Failed to load '%s'", image_path);
}
Expand Down

0 comments on commit ff318d4

Please sign in to comment.