From acd5f65b26400112ad576220a61b60da14ea27e8 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 19 Aug 2024 21:19:36 -0400 Subject: [PATCH 1/5] mkcomposefs: Fail on an empty symlink target This one previously ended up with a NULL pointer deference in the bowels of the EROFS generation. Signed-off-by: Colin Walters --- libcomposefs/lcfs-writer-erofs.c | 1 + tests/assets/should-fail-empty-link-name.dump | 2 ++ tests/meson.build | 1 + tools/mkcomposefs.c | 6 ++++++ 4 files changed, 10 insertions(+) create mode 100644 tests/assets/should-fail-empty-link-name.dump diff --git a/libcomposefs/lcfs-writer-erofs.c b/libcomposefs/lcfs-writer-erofs.c index e9dd7946..566deecc 100644 --- a/libcomposefs/lcfs-writer-erofs.c +++ b/libcomposefs/lcfs-writer-erofs.c @@ -509,6 +509,7 @@ static void compute_erofs_inode_size(struct lcfs_node_s *node) compute_erofs_dir_size(node); } else if (type == S_IFLNK) { node->erofs_n_blocks = 0; + assert(node->payload); node->erofs_tailsize = strlen(node->payload); } else if (type == S_IFREG && file_size > 0) { if (node->content != NULL) { diff --git a/tests/assets/should-fail-empty-link-name.dump b/tests/assets/should-fail-empty-link-name.dump new file mode 100644 index 00000000..28c497de --- /dev/null +++ b/tests/assets/should-fail-empty-link-name.dump @@ -0,0 +1,2 @@ +/ 4096 40555 2 0 0 0 1633950376.0 - - - +/nolinkname 0 120777 1 0 0 0 0.0 - - - diff --git a/tests/meson.build b/tests/meson.build index e7068b00..aa4a6b1d 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -16,6 +16,7 @@ test_assets_should_fail = [ 'should-fail-long-xattr-value.dump', 'should-fail-no-ftype.dump', 'should-fail-empty-name.dump', + 'should-fail-empty-link-name.dump', ] test_assets = test_assets_small + [ diff --git a/tools/mkcomposefs.c b/tools/mkcomposefs.c index a8b096be..7820a33a 100644 --- a/tools/mkcomposefs.c +++ b/tools/mkcomposefs.c @@ -511,6 +511,12 @@ static char *tree_from_dump_line(dump_info *info, const char *line, size_t line_ lcfs_node_set_gid(node, gid); lcfs_node_set_rdev(node, rdev); lcfs_node_set_mtime(node, &mtime); + // Validate that symlinks are non-empty + if ((mode & S_IFMT) == S_IFLNK) { + if (payload == NULL) { + return make_error("Invalid empty symlink"); + } + } if (lcfs_node_set_payload(node, payload) < 0) return make_error("Invalid payload"); if (content) { From 97c597691bb3ffafbef8fde743ccd12ba2023497 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 19 Aug 2024 21:33:52 -0400 Subject: [PATCH 2/5] mkcomposefs: Reject `.` and `..` in paths There's no good reason for us to support this; we should expect paths to be canonicalized. In theory we *could* handle this, but I am doubtful anyone actually relies on it. In EROFS these are supposed to be "hard links" to the relevant directories; the EROFS generation adds them if they don't exist. I tried to do stronger validation at the `lcfs_node_*` level but that is trickier. Let's just reject at the dump file for now. Signed-off-by: Colin Walters --- tests/assets/should-fail-dot-name.dump | 2 ++ tests/assets/should-fail-dotdot-name.dump | 2 ++ tests/meson.build | 2 ++ tools/mkcomposefs.c | 3 +++ 4 files changed, 9 insertions(+) create mode 100644 tests/assets/should-fail-dot-name.dump create mode 100644 tests/assets/should-fail-dotdot-name.dump diff --git a/tests/assets/should-fail-dot-name.dump b/tests/assets/should-fail-dot-name.dump new file mode 100644 index 00000000..c3b5f944 --- /dev/null +++ b/tests/assets/should-fail-dot-name.dump @@ -0,0 +1,2 @@ +/ 4096 40555 2 0 0 0 1633950376.0 - - - +/. 4096 40555 2 0 0 0 1633950376.0 - - - diff --git a/tests/assets/should-fail-dotdot-name.dump b/tests/assets/should-fail-dotdot-name.dump new file mode 100644 index 00000000..34bcbd11 --- /dev/null +++ b/tests/assets/should-fail-dotdot-name.dump @@ -0,0 +1,2 @@ +/ 4096 40555 2 0 0 0 1633950376.0 - - - +/.. 4096 40555 2 0 0 0 1633950376.0 - - - diff --git a/tests/meson.build b/tests/meson.build index aa4a6b1d..18c934d0 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -17,6 +17,8 @@ test_assets_should_fail = [ 'should-fail-no-ftype.dump', 'should-fail-empty-name.dump', 'should-fail-empty-link-name.dump', + 'should-fail-dot-name.dump', + 'should-fail-dotdot-name.dump', ] test_assets = test_assets_small + [ diff --git a/tools/mkcomposefs.c b/tools/mkcomposefs.c index 7820a33a..9af050df 100644 --- a/tools/mkcomposefs.c +++ b/tools/mkcomposefs.c @@ -343,6 +343,9 @@ static char *tree_add_node(dump_info *info, const char *path, struct lcfs_node_s if (!lcfs_node_dirp(parent)) return make_error("Parent must be a directory for %s", path); + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { + return make_error("Invalid . or .. in path: %s", path); + } int r = lcfs_node_add_child(parent, node, name); if (r < 0) { From e57b834f708792b5007710c41d10447ef577d3fa Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 19 Aug 2024 21:39:25 -0400 Subject: [PATCH 3/5] tests: Add a test case that directories can't be hardlinked Hooray! We were actually validating this already. Just another corner case I thought of. Signed-off-by: Colin Walters --- tests/assets/should-fail-dir-hardlink.dump | 2 ++ tests/meson.build | 1 + 2 files changed, 3 insertions(+) create mode 100644 tests/assets/should-fail-dir-hardlink.dump diff --git a/tests/assets/should-fail-dir-hardlink.dump b/tests/assets/should-fail-dir-hardlink.dump new file mode 100644 index 00000000..a42ab50a --- /dev/null +++ b/tests/assets/should-fail-dir-hardlink.dump @@ -0,0 +1,2 @@ +/ 4096 40555 2 0 0 0 1633950376.0 - - - +/rootlink 4096 @40555 2 0 0 0 1633950376.0 / - - diff --git a/tests/meson.build b/tests/meson.build index 18c934d0..8bec717b 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -19,6 +19,7 @@ test_assets_should_fail = [ 'should-fail-empty-link-name.dump', 'should-fail-dot-name.dump', 'should-fail-dotdot-name.dump', + 'should-fail-dir-hardlink.dump', ] test_assets = test_assets_small + [ From 57f6e81c94fa2df42f3a40763352d22ac3a3c896 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 20 Aug 2024 07:31:08 -0400 Subject: [PATCH 4/5] writer: Also check for dir hardlinks when canonicalizing tree While we have a check in `mkcomposefs.c`, let's also have one at the C API level because we want to guard against misuse/attack from something directly operating on that API. Signed-off-by: Colin Walters --- libcomposefs/lcfs-writer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libcomposefs/lcfs-writer.c b/libcomposefs/lcfs-writer.c index 209781c2..4b0251f0 100644 --- a/libcomposefs/lcfs-writer.c +++ b/libcomposefs/lcfs-writer.c @@ -216,6 +216,16 @@ int lcfs_compute_tree(struct lcfs_ctx_s *ctx, struct lcfs_node_s *root) node->inode.st_nlink = n_link; } + // Disallow directory hardlinks; the EROFS generation does + // do this specially for `.` and `..`, but that happens after this. + if (node->link_to != NULL) { + if ((node->inode.st_mode & S_IFMT) == S_IFDIR || + (node->link_to->inode.st_mode & S_IFMT) == S_IFDIR) { + errno = EINVAL; + return -1; + } + } + /* Canonical order */ if (node->xattrs) qsort(node->xattrs, node->n_xattrs, From a6b0a65789adc758b0d90536922e1928d98fe658 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Tue, 20 Aug 2024 08:05:43 -0400 Subject: [PATCH 5/5] rust/dumpfile: More validation Matching the C side, but we want to detect errors where we can early on the Rust side here too as it's safer. - Also verify path length here - Deny hardlinked directories - And canonicalize and enforce normal form for paths Signed-off-by: Colin Walters --- rust/composefs/src/dumpfile.rs | 83 ++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 3 deletions(-) diff --git a/rust/composefs/src/dumpfile.rs b/rust/composefs/src/dumpfile.rs index db403af4..4e6ec22f 100644 --- a/rust/composefs/src/dumpfile.rs +++ b/rust/composefs/src/dumpfile.rs @@ -15,6 +15,7 @@ use std::str::FromStr; use anyhow::Context; use anyhow::{anyhow, Result}; +use libc::S_IFDIR; /// https://github.com/torvalds/linux/blob/47ac09b91befbb6a235ab620c32af719f8208399/include/uapi/linux/limits.h#L15 /// This isn't exposed in libc/rustix, and in any case we should be conservative...if this ever @@ -167,6 +168,10 @@ fn unescape_to_path(s: &str) -> Result> { if v.is_empty() { anyhow::bail!("Invalid empty path"); } + let l = v.len(); + if l > libc::PATH_MAX as usize { + anyhow::bail!("Path is too long: {l} bytes"); + } Ok(v) })?; let r = match v { @@ -176,6 +181,42 @@ fn unescape_to_path(s: &str) -> Result> { Ok(r) } +/// Like [`unescape_to_path`], but also ensures the path is in "canonical" +/// form; this has the same semantics as Rust https://doc.rust-lang.org/std/path/struct.Path.html#method.components +/// which in particular removes `.` and extra `//`. +/// +/// We also deny uplinks `..` and empty paths. +fn unescape_to_path_canonical(s: &str) -> Result> { + let p = unescape_to_path(s)?; + let mut components = p.components(); + let mut r = std::path::PathBuf::new(); + let Some(first) = components.next() else { + anyhow::bail!("Invalid empty path"); + }; + if first != std::path::Component::RootDir { + anyhow::bail!("Invalid non-absolute path"); + } + r.push(first); + for component in components { + match component { + // Prefix is a windows thing; I don't think RootDir or CurDir are reachable + // after the first component has been RootDir. + std::path::Component::Prefix(_) + | std::path::Component::RootDir + | std::path::Component::CurDir => { + anyhow::bail!("Internal error in unescape_to_path_canonical"); + } + std::path::Component::ParentDir => { + anyhow::bail!("Invalid \"..\" in path"); + } + std::path::Component::Normal(_) => { + r.push(component); + } + } + } + Ok(r.into()) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum EscapeMode { Standard, @@ -261,7 +302,7 @@ impl<'p> Entry<'p> { pub fn parse(s: &'p str) -> Result> { let mut components = s.split(' '); let mut next = |name: &str| components.next().ok_or_else(|| anyhow!("Missing {name}")); - let path = unescape_to_path(next("path")?)?; + let path = unescape_to_path_canonical(next("path")?)?; let size = u64::from_str(next("size")?)?; let modeval = next("mode")?; let (is_hardlink, mode) = if let Some((_, rest)) = modeval.split_once('@') { @@ -279,11 +320,16 @@ impl<'p> Entry<'p> { let fsverity_digest = optional_str(next("digest")?); let xattrs = components.map(Xattr::parse).collect::>>()?; + let ty = libc::S_IFMT & mode; let item = if is_hardlink { - let target = unescape_to_path(payload.ok_or_else(|| anyhow!("Missing payload"))?)?; + if ty == S_IFDIR { + anyhow::bail!("Invalid hardlinked directory"); + } + let target = + unescape_to_path_canonical(payload.ok_or_else(|| anyhow!("Missing payload"))?)?; Item::Hardlink { target } } else { - match libc::S_IFMT & mode { + match ty { libc::S_IFREG => Item::Regular { size, nlink, @@ -291,6 +337,9 @@ impl<'p> Entry<'p> { fsverity_digest: fsverity_digest.map(ToOwned::to_owned), }, libc::S_IFLNK => { + // Note that the target of *symlinks* is not required to be in canonical form, + // as we don't actually traverse those links on our own, and we need to support + // symlinks that e.g. contain `//` or other things. let target = unescape_to_path(payload.ok_or_else(|| anyhow!("Missing payload"))?)?; let targetlen = target.as_os_str().as_bytes().len(); @@ -469,6 +518,30 @@ mod tests { #[test] fn test_unescape_path() { assert!(unescape_to_path("").is_err()); + let mut p = std::iter::repeat('a') + .take(libc::PATH_MAX.try_into().unwrap()) + .collect::(); + assert!(unescape_to_path(&p).is_ok()); + p.push('a'); + assert!(unescape_to_path(&p).is_err()); + } + + #[test] + fn test_unescape_path_canonical() { + assert!(unescape_to_path_canonical("foo").is_err()); + assert!(unescape_to_path_canonical("/foo/..").is_err()); + assert!(unescape_to_path_canonical("/foo/../blah").is_err()); + assert_eq!( + unescape_to_path_canonical("///foo/bar//baz") + .unwrap() + .to_str() + .unwrap(), + "/foo/bar/baz" + ); + assert_eq!( + unescape_to_path_canonical("/.").unwrap().to_str().unwrap(), + "/" + ); } #[test] @@ -514,6 +587,10 @@ mod tests { "long xattr value", include_str!("../../../tests/assets/should-fail-long-xattr-value.dump"), ), + ( + "dir hardlink", + include_str!("../../../tests/assets/should-fail-dir-hardlink.dump"), + ), ]; for (name, case) in CASES.iter().copied() { assert!(