diff --git a/rust/composefs/src/dumpfile.rs b/rust/composefs/src/dumpfile.rs index db403af4..c0b05a4c 100644 --- a/rust/composefs/src/dumpfile.rs +++ b/rust/composefs/src/dumpfile.rs @@ -167,6 +167,10 @@ fn unescape_to_path(s: &str) -> Result> { if v.is_empty() { anyhow::bail!("Invalid empty path"); } + let l = v.len(); + if l > libc::PATH_MAX as usize { + anyhow::bail!("Path is too long: {l} bytes"); + } Ok(v) })?; let r = match v { @@ -176,6 +180,42 @@ fn unescape_to_path(s: &str) -> Result> { Ok(r) } +/// Like [`unescape_to_path`], but also ensures the path is in "canonical" +/// form; this has the same semantics as Rust https://doc.rust-lang.org/std/path/struct.Path.html#method.components +/// which in particular removes `.` and extra `//`. +/// +/// We also deny uplinks `..` and empty paths. +fn unescape_to_path_canonical(s: &str) -> Result> { + let p = unescape_to_path(s)?; + let mut components = p.components(); + let mut r = std::path::PathBuf::new(); + let Some(first) = components.next() else { + anyhow::bail!("Invalid empty path"); + }; + if first != std::path::Component::RootDir { + anyhow::bail!("Invalid non-absolute path"); + } + r.push(first); + for component in components { + match component { + // Prefix is a windows thing; I don't think RootDir or CurDir are reachable + // after the first component has been RootDir. + std::path::Component::Prefix(_) + | std::path::Component::RootDir + | std::path::Component::CurDir => { + anyhow::bail!("Internal error in unescape_to_path_canonical"); + } + std::path::Component::ParentDir => { + anyhow::bail!("Invalid \"..\" in path"); + } + std::path::Component::Normal(_) => { + r.push(component); + } + } + } + Ok(r.into()) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum EscapeMode { Standard, @@ -261,7 +301,7 @@ impl<'p> Entry<'p> { pub fn parse(s: &'p str) -> Result> { let mut components = s.split(' '); let mut next = |name: &str| components.next().ok_or_else(|| anyhow!("Missing {name}")); - let path = unescape_to_path(next("path")?)?; + let path = unescape_to_path_canonical(next("path")?)?; let size = u64::from_str(next("size")?)?; let modeval = next("mode")?; let (is_hardlink, mode) = if let Some((_, rest)) = modeval.split_once('@') { @@ -280,7 +320,8 @@ impl<'p> Entry<'p> { let xattrs = components.map(Xattr::parse).collect::>>()?; let item = if is_hardlink { - let target = unescape_to_path(payload.ok_or_else(|| anyhow!("Missing payload"))?)?; + let target = + unescape_to_path_canonical(payload.ok_or_else(|| anyhow!("Missing payload"))?)?; Item::Hardlink { target } } else { match libc::S_IFMT & mode { @@ -291,6 +332,9 @@ impl<'p> Entry<'p> { fsverity_digest: fsverity_digest.map(ToOwned::to_owned), }, libc::S_IFLNK => { + // Note that the target of *symlinks* is not required to be in canonical form, + // as we don't actually traverse those links on our own, and we need to support + // symlinks that e.g. contain `//` or other things. let target = unescape_to_path(payload.ok_or_else(|| anyhow!("Missing payload"))?)?; let targetlen = target.as_os_str().as_bytes().len(); @@ -469,6 +513,30 @@ mod tests { #[test] fn test_unescape_path() { assert!(unescape_to_path("").is_err()); + let mut p = std::iter::repeat('a') + .take(libc::PATH_MAX.try_into().unwrap()) + .collect::(); + assert!(unescape_to_path(&p).is_ok()); + p.push('a'); + assert!(unescape_to_path(&p).is_err()); + } + + #[test] + fn test_unescape_path_canonical() { + assert!(unescape_to_path_canonical("foo").is_err()); + assert!(unescape_to_path_canonical("/foo/..").is_err()); + assert!(unescape_to_path_canonical("/foo/../blah").is_err()); + assert_eq!( + unescape_to_path_canonical("///foo/bar//baz") + .unwrap() + .to_str() + .unwrap(), + "/foo/bar/baz" + ); + assert_eq!( + unescape_to_path_canonical("/.").unwrap().to_str().unwrap(), + "/" + ); } #[test]