From 7894e2beb63923b1b40ee8ff702d89885834ca30 Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Thu, 29 Aug 2024 11:06:11 -0700
Subject: [PATCH 1/8] [sled-agent] Replace `du(1)` with a Rust function

Currently, the `zone_bundle::disk_usage` function in sled-agent shells
out to `du(1)` and parses its output. This works fine, but it's a bit
flaky, especially when running the corresponding tests on "weird"
platforms such as NixOS, where the `du(1)` binary doesn't live in
`/usr/bin/du` (see #6479).

This commit replaces the code that shells out to `/usr/bin/du` with a
pure-Rust rewrite. There's a unit test that asserts that the disk usage
calculated by the new implementation matches the value calculated by
`du(1)`, with code for handling the potential inaccuracy due to `du(1)`
operating in terms of 512B blocks on macOS.

Closes #6479
---
 sled-agent/src/zone_bundle.rs | 175 ++++++++++++++++++++++++----------
 1 file changed, 122 insertions(+), 53 deletions(-)

diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index 46cee1c415..c0af5d0c32 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -1545,61 +1545,44 @@ async fn compute_bundle_utilization(
 
 // Return the number of bytes occupied by the provided directory.
 //
-// This returns an error if:
-//
-// - The "du" command fails
-// - Parsing stdout fails
-// - Parsing the actual size as a u64 fails
+// This returns an error if reading any file or directory within the provided
+// directory fails.
 async fn disk_usage(path: &Utf8PathBuf) -> Result<u64, BundleError> {
-    // Each OS implements slightly different `du` options.
-    //
-    // Linux and illumos support the "apparent" size in bytes, though using
-    // different options. macOS doesn't support bytes at all, and has a minimum
-    // block size of 512.
+    let mut sum = 0;
+    // Rather than recursing, we use a VecDeque as a queue of paths to scan
+    // next. This is for two reasons:
     //
-    // We'll suffer the lower resolution on macOS, and get higher resolution on
-    // the others.
-    cfg_if::cfg_if! {
-        if #[cfg(target_os = "illumos")] {
-            const BLOCK_SIZE: u64 = 1;
-            const DU_ARG: &str = "-A";
-        } else if #[cfg(target_os = "linux")] {
-            const BLOCK_SIZE: u64 = 1;
-            const DU_ARG: &str = "-b";
-        } else if #[cfg(target_os = "macos")] {
-            const BLOCK_SIZE: u64 = 512;
-            const DU_ARG: &str = "-k";
-        } else {
-            compile_error!("unsupported target OS");
+    // 1. If we're walking a very deeply nested directory structure, this helps
+    //    to avoid a potential stack overflow.
+    // 2. Since this is an `async fn` (due to the use of `tokio::fs`), it can't
+    //    contain a recursive call to itself without boxing it. We could work
+    //    around that bit by wrapping the whole thing in `spawn_blocking`,
+    //    instead of using `tokio::fs`.
+    let mut dirs_to_scan = std::collections::VecDeque::new();
+    dirs_to_scan.push_back(path.to_owned());
+
+    // TODO(eliza): since we're using `tokio::fs` here, this could, conceivably,
+    // be parallelized by spawning a bunch of tasks to walk each sub-directory.
+    while let Some(dir) = dirs_to_scan.pop_front() {
+        let mk_dir_error =
+            |err| BundleError::ReadDirectory { directory: dir.clone(), err };
+        let mut dir = tokio::fs::read_dir(&dir).await.map_err(mk_dir_error)?;
+        while let Some(entry) = dir.next_entry().await.map_err(mk_dir_error)? {
+            let mk_error = |err| BundleError::ReadDirectory {
+                directory: Utf8PathBuf::try_from(entry.path())
+                    .unwrap_or_else(|_| path.clone()),
+                err,
+            };
+            if entry.file_type().await.map_err(mk_error)?.is_dir() {
+                let path = entry.path().try_into()?;
+                dirs_to_scan.push_back(path);
+            } else {
+                sum += entry.metadata().await.map_err(mk_error)?.len()
+            }
         }
     }
-    const DU: &str = "/usr/bin/du";
-    let args = &[DU_ARG, "-s", path.as_str()];
-    let output = Command::new(DU).args(args).output().await.map_err(|err| {
-        BundleError::Command { cmd: format!("{DU} {}", args.join(" ")), err }
-    })?;
-    let err = |msg: &str| {
-        BundleError::Cleanup(anyhow!(
-            "failed to fetch disk usage for {}: {}",
-            path,
-            msg,
-        ))
-    };
-    if !output.status.success() {
-        return Err(err("du command failed"));
-    }
-    let Ok(s) = std::str::from_utf8(&output.stdout) else {
-        return Err(err("non-UTF8 stdout"));
-    };
-    let Some(line) = s.lines().next() else {
-        return Err(err("no lines in du output"));
-    };
-    let Some(part) = line.trim().split_ascii_whitespace().next() else {
-        return Err(err("no disk usage size computed in output"));
-    };
-    part.parse()
-        .map(|x: u64| x.saturating_mul(BLOCK_SIZE))
-        .map_err(|_| err("failed to parse du output"))
+
+    Ok(sum)
 }
 
 // Return the quota for a ZFS dataset, or the available size.
@@ -1659,8 +1642,7 @@ async fn zfs_quota(path: &Utf8PathBuf) -> Result<u64, BundleError> {
 
 #[cfg(test)]
 mod tests {
-    use super::disk_usage;
-    use super::Utf8PathBuf;
+    use super::*;
 
     #[tokio::test]
     async fn test_disk_usage() {
@@ -1676,6 +1658,93 @@ mod tests {
         let path = Utf8PathBuf::from("/some/nonexistent/path");
         assert!(disk_usage(&path).await.is_err());
     }
+
+    #[tokio::test]
+    #[cfg(any(
+        target_os = "illumos",
+        target_os = "linux",
+        target_os = "macos"
+    ))]
+    async fn test_disk_usage_matches_du() {
+        // Each OS implements slightly different `du` options.
+        //
+        // Linux and illumos support the "apparent" size in bytes, though using
+        // different options. macOS doesn't support bytes at all, and has a minimum
+        // block size of 512.
+        //
+        // We'll suffer the lower resolution on macOS, and get higher resolution on
+        // the others.
+        cfg_if::cfg_if! {
+            if #[cfg(target_os = "illumos")] {
+                const BLOCK_SIZE: u64 = 1;
+                const DU_ARG: &str = "-A";
+            } else if #[cfg(target_os = "linux")] {
+                const BLOCK_SIZE: u64 = 1;
+                const DU_ARG: &str = "-b";
+            } else if #[cfg(target_os = "macos")] {
+                const BLOCK_SIZE: u64 = 512;
+                const DU_ARG: &str = "-k";
+            } else {
+                compile_error!("unsupported target OS");
+            }
+        }
+
+        async fn disk_usage_du(path: &Utf8PathBuf) -> Result<u64, BundleError> {
+            const DU: &str = "du";
+            let args = &[DU_ARG, "-s", path.as_str()];
+            let output =
+                Command::new(DU).args(args).output().await.map_err(|err| {
+                    BundleError::Command {
+                        cmd: format!("{DU} {}", args.join(" ")),
+                        err,
+                    }
+                })?;
+            let err = |msg: &str| {
+                BundleError::Cleanup(anyhow!(
+                    "failed to fetch disk usage for {}: {}",
+                    path,
+                    msg,
+                ))
+            };
+            if !output.status.success() {
+                return Err(err("du command failed"));
+            }
+            let Ok(s) = std::str::from_utf8(&output.stdout) else {
+                return Err(err("non-UTF8 stdout"));
+            };
+            let Some(line) = s.lines().next() else {
+                return Err(err("no lines in du output"));
+            };
+            let Some(part) = line.trim().split_ascii_whitespace().next() else {
+                return Err(err("no disk usage size computed in output"));
+            };
+            part.parse()
+                .map(|x: u64| x.saturating_mul(BLOCK_SIZE))
+                .map_err(|_| err("failed to parse du output"))
+        }
+
+        let path =
+            Utf8PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"), "/src"));
+        let usage = dbg!(disk_usage(&path).await)
+            .expect("disk usage for src dir failed");
+        let du_usage =
+            dbg!(disk_usage_du(&path).await).expect("running du failed!");
+
+        // Round down the Rust disk usage result to `du`'s block size on this
+        // system.
+        let usage = if BLOCK_SIZE > 1 {
+            eprintln!("rounding up to `du(1)`'s block size of {BLOCK_SIZE}B");
+            dbg!(usage.div_ceil(BLOCK_SIZE) * BLOCK_SIZE)
+        } else {
+            usage
+        };
+
+        assert_eq!(
+            usage, du_usage,
+            "expected `disk_usage({path})` to == `du -s {path}`\n\
+             {usage}B (`disk_usage`) != {du_usage}B (`du`)",
+        );
+    }
 }
 
 #[cfg(all(target_os = "illumos", test))]

From a265af82c696e76bd0cce265be0de41580d8712e Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Thu, 29 Aug 2024 11:37:40 -0700
Subject: [PATCH 2/8] just stick the whole thing in one big blocking task

---
 sled-agent/src/zone_bundle.rs | 75 +++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 30 deletions(-)

diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index c0af5d0c32..77f78261a7 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -1548,41 +1548,51 @@ async fn compute_bundle_utilization(
 // This returns an error if reading any file or directory within the provided
 // directory fails.
 async fn disk_usage(path: &Utf8PathBuf) -> Result<u64, BundleError> {
-    let mut sum = 0;
-    // Rather than recursing, we use a VecDeque as a queue of paths to scan
-    // next. This is for two reasons:
-    //
-    // 1. If we're walking a very deeply nested directory structure, this helps
-    //    to avoid a potential stack overflow.
-    // 2. Since this is an `async fn` (due to the use of `tokio::fs`), it can't
-    //    contain a recursive call to itself without boxing it. We could work
-    //    around that bit by wrapping the whole thing in `spawn_blocking`,
-    //    instead of using `tokio::fs`.
-    let mut dirs_to_scan = std::collections::VecDeque::new();
-    dirs_to_scan.push_back(path.to_owned());
-
-    // TODO(eliza): since we're using `tokio::fs` here, this could, conceivably,
-    // be parallelized by spawning a bunch of tasks to walk each sub-directory.
-    while let Some(dir) = dirs_to_scan.pop_front() {
-        let mk_dir_error =
-            |err| BundleError::ReadDirectory { directory: dir.clone(), err };
-        let mut dir = tokio::fs::read_dir(&dir).await.map_err(mk_dir_error)?;
-        while let Some(entry) = dir.next_entry().await.map_err(mk_dir_error)? {
-            let mk_error = |err| BundleError::ReadDirectory {
-                directory: Utf8PathBuf::try_from(entry.path())
-                    .unwrap_or_else(|_| path.clone()),
+    let path = path.clone();
+    // We could, alternatively, just implement this using `tokio::fs` to read
+    // the directory and so on. However, the `tokio::fs` APIs are basically just
+    // a wrapper around `spawn_blocking`-ing code that does the `std::fs`
+    // functions. So, putting the whole thing in one big `spawn_blocking`
+    // closure that just does all the blocking fs ops lets us spend less time
+    // creating and destroying tiny blocking tasks.
+    tokio::task::spawn_blocking(move || {
+        let mut sum = 0;
+        // Rather than recursing, we use a VecDeque as a queue of paths to scan
+        // next. This is so that if we're walking a very deeply nested directory
+        // structure, we don't overflow our stack.
+        //
+        // TODO(eliza): this could, conceivably, be parallelized by spawning a
+        // bunch of blocking tasks to walk each sub-directory, instead of using
+        // a queue. But, is optimizing this really all that important? It's
+        // probably far from the slowest part of zone bundle collection anyway...
+        let mut dirs_to_scan = std::collections::VecDeque::new();
+        dirs_to_scan.push_back(path.to_owned());
+
+        while let Some(dir) = dirs_to_scan.pop_front() {
+            let mk_dir_error = |err| BundleError::ReadDirectory {
+                directory: dir.clone(),
                 err,
             };
-            if entry.file_type().await.map_err(mk_error)?.is_dir() {
-                let path = entry.path().try_into()?;
-                dirs_to_scan.push_back(path);
-            } else {
-                sum += entry.metadata().await.map_err(mk_error)?.len()
+            for entry in std::fs::read_dir(&dir).map_err(mk_dir_error)? {
+                let entry = entry.map_err(mk_dir_error)?;
+                let mk_error = |err| BundleError::ReadDirectory {
+                    directory: Utf8PathBuf::try_from(entry.path())
+                        .unwrap_or_else(|_| path.clone()),
+                    err,
+                };
+                if entry.file_type().map_err(mk_error)?.is_dir() {
+                    let path = entry.path().try_into()?;
+                    dirs_to_scan.push_back(path);
+                } else {
+                    sum += entry.metadata().map_err(mk_error)?.len()
+                }
             }
         }
-    }
 
-    Ok(sum)
+        Ok(sum)
+    })
+    .await
+    .expect("spawned blocking task should not be cancelled or panic")
 }
 
 // Return the quota for a ZFS dataset, or the available size.
@@ -1725,10 +1735,15 @@ mod tests {
 
         let path =
             Utf8PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"), "/src"));
+        let t0 = std::time::Instant::now();
         let usage = dbg!(disk_usage(&path).await)
             .expect("disk usage for src dir failed");
+        eprintln!("disk_usage({path}) took {:?}", t0.elapsed());
+
+        let t0 = std::time::Instant::now();
         let du_usage =
             dbg!(disk_usage_du(&path).await).expect("running du failed!");
+        eprintln!("du -s {path} took {:?}", t0.elapsed());
 
         // Round down the Rust disk usage result to `du`'s block size on this
         // system.

From ab6aaf05759ac3bc996ecd9ea99ec46670768ab7 Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Thu, 29 Aug 2024 12:25:27 -0700
Subject: [PATCH 3/8] review feedback

---
 sled-agent/src/zone_bundle.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index 77f78261a7..51ad93ce4d 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -1575,8 +1575,8 @@ async fn disk_usage(path: &Utf8PathBuf) -> Result<u64, BundleError> {
             };
             for entry in std::fs::read_dir(&dir).map_err(mk_dir_error)? {
                 let entry = entry.map_err(mk_dir_error)?;
-                let mk_error = |err| BundleError::ReadDirectory {
-                    directory: Utf8PathBuf::try_from(entry.path())
+                let mk_error = |err| BundleError::Metadata {
+                    path: Utf8PathBuf::try_from(entry.path())
                         .unwrap_or_else(|_| path.clone()),
                     err,
                 };
@@ -1745,7 +1745,7 @@ mod tests {
             dbg!(disk_usage_du(&path).await).expect("running du failed!");
         eprintln!("du -s {path} took {:?}", t0.elapsed());
 
-        // Round down the Rust disk usage result to `du`'s block size on this
+        // Round up the Rust disk usage result to `du`'s block size on this
         // system.
         let usage = if BLOCK_SIZE > 1 {
             eprintln!("rounding up to `du(1)`'s block size of {BLOCK_SIZE}B");

From b4e873109e033d68d0dbd804cb693ace27a96485 Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Thu, 29 Aug 2024 14:14:02 -0700
Subject: [PATCH 4/8] just use walkdir

---
 Cargo.lock                    |  1 +
 sled-agent/Cargo.toml         |  1 +
 sled-agent/src/zone_bundle.rs | 38 +++++++++--------------------------
 3 files changed, 11 insertions(+), 29 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 1febfe6ad9..5220540b2e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6525,6 +6525,7 @@ dependencies = [
  "toml 0.8.19",
  "usdt",
  "uuid",
+ "walkdir",
  "zeroize",
  "zone 0.3.0",
 ]
diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml
index 2aefd8f464..e0559cb354 100644
--- a/sled-agent/Cargo.toml
+++ b/sled-agent/Cargo.toml
@@ -93,6 +93,7 @@ zone.workspace = true
 static_assertions.workspace = true
 omicron-workspace-hack.workspace = true
 slog-error-chain.workspace = true
+walkdir.workspace = true
 
 [target.'cfg(target_os = "illumos")'.dependencies]
 opte-ioctl.workspace = true
diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index 51ad93ce4d..d0c8d7d8d2 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -582,6 +582,11 @@ pub enum BundleError {
 
     #[error("Instance is terminating")]
     InstanceTerminating,
+
+    /// The `walkdir` crate's errors already include the path which could not be
+    /// read (if one exists), so we can just wrap them directly.
+    #[error(transparent)]
+    WalkDir(#[from] walkdir::Error),
 }
 
 // Helper function to write an array of bytes into the tar archive, with
@@ -1557,35 +1562,10 @@ async fn disk_usage(path: &Utf8PathBuf) -> Result<u64, BundleError> {
     // creating and destroying tiny blocking tasks.
     tokio::task::spawn_blocking(move || {
         let mut sum = 0;
-        // Rather than recursing, we use a VecDeque as a queue of paths to scan
-        // next. This is so that if we're walking a very deeply nested directory
-        // structure, we don't overflow our stack.
-        //
-        // TODO(eliza): this could, conceivably, be parallelized by spawning a
-        // bunch of blocking tasks to walk each sub-directory, instead of using
-        // a queue. But, is optimizing this really all that important? It's
-        // probably far from the slowest part of zone bundle collection anyway...
-        let mut dirs_to_scan = std::collections::VecDeque::new();
-        dirs_to_scan.push_back(path.to_owned());
-
-        while let Some(dir) = dirs_to_scan.pop_front() {
-            let mk_dir_error = |err| BundleError::ReadDirectory {
-                directory: dir.clone(),
-                err,
-            };
-            for entry in std::fs::read_dir(&dir).map_err(mk_dir_error)? {
-                let entry = entry.map_err(mk_dir_error)?;
-                let mk_error = |err| BundleError::Metadata {
-                    path: Utf8PathBuf::try_from(entry.path())
-                        .unwrap_or_else(|_| path.clone()),
-                    err,
-                };
-                if entry.file_type().map_err(mk_error)?.is_dir() {
-                    let path = entry.path().try_into()?;
-                    dirs_to_scan.push_back(path);
-                } else {
-                    sum += entry.metadata().map_err(mk_error)?.len()
-                }
+        for entry in walkdir::WalkDir::new(&path).into_iter() {
+            let entry = entry?;
+            if !entry.file_type().is_dir() {
+                sum += entry.metadata()?.len()
             }
         }
 

From 772cd163c523f21a67ea543ccf55e03ef6e14d5d Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Fri, 30 Aug 2024 09:07:51 -0700
Subject: [PATCH 5/8] rename `disk_usage` to `dir_size`

---
 sled-agent/src/zone_bundle.rs | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index d0c8d7d8d2..894ca920a3 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -1538,7 +1538,7 @@ async fn compute_bundle_utilization(
         // TODO-correctness: This takes into account the directories themselves,
         // and may be not quite what we want. But it is very easy and pretty
         // close.
-        let bytes_used = disk_usage(dir).await?;
+        let bytes_used = dir_size(dir).await?;
         debug!(log, "computed bytes used"; "bytes_used" => bytes_used);
         out.insert(
             dir.clone(),
@@ -1552,7 +1552,7 @@ async fn compute_bundle_utilization(
 //
 // This returns an error if reading any file or directory within the provided
 // directory fails.
-async fn disk_usage(path: &Utf8PathBuf) -> Result<u64, BundleError> {
+async fn dir_size(path: &Utf8PathBuf) -> Result<u64, BundleError> {
     let path = path.clone();
     // We could, alternatively, just implement this using `tokio::fs` to read
     // the directory and so on. However, the `tokio::fs` APIs are basically just
@@ -1635,10 +1635,10 @@ mod tests {
     use super::*;
 
     #[tokio::test]
-    async fn test_disk_usage() {
+    async fn test_dir_size() {
         let path =
             Utf8PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"), "/src"));
-        let usage = disk_usage(&path).await.unwrap();
+        let usage = dir_size(&path).await.unwrap();
         // Run `du -As /path/to/omicron/sled-agent/src`, which currently shows this
         // directory is ~450 KiB.
         assert!(
@@ -1646,7 +1646,7 @@ mod tests {
             "sled-agent manifest directory disk usage not correct?"
         );
         let path = Utf8PathBuf::from("/some/nonexistent/path");
-        assert!(disk_usage(&path).await.is_err());
+        assert!(dir_size(&path).await.is_err());
     }
 
     #[tokio::test]
@@ -1655,7 +1655,7 @@ mod tests {
         target_os = "linux",
         target_os = "macos"
     ))]
-    async fn test_disk_usage_matches_du() {
+    async fn test_dir_size_matches_du() {
         // Each OS implements slightly different `du` options.
         //
         // Linux and illumos support the "apparent" size in bytes, though using
@@ -1679,7 +1679,7 @@ mod tests {
             }
         }
 
-        async fn disk_usage_du(path: &Utf8PathBuf) -> Result<u64, BundleError> {
+        async fn dir_size_du(path: &Utf8PathBuf) -> Result<u64, BundleError> {
             const DU: &str = "du";
             let args = &[DU_ARG, "-s", path.as_str()];
             let output =
@@ -1716,13 +1716,13 @@ mod tests {
         let path =
             Utf8PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"), "/src"));
         let t0 = std::time::Instant::now();
-        let usage = dbg!(disk_usage(&path).await)
-            .expect("disk usage for src dir failed");
-        eprintln!("disk_usage({path}) took {:?}", t0.elapsed());
+        let usage =
+            dbg!(dir_size(&path).await).expect("disk usage for src dir failed");
+        eprintln!("dir_size({path}) took {:?}", t0.elapsed());
 
         let t0 = std::time::Instant::now();
         let du_usage =
-            dbg!(disk_usage_du(&path).await).expect("running du failed!");
+            dbg!(dir_size_du(&path).await).expect("running du failed!");
         eprintln!("du -s {path} took {:?}", t0.elapsed());
 
         // Round up the Rust disk usage result to `du`'s block size on this
@@ -1736,8 +1736,8 @@ mod tests {
 
         assert_eq!(
             usage, du_usage,
-            "expected `disk_usage({path})` to == `du -s {path}`\n\
-             {usage}B (`disk_usage`) != {du_usage}B (`du`)",
+            "expected `dir_size({path})` to == `du -s {path}`\n\
+             {usage}B (`dir_size`) != {du_usage}B (`du`)",
         );
     }
 }

From 92718e074aea017efc8dc3fbce72d28161391f76 Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Fri, 30 Aug 2024 12:07:11 -0700
Subject: [PATCH 6/8] print du version in tests

---
 sled-agent/src/zone_bundle.rs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index 894ca920a3..63212719db 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -1679,8 +1679,8 @@ mod tests {
             }
         }
 
+        const DU: &str = "du";
         async fn dir_size_du(path: &Utf8PathBuf) -> Result<u64, BundleError> {
-            const DU: &str = "du";
             let args = &[DU_ARG, "-s", path.as_str()];
             let output =
                 Command::new(DU).args(args).output().await.map_err(|err| {
@@ -1713,6 +1713,13 @@ mod tests {
                 .map_err(|_| err("failed to parse du output"))
         }
 
+        let du_output =
+            Command::new(DU).arg("--version").output().await.unwrap();
+        eprintln!(
+            "du --version:\n{}\n",
+            String::from_utf8_lossy(&du_output.stdout)
+        );
+
         let path =
             Utf8PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"), "/src"));
         let t0 = std::time::Instant::now();

From 8fe21ffaac6f023b2d347fbcbb84a5badd6dced8 Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Fri, 30 Aug 2024 12:07:24 -0700
Subject: [PATCH 7/8] what happens if we include dirs

---
 sled-agent/src/zone_bundle.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index 63212719db..bb2960674c 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -1564,9 +1564,7 @@ async fn dir_size(path: &Utf8PathBuf) -> Result<u64, BundleError> {
         let mut sum = 0;
         for entry in walkdir::WalkDir::new(&path).into_iter() {
             let entry = entry?;
-            if !entry.file_type().is_dir() {
-                sum += entry.metadata()?.len()
-            }
+            sum += entry.metadata()?.len()
         }
 
         Ok(sum)

From dcf0764809f438febc786c73e6c2f600c3cccc9b Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Sat, 31 Aug 2024 09:12:16 -0700
Subject: [PATCH 8/8] only run du test on illumos

---
 sled-agent/src/zone_bundle.rs | 52 ++++++-----------------------------
 1 file changed, 9 insertions(+), 43 deletions(-)

diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs
index bb2960674c..e38d4c963b 100644
--- a/sled-agent/src/zone_bundle.rs
+++ b/sled-agent/src/zone_bundle.rs
@@ -1637,8 +1637,6 @@ mod tests {
         let path =
             Utf8PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"), "/src"));
         let usage = dir_size(&path).await.unwrap();
-        // Run `du -As /path/to/omicron/sled-agent/src`, which currently shows this
-        // directory is ~450 KiB.
         assert!(
             usage >= 1024 * 400,
             "sled-agent manifest directory disk usage not correct?"
@@ -1648,38 +1646,15 @@ mod tests {
     }
 
     #[tokio::test]
-    #[cfg(any(
-        target_os = "illumos",
-        target_os = "linux",
-        target_os = "macos"
-    ))]
+    // Different operating systems ship slightly different versions of `du(1)`,
+    // with differing behaviors. We really only care that the `dir_size`
+    // function behaves the same as the illumos `du(1)`, so skip this test on
+    // other systems.
+    #[cfg_attr(not(target_os = "illumos"), ignore)]
     async fn test_dir_size_matches_du() {
-        // Each OS implements slightly different `du` options.
-        //
-        // Linux and illumos support the "apparent" size in bytes, though using
-        // different options. macOS doesn't support bytes at all, and has a minimum
-        // block size of 512.
-        //
-        // We'll suffer the lower resolution on macOS, and get higher resolution on
-        // the others.
-        cfg_if::cfg_if! {
-            if #[cfg(target_os = "illumos")] {
-                const BLOCK_SIZE: u64 = 1;
-                const DU_ARG: &str = "-A";
-            } else if #[cfg(target_os = "linux")] {
-                const BLOCK_SIZE: u64 = 1;
-                const DU_ARG: &str = "-b";
-            } else if #[cfg(target_os = "macos")] {
-                const BLOCK_SIZE: u64 = 512;
-                const DU_ARG: &str = "-k";
-            } else {
-                compile_error!("unsupported target OS");
-            }
-        }
-
         const DU: &str = "du";
         async fn dir_size_du(path: &Utf8PathBuf) -> Result<u64, BundleError> {
-            let args = &[DU_ARG, "-s", path.as_str()];
+            let args = &["-A", "-s", path.as_str()];
             let output =
                 Command::new(DU).args(args).output().await.map_err(|err| {
                     BundleError::Command {
@@ -1706,9 +1681,7 @@ mod tests {
             let Some(part) = line.trim().split_ascii_whitespace().next() else {
                 return Err(err("no disk usage size computed in output"));
             };
-            part.parse()
-                .map(|x: u64| x.saturating_mul(BLOCK_SIZE))
-                .map_err(|_| err("failed to parse du output"))
+            part.parse().map_err(|_| err("failed to parse du output"))
         }
 
         let du_output =
@@ -1726,19 +1699,12 @@ mod tests {
         eprintln!("dir_size({path}) took {:?}", t0.elapsed());
 
         let t0 = std::time::Instant::now();
+        // Run `du -As /path/to/omicron/sled-agent/src`, which currently shows this
+        // directory is ~450 KiB.
         let du_usage =
             dbg!(dir_size_du(&path).await).expect("running du failed!");
         eprintln!("du -s {path} took {:?}", t0.elapsed());
 
-        // Round up the Rust disk usage result to `du`'s block size on this
-        // system.
-        let usage = if BLOCK_SIZE > 1 {
-            eprintln!("rounding up to `du(1)`'s block size of {BLOCK_SIZE}B");
-            dbg!(usage.div_ceil(BLOCK_SIZE) * BLOCK_SIZE)
-        } else {
-            usage
-        };
-
         assert_eq!(
             usage, du_usage,
             "expected `dir_size({path})` to == `du -s {path}`\n\