Skip to content
This repository has been archived by the owner on Jan 15, 2025. It is now read-only.

Commit

Permalink
Initial "chunking" code
Browse files Browse the repository at this point in the history
This analyzes an ostree commit and splits it into chunks
suitable for output to separate layers in an OCI image.
  • Loading branch information
cgwalters committed Oct 31, 2021
1 parent 98b700e commit d90a1b1
Show file tree
Hide file tree
Showing 7 changed files with 487 additions and 20 deletions.
296 changes: 296 additions & 0 deletions lib/src/chunking.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
//! Split an OSTree commit into separate chunks
// SPDX-License-Identifier: Apache-2.0 OR MIT

use std::borrow::Borrow;
use std::collections::{BTreeMap, BTreeSet};
use std::rc::Rc;

use crate::objgv::*;
use anyhow::Result;
use camino::Utf8PathBuf;
use gvariant::aligned_bytes::TryAsAligned;
use gvariant::{Marker, Structure};
use ostree;
use ostree::prelude::*;
use ostree::{gio, glib};

//const MODULES: &str = "/usr/lib/modules";
const FIRMWARE: &str = "/usr/lib/firmware";

const QUERYATTRS: &str = "standard::name,standard::type";

/// Size in bytes of the smallest chunk we will emit.
// pub(crate) const MIN_CHUNK_SIZE: u32 = 10 * 1024;
/// Maximum number of layers (chunks) we will use.
// We take half the limit of 128.
// https://github.com/ostreedev/ostree-rs-ext/issues/69
pub(crate) const MAX_CHUNKS: u32 = 64;

#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub(crate) struct RcStr(Rc<str>);

impl Borrow<str> for RcStr {
fn borrow(&self) -> &str {
&*self.0
}
}

impl From<&str> for RcStr {
fn from(s: &str) -> Self {
Self(Rc::from(s))
}
}

#[derive(Debug, Default)]
pub(crate) struct Chunk {
pub(crate) content: BTreeMap<RcStr, (u64, Vec<Utf8PathBuf>)>,
pub(crate) size: u64,
}

#[derive(Debug)]
pub(crate) enum Meta {
DirTree(RcStr),
DirMeta(RcStr),
}

impl Meta {
pub(crate) fn objtype(&self) -> ostree::ObjectType {
match self {
Meta::DirTree(_) => ostree::ObjectType::DirTree,
Meta::DirMeta(_) => ostree::ObjectType::DirMeta,
}
}

pub(crate) fn checksum(&self) -> &str {
match self {
Meta::DirTree(v) => &*v.0,
Meta::DirMeta(v) => &*v.0,
}
}
}

#[derive(Debug, Default)]
pub(crate) struct Chunking {
pub(crate) metadata_size: u64,
pub(crate) commit: Box<str>,
pub(crate) meta: Vec<Meta>,
pub(crate) remainder: Chunk,
pub(crate) chunks: Vec<Chunk>,
}

// pub(crate) struct ChunkConfig {
// pub(crate) min_size: u32,
// pub(crate) max_chunks: u32,
// }
//
// impl Default for ChunkConfig {
// fn default() -> Self {
// Self {
// min_size: MIN_CHUNK_SIZE,
// max_chunks: MAX_CHUNKS,
// }
// }
// }

#[derive(Default)]
struct Generation {
path: Utf8PathBuf,
metadata_size: u64,
meta: Vec<Meta>,
dirtree_found: BTreeSet<RcStr>,
dirmeta_found: BTreeSet<RcStr>,
}

fn generate_chunking_recurse(
repo: &ostree::Repo,
gen: &mut Generation,
chunk: &mut Chunk,
dt: &glib::Variant,
) -> Result<()> {
let dt = dt.data_as_bytes();
let dt = dt.try_as_aligned()?;
let dt = gv_dirtree!().cast(dt);
let (files, dirs) = dt.to_tuple();
// A reusable buffer to avoid heap allocating these
let mut hexbuf = [0u8; 64];
for file in files {
let (name, csum) = file.to_tuple();
let fpath = gen.path.join(name.to_str());
hex::encode_to_slice(csum, &mut hexbuf)?;
let checksum = std::str::from_utf8(&hexbuf)?;
let (_, meta, _) = repo.load_file(checksum, gio::NONE_CANCELLABLE)?;
// SAFETY: We know this API returns this value; it only has a return nullable because the
// caller can pass NULL to skip it.
let meta = meta.unwrap();
let size = meta.size() as u64;
let entry = chunk.content.entry(RcStr::from(checksum)).or_default();
entry.0 = size;
let first = entry.1.is_empty();
if first {
chunk.size += size;
}
entry.1.push(fpath);
}
for item in dirs {
let (name, contents_csum, meta_csum) = item.to_tuple();
let name = name.to_str();
// Extend our current path
gen.path.push(name);
hex::encode_to_slice(contents_csum, &mut hexbuf)?;
let checksum_s = std::str::from_utf8(&hexbuf)?;
if !gen.dirtree_found.contains(checksum_s) {
let checksum = RcStr::from(checksum_s);
gen.dirtree_found.insert(RcStr::clone(&checksum));
gen.meta.push(Meta::DirTree(checksum));
let child_v = repo.load_variant(ostree::ObjectType::DirTree, checksum_s)?;
gen.metadata_size += child_v.data_as_bytes().as_ref().len() as u64;
generate_chunking_recurse(repo, gen, chunk, &child_v)?;
}
hex::encode_to_slice(meta_csum, &mut hexbuf)?;
let checksum_s = std::str::from_utf8(&hexbuf)?;
if !gen.dirtree_found.contains(checksum_s) {
let checksum = RcStr::from(checksum_s);
gen.dirmeta_found.insert(RcStr::clone(&checksum));
let child_v = repo.load_variant(ostree::ObjectType::DirMeta, checksum_s)?;
gen.metadata_size += child_v.data_as_bytes().as_ref().len() as u64;
gen.meta.push(Meta::DirMeta(checksum));
}
// We did a push above, so pop must succeed.
assert!(gen.path.pop());
}
Ok(())
}

impl Chunk {
fn new() -> Self {
Default::default()
}

fn move_obj(&mut self, dest: &mut Self, checksum: &str) -> bool {
// In most cases, we expect the object to exist in the source. However, it's
// conveneient here to simply ignore objects which were already moved into
// a chunk.
if let Some((name, (size, paths))) = self.content.remove_entry(checksum) {
let v = dest.content.insert(name, (size, paths));
debug_assert!(v.is_none());
self.size -= size;
dest.size += size;
true
} else {
false
}
}

// fn split(self) -> (Self, Self) {
// todo!()
// }
}

impl Chunking {
/// Generate an initial single chunk.
pub(crate) fn new(repo: &ostree::Repo, rev: &str) -> Result<Self> {
// Find the target commit
let rev = repo.resolve_rev(rev, false)?.unwrap();

// Load and parse the commit object
let (commit_v, _) = repo.load_commit(&rev)?;
let commit_v = commit_v.data_as_bytes();
let commit_v = commit_v.try_as_aligned()?;
let commit = gv_commit!().cast(commit_v);
let commit = commit.to_tuple();

// Find the root directory tree
let contents_checksum = &hex::encode(commit.6);
let contents_v = repo.load_variant(ostree::ObjectType::DirTree, contents_checksum)?;

// Load it all into a single chunk
let mut gen: Generation = Default::default();
gen.path = Utf8PathBuf::from("/");
let mut chunk: Chunk = Default::default();
generate_chunking_recurse(repo, &mut gen, &mut chunk, &contents_v)?;

let chunking = Chunking {
commit: Box::from(rev.as_str()),
metadata_size: gen.metadata_size,
meta: gen.meta,
remainder: chunk,
..Default::default()
};
Ok(chunking)
}

/// Find the object named by `path` in `src`, and move it to `dest`.
fn extend_chunk(
repo: &ostree::Repo,
src: &mut Chunk,
dest: &mut Chunk,
path: &ostree::RepoFile,
) -> Result<()> {
let cancellable = gio::NONE_CANCELLABLE;
let ft = path.query_file_type(gio::FileQueryInfoFlags::NOFOLLOW_SYMLINKS, cancellable);
if ft == gio::FileType::Directory {
let e = path.enumerate_children(
QUERYATTRS,
gio::FileQueryInfoFlags::NOFOLLOW_SYMLINKS,
cancellable,
)?;
for child in e {
let childi = child?;
let child = path.child(childi.name());
let child = child.downcast::<ostree::RepoFile>().unwrap();
Self::extend_chunk(repo, src, dest, &child)?;
}
} else {
let checksum = path.checksum().unwrap();
src.move_obj(dest, checksum.as_str());
}
Ok(())
}

/// Create a new chunk from the provided filesystem paths.
pub(crate) fn chunk_paths<'a>(
&mut self,
repo: &ostree::Repo,
paths: impl IntoIterator<Item = &'a str>,
) -> Result<()> {
// Do nothing if we've hit our max.
if self.chunks.len() as u32 == MAX_CHUNKS {
return Ok(());
}
let cancellable = gio::NONE_CANCELLABLE;
let (root, _) = repo.read_commit(&self.commit, cancellable)?;
let root = root.downcast::<ostree::RepoFile>().unwrap();
let mut chunk = Chunk::new();
for path in paths {
let child = root.resolve_relative_path(path);
if !child.query_exists(cancellable) {
continue;
}
let child = child.downcast::<ostree::RepoFile>().unwrap();
Self::extend_chunk(repo, &mut self.remainder, &mut chunk, &child)?;
}
self.chunks.push(chunk);
Ok(())
}

/// Apply built-in heuristics to automatically create chunks.
pub(crate) fn auto_chunk(&mut self, repo: &ostree::Repo) -> Result<()> {
self.chunk_paths(repo, [FIRMWARE])?;
Ok(())
}
}

pub(crate) fn print(src: &Chunking) {
println!("Metadata: {}", glib::format_size(src.metadata_size));
for (n, chunk) in src.chunks.iter().enumerate() {
let sz = glib::format_size(chunk.size);
println!("Chunk {}: objects:{} size:{}", n, chunk.content.len(), sz);
}
let sz = glib::format_size(src.remainder.size);
println!(
"Remainder: objects:{} size:{}",
src.remainder.content.len(),
sz
);
}
38 changes: 35 additions & 3 deletions lib/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use std::ffi::OsString;
use structopt::StructOpt;

use crate::container::store::{LayeredImageImporter, PrepareResult};
use crate::container::{Config, ImageReference, UnencapsulateOptions, OstreeImageReference};
use crate::container::{Config, ImageReference, OstreeImageReference, UnencapsulateOptions};

fn parse_imgref(s: &str) -> Result<OstreeImageReference> {
OstreeImageReference::try_from(s)
Expand Down Expand Up @@ -118,6 +118,10 @@ enum ContainerOpts {
/// Corresponds to the Dockerfile `CMD` instruction.
#[structopt(long)]
cmd: Option<Vec<String>>,

#[structopt(long)]
/// Output in multiple blobs
ex_chunked: bool,
},

/// Commands for working with (possibly layered, non-encapsulated) container images.
Expand Down Expand Up @@ -206,6 +210,19 @@ struct ImaSignOpts {
key: String,
}

/// Experimental options
#[derive(Debug, StructOpt)]
enum ExperimentalOpts {
/// Print chunking
PrintChunks {
/// Path to the repository
#[structopt(long)]
repo: String,
/// The ostree ref or commt
rev: String,
},
}

/// Toplevel options for extended ostree functionality.
#[derive(Debug, StructOpt)]
#[structopt(name = "ostree-ext")]
Expand All @@ -217,6 +234,8 @@ enum Opt {
Container(ContainerOpts),
/// IMA signatures
ImaSign(ImaSignOpts),
/// Experimental/debug CLI
Experimental(ExperimentalOpts),
}

/// Import a tar archive containing an ostree commit.
Expand Down Expand Up @@ -310,13 +329,15 @@ async fn container_export(
imgref: &ImageReference,
labels: BTreeMap<String, String>,
cmd: Option<Vec<String>>,
chunked: bool,
) -> Result<()> {
let repo = &ostree::Repo::open_at(libc::AT_FDCWD, repo, gio::NONE_CANCELLABLE)?;
let config = Config {
labels: Some(labels),
cmd,
};
let pushed = crate::container::encapsulate(repo, rev, &config, &imgref).await?;
let opts = Some(crate::container::ExportOpts { chunked });
let pushed = crate::container::encapsulate(repo, rev, &config, opts, &imgref).await?;
println!("{}", pushed);
Ok(())
}
Expand Down Expand Up @@ -417,6 +438,7 @@ where
imgref,
labels,
cmd,
ex_chunked,
} => {
let labels: Result<BTreeMap<_, _>> = labels
.into_iter()
Expand All @@ -429,7 +451,8 @@ where
Ok((k.to_string(), v.to_string()))
})
.collect();
container_export(&repo, &rev, &imgref, labels?, cmd).await

container_export(&repo, &rev, &imgref, labels?, cmd, ex_chunked).await
}
ContainerOpts::Image(opts) => match opts {
ContainerImageOpts::List { repo } => {
Expand Down Expand Up @@ -476,5 +499,14 @@ where
},
},
Opt::ImaSign(ref opts) => ima_sign(opts),
Opt::Experimental(ref opts) => match opts {
ExperimentalOpts::PrintChunks { repo, rev } => {
let repo = &ostree::Repo::open_at(libc::AT_FDCWD, &repo, gio::NONE_CANCELLABLE)?;
let mut chunks = crate::chunking::Chunking::new(repo, rev)?;
chunks.auto_chunk(repo)?;
crate::chunking::print(&chunks);
Ok(())
}
},
}
}
Loading

0 comments on commit d90a1b1

Please sign in to comment.