diff --git a/Cargo.toml b/Cargo.toml index 94d73a2..53031a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,3 @@ [workspace] -members = ["src/api", "src/vmm", "src/cli"] +members = ["src/api", "src/vmm", "src/cli", "src/fs-gen"] resolver = "2" diff --git a/src/fs-gen/Cargo.toml b/src/fs-gen/Cargo.toml new file mode 100644 index 0000000..6bccfdb --- /dev/null +++ b/src/fs-gen/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "fs-gen" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.5.3", features = ["derive", "wrap_help", "string"] } +dircpy = "0.3.16" +fuse-backend-rs = "0.12.0" +flate2 = "1.0.28" +once_cell = "1.19.0" +regex = "1.10.4" +reqwest = { version = "0.12.3", features = ["blocking", "json"] } +serde = { version = "1.0.197", features = ["derive"] } +serde_json = "1.0.115" +signal-hook = "0.3.17" +tar = "0.4.40" +validator = { version = "0.17.0", features = ["derive"] } +anyhow = "1.0.82" diff --git a/src/fs-gen/src/cli_args.rs b/src/fs-gen/src/cli_args.rs new file mode 100644 index 0000000..dae89d6 --- /dev/null +++ b/src/fs-gen/src/cli_args.rs @@ -0,0 +1,80 @@ +use std::{env, path::PathBuf}; + +use clap::{command, error::ErrorKind, CommandFactory, Parser}; +use regex::Regex; + +use once_cell::sync::Lazy; + +// So, for any of you who may be scared, this is the regex from the OCI Distribution Sepcification for the image name + the tag +static RE_IMAGE_NAME: Lazy = Lazy::new(|| { + Regex::new(r"[a-z0-9]+((\.|_|__|-+)[a-z0-9]+)*(\/[a-z0-9]+((\.|_|__|-+)[a-z0-9]+)*)*:[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}").unwrap() +}); + +/// Convert an OCI image into a CPIO file +#[derive(Parser, Debug)] +#[command(version, about, long_about = None)] +pub struct CliArgs { + /// The name of the image to download + pub image_name: String, + + /// The path to the output file + #[arg(short='o', long="output", default_value=get_default_output_file().into_os_string())] + pub output_file: PathBuf, + + /// The path to the temporary folder + #[arg(short='t', long="tempdir", default_value=get_default_temp_directory().into_os_string())] + pub temp_directory: PathBuf, + + /// The host path to the guest agent binary + pub agent_host_path: PathBuf, +} + +impl CliArgs { + /// Get the cli arguments with additional validation + pub fn get_args() -> Self { + let args = CliArgs::parse(); + + args.validate_image(); + args.validate_host_path(); + + args + } + + fn validate_image(&self) { + if !RE_IMAGE_NAME.is_match(&self.image_name) { + let mut cmd = CliArgs::command(); + cmd.error( + ErrorKind::InvalidValue, + format!("Invalid image name: \"{}\"", self.image_name), + ) + .exit(); + } + } + + fn validate_host_path(&self) { + if !self.agent_host_path.exists() { + let mut cmd = CliArgs::command(); + cmd.error( + ErrorKind::InvalidValue, + format!( + "File not found for agent binary: \"{}\"", + self.agent_host_path.to_string_lossy() + ), + ) + .exit(); + } + } +} + +/// Get the default output path for the cpio file. +fn get_default_temp_directory() -> PathBuf { + let mut path = env::current_dir().unwrap(); + path.push(".cloudlet_temp/"); + path +} + +fn get_default_output_file() -> PathBuf { + let mut path = env::current_dir().unwrap(); + path.push("initramfs.img"); + path +} diff --git a/src/fs-gen/src/image_builder.rs b/src/fs-gen/src/image_builder.rs new file mode 100644 index 0000000..1d471f9 --- /dev/null +++ b/src/fs-gen/src/image_builder.rs @@ -0,0 +1,169 @@ +use std::{ + fs, + path::{Path, PathBuf}, + sync::Arc, + thread, +}; + +use anyhow::anyhow; +use anyhow::{Context, Ok, Result}; +use fuse_backend_rs::{ + api::{filesystem::Layer, server::Server}, + overlayfs::{config::Config, OverlayFs}, + passthrough::{self, PassthroughFs}, + transport::{FuseChannel, FuseSession}, +}; + +static FILE_EXISTS_ERROR: i32 = 17; + +pub struct FuseServer { + server: Arc>>, + ch: FuseChannel, +} + +type BoxedLayer = Box + Send + Sync>; + +/// Initialiazes a passthrough fs for a given layer +/// a passthrough fs is just a dummy implementation to map to the physical disk +/// # Usage +/// ``` +/// let passthrough_layer = new_passthroughfs_layer("/path/to/layer") +/// ``` +fn new_passthroughfs_layer(rootdir: &str) -> Result { + let config = passthrough::Config { + root_dir: String::from(rootdir), + xattr: true, + do_import: true, + ..Default::default() + }; + let fs = Box::new(PassthroughFs::<()>::new(config)?); + fs.import() + .with_context(|| format!("Failed to create the passthrough layer: {}", rootdir))?; + Ok(fs as BoxedLayer) +} + +/// Ensure a destination folder is created +fn ensure_folder_created(output_folder: &Path) -> Result<()> { + let result = fs::create_dir(output_folder); + + // If the file already exists, we're fine + if result.is_err() + && result + .unwrap_err() + .raw_os_error() + .is_some_and(|err_val| err_val != FILE_EXISTS_ERROR) + { + return Err(anyhow!("Failed to create folder")); + } + + Ok(()) +} + +/// Merges all the layers into a single folder for further manipulation +/// It works by instantiating an overlay fs via FUSE then copying the files to the desired target +/// # Usage +/// ``` +/// merge_layer(vec!["source/layer_1", "source/layer_2"], "/tmp/fused_layers") +/// ``` +pub fn merge_layer(blob_paths: &[PathBuf], output_folder: &Path) -> Result<()> { + // Stack all lower layers + let mut lower_layers = Vec::new(); + for lower in blob_paths { + lower_layers.push(Arc::new(new_passthroughfs_layer(&lower.to_string_lossy())?)); + } + + let mountpoint = Path::new("/tmp/cloudlet_internal"); + let fs_name = "cloudlet_overlay"; + + ensure_folder_created(mountpoint)?; + ensure_folder_created(output_folder)?; + + // Setup the overlay fs config + let config = Config { + work: "/work".into(), + mountpoint: output_folder.to_string_lossy().into(), + do_import: true, + ..Default::default() + }; + + let fs = OverlayFs::new(None, lower_layers, config) + .with_context(|| "Failed to construct the Overlay fs struct !".to_string())?; + fs.import() + .with_context(|| "Failed to initialize the overlay fs".to_string())?; + + // Enable a fuse session to make the fs available + let mut se = FuseSession::new(mountpoint, fs_name, "", true) + .with_context(|| "Failed to construct the Fuse session")?; + se.set_allow_other(false); + se.mount() + .with_context(|| "Failed to mount the overlay fs".to_string())?; + + // Fuse session + let mut server = FuseServer { + server: Arc::new(Server::new(Arc::new(fs))), + ch: se + .new_channel() + .with_context(|| "Failed to create a new channel".to_string())?, + }; + + let handle = thread::spawn(move || { + let _ = server.svc_loop(); + }); + + println!("copy starting !"); + //So now we need to copy the files + dircpy::copy_dir(mountpoint, output_folder).with_context(|| { + format!( + "Failed to copy directories into the output folder: {}", + output_folder.to_string_lossy() + ) + })?; + println!("copy finished"); + + // Unmount sessions so it can be re-used in later executions of the program + se.wake() + .with_context(|| "Failed to exit the fuse session".to_string())?; + se.umount() + .with_context(|| "Failed to unmount the fuse session".to_string())?; + + let _ = handle.join(); + Ok(()) +} + +impl FuseServer { + /// Run a loop to execute requests from the FUSE session + /// + pub fn svc_loop(&mut self) -> Result<()> { + println!("entering server loop"); + loop { + let value = self + .ch + .get_request() + .with_context(|| "Failed to get message from fuse session".to_string())?; + + if value.is_none() { + println!("fuse server exits"); + break; + } + + // Technically the unwrap is safe + let (reader, writer) = value.unwrap(); + + if let Err(e) = self + .server + .handle_message(reader, writer.into(), None, None) + { + match e { + fuse_backend_rs::Error::EncodeMessage(_ebadf) => { + break; + } + _ => { + print!("Handling fuse message failed"); + continue; + } + } + } + } + Ok(()) + } +} diff --git a/src/fs-gen/src/image_loader.rs b/src/fs-gen/src/image_loader.rs new file mode 100644 index 0000000..45c77bb --- /dev/null +++ b/src/fs-gen/src/image_loader.rs @@ -0,0 +1,151 @@ +use flate2::read::GzDecoder; +use reqwest::blocking::{Client, Response}; +use std::error::Error; +use std::fs::create_dir; +use std::path::PathBuf; +use tar::Archive; + +pub fn download_image_fs( + image_name: &str, + output_file: PathBuf, +) -> Result, Box> { + // Get image's name and tag + let image_and_tag: Vec<&str> = image_name.split(':').collect(); + + let tag = if image_and_tag.len() < 2 { + "latest" + } else { + image_and_tag[1] + }; + let image_name = image_and_tag[0]; + + // Download image manifest + let mut manifest_json = download_manifest(image_name, tag)?; + + // Verify if it's a manifest or a manifest list + let mut layers = manifest_json["layers"].as_array(); + + if layers.is_none() { + let manifests = manifest_json["manifests"].as_array(); + match manifests { + None => Err(format!( + "Couldn't find a Docker V2 or OCI manifest for {}:{}", + image_name, tag + ))?, + Some(m) => { + println!("Manifest list found. Looking for an amd64 manifest..."); + // Get a manifest for amd64 architecture from the manifest list + let amd64_manifest = m.iter().find(|manifest| { + manifest["platform"].as_object().unwrap()["architecture"] + .as_str() + .unwrap() + == "amd64" + }); + + match amd64_manifest { + None => Err("This image doesn't support amd64 architecture")?, + Some(m) => { + println!("Downloading manifest for amd64 architecture..."); + manifest_json = + download_manifest(image_name, m["digest"].as_str().unwrap())?; + layers = manifest_json["layers"].as_array(); + if layers.is_none() { + Err("Couldn't find image layers in the manifest.")? + } + } + } + } + } + } + + let _ = create_dir(&output_file); + + download_layers(layers.unwrap(), image_name, &output_file) +} + +fn download_manifest(image_name: &str, digest: &str) -> Result> { + // Create a reqwest HTTP client + let client = Client::new(); + + // Get a token for anonymous authentication to Docker Hub + let token_json: serde_json::Value = client + .get(format!("https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/{image_name}:pull")) + .send()?.json()?; + + let token = token_json["token"].as_str().unwrap(); + + // Query Docker Hub API to get the image manifest + let manifest_url = format!( + "https://registry-1.docker.io/v2/library/{}/manifests/{}", + image_name, digest + ); + + let manifest_response = client + .get(manifest_url) + .header( + "Accept", + "application/vnd.docker.distribution.manifest.v2+json", + ) + .header( + "Accept", + "application/vnd.docker.distribution.manifest.list.v2+json", + ) + .header("Accept", "application/vnd.oci.image.manifest.v1+json") + .bearer_auth(token) + .send()?; + + let manifest_json: serde_json::Value = manifest_response.json()?; + + println!("{}", manifest_json); + + Ok(manifest_json) +} + +fn unpack_tarball(tar: GzDecoder, output_dir: &PathBuf) -> Result<(), Box> { + let mut ar = Archive::new(tar); + ar.unpack(output_dir)?; + Ok(()) +} + +fn download_layers( + layers: &Vec, + image_name: &str, + output_dir: &PathBuf, +) -> Result, Box> { + let client = Client::new(); + + // Get a token for anonymous authentication to Docker Hub + let token_json: serde_json::Value = client + .get(format!("https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/{image_name}:pull")) + .send()?.json()?; + + let token = token_json["token"].as_str().unwrap(); + + let mut layer_paths = Vec::new(); + + println!("Downloading and unpacking layers:"); + + // Download and unpack each layer + for layer in layers { + let digest = layer["digest"].as_str().unwrap(); + let layer_url = format!( + "https://registry-1.docker.io/v2/library/{}/blobs/{}", + image_name, digest + ); + + let response = client.get(&layer_url).bearer_auth(token).send()?; + + print!(" - {}", digest); + + let tar = GzDecoder::new(response); + + let mut output_path = PathBuf::new(); + output_path.push(output_dir); + output_path.push(digest); + + unpack_tarball(tar, &output_path)?; + println!(" - unpacked"); + layer_paths.push(output_path); + } + Ok(layer_paths) +} diff --git a/src/fs-gen/src/initramfs_generator.rs b/src/fs-gen/src/initramfs_generator.rs new file mode 100644 index 0000000..76027dc --- /dev/null +++ b/src/fs-gen/src/initramfs_generator.rs @@ -0,0 +1,48 @@ +use std::fs::{File, Permissions}; +use std::io::Write; +use std::os::unix::fs::PermissionsExt; +use std::path::Path; +use std::process::{Command, Stdio}; + +const INIT_FILE: &[u8; 211] = b"#! /bin/sh +# +# Cloudlet initramfs generation +# +mount -t devtmpfs dev /dev +mount -t proc proc /proc +mount -t sysfs sysfs /sys +ip link set up dev lo + +exec /sbin/getty -n -l /bin/sh 115200 /dev/console +poweroff -f +"; + +pub fn create_init_file(path: &Path) { + let file_path = path.join("init"); + let mut file = File::create(file_path).unwrap(); + + file.write_all(INIT_FILE) + .expect("Could not write init file"); + file.set_permissions(Permissions::from_mode(0o755)).unwrap(); +} + +pub fn generate_initramfs(root_directory: &Path, output: &Path) { + let file = File::create(output).unwrap(); + file.set_permissions(Permissions::from_mode(0o644)) + .expect("Could not set permissions"); + + println!("Generating initramfs..."); + + let mut command = Command::new("sh") + .current_dir(root_directory) + .stdout(Stdio::from(file)) + .arg("-c") + .arg("find . -print0 | cpio -0 --create --owner=root:root --format=newc | xz -9 --format=lzma") + .spawn() + .expect("Failed to package initramfs"); + command + .wait() + .expect("Failed to wait for initramfs to finish"); + + println!("Initramfs generated!"); +} diff --git a/src/fs-gen/src/main.rs b/src/fs-gen/src/main.rs new file mode 100644 index 0000000..8229e85 --- /dev/null +++ b/src/fs-gen/src/main.rs @@ -0,0 +1,41 @@ +use std::{fs::remove_dir_all, path::Path}; + +use crate::initramfs_generator::{create_init_file, generate_initramfs}; +use image_builder::merge_layer; + +mod cli_args; +mod image_builder; +mod image_loader; +mod initramfs_generator; + +fn main() { + let args = cli_args::CliArgs::get_args(); + println!("Hello, world!, {:?}", args); + + let layers_subdir = args.temp_directory.clone().join("layers/"); + let overlay_subdir = args.temp_directory.clone().join("overlay/"); + + // TODO: better organise layers and OverlayFS build in the temp directory + match image_loader::download_image_fs(&args.image_name, layers_subdir) { + Err(e) => { + eprintln!("Error: {}", e); + return; + } + Ok(layers_paths) => { + println!("Image downloaded successfully! Layers' paths:"); + for path in &layers_paths { + println!(" - {}", path.display()); + } + + // FIXME: use a subdir of the temp directory instead + let path = Path::new(overlay_subdir.as_path()); + + merge_layer(&layers_paths, path).expect("Merging layers failed"); + create_init_file(path); + generate_initramfs(path, Path::new(args.output_file.as_path())); + } + } + + // cleanup of temporary directory + remove_dir_all(args.temp_directory.clone()).expect("Could not remove temporary directory"); +} diff --git a/src/fs-gen/test b/src/fs-gen/test new file mode 100644 index 0000000..e69de29