Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

prepare-var: Do not remount stateroot var in-place, and unmount the temporary var mount after real /var is mounted #3363

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 66 additions & 90 deletions src/libostree/ostree-impl-system-generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,31 +126,32 @@ require_internal_units (const char *normal_dir, const char *early_dir, const cha
#endif
}

// Resolve symlink to return osname
static gboolean
_ostree_sysroot_parse_bootlink_aboot (const char *bootlink, char **out_osname, GError **error)
write_unit_file (int dir_fd, const char *path, GCancellable *cancellable, GError **error, const char *fmt, ...)
{
static gsize regex_initialized;
static GRegex *regex;
g_autofree char *symlink_val = glnx_readlinkat_malloc (-1, bootlink, NULL, error);
if (!symlink_val)
return glnx_prefix_error (error, "Failed to read '%s' symlink", bootlink);

if (g_once_init_enter (&regex_initialized))
{
regex = g_regex_new ("^deploy/([^/]+)/", 0, 0, NULL);
g_assert (regex);
g_once_init_leave (&regex_initialized, 1);
}

g_autoptr (GMatchInfo) match = NULL;
if (!g_regex_match (regex, symlink_val, 0, &match))
return glnx_throw (error,
"Invalid aboot symlink in /ostree, expected symlink to resolve to "
"deploy/OSNAME/... instead it resolves to '%s'",
symlink_val);

*out_osname = g_match_info_fetch (match, 1);
g_auto (GLnxTmpfile) tmpf = {
0,
};
if (!glnx_open_tmpfile_linkable_at (dir_fd, ".", O_WRONLY | O_CLOEXEC, &tmpf, error))
return FALSE;
g_autoptr (GOutputStream) outstream = g_unix_output_stream_new (tmpf.fd, FALSE);
gsize bytes_written;
va_list args;
va_start (args, fmt);
const gboolean r = g_output_stream_vprintf (outstream, &bytes_written, cancellable, error, fmt, args);
va_end (args);
if (!r)
return FALSE;
if (!g_output_stream_flush (outstream, cancellable, error))
return FALSE;
g_clear_object (&outstream);
/* It should be readable */
if (!glnx_fchmod (tmpf.fd, 0644, error))
return FALSE;
/* Error out if somehow it already exists, that'll help us debug conflicts */
if (!glnx_link_tmpfile_at (&tmpf, GLNX_LINK_TMPFILE_NOREPLACE, dir_fd, path,
error))
return FALSE;
return TRUE;
}

Expand All @@ -163,22 +164,37 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor
/* Not currently cancellable, but define a var in case we care later */
GCancellable *cancellable = NULL;
/* Some path constants to avoid typos */
static const char fstab_path[] = "/etc/fstab";
static const char var_path[] = "/var";
const char *fstab_path = "/etc/fstab";
const char *var_dst = "/var";
const char *var_src = OTCORE_RUN_OSTREE_PRIVATE "/var";

/* Written by ostree-sysroot-deploy.c. We parse out the stateroot here since we
* need to know it to mount /var. Unfortunately we can't easily use the
* libostree API to find the booted deployment since /boot might not have been
* mounted yet.
/* Prepare to write to the output unit dir; we use the "normal" dir
* that overrides /usr, but not /etc.
*/
g_autofree char *stateroot = NULL;
if (is_aboot)
{
if (!_ostree_sysroot_parse_bootlink_aboot (ostree_target, &stateroot, error))
return glnx_prefix_error (error, "Parsing aboot stateroot");
}
else if (!_ostree_sysroot_parse_bootlink (ostree_target, NULL, &stateroot, NULL, NULL, error))
return glnx_prefix_error (error, "Parsing stateroot");
glnx_autofd int normal_dir_dfd = -1;
if (!glnx_opendirat (AT_FDCWD, normal_dir, TRUE, &normal_dir_dfd, error))
return FALSE;

/* Generate a unit to unmount var_src */
if (!write_unit_file (normal_dir_dfd, "ostree-unmount-temp-var.service", cancellable, error,
"##\n# Automatically generated by ostree-system-generator\n##\n\n"
"[Unit]\n"
"Documentation=man:ostree(1)\n"
"ConditionPathIsMountPoint=%s\n"
"After=var.mount\n"
"\n"
"[Service]\n"
"Type=oneshot\n"
"ExecStart=/usr/bin/umount --lazy %s\n",
var_src, var_src))
return FALSE;

if (!glnx_shutil_mkdir_p_at (normal_dir_dfd, "local-fs.target.wants", 0755, cancellable,
error))
return FALSE;
if (symlinkat ("../ostree-unmount-temp-var.service", normal_dir_dfd,
"local-fs.target.wants/ostree-unmount-temp-var.service") < 0)
return glnx_throw_errno_prefix (error, "symlinkat");

/* Load /etc/fstab if it exists, and look for a /var mount */
g_autoptr (OtLibMountFile) fstab = setmntent (fstab_path, "re");
Expand All @@ -199,7 +215,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor
path_kill_slashes (where);

/* We're only looking for /var here */
if (strcmp (where, var_path) != 0)
if (strcmp (where, var_dst) != 0)
continue;

found_var_mnt = TRUE;
Expand All @@ -211,59 +227,19 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor
if (found_var_mnt)
return TRUE;

/* Prepare to write to the output unit dir; we use the "normal" dir
* that overrides /usr, but not /etc.
*/
glnx_autofd int normal_dir_dfd = -1;
if (!glnx_opendirat (AT_FDCWD, normal_dir, TRUE, &normal_dir_dfd, error))
return FALSE;

/* Generate our bind mount unit */
const char *stateroot_var_path = glnx_strjoina ("/sysroot/ostree/deploy/", stateroot, "/var");

g_auto (GLnxTmpfile) tmpf = {
0,
};
if (!glnx_open_tmpfile_linkable_at (normal_dir_dfd, ".", O_WRONLY | O_CLOEXEC, &tmpf, error))
return FALSE;
g_autoptr (GOutputStream) outstream = g_unix_output_stream_new (tmpf.fd, FALSE);
gsize bytes_written;
/* This code is inspired by systemd's fstab-generator.c.
*
* Note that our unit doesn't run if systemd.volatile is enabled;
* see https://github.com/ostreedev/ostree/pull/856
*
* To avoid having submounts of /var propagate into $stateroot/var, the mount
* is made with slave+shared propagation. This means that /var will receive
* mount events from the parent /sysroot mount, but not vice versa. Adding a
* shared peer group below the slave group means that submounts of /var will
* inherit normal shared propagation. See mount_namespaces(7), Linux
* Documentation/filesystems/sharedsubtree.txt and
* https://github.com/ostreedev/ostree/issues/2086. This also happens in
* ostree-prepare-root.c for the INITRAMFS_MOUNT_VAR case.
*/
if (!g_output_stream_printf (outstream, &bytes_written, cancellable, error,
"##\n# Automatically generated by ostree-system-generator\n##\n\n"
"[Unit]\n"
"Documentation=man:ostree(1)\n"
"ConditionKernelCommandLine=!systemd.volatile\n"
"Before=local-fs.target\n"
"\n"
"[Mount]\n"
"Where=%s\n"
"What=%s\n"
"Options=bind,slave,shared\n",
var_path, stateroot_var_path))
return FALSE;
if (!g_output_stream_flush (outstream, cancellable, error))
return FALSE;
g_clear_object (&outstream);
/* It should be readable */
if (!glnx_fchmod (tmpf.fd, 0644, error))
return FALSE;
/* Error out if somehow it already exists, that'll help us debug conflicts */
if (!glnx_link_tmpfile_at (&tmpf, GLNX_LINK_TMPFILE_NOREPLACE, normal_dir_dfd, "var.mount",
error))
if (!write_unit_file (normal_dir_dfd, "var.mount", cancellable, error,
"##\n# Automatically generated by ostree-system-generator\n##\n\n"
"[Unit]\n"
"Documentation=man:ostree(1)\n"
"ConditionKernelCommandLine=!systemd.volatile\n"
"Before=local-fs.target\n"
"\n"
"[Mount]\n"
"Where=%s\n"
"What=%s\n"
"Options=bind\n",
var_dst, var_src))
return FALSE;

/* And ensure it's required; newer systemd will auto-inject fs dependencies
Expand Down
42 changes: 20 additions & 22 deletions src/switchroot/ostree-prepare-root.c
Original file line number Diff line number Diff line change
Expand Up @@ -389,19 +389,16 @@ main (int argc, char *argv[])

g_autofree char *expected_digest = NULL;

// For now we just stick the transient root on the default /run tmpfs;
// however, see
// https://github.com/systemd/systemd/blob/604b2001081adcbd64ee1fbe7de7a6d77c5209fe/src/basic/mountpoint-util.h#L36
// which bumps up these defaults for the rootfs a bit.
g_autofree char *root_upperdir
= root_transient ? g_build_filename (OTCORE_RUN_OSTREE_PRIVATE, "root/upper", NULL)
: NULL;
g_autofree char *root_workdir
= root_transient ? g_build_filename (OTCORE_RUN_OSTREE_PRIVATE, "root/work", NULL) : NULL;

// Propagate these options for transient root, if provided
if (root_transient)
{
// For now we just stick the transient root on the default /run tmpfs;
// however, see
// https://github.com/systemd/systemd/blob/604b2001081adcbd64ee1fbe7de7a6d77c5209fe/src/basic/mountpoint-util.h#L36
// which bumps up these defaults for the rootfs a bit.
const char *root_upperdir = OTCORE_RUN_OSTREE_PRIVATE "/root/upper";
const char *root_workdir = OTCORE_RUN_OSTREE_PRIVATE "/root/work";

if (!glnx_shutil_mkdir_p_at (AT_FDCWD, root_upperdir, 0755, NULL, &error))
errx (EXIT_FAILURE, "Failed to create %s: %s", root_upperdir, error->message);
if (!glnx_shutil_mkdir_p_at (AT_FDCWD, root_workdir, 0700, NULL, &error))
Expand Down Expand Up @@ -611,6 +608,8 @@ main (int argc, char *argv[])
err (EXIT_FAILURE, "failed to bind mount (class:readonly) /usr");
}

const char *var_dir = OTCORE_RUN_OSTREE_PRIVATE "/var";

/* Prepare /var.
* When a read-only sysroot is configured, this adds a dedicated bind-mount (to itself)
* so that the stateroot location stays writable. */
Expand All @@ -623,6 +622,12 @@ main (int argc, char *argv[])
err (EXIT_FAILURE, "failed to make writable /var bind-mount at %s", srcpath);
}

/* Bind-mount var to var_dir */
if (mount ("../../var", var_dir, NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to bind mount %s", var_dir);
if (mount (NULL, var_dir, NULL, MS_SLAVE | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to change %s to slave mount", var_dir);

/* When running under systemd, /var will be handled by a 'var.mount' unit outside
* of initramfs.
* Systemd auto-detection can be overridden by a marker file under /run. */
Expand All @@ -640,18 +645,11 @@ main (int argc, char *argv[])
*/
if (mount_var)
{
if (mount ("../../var", TMP_SYSROOT "/var", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to bind mount ../../var to var");

/* To avoid having submounts of /var propagate into $stateroot/var, the
* mount is made with slave+shared propagation. See the comment in
* ostree-impl-system-generator.c when /var isn't mounted in the
* initramfs for further explanation.
*/
if (mount (NULL, TMP_SYSROOT "/var", NULL, MS_SLAVE | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to change /var to slave mount");
if (mount (NULL, TMP_SYSROOT "/var", NULL, MS_SHARED | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to change /var to slave+shared mount");
if (mount (var_dir, TMP_SYSROOT "/var", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to bind mount %s to /var", var_dir);

if (umount2 (var_dir, MNT_DETACH) < 0)
err (EXIT_FAILURE, "failed to umount %s", var_dir);
}

/* This can be used by other things to signal ostree is in use */
Expand Down
Loading