diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000000..8dd405bb1193 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,44 @@ +name: CI + +on: [push] + +jobs: + build-native-deb: + runs-on: ubuntu-22.04 + container: + image: debian:testing + steps: + - name: Installing Dependencies + run: | + apt update > /dev/null 2>&1 + apt install -y linux-image-amd64 linux-headers-amd64 debhelper-compat devscripts > /dev/null 2>&1 + + - name: Checkout + uses: actions/checkout@v2 + + - name: Build deb package + run: | + mk-build-deps --build-dep contrib/debian/control + apt install -y ./*.deb + sh autogen.sh + ./configure + cp -a contrib/debian debian + sed 's/@CFGOPTS@/--enable-debuginfo/g' debian/rules.in > debian/rules + chmod +x debian/rules + dch -b -M --force-distribution --distribution bullseye-truenas-unstable 'Tagged from zfs CI' + debuild -us -uc -b + debian/rules override_dh_binary-modules + + - name: Create artifacts dir + run: mkdir artifacts + if: success() + + - name: Move artifacts + run: mv ../*.deb artifacts + if: success() + + - uses: actions/upload-artifact@v1 + with: + name: zfs-native + path: artifacts + if: success() diff --git a/.github/workflows/docker_image.yml b/.github/workflows/docker_image.yml new file mode 100644 index 000000000000..d9d68d69a85c --- /dev/null +++ b/.github/workflows/docker_image.yml @@ -0,0 +1,28 @@ +name: build_image + +on: + push: + branches: + - 'truenas/zfs-2.3-release' + +jobs: + docker: + runs-on: ubuntu-latest + steps: + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Build and push + id: docker_build + uses: docker/build-push-action@v2 + with: + push: true + tags: ixsystems/zfs:latest + - name: Image digest + run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/.github/workflows/scripts/generate-summary.sh b/.github/workflows/scripts/generate-summary.sh index b5d89208a5d8..5f19aa4081fc 100755 --- a/.github/workflows/scripts/generate-summary.sh +++ b/.github/workflows/scripts/generate-summary.sh @@ -103,9 +103,7 @@ ERRLOGS=0 if [ ! -f Summary/Summary.md ]; then # first call, we do the default summary (~500k) echo -n > Summary.md - summarize_s "Sanity Tests Ubuntu 20.04" Logs-20.04-sanity summarize_s "Sanity Tests Ubuntu 22.04" Logs-22.04-sanity - summarize_f "Functional Tests Ubuntu 20.04" Logs-20.04-functional summarize_f "Functional Tests Ubuntu 22.04" Logs-22.04-functional cat Summary.md >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/zfs-linux.yml b/.github/workflows/zfs-linux.yml index e6b705c86055..13111f6d9847 100644 --- a/.github/workflows/zfs-linux.yml +++ b/.github/workflows/zfs-linux.yml @@ -11,7 +11,7 @@ jobs: strategy: fail-fast: false matrix: - os: [20.04, 22.04] + os: [22.04] runs-on: ubuntu-${{ matrix.os }} steps: - uses: actions/checkout@v4 @@ -32,7 +32,7 @@ jobs: strategy: fail-fast: false matrix: - os: [20.04, 22.04] + os: [22.04] needs: build uses: ./.github/workflows/zfs-linux-tests.yml with: diff --git a/.gitignore b/.gitignore index a2cb92dd5406..b40208702029 100644 --- a/.gitignore +++ b/.gitignore @@ -52,6 +52,7 @@ !configure.ac !copy-builtin !COPYRIGHT +!Dockerfile !LICENSE !Makefile.am !META @@ -84,6 +85,8 @@ modules.order Makefile Makefile.in changelog +nfs41acl.h +nfs41acl_xdr.c *.patch *.orig *.tmp diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000000..4ff1abdd8df1 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +FROM debian:bookworm-slim + +ENV DEBIAN_FRONTEND noninteractive + +RUN apt-get update + +RUN apt-get install -y \ + debhelper-compat \ + devscripts \ + linux-headers-amd64 + +ENV WORK_DIR /zfs_app/zfs +WORKDIR ${WORK_DIR} + +ADD . ${WORK_DIR}/ + +RUN mk-build-deps --build-dep contrib/debian/control +RUN apt install -y ./*.deb +RUN sh autogen.sh +RUN ./configure +RUN cp -a contrib/debian debian +RUN sed 's/@CFGOPTS@/--enable-debuginfo/g' debian/rules.in > debian/rules +RUN chmod +x debian/rules +RUN dch -b -M --force-distribution --distribution bullseye-truenas-unstable "Tagged from ixsystems/zfs CI" +RUN debuild -us -uc -b +RUN rm ../openzfs-zfs-dracut_*.deb +RUN rm ../openzfs-zfs-initramfs_*.deb +RUN apt-get install -y ../*.deb diff --git a/Makefile.am b/Makefile.am index 11e45dae8255..f49766a45acd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -119,9 +119,10 @@ cstyle_line = -exec ${top_srcdir}/scripts/cstyle.pl -cpP {} + endif CHECKS += cstyle cstyle: - $(AM_V_at)find $(top_srcdir) -name build -prune \ + $(AM_V_at)find $(top_srcdir) -name build -prune -o -name zfsd -prune \ -o -type f -name '*.[hc]' \ ! -name 'zfs_config.*' ! -name '*.mod.c' \ + ! -name 'nfs41acl_xdr.c' ! -name 'nfs41acl.h' \ ! -name 'opt_global.h' ! -name '*_if*.h' \ ! -name 'zstd_compat_wrapper.h' \ ! -path './module/zstd/lib/*' \ diff --git a/cmd/Makefile.am b/cmd/Makefile.am index 96040976e53e..08536bef9da2 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -64,6 +64,9 @@ include $(srcdir)/%D%/zpool/Makefile.am include $(srcdir)/%D%/zpool_influxdb/Makefile.am include $(srcdir)/%D%/zstream/Makefile.am +if BUILD_FREEBSD +include $(srcdir)/%D%/zfsd/Makefile.am +endif if BUILD_LINUX mounthelper_PROGRAMS += mount.zfs diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index d0372608c723..4b040056b161 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -238,8 +238,9 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH); (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, &enc_sysfs_path); - +#ifndef TRUENAS_SCALE_NEVER_WHOLEDISK (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); +#endif (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted); @@ -1040,7 +1041,9 @@ vdev_whole_disk_from_config(zpool_handle_t *zhp, const char *vdev_path) if (!nvl) return (0); +#ifndef TRUENAS_SCALE_NEVER_WHOLEDISK (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); +#endif return (wholedisk); } @@ -1088,10 +1091,10 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data) zpool_close(zhp); return (0); } - +#ifndef TRUENAS_SCALE_NEVER_WHOLEDISK (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); - +#endif if (wholedisk) { char *tmp; path = strrchr(path, '/'); diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc index af56147a969b..bcf774e04d7d 100644 --- a/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc @@ -14,7 +14,7 @@ # Email will only be sent if ZED_EMAIL_ADDR is defined. # Enabled by default; comment to disable. # -ZED_EMAIL_ADDR="root" +#ZED_EMAIL_ADDR="root" ## # Name or path of executable responsible for sending notifications via email; diff --git a/cmd/zfsd/.gitignore b/cmd/zfsd/.gitignore new file mode 100644 index 000000000000..5e3efca4d908 --- /dev/null +++ b/cmd/zfsd/.gitignore @@ -0,0 +1 @@ +/zfsd diff --git a/cmd/zfsd/Makefile.am b/cmd/zfsd/Makefile.am new file mode 100644 index 000000000000..0371d389bee3 --- /dev/null +++ b/cmd/zfsd/Makefile.am @@ -0,0 +1,25 @@ +zfsd_CFLAGS = $(AM_CFLAGS) +zfsd_CXXFLAGS = $(AM_CXXFLAGS) +zfsd_CPPFLAGS = $(AM_CPPFLAGS) + +sbin_PROGRAMS += zfsd + +zfsd_SOURCES = \ + %D%/callout.cc \ + %D%/case_file.cc \ + %D%/vdev.cc \ + %D%/vdev_iterator.cc \ + %D%/zfsd.cc \ + %D%/zfsd_event.cc \ + %D%/zfsd_exception.cc \ + %D%/zfsd_main.cc \ + %D%/zpool_list.cc + +zfsd_LDADD = \ + libnvpair.la \ + libuutil.la \ + libzfs_core.la \ + libzfs.la + +zfsd_LDADD += -lrt -lprivatedevdctl -lgeom -lbsdxml -lsbuf +zfsd_LDFLAGS = -pthread diff --git a/cmd/zfsd/callout.cc b/cmd/zfsd/callout.cc new file mode 100644 index 000000000000..3e5cd5779559 --- /dev/null +++ b/cmd/zfsd/callout.cc @@ -0,0 +1,220 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file callout.cc + * + * \brief Implementation of the Callout class - multi-client + * timer services built on top of the POSIX interval timer. + */ + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "callout.h" +#include "vdev_iterator.h" +#include "zfsd.h" +#include "zfsd_exception.h" + +std::list Callout::s_activeCallouts; +bool Callout::s_alarmFired(false); + +void +Callout::Init() +{ + signal(SIGALRM, Callout::AlarmSignalHandler); +} + +bool +Callout::Stop() +{ + if (!IsPending()) + return (false); + + for (std::list::iterator it(s_activeCallouts.begin()); + it != s_activeCallouts.end(); it++) { + if (*it != this) + continue; + + it = s_activeCallouts.erase(it); + if (it != s_activeCallouts.end()) { + + /* + * Maintain correct interval for the + * callouts that follow the just removed + * entry. + */ + timeradd(&(*it)->m_interval, &m_interval, + &(*it)->m_interval); + } + break; + } + m_pending = false; + return (true); +} + +bool +Callout::Reset(const timeval &interval, CalloutFunc_t *func, void *arg) +{ + bool cancelled(false); + + if (!timerisset(&interval)) + throw ZfsdException("Callout::Reset: interval of 0"); + + cancelled = Stop(); + + m_interval = interval; + m_func = func; + m_arg = arg; + m_pending = true; + + std::list::iterator it(s_activeCallouts.begin()); + for (; it != s_activeCallouts.end(); it++) { + + if (timercmp(&(*it)->m_interval, &m_interval, <=)) { + /* + * Decrease our interval by those that come + * before us. + */ + timersub(&m_interval, &(*it)->m_interval, &m_interval); + } else { + /* + * Account for the time between the newly + * inserted event and those that follow. + */ + timersub(&(*it)->m_interval, &m_interval, + &(*it)->m_interval); + break; + } + } + s_activeCallouts.insert(it, this); + + + if (s_activeCallouts.front() == this) { + itimerval timerval = { {0, 0}, m_interval }; + + setitimer(ITIMER_REAL, &timerval, NULL); + } + + return (cancelled); +} + +void +Callout::AlarmSignalHandler(int) +{ + s_alarmFired = true; + ZfsDaemon::WakeEventLoop(); +} + +void +Callout::ExpireCallouts() +{ + if (!s_alarmFired) + return; + + s_alarmFired = false; + if (s_activeCallouts.empty()) { + /* Callout removal/SIGALRM race was lost. */ + return; + } + + /* + * Expire the first callout (the one we used to set the + * interval timer) as well as any callouts following that + * expire at the same time (have a zero interval from + * the callout before it). + */ + do { + Callout *cur(s_activeCallouts.front()); + s_activeCallouts.pop_front(); + cur->m_pending = false; + cur->m_func(cur->m_arg); + } while (!s_activeCallouts.empty() + && timerisset(&s_activeCallouts.front()->m_interval) == 0); + + if (!s_activeCallouts.empty()) { + Callout *next(s_activeCallouts.front()); + itimerval timerval = { { 0, 0 }, next->m_interval }; + + setitimer(ITIMER_REAL, &timerval, NULL); + } +} + +timeval +Callout::TimeRemaining() const +{ + /* + * Outline: Add the m_interval for each callout in s_activeCallouts + * ahead of this, except for the first callout. Add to that the result + * of getitimer (That's because the first callout stores its original + * interval setting while the timer is ticking). + */ + itimerval timervalToAlarm; + timeval timeToExpiry; + std::list::iterator it; + + if (!IsPending()) { + timeToExpiry.tv_sec = INT_MAX; + timeToExpiry.tv_usec = 999999; /*maximum normalized value*/ + return (timeToExpiry); + } + + timerclear(&timeToExpiry); + getitimer(ITIMER_REAL, &timervalToAlarm); + timeval& timeToAlarm = timervalToAlarm.it_value; + timeradd(&timeToExpiry, &timeToAlarm, &timeToExpiry); + + it =s_activeCallouts.begin(); + it++; /*skip the first callout in the list*/ + for (; it != s_activeCallouts.end(); it++) { + timeradd(&timeToExpiry, &(*it)->m_interval, &timeToExpiry); + if ((*it) == this) + break; + } + return (timeToExpiry); +} diff --git a/cmd/zfsd/callout.h b/cmd/zfsd/callout.h new file mode 100644 index 000000000000..d6b83bcfe5f5 --- /dev/null +++ b/cmd/zfsd/callout.h @@ -0,0 +1,185 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file callout.h + * + * \brief Interface for timer based callback services. + * + * Header requirements: + * + * #include + * + * #include + */ + +#ifndef _CALLOUT_H_ +#define _CALLOUT_H_ + +/** + * \brief Type of the function callback from a Callout. + */ +typedef void CalloutFunc_t(void *); + +/** + * \brief Interface to a schedulable one-shot timer with the granularity + * of the system clock (see setitimer(2)). + * + * Determination of callback expiration is triggered by the SIGALRM + * signal. Callout callbacks are always delivered from Zfsd's event + * processing loop. + * + * Periodic actions can be triggered via the Callout mechanisms by + * resetting the Callout from within its callback. + */ +class Callout +{ +public: + + /** + * Initialize the Callout subsystem. + */ + static void Init(); + + /** + * Function called (via SIGALRM) when our interval + * timer expires. + */ + static void AlarmSignalHandler(int); + + /** + * Execute callbacks for all callouts that have the same + * expiration time as the first callout in the list. + */ + static void ExpireCallouts(); + + /** Constructor. */ + Callout(); + + /** + * Returns true if callout has not been stopped, + * or deactivated since the last time the callout was + * reset. + */ + bool IsActive() const; + + /** + * Returns true if callout is still waiting to expire. + */ + bool IsPending() const; + + /** + * Disestablish a callout. + */ + bool Stop(); + + /** + * \brief Establish or change a timeout. + * + * \param interval Timeval indicating the time which must elapse + * before this callout fires. + * \param func Pointer to the callback function + * \param arg Argument pointer to pass to callback function + * + * \return Cancellation status. + * true: The previous callback was pending and therefore + * was cancelled. + * false: The callout was not pending at the time of this + * reset request. + * In all cases, a new callout is established. + */ + bool Reset(const timeval &interval, CalloutFunc_t *func, void *arg); + + /** + * \brief Calculate the remaining time until this Callout's timer + * expires. + * + * The return value will be slightly greater than the actual time to + * expiry. + * + * If the callout is not pending, returns INT_MAX. + */ + timeval TimeRemaining() const; + +private: + /** + * All active callouts sorted by expiration time. The callout + * with the nearest expiration time is at the head of the list. + */ + static std::list s_activeCallouts; + + /** + * The interval timer has expired. This variable is set from + * signal handler context and tested from Zfsd::EventLoop() + * context via ExpireCallouts(). + */ + static bool s_alarmFired; + + /** + * Time, relative to others in the active list, until + * this callout is fired. + */ + timeval m_interval; + + /** Callback function argument. */ + void *m_arg; + + /** + * The callback function associated with this timer + * entry. + */ + CalloutFunc_t *m_func; + + /** State of this callout. */ + bool m_pending; +}; + +//- Callout public const methods ---------------------------------------------- +inline bool +Callout::IsPending() const +{ + return (m_pending); +} + +//- Callout public methods ---------------------------------------------------- +inline +Callout::Callout() + : m_arg(0), + m_func(NULL), + m_pending(false) +{ + timerclear(&m_interval); +} + +#endif /* CALLOUT_H_ */ diff --git a/cmd/zfsd/case_file.cc b/cmd/zfsd/case_file.cc new file mode 100644 index 000000000000..5064d3c266dd --- /dev/null +++ b/cmd/zfsd/case_file.cc @@ -0,0 +1,1195 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file case_file.cc + * + * We keep case files for any leaf vdev that is not in the optimal state. + * However, we only serialize to disk those events that need to be preserved + * across reboots. For now, this is just a log of soft errors which we + * accumulate in order to mark a device as degraded. + */ +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "callout.h" +#include "vdev_iterator.h" +#include "zfsd_event.h" +#include "case_file.h" +#include "vdev.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +__FBSDID("$FreeBSD$"); + +/*============================ Namespace Control =============================*/ +using std::hex; +using std::ifstream; +using std::stringstream; +using std::setfill; +using std::setw; + +using DevdCtl::Event; +using DevdCtl::EventFactory; +using DevdCtl::EventList; +using DevdCtl::Guid; +using DevdCtl::ParseException; + +/*--------------------------------- CaseFile ---------------------------------*/ +//- CaseFile Static Data ------------------------------------------------------- + +CaseFileList CaseFile::s_activeCases; +const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; +const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; + +//- CaseFile Static Public Methods --------------------------------------------- +CaseFile * +CaseFile::Find(Guid poolGUID, Guid vdevGUID) +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) { + + if (((*curCase)->PoolGUID() != poolGUID + && Guid::InvalidGuid() != poolGUID) + || (*curCase)->VdevGUID() != vdevGUID) + continue; + + /* + * We only carry one active case per-vdev. + */ + return (*curCase); + } + return (NULL); +} + +void +CaseFile::Find(Guid poolGUID, Guid vdevGUID, CaseFileList &cases) +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) { + if (((*curCase)->PoolGUID() != poolGUID && + Guid::InvalidGuid() != poolGUID) || + (*curCase)->VdevGUID() != vdevGUID) + continue; + + /* + * We can have multiple cases for spare vdevs + */ + cases.push_back(*curCase); + if (!(*curCase)->IsSpare()) { + return; + } + } +} + +CaseFile * +CaseFile::Find(const string &physPath) +{ + CaseFile *result = NULL; + + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) { + + if ((*curCase)->PhysicalPath() != physPath) + continue; + + if (result != NULL) { + syslog(LOG_WARNING, "Multiple casefiles found for " + "physical path %s. " + "This is most likely a bug in zfsd", + physPath.c_str()); + } + result = *curCase; + } + return (result); +} + + +void +CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) +{ + CaseFileList::iterator casefile; + for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ + CaseFileList::iterator next = casefile; + next++; + if (poolGUID == (*casefile)->PoolGUID()) + (*casefile)->ReEvaluate(event); + casefile = next; + } +} + +CaseFile & +CaseFile::Create(Vdev &vdev) +{ + CaseFile *activeCase; + + activeCase = Find(vdev.PoolGUID(), vdev.GUID()); + if (activeCase == NULL) + activeCase = new CaseFile(vdev); + + return (*activeCase); +} + +void +CaseFile::DeSerialize() +{ + struct dirent **caseFiles; + + int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, + DeSerializeSelector, /*compar*/NULL)); + + if (numCaseFiles == -1) + return; + if (numCaseFiles == 0) { + free(caseFiles); + return; + } + + for (int i = 0; i < numCaseFiles; i++) { + + DeSerializeFile(caseFiles[i]->d_name); + free(caseFiles[i]); + } + free(caseFiles); +} + +bool +CaseFile::Empty() +{ + return (s_activeCases.empty()); +} + +void +CaseFile::LogAll() +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) + (*curCase)->Log(); +} + +void +CaseFile::PurgeAll() +{ + /* + * Serialize casefiles before deleting them so that they can be reread + * and revalidated during BuildCaseFiles. + * CaseFiles remove themselves from this list on destruction. + */ + while (s_activeCases.size() != 0) { + CaseFile *casefile = s_activeCases.front(); + casefile->Serialize(); + delete casefile; + } + +} + +int +CaseFile::IsSpare() +{ + return (m_is_spare); +} + +//- CaseFile Public Methods ---------------------------------------------------- +bool +CaseFile::RefreshVdevState() +{ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); + if (casePool == NULL) + return (false); + + Vdev vd(casePool, CaseVdev(casePool)); + if (vd.DoesNotExist()) + return (false); + + m_vdevState = vd.State(); + m_vdevPhysPath = vd.PhysicalPath(); + return (true); +} + +bool +CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) +{ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); + int flags = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE; + + if (pool == NULL || !RefreshVdevState()) { + /* + * The pool or vdev for this case file is no longer + * part of the configuration. This can happen + * if we process a device arrival notification + * before seeing the ZFS configuration change + * event. + */ + syslog(LOG_INFO, + "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " + "Closing\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str()); + Close(); + + /* + * Since this event was not used to close this + * case, do not report it as consumed. + */ + return (/*consumed*/false); + } + + if (VdevState() > VDEV_STATE_CANT_OPEN) { + /* + * For now, newly discovered devices only help for + * devices that are missing. In the future, we might + * use a newly inserted spare to replace a degraded + * or faulted device. + */ + syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", + PoolGUIDString().c_str(), VdevGUIDString().c_str()); + return (/*consumed*/false); + } + + if (vdev != NULL + && ( vdev->PoolGUID() == m_poolGUID + || vdev->PoolGUID() == Guid::InvalidGuid()) + && vdev->GUID() == m_vdevGUID) { + + if (IsSpare()) + flags |= ZFS_ONLINE_SPARE; + if (zpool_vdev_online(pool, vdev->GUIDString().c_str(), + flags, &m_vdevState) != 0) { + syslog(LOG_ERR, + "Failed to online vdev(%s/%s:%s): %s: %s\n", + zpool_get_name(pool), vdev->GUIDString().c_str(), + devPath.c_str(), libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + return (/*consumed*/false); + } + + syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", + zpool_get_name(pool), vdev->GUIDString().c_str(), + devPath.c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + + /* + * Check the vdev state post the online action to see + * if we can retire this case. + */ + CloseIfSolved(); + + return (/*consumed*/true); + } + + /* + * If the auto-replace policy is enabled, and we have physical + * path information, try a physical path replacement. + */ + if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { + syslog(LOG_INFO, + "CaseFile(%s:%s:%s): AutoReplace not set. " + "Ignoring device insertion.\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + return (/*consumed*/false); + } + + if (PhysicalPath().empty()) { + syslog(LOG_INFO, + "CaseFile(%s:%s:%s): No physical path information. " + "Ignoring device insertion.\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + return (/*consumed*/false); + } + + if (physPath != PhysicalPath()) { + syslog(LOG_INFO, + "CaseFile(%s:%s:%s): Physical path mismatch. " + "Ignoring device insertion.\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + return (/*consumed*/false); + } + + /* Write a label on the newly inserted disk. */ + if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { + syslog(LOG_ERR, + "Replace vdev(%s/%s) by physical path (label): %s: %s\n", + zpool_get_name(pool), VdevGUIDString().c_str(), + libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + return (/*consumed*/false); + } + + syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", + PoolGUIDString().c_str(), VdevGUIDString().c_str(), + devPath.c_str()); + return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); +} + +bool +CaseFile::ReEvaluate(const ZfsEvent &event) +{ + bool consumed(false); + + if (event.Value("type") == "misc.fs.zfs.vdev_remove") { + /* + * The Vdev we represent has been removed from the + * configuration. This case is no longer of value. + */ + Close(); + + return (/*consumed*/true); + } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { + /* This Pool has been destroyed. Discard the case */ + Close(); + + return (/*consumed*/true); + } else if (event.Value("type") == "misc.fs.zfs.config_sync") { + RefreshVdevState(); + if (VdevState() < VDEV_STATE_HEALTHY) + consumed = ActivateSpare(); + } + + + if (event.Value("class") == "resource.fs.zfs.removed") { + bool spare_activated; + + if (!RefreshVdevState()) { + /* + * The pool or vdev for this case file is no longer + * part of the configuration. This can happen + * if we process a device arrival notification + * before seeing the ZFS configuration change + * event. + */ + syslog(LOG_INFO, + "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " + "unconfigured. Closing\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str()); + /* + * Close the case now so we won't waste cycles in the + * system rescan + */ + Close(); + + /* + * Since this event was not used to close this + * case, do not report it as consumed. + */ + return (/*consumed*/false); + } + + /* + * Discard any tentative I/O error events for + * this case. They were most likely caused by the + * hot-unplug of this device. + */ + PurgeTentativeEvents(); + + /* Try to activate spares if they are available */ + spare_activated = ActivateSpare(); + + /* + * Rescan the drives in the system to see if a recent + * drive arrival can be used to solve this case. + */ + ZfsDaemon::RequestSystemRescan(); + + /* + * Consume the event if we successfully activated a spare. + * Otherwise, leave it in the unconsumed events list so that the + * future addition of a spare to this pool might be able to + * close the case + */ + consumed = spare_activated; + } else if (event.Value("class") == "resource.fs.zfs.statechange") { + RefreshVdevState(); + /* + * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to + * activate a hotspare. Otherwise, ignore the event + */ + if (VdevState() == VDEV_STATE_FAULTED || + VdevState() == VDEV_STATE_DEGRADED || + VdevState() == VDEV_STATE_CANT_OPEN) + (void) ActivateSpare(); + consumed = true; + } + else if (event.Value("class") == "ereport.fs.zfs.io" || + event.Value("class") == "ereport.fs.zfs.checksum") { + + m_tentativeEvents.push_front(event.DeepCopy()); + RegisterCallout(event); + consumed = true; + } + + bool closed(CloseIfSolved()); + + return (consumed || closed); +} + +/* Find a Vdev containing the vdev with the given GUID */ +static nvlist_t* +find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid) +{ + nvlist_t **vdevChildren; + int error; + unsigned ch, numChildren; + + error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, + &vdevChildren, &numChildren); + + if (error != 0 || numChildren == 0) + return (NULL); + + for (ch = 0; ch < numChildren; ch++) { + nvlist *result; + Vdev vdev(pool_config, vdevChildren[ch]); + + if (vdev.GUID() == child_guid) + return (config); + + result = find_parent(pool_config, vdevChildren[ch], child_guid); + if (result != NULL) + return (result); + } + + return (NULL); +} + +bool +CaseFile::ActivateSpare() { + nvlist_t *config, *nvroot, *parent_config; + nvlist_t **spares; + const char *devPath, *poolname, *vdev_type; + u_int nspares, i; + int error; + + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); + if (zhp == NULL) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " + "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID); + return (false); + } + poolname = zpool_get_name(zhp); + config = zpool_get_config(zhp, NULL); + if (config == NULL) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " + "config for pool %s", poolname); + return (false); + } + error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); + if (error != 0){ + syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " + "tree for pool %s", poolname); + return (false); + } + + parent_config = find_parent(config, nvroot, m_vdevGUID); + if (parent_config != NULL) { + const char *parent_type; + + /* + * Don't activate spares for members of a "replacing" vdev. + * They're already dealt with. Sparing them will just drag out + * the resilver process. + */ + error = nvlist_lookup_string(parent_config, + ZPOOL_CONFIG_TYPE, &parent_type); + if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0) + return (false); + } + + nspares = 0; + nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, + &nspares); + if (nspares == 0) { + /* The pool has no spares configured */ + syslog(LOG_INFO, "CaseFile::ActivateSpare: " + "No spares available for pool %s", poolname); + return (false); + } + for (i = 0; i < nspares; i++) { + uint64_t *nvlist_array; + vdev_stat_t *vs; + uint_t nstats; + + if (nvlist_lookup_uint64_array(spares[i], + ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " + "find vdev stats for pool %s, spare %d", + poolname, i); + return (false); + } + vs = reinterpret_cast(nvlist_array); + + if ((vs->vs_aux != VDEV_AUX_SPARED) + && (vs->vs_state == VDEV_STATE_HEALTHY)) { + /* We found a usable spare */ + break; + } + } + + if (i == nspares) { + /* No available spares were found */ + return (false); + } + + error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath); + if (error != 0) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " + "the path of pool %s, spare %d. Error %d", + poolname, i, error); + return (false); + } + + error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type); + if (error != 0) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine " + "the vdev type of pool %s, spare %d. Error %d", + poolname, i, error); + return (false); + } + + return (Replace(vdev_type, devPath, /*isspare*/true)); +} + +void +CaseFile::RegisterCallout(const Event &event) +{ + timeval now, countdown, elapsed, timestamp, zero, remaining; + + gettimeofday(&now, 0); + timestamp = event.GetTimestamp(); + timersub(&now, ×tamp, &elapsed); + timersub(&s_removeGracePeriod, &elapsed, &countdown); + /* + * If countdown is <= zero, Reset the timer to the + * smallest positive time value instead + */ + timerclear(&zero); + if (timercmp(&countdown, &zero, <=)) { + timerclear(&countdown); + countdown.tv_usec = 1; + } + + remaining = m_tentativeTimer.TimeRemaining(); + + if (!m_tentativeTimer.IsPending() + || timercmp(&countdown, &remaining, <)) + m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this); +} + + +bool +CaseFile::CloseIfSolved() +{ + if (m_events.empty() + && m_tentativeEvents.empty()) { + + /* + * We currently do not track or take actions on + * devices in the degraded or faulted state. + * Once we have support for spare pools, we'll + * retain these cases so that any spares added in + * the future can be applied to them. + */ + switch (VdevState()) { + case VDEV_STATE_HEALTHY: + /* No need to keep cases for healthy vdevs */ + Close(); + return (true); + case VDEV_STATE_REMOVED: + case VDEV_STATE_CANT_OPEN: + /* + * Keep open. We may solve it with a newly inserted + * device. + */ + case VDEV_STATE_FAULTED: + case VDEV_STATE_DEGRADED: + /* + * Keep open. We may solve it with the future + * addition of a spare to the pool + */ + case VDEV_STATE_UNKNOWN: + case VDEV_STATE_CLOSED: + case VDEV_STATE_OFFLINE: + /* + * Keep open? This may not be the correct behavior, + * but it's what we've always done + */ + ; + } + + /* + * Re-serialize the case in order to remove any + * previous event data. + */ + Serialize(); + } + + return (false); +} + +void +CaseFile::Log() +{ + syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(), + VdevGUIDString().c_str(), PhysicalPath().c_str()); + syslog(LOG_INFO, "\tVdev State = %s\n", + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + if (m_tentativeEvents.size() != 0) { + syslog(LOG_INFO, "\t=== Tentative Events ===\n"); + for (EventList::iterator event(m_tentativeEvents.begin()); + event != m_tentativeEvents.end(); event++) + (*event)->Log(LOG_INFO); + } + if (m_events.size() != 0) { + syslog(LOG_INFO, "\t=== Events ===\n"); + for (EventList::iterator event(m_events.begin()); + event != m_events.end(); event++) + (*event)->Log(LOG_INFO); + } +} + +//- CaseFile Static Protected Methods ------------------------------------------ +void +CaseFile::OnGracePeriodEnded(void *arg) +{ + CaseFile &casefile(*static_cast(arg)); + + casefile.OnGracePeriodEnded(); +} + +int +CaseFile::DeSerializeSelector(const struct dirent *dirEntry) +{ + uint64_t poolGUID; + uint64_t vdevGUID; + + if (dirEntry->d_type == DT_REG + && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", + &poolGUID, &vdevGUID) == 2) + return (1); + return (0); +} + +void +CaseFile::DeSerializeFile(const char *fileName) +{ + string fullName(s_caseFilePath + '/' + fileName); + CaseFile *existingCaseFile(NULL); + CaseFile *caseFile(NULL); + + try { + uint64_t poolGUID; + uint64_t vdevGUID; + nvlist_t *vdevConf; + + if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case", + &poolGUID, &vdevGUID) != 2) { + throw ZfsdException("CaseFile::DeSerialize: " + "Unintelligible CaseFile filename %s.\n", fileName); + } + existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID)); + if (existingCaseFile != NULL) { + /* + * If the vdev is already degraded or faulted, + * there's no point in keeping the state around + * that we use to put a drive into the degraded + * state. However, if the vdev is simply missing, + * preserve the case data in the hopes that it will + * return. + */ + caseFile = existingCaseFile; + vdev_state curState(caseFile->VdevState()); + if (curState > VDEV_STATE_CANT_OPEN + && curState < VDEV_STATE_HEALTHY) { + unlink(fileName); + return; + } + } else { + ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); + if (zpl.empty() + || (vdevConf = VdevIterator(zpl.front()) + .Find(vdevGUID)) == NULL) { + /* + * Either the pool no longer exists + * or this vdev is no longer a member of + * the pool. + */ + unlink(fullName.c_str()); + return; + } + + /* + * Any vdev we find that does not have a case file + * must be in the healthy state and thus worthy of + * continued SERD data tracking. + */ + caseFile = new CaseFile(Vdev(zpl.front(), vdevConf)); + } + + ifstream caseStream(fullName.c_str()); + if (!caseStream) + throw ZfsdException("CaseFile::DeSerialize: Unable to " + "read %s.\n", fileName); + + caseFile->DeSerialize(caseStream); + } catch (const ParseException &exp) { + + exp.Log(); + if (caseFile != existingCaseFile) + delete caseFile; + + /* + * Since we can't parse the file, unlink it so we don't + * trip over it again. + */ + unlink(fileName); + } catch (const ZfsdException &zfsException) { + + zfsException.Log(); + if (caseFile != existingCaseFile) + delete caseFile; + } +} + +//- CaseFile Protected Methods ------------------------------------------------- +CaseFile::CaseFile(const Vdev &vdev) + : m_poolGUID(vdev.PoolGUID()), + m_vdevGUID(vdev.GUID()), + m_vdevState(vdev.State()), + m_vdevPhysPath(vdev.PhysicalPath()), + m_is_spare(vdev.IsSpare()) +{ + stringstream guidString; + + guidString << m_vdevGUID; + m_vdevGUIDString = guidString.str(); + guidString.str(""); + guidString << m_poolGUID; + m_poolGUIDString = guidString.str(); + + s_activeCases.push_back(this); + + syslog(LOG_INFO, "Creating new CaseFile:\n"); + Log(); +} + +CaseFile::~CaseFile() +{ + PurgeEvents(); + PurgeTentativeEvents(); + m_tentativeTimer.Stop(); + s_activeCases.remove(this); +} + +void +CaseFile::PurgeEvents() +{ + for (EventList::iterator event(m_events.begin()); + event != m_events.end(); event++) + delete *event; + + m_events.clear(); +} + +void +CaseFile::PurgeTentativeEvents() +{ + for (EventList::iterator event(m_tentativeEvents.begin()); + event != m_tentativeEvents.end(); event++) + delete *event; + + m_tentativeEvents.clear(); +} + +void +CaseFile::SerializeEvList(const EventList events, int fd, + const char* prefix) const +{ + if (events.empty()) + return; + for (EventList::const_iterator curEvent = events.begin(); + curEvent != events.end(); curEvent++) { + const string &eventString((*curEvent)->GetEventString()); + + // TODO: replace many write(2) calls with a single writev(2) + if (prefix) + write(fd, prefix, strlen(prefix)); + write(fd, eventString.c_str(), eventString.length()); + } +} + +void +CaseFile::Serialize() +{ + stringstream saveFile; + + saveFile << setfill('0') + << s_caseFilePath << "/" + << "pool_" << PoolGUIDString() + << "_vdev_" << VdevGUIDString() + << ".case"; + + if (m_events.empty() && m_tentativeEvents.empty()) { + unlink(saveFile.str().c_str()); + return; + } + + int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644)); + if (fd == -1) { + syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n", + saveFile.str().c_str()); + return; + } + SerializeEvList(m_events, fd); + SerializeEvList(m_tentativeEvents, fd, "tentative "); + close(fd); +} + +/* + * XXX: This method assumes that events may not contain embedded newlines. If + * ever events can contain embedded newlines, then CaseFile must switch + * serialization formats + */ +void +CaseFile::DeSerialize(ifstream &caseStream) +{ + string evString; + const EventFactory &factory(ZfsDaemon::Get().GetFactory()); + + caseStream >> std::noskipws >> std::ws; + while (caseStream.good()) { + /* + * Outline: + * read the beginning of a line and check it for + * "tentative". If found, discard "tentative". + * Create a new event + * continue + */ + EventList* destEvents; + const string tentFlag("tentative "); + string line; + std::stringbuf lineBuf; + + caseStream.get(lineBuf); + caseStream.ignore(); /*discard the newline character*/ + line = lineBuf.str(); + if (line.compare(0, tentFlag.size(), tentFlag) == 0) { + /* Discard "tentative" */ + line.erase(0, tentFlag.size()); + destEvents = &m_tentativeEvents; + } else { + destEvents = &m_events; + } + Event *event(Event::CreateEvent(factory, line)); + if (event != NULL) { + destEvents->push_back(event); + RegisterCallout(*event); + } + } +} + +void +CaseFile::Close() +{ + /* + * This case is no longer relevant. Clean up our + * serialization file, and delete the case. + */ + syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n", + PoolGUIDString().c_str(), VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + + /* + * Serialization of a Case with no event data, clears the + * Serialization data for that event. + */ + PurgeEvents(); + Serialize(); + + delete this; +} + +void +CaseFile::OnGracePeriodEnded() +{ + bool should_fault, should_degrade; + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); + + m_events.splice(m_events.begin(), m_tentativeEvents); + should_fault = ShouldFault(); + should_degrade = ShouldDegrade(); + + if (should_fault || should_degrade) { + if (zhp == NULL + || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) { + /* + * Either the pool no longer exists + * or this vdev is no longer a member of + * the pool. + */ + Close(); + return; + } + + } + + /* A fault condition has priority over a degrade condition */ + if (ShouldFault()) { + /* Fault the vdev and close the case. */ + if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID, + VDEV_AUX_ERR_EXCEEDED) == 0) { + syslog(LOG_INFO, "Faulting vdev(%s/%s)", + PoolGUIDString().c_str(), + VdevGUIDString().c_str()); + Close(); + return; + } + else { + syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + } + } + else if (ShouldDegrade()) { + /* Degrade the vdev and close the case. */ + if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID, + VDEV_AUX_ERR_EXCEEDED) == 0) { + syslog(LOG_INFO, "Degrading vdev(%s/%s)", + PoolGUIDString().c_str(), + VdevGUIDString().c_str()); + Close(); + return; + } + else { + syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + } + } + Serialize(); +} + +Vdev +CaseFile::BeingReplacedBy(zpool_handle_t *zhp) { + Vdev vd(zhp, CaseVdev(zhp)); + std::list children; + std::list::iterator children_it; + + Vdev parent(vd.Parent()); + Vdev replacing(NonexistentVdev); + + /* + * To determine whether we are being replaced by another spare that + * is still working, then make sure that it is currently spared and + * that the spare is either resilvering or healthy. If any of these + * conditions fail, then we are not being replaced by a spare. + * + * If the spare is healthy, then the case file should be closed very + * soon after this check. + */ + if (parent.DoesNotExist() + || parent.Name(zhp, /*verbose*/false) != "spare") + return (NonexistentVdev); + + children = parent.Children(); + children_it = children.begin(); + for (;children_it != children.end(); children_it++) { + Vdev child = *children_it; + + /* Skip our vdev. */ + if (child.GUID() == VdevGUID()) + continue; + /* + * Accept the first child that doesn't match our GUID, or + * any resilvering/healthy device if one exists. + */ + if (replacing.DoesNotExist() || child.IsResilvering() + || child.State() == VDEV_STATE_HEALTHY) + replacing = child; + } + + return (replacing); +} + +bool +CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) { + nvlist_t *nvroot, *newvd; + const char *poolname; + string oldstr(VdevGUIDString()); + bool retval = true; + + /* Figure out what pool we're working on */ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); + if (zhp == NULL) { + syslog(LOG_ERR, "CaseFile::Replace: could not find pool for " + "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID); + return (false); + } + poolname = zpool_get_name(zhp); + Vdev vd(zhp, CaseVdev(zhp)); + Vdev replaced(BeingReplacedBy(zhp)); + + if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) { + /* If we are already being replaced by a working spare, pass. */ + if (replaced.IsResilvering() + || replaced.State() == VDEV_STATE_HEALTHY) { + syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already " + "replaced", VdevGUIDString().c_str(), path); + return (/*consumed*/false); + } + /* + * If we have already been replaced by a spare, but that spare + * is broken, we must spare the spare, not the original device. + */ + oldstr = replaced.GUIDString(); + syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing " + "broken spare %s instead", VdevGUIDString().c_str(), + path, oldstr.c_str()); + } + + /* + * Build a root vdev/leaf vdev configuration suitable for + * zpool_vdev_attach. Only enough data for the kernel to find + * the device (i.e. type and disk device node path) are needed. + */ + nvroot = NULL; + newvd = NULL; + + if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0 + || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { + syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate " + "configuration data.", poolname, oldstr.c_str()); + if (nvroot != NULL) + nvlist_free(nvroot); + return (false); + } + if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0 + || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 + || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 + || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, + &newvd, 1) != 0) { + syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize " + "configuration data.", poolname, oldstr.c_str()); + nvlist_free(newvd); + nvlist_free(nvroot); + return (true); + } + + /* Data was copied when added to the root vdev. */ + nvlist_free(newvd); + + retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot, + /*replace*/B_TRUE, /*rebuild*/ B_FALSE) == 0); + if (retval) + syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n", + poolname, oldstr.c_str(), path); + else + syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n", + poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + nvlist_free(nvroot); + + return (retval); +} + +/* Does the argument event refer to a checksum error? */ +static bool +IsChecksumEvent(const Event* const event) +{ + return ("ereport.fs.zfs.checksum" == event->Value("type")); +} + +/* Does the argument event refer to an IO error? */ +static bool +IsIOEvent(const Event* const event) +{ + return ("ereport.fs.zfs.io" == event->Value("type")); +} + +bool +CaseFile::ShouldDegrade() const +{ + return (std::count_if(m_events.begin(), m_events.end(), + IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT); +} + +bool +CaseFile::ShouldFault() const +{ + return (std::count_if(m_events.begin(), m_events.end(), + IsIOEvent) > ZFS_DEGRADE_IO_COUNT); +} + +nvlist_t * +CaseFile::CaseVdev(zpool_handle_t *zhp) const +{ + return (VdevIterator(zhp).Find(VdevGUID())); +} diff --git a/cmd/zfsd/case_file.h b/cmd/zfsd/case_file.h new file mode 100644 index 000000000000..d7475d331a76 --- /dev/null +++ b/cmd/zfsd/case_file.h @@ -0,0 +1,452 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file case_file.h + * + * CaseFile objects aggregate vdev faults that may require ZFSD action + * in order to maintain the health of a ZFS pool. + * + * Header requirements: + * + * #include + * + * #include "callout.h" + * #include "zfsd_event.h" + */ +#ifndef _CASE_FILE_H_ +#define _CASE_FILE_H_ + +/*=========================== Forward Declarations ===========================*/ +class CaseFile; +class Vdev; + +/*============================= Class Definitions ============================*/ +/*------------------------------- CaseFileList -------------------------------*/ +/** + * CaseFileList is a specialization of the standard list STL container. + */ +typedef std::list< CaseFile *> CaseFileList; + +/*--------------------------------- CaseFile ---------------------------------*/ +/** + * A CaseFile object is instantiated anytime a vdev for an active pool + * experiences an I/O error, is faulted by ZFS, or is determined to be + * missing/removed. + * + * A vdev may have at most one CaseFile. + * + * CaseFiles are retired when a vdev leaves an active pool configuration + * or an action is taken to resolve the issues recorded in the CaseFile. + * + * Logging a case against a vdev does not imply that an immediate action + * to resolve a fault is required or even desired. For example, a CaseFile + * must accumulate a number of I/O errors in order to flag a device as + * degraded. + * + * Vdev I/O errors are not recorded in ZFS label inforamation. For this + * reasons, CaseFile%%s with accumulated I/O error events are serialized + * to the file system so that they survive across boots. Currently all + * other fault types can be reconstructed from ZFS label information, so + * CaseFile%%s for missing, faulted, or degradded members are just recreated + * at ZFSD startup instead of being deserialized from the file system. + */ +class CaseFile +{ +public: + /** + * \brief Find a CaseFile object by a vdev's pool/vdev GUID tuple. + * + * \param poolGUID Pool GUID for the vdev of the CaseFile to find. + * If InvalidGuid, then only match the vdev GUID + * instead of both pool and vdev GUIDs. + * \param vdevGUID Vdev GUID for the vdev of the CaseFile to find. + * + * \return If found, a pointer to a valid CaseFile object. + * Otherwise NULL. + */ + static CaseFile *Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID); + + /** + * \brief Find multiple CaseFile objects by a vdev's pool/vdev + * GUID tuple (special case for spare vdevs) + * + * \param poolGUID Pool GUID for the vdev of the CaseFile to find. + * If InvalidGuid, then only match the vdev GUID + * instead of both pool and vdev GUIDs. + * \param vdevGUID Vdev GUID for the vdev of the CaseFile to find. + * \param caseList List of cases associated with the vdev. + */ + static void Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID, + CaseFileList &caseList); + + /** + * \brief Find a CaseFile object by a vdev's current/last known + * physical path. + * + * \param physPath Physical path of the vdev of the CaseFile to find. + * + * \return If found, a pointer to a valid CaseFile object. + * Otherwise NULL. + */ + static CaseFile *Find(const string &physPath); + + /** + * \brief ReEvaluate all open cases whose pool guid matches the argument + * + * \param poolGUID Only reevaluate cases for this pool + * \param event Try to consume this event with the casefile + */ + static void ReEvaluateByGuid(DevdCtl::Guid poolGUID, + const ZfsEvent &event); + + /** + * \brief Create or return an existing active CaseFile for the + * specified vdev. + * + * \param vdev The vdev object for which to find/create a CaseFile. + * + * \return A reference to a valid CaseFile object. + */ + static CaseFile &Create(Vdev &vdev); + + /** + * \brief Deserialize all serialized CaseFile objects found in + * the file system. + */ + static void DeSerialize(); + + /** + * \brief returns true if there are no CaseFiles + */ + static bool Empty(); + + /** + * \brief Emit syslog data on all active CaseFile%%s in the system. + */ + static void LogAll(); + + /** + * \brief Destroy the in-core cache of CaseFile data. + * + * This routine does not disturb the on disk, serialized, CaseFile + * data. + */ + static void PurgeAll(); + + DevdCtl::Guid PoolGUID() const; + DevdCtl::Guid VdevGUID() const; + vdev_state VdevState() const; + const string &PoolGUIDString() const; + const string &VdevGUIDString() const; + const string &PhysicalPath() const; + + /** + * \brief Attempt to resolve this CaseFile using the disk + * resource at the given device/physical path/vdev object + * tuple. + * + * \param devPath The devfs path for the disk resource. + * \param physPath The physical path information reported by + * the disk resource. + * \param vdev If the disk contains ZFS label information, + * a pointer to the disk label's vdev object + * data. Otherwise NULL. + * + * \return True if this event was consumed by this CaseFile. + */ + bool ReEvaluate(const string &devPath, const string &physPath, + Vdev *vdev); + + /** + * \brief Update this CaseFile in light of the provided ZfsEvent. + * + * Must be virtual so it can be overridden in the unit tests + * + * \param event The ZfsEvent to evaluate. + * + * \return True if this event was consumed by this CaseFile. + */ + virtual bool ReEvaluate(const ZfsEvent &event); + + /** + * \brief Register an itimer callout for the given event, if necessary + */ + virtual void RegisterCallout(const DevdCtl::Event &event); + + /** + * \brief Close a case if it is no longer relevant. + * + * This method deals with cases tracking soft errors. Soft errors + * will be discarded should a remove event occur within a short period + * of the soft errors being reported. We also discard the events + * if the vdev is marked degraded or failed. + * + * \return True if the case is closed. False otherwise. + */ + bool CloseIfSolved(); + + /** + * \brief Emit data about this CaseFile via syslog(3). + */ + void Log(); + + /** + * \brief Whether we should degrade this vdev + */ + bool ShouldDegrade() const; + + /** + * \brief Whether we should fault this vdev + */ + bool ShouldFault() const; + + /** + * \brief If this vdev is spare + */ + int IsSpare(); + +protected: + enum { + /** + * The number of soft errors on a vdev required + * to transition a vdev from healthy to degraded + * status. + */ + ZFS_DEGRADE_IO_COUNT = 50 + }; + + static CalloutFunc_t OnGracePeriodEnded; + + /** + * \brief scandir(3) filter function used to find files containing + * serialized CaseFile data. + * + * \param dirEntry Directory entry for the file to filter. + * + * \return Non-zero for a file to include in the selection, + * otherwise 0. + */ + static int DeSerializeSelector(const struct dirent *dirEntry); + + /** + * \brief Given the name of a file containing serialized events from a + * CaseFile object, create/update an in-core CaseFile object + * representing the serialized data. + * + * \param fileName The name of a file containing serialized events + * from a CaseFile object. + */ + static void DeSerializeFile(const char *fileName); + + /** Constructor. */ + CaseFile(const Vdev &vdev); + + /** + * Destructor. + * Must be virtual so it can be subclassed in the unit tests + */ + virtual ~CaseFile(); + + /** + * \brief Reload state for the vdev associated with this CaseFile. + * + * \return True if the refresh was successful. False if the system + * has no record of the pool or vdev for this CaseFile. + */ + virtual bool RefreshVdevState(); + + /** + * \brief Free all events in the m_events list. + */ + void PurgeEvents(); + + /** + * \brief Free all events in the m_tentativeEvents list. + */ + void PurgeTentativeEvents(); + + /** + * \brief Commit to file system storage. + */ + void Serialize(); + + /** + * \brief Retrieve event data from a serialization stream. + * + * \param caseStream The serializtion stream to parse. + */ + void DeSerialize(std::ifstream &caseStream); + + /** + * \brief Serializes the supplied event list and writes it to fd + * + * \param prefix If not NULL, this prefix will be prepended to + * every event in the file. + */ + void SerializeEvList(const DevdCtl::EventList events, int fd, + const char* prefix=NULL) const; + + /** + * \brief Unconditionally close a CaseFile. + */ + virtual void Close(); + + /** + * \brief Callout callback invoked when the remove timer grace + * period expires. + * + * If no remove events are received prior to the grace period + * firing, then any tentative events are promoted and counted + * against the health of the vdev. + */ + void OnGracePeriodEnded(); + + /** + * \brief Attempt to activate a spare on this case's pool. + * + * Call this whenever a pool becomes degraded. It will look for any + * spare devices and activate one to replace the casefile's vdev. It + * will _not_ close the casefile; that should only happen when the + * missing drive is replaced or the user promotes the spare. + * + * \return True if a spare was activated + */ + bool ActivateSpare(); + + /** + * \brief replace a pool's vdev with another + * + * \param vdev_type The type of the new vdev. Usually either + * VDEV_TYPE_DISK or VDEV_TYPE_FILE + * \param path The file system path to the new vdev + * \param isspare Whether the new vdev is a spare + * + * \return true iff the replacement was successful + */ + bool Replace(const char* vdev_type, const char* path, bool isspare); + + /** + * \brief Which vdev, if any, is replacing ours. + * + * \param zhp Pool handle state from the caller context + * + * \return the vdev that is currently replacing ours, + * or NonexistentVdev if there isn't one. + */ + Vdev BeingReplacedBy(zpool_handle_t *zhp); + + /** + * \brief All CaseFiles being tracked by ZFSD. + */ + static CaseFileList s_activeCases; + + /** + * \brief The file system path to serialized CaseFile data. + */ + static const string s_caseFilePath; + + /** + * \brief The time ZFSD waits before promoting a tentative event + * into a permanent event. + */ + static const timeval s_removeGracePeriod; + + /** + * \brief A list of soft error events counted against the health of + * a vdev. + */ + DevdCtl::EventList m_events; + + /** + * \brief A list of soft error events waiting for a grace period + * expiration before being counted against the health of + * a vdev. + */ + DevdCtl::EventList m_tentativeEvents; + + DevdCtl::Guid m_poolGUID; + DevdCtl::Guid m_vdevGUID; + vdev_state m_vdevState; + string m_poolGUIDString; + string m_vdevGUIDString; + string m_vdevPhysPath; + int m_is_spare; + + /** + * \brief Callout activated when a grace period + */ + Callout m_tentativeTimer; + +private: + nvlist_t *CaseVdev(zpool_handle_t *zhp) const; +}; + +inline DevdCtl::Guid +CaseFile::PoolGUID() const +{ + return (m_poolGUID); +} + +inline DevdCtl::Guid +CaseFile::VdevGUID() const +{ + return (m_vdevGUID); +} + +inline vdev_state +CaseFile::VdevState() const +{ + return (m_vdevState); +} + +inline const string & +CaseFile::PoolGUIDString() const +{ + return (m_poolGUIDString); +} + +inline const string & +CaseFile::VdevGUIDString() const +{ + return (m_vdevGUIDString); +} + +inline const string & +CaseFile::PhysicalPath() const +{ + return (m_vdevPhysPath); +} + +#endif /* _CASE_FILE_H_ */ diff --git a/cmd/zfsd/vdev.cc b/cmd/zfsd/vdev.cc new file mode 100644 index 000000000000..a0e089a5bdf5 --- /dev/null +++ b/cmd/zfsd/vdev.cc @@ -0,0 +1,358 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file vdev.cc + * + * Implementation of the Vdev class. + */ +#include +#include +#include +#include + +#include +/* + * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with + * C++ flush methods + */ +#undef flush + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "vdev.h" +#include "vdev_iterator.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +__FBSDID("$FreeBSD$"); +/*============================ Namespace Control =============================*/ +using std::string; +using std::stringstream; + +//- Special objects ----------------------------------------------------------- +Vdev NonexistentVdev; + +//- Vdev Inline Public Methods ------------------------------------------------ +/*=========================== Class Implementations ==========================*/ +/*----------------------------------- Vdev -----------------------------------*/ + +/* Special constructor for NonexistentVdev. */ +Vdev::Vdev() + : m_poolConfig(NULL), + m_config(NULL) +{} + +bool +Vdev::VdevLookupPoolGuid() +{ + uint64_t guid; + if (nvlist_lookup_uint64(m_poolConfig, ZPOOL_CONFIG_POOL_GUID, &guid)) + return (false); + m_poolGUID = guid; + return (true); +} + +void +Vdev::VdevLookupGuid() +{ + uint64_t guid; + if (nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_GUID, &guid) != 0) + throw ZfsdException("Unable to extract vdev GUID " + "from vdev config data."); + m_vdevGUID = guid; +} + +Vdev::Vdev(zpool_handle_t *pool, nvlist_t *config) + : m_poolConfig(zpool_get_config(pool, NULL)), + m_config(config) +{ + if (!VdevLookupPoolGuid()) + throw ZfsdException("Can't extract pool GUID from handle."); + VdevLookupGuid(); +} + +Vdev::Vdev(nvlist_t *poolConfig, nvlist_t *config) + : m_poolConfig(poolConfig), + m_config(config) +{ + if (!VdevLookupPoolGuid()) + throw ZfsdException("Can't extract pool GUID from config."); + VdevLookupGuid(); +} + +Vdev::Vdev(nvlist_t *labelConfig) + : m_poolConfig(labelConfig), + m_config(labelConfig) +{ + /* + * Spares do not have a Pool GUID. Tolerate its absence. + * Code accessing this Vdev in a context where the Pool GUID is + * required will find it invalid (as it is upon Vdev construction) + * and act accordingly. + */ + (void) VdevLookupPoolGuid(); + VdevLookupGuid(); + + try { + m_config = VdevIterator(labelConfig).Find(m_vdevGUID); + } catch (const ZfsdException &exp) { + /* + * When reading a spare's label, it is normal not to find + * a list of vdevs + */ + m_config = NULL; + } +} + +bool +Vdev::IsSpare() const +{ + uint64_t is_spare(0); + + if (m_config == NULL) + return (false); + + (void)nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_IS_SPARE, &is_spare); + return (bool(is_spare)); +} + +vdev_state +Vdev::State() const +{ + uint64_t *nvlist_array; + vdev_stat_t *vs; + uint_t vsc; + + if (m_config == NULL) { + /* + * If we couldn't find the list of vdevs, that normally means + * that this is an available hotspare. In that case, we will + * presume it to be healthy. Even if this spare had formerly + * been in use, been degraded, and been replaced, the act of + * replacement wipes the degraded bit from the label. So we + * have no choice but to presume that it is healthy. + */ + return (VDEV_STATE_HEALTHY); + } + + if (nvlist_lookup_uint64_array(m_config, ZPOOL_CONFIG_VDEV_STATS, + &nvlist_array, &vsc) == 0) { + vs = reinterpret_cast(nvlist_array); + return (static_cast(vs->vs_state)); + } + + /* + * Stats are not available. This vdev was created from a label. + * Synthesize a state based on available data. + */ + uint64_t faulted(0); + uint64_t degraded(0); + (void)nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_FAULTED, &faulted); + (void)nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_DEGRADED, °raded); + if (faulted) + return (VDEV_STATE_FAULTED); + if (degraded) + return (VDEV_STATE_DEGRADED); + return (VDEV_STATE_HEALTHY); +} + +std::list +Vdev::Children() +{ + nvlist_t **vdevChildren; + int result; + u_int numChildren; + std::list children; + + if (m_poolConfig == NULL || m_config == NULL) + return (children); + + result = nvlist_lookup_nvlist_array(m_config, + ZPOOL_CONFIG_CHILDREN, &vdevChildren, &numChildren); + if (result != 0) + return (children); + + for (u_int c = 0;c < numChildren; c++) + children.push_back(Vdev(m_poolConfig, vdevChildren[c])); + + return (children); +} + +Vdev +Vdev::RootVdev() +{ + nvlist_t *rootVdev; + + if (m_poolConfig == NULL) + return (NonexistentVdev); + + if (nvlist_lookup_nvlist(m_poolConfig, ZPOOL_CONFIG_VDEV_TREE, + &rootVdev) != 0) + return (NonexistentVdev); + return (Vdev(m_poolConfig, rootVdev)); +} + +/* + * Find our parent. This requires doing a traversal of the config; we can't + * cache it as leaf vdevs may change their pool config location (spare, + * replacing, mirror, etc). + */ +Vdev +Vdev::Parent() +{ + std::list to_examine; + std::list children; + std::list::iterator children_it; + + to_examine.push_back(RootVdev()); + for (;;) { + if (to_examine.empty()) + return (NonexistentVdev); + Vdev vd = to_examine.front(); + if (vd.DoesNotExist()) + return (NonexistentVdev); + to_examine.pop_front(); + children = vd.Children(); + children_it = children.begin(); + for (;children_it != children.end(); children_it++) { + Vdev child = *children_it; + + if (child.GUID() == GUID()) + return (vd); + to_examine.push_front(child); + } + } +} + +bool +Vdev::IsAvailableSpare() const +{ + /* If we have a pool guid, we cannot be an available spare. */ + if (PoolGUID()) + return (false); + + return (true); +} + +bool +Vdev::IsSpare() +{ + uint64_t spare; + if (nvlist_lookup_uint64(m_config, ZPOOL_CONFIG_IS_SPARE, &spare) != 0) + return (false); + return (spare != 0); +} + +bool +Vdev::IsActiveSpare() const +{ + vdev_stat_t *vs; + uint_t c; + + if (m_poolConfig == NULL) + return (false); + + (void) nvlist_lookup_uint64_array(m_config, ZPOOL_CONFIG_VDEV_STATS, + reinterpret_cast(&vs), &c); + if (vs == NULL || vs->vs_aux != VDEV_AUX_SPARED) + return (false); + return (true); +} + +bool +Vdev::IsResilvering() const +{ + pool_scan_stat_t *ps = NULL; + uint_t c; + + if (State() != VDEV_STATE_HEALTHY) + return (false); + + (void) nvlist_lookup_uint64_array(m_config, ZPOOL_CONFIG_SCAN_STATS, + reinterpret_cast(&ps), &c); + if (ps == NULL || ps->pss_func != POOL_SCAN_RESILVER) + return (false); + return (true); +} + +string +Vdev::GUIDString() const +{ + stringstream vdevGUIDString; + + vdevGUIDString << GUID(); + return (vdevGUIDString.str()); +} + +string +Vdev::Name(zpool_handle_t *zhp, bool verbose) const +{ + return (zpool_vdev_name(g_zfsHandle, zhp, m_config, + verbose ? B_TRUE : B_FALSE)); +} + +string +Vdev::Path() const +{ + const char *path(NULL); + + if ((m_config != NULL) + && (nvlist_lookup_string(m_config, ZPOOL_CONFIG_PATH, &path) == 0)) + return (path); + + return (""); +} + +string +Vdev::PhysicalPath() const +{ + const char *path(NULL); + + if ((m_config != NULL) && (nvlist_lookup_string(m_config, + ZPOOL_CONFIG_PHYS_PATH, &path) == 0)) + return (path); + + return (""); +} diff --git a/cmd/zfsd/vdev.h b/cmd/zfsd/vdev.h new file mode 100644 index 000000000000..322efc8f4e53 --- /dev/null +++ b/cmd/zfsd/vdev.h @@ -0,0 +1,188 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file vdev.h + * + * Definition of the Vdev class. + * + * Header requirements: + * + * #include + * #include + * + * #include + */ +#ifndef _VDEV_H_ +#define _VDEV_H_ + +/*=========================== Forward Declarations ===========================*/ +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +/*============================= Class Definitions ============================*/ +/*----------------------------------- Vdev -----------------------------------*/ +/** + * \brief Wrapper class for a vdev's name/value configuration list + * simplifying access to commonly used vdev attributes. + */ +class Vdev +{ +public: + /** + * \brief Instantiate a vdev object for a vdev that is a member + * of an imported pool. + * + * \param pool The pool object containing the vdev with + * configuration data provided in vdevConfig. + * \param vdevConfig Vdev configuration data. + * + * This method should be used whenever dealing with vdev's + * enumerated via the ZpoolList class. The in-core configuration + * data for a vdev does not contain all of the items found in + * the on-disk label. This requires the vdev class to augment + * the data in vdevConfig with data found in the pool object. + */ + Vdev(zpool_handle_t *pool, nvlist_t *vdevConfig); + + /** + * \brief Instantiate a vdev object for a vdev that is a member + * of a pool configuration. + * + * \param poolConfig The pool configuration containing the vdev + * configuration data provided in vdevConfig. + * \param vdevConfig Vdev configuration data. + * + * This method should be used whenever dealing with vdev's + * enumerated via the ZpoolList class. The in-core configuration + * data for a vdev does not contain all of the items found in + * the on-disk label. This requires the vdev class to augment + * the data in vdevConfig with data found in the pool object. + */ + Vdev(nvlist_t *poolConfig, nvlist_t *vdevConfig); + + /** + * \brief Instantiate a vdev object from a ZFS label stored on + * the device. + * + * \param vdevConfig The name/value list retrieved by reading + * the label information on a leaf vdev. + */ + Vdev(nvlist_t *vdevConfig); + + /** + * \brief No-op copy constructor for nonexistent vdevs. + */ + Vdev(); + + /** + * \brief No-op virtual destructor, since this class has virtual + * functions. + */ + virtual ~Vdev(); + bool DoesNotExist() const; + + /** + * \brief Return a list of the vdev's children. + */ + std::list Children(); + + virtual DevdCtl::Guid GUID() const; + bool IsSpare() const; + virtual DevdCtl::Guid PoolGUID() const; + virtual vdev_state State() const; + std::string Path() const; + virtual std::string PhysicalPath() const; + std::string GUIDString() const; + nvlist_t *PoolConfig() const; + nvlist_t *Config() const; + Vdev Parent(); + Vdev RootVdev(); + std::string Name(zpool_handle_t *, bool verbose) const; + bool IsSpare(); + bool IsAvailableSpare() const; + bool IsActiveSpare() const; + bool IsResilvering() const; + +private: + void VdevLookupGuid(); + bool VdevLookupPoolGuid(); + DevdCtl::Guid m_poolGUID; + DevdCtl::Guid m_vdevGUID; + nvlist_t *m_poolConfig; + nvlist_t *m_config; +}; + +//- Special objects ----------------------------------------------------------- +extern Vdev NonexistentVdev; + +//- Vdev Inline Public Methods ------------------------------------------------ +inline Vdev::~Vdev() +{ +} + +inline DevdCtl::Guid +Vdev::PoolGUID() const +{ + return (m_poolGUID); +} + +inline DevdCtl::Guid +Vdev::GUID() const +{ + return (m_vdevGUID); +} + +inline nvlist_t * +Vdev::PoolConfig() const +{ + return (m_poolConfig); +} + +inline nvlist_t * +Vdev::Config() const +{ + return (m_config); +} + +inline bool +Vdev::DoesNotExist() const +{ + return (m_config == NULL); +} + +#endif /* _VDEV_H_ */ diff --git a/cmd/zfsd/vdev_iterator.cc b/cmd/zfsd/vdev_iterator.cc new file mode 100644 index 000000000000..c23399ed68a8 --- /dev/null +++ b/cmd/zfsd/vdev_iterator.cc @@ -0,0 +1,172 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file vdev_iterator.cc + * + * Implementation of the VdevIterator class. + */ +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#include +#include + +#include "vdev.h" +#include "vdev_iterator.h" +#include "zfsd_exception.h" + +/*============================ Namespace Control =============================*/ +using DevdCtl::Guid; + +/*=========================== Class Implementations ==========================*/ +/*------------------------------- VdevIterator -------------------------------*/ +VdevIterator::VdevIterator(zpool_handle_t *pool) + : m_poolConfig(zpool_get_config(pool, NULL)) +{ + Reset(); +} + +VdevIterator::VdevIterator(nvlist_t *poolConfig) + : m_poolConfig(poolConfig) +{ + Reset(); +} + +void +VdevIterator::Reset() +{ + nvlist_t *rootVdev; + nvlist **cache_child; + nvlist **spare_child; + int result; + uint_t cache_children; + uint_t spare_children; + + result = nvlist_lookup_nvlist(m_poolConfig, + ZPOOL_CONFIG_VDEV_TREE, + &rootVdev); + if (result != 0) + throw ZfsdException(m_poolConfig, "Unable to extract " + "ZPOOL_CONFIG_VDEV_TREE from pool."); + m_vdevQueue.assign(1, rootVdev); + result = nvlist_lookup_nvlist_array(rootVdev, + ZPOOL_CONFIG_L2CACHE, + &cache_child, + &cache_children); + if (result == 0) + for (uint_t c = 0; c < cache_children; c++) + m_vdevQueue.push_back(cache_child[c]); + result = nvlist_lookup_nvlist_array(rootVdev, + ZPOOL_CONFIG_SPARES, + &spare_child, + &spare_children); + if (result == 0) + for (uint_t c = 0; c < spare_children; c++) + m_vdevQueue.push_back(spare_child[c]); +} + +nvlist_t * +VdevIterator::Next() +{ + nvlist_t *vdevConfig; + + if (m_vdevQueue.empty()) + return (NULL); + + for (;;) { + nvlist_t **vdevChildren; + int result; + u_int numChildren; + + vdevConfig = m_vdevQueue.front(); + m_vdevQueue.pop_front(); + + /* Expand non-leaf vdevs. */ + result = nvlist_lookup_nvlist_array(vdevConfig, + ZPOOL_CONFIG_CHILDREN, + &vdevChildren, &numChildren); + if (result != 0) { + /* leaf vdev */ + break; + } + + /* + * Insert children at the head of the queue to effect a + * depth first traversal of the tree. + */ + m_vdevQueue.insert(m_vdevQueue.begin(), vdevChildren, + vdevChildren + numChildren); + } + + return (vdevConfig); +} + +void +VdevIterator::Each(VdevCallback_t *callBack, void *callBackArg) +{ + nvlist_t *vdevConfig; + + Reset(); + while ((vdevConfig = Next()) != NULL) { + Vdev vdev(m_poolConfig, vdevConfig); + + if (callBack(vdev, callBackArg)) + break; + } +} + +nvlist_t * +VdevIterator::Find(Guid vdevGUID) +{ + nvlist_t *vdevConfig; + + Reset(); + while ((vdevConfig = Next()) != NULL) { + Vdev vdev(m_poolConfig, vdevConfig); + + if (vdev.GUID() == vdevGUID) + return (vdevConfig); + } + return (NULL); +} diff --git a/cmd/zfsd/vdev_iterator.h b/cmd/zfsd/vdev_iterator.h new file mode 100644 index 000000000000..435582ec1f84 --- /dev/null +++ b/cmd/zfsd/vdev_iterator.h @@ -0,0 +1,123 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file vdev_iterator.h + * + * VdevIterator class definition. + * + * Header requirements: + * + * #include + */ +#ifndef _VDEV_ITERATOR_H_ +#define _VDEV_ITERATOR_H_ + +/*=========================== Forward Declarations ===========================*/ +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +class Vdev; + +/*============================= Class Definitions ============================*/ +/*------------------------------- VdevIterator -------------------------------*/ +typedef bool VdevCallback_t(Vdev &vdev, void *cbArg); + +/** + * \brief VdevIterator provides mechanisms for traversing and searching + * the leaf vdevs contained in a ZFS pool configuration. + */ +class VdevIterator +{ +public: + /** + * \brief Instantiate a VdevIterator for the given ZFS pool. + * + * \param pool The ZFS pool to traverse/search. + */ + VdevIterator(zpool_handle_t *pool); + + /** + * \brief Instantiate a VdevIterator for the given ZFS pool. + * + * \param poolConfig The configuration data for the ZFS pool + * to traverse/search. + */ + VdevIterator(nvlist_t *poolConfig); + + /** + * \brief Reset this iterator's cursor so that Next() will + * report the first member of the pool. + */ + void Reset(); + + /** + * \brief Report the leaf vdev at this iterator's cursor and increment + * the cursor to the next leaf pool member. + */ + nvlist_t *Next(); + + /** + * \brief Traverse the entire pool configuration starting its + * first member, returning a vdev object with the given + * vdev GUID if found. + * + * \param vdevGUID The vdev GUID of the vdev object to find. + * + * \return A Vdev object for the matching vdev if found. Otherwise + * NULL. + * + * Upon return, the VdevIterator's cursor points to the vdev just + * past the returned vdev or end() if no matching vdev is found. + */ + nvlist_t *Find(DevdCtl::Guid vdevGUID); + + /** + * \brief Perform the specified operation on each leaf member of + * a pool's vdev membership. + * + * \param cb Callback function to execute for each member. + * \param cbArg Argument to pass to cb. + */ + void Each(VdevCallback_t *cb, void *cbArg); + +private: + nvlist_t *m_poolConfig; + std::list m_vdevQueue; +}; + +#endif /* _VDEV_ITERATOR_H_ */ diff --git a/cmd/zfsd/zfsd.8 b/cmd/zfsd/zfsd.8 new file mode 100644 index 000000000000..ce7550b891a3 --- /dev/null +++ b/cmd/zfsd/zfsd.8 @@ -0,0 +1,154 @@ +.\"- +.\" Copyright (c) 2016 Allan Jude +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd April 18, 2020 +.Dt ZFSD 8 +.Os +.Sh NAME +.Nm zfsd +.Nd ZFS fault management daemon +.Sh SYNOPSIS +.Nm +.Op Fl d +.Sh DESCRIPTION +.Nm +attempts to resolve ZFS faults that the kernel can't resolve by itself. +It listens to +.Xr devctl 4 +events, which are how the kernel notifies userland of events such as I/O +errors and disk removals. +.Nm +attempts to resolve these faults by activating or deactivating hot spares +and onlining offline vdevs. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl d +Run in the foreground instead of daemonizing. +.El +.Pp +System administrators never interact with +.Nm +directly. +Instead, they control its behavior indirectly through zpool configuration. +There are two ways to influence +.Nm : +assigning hotspares and setting pool properties. +Currently, only the +.Em autoreplace +property has any effect. +See +.Xr zpool 8 +for details. +.Pp +.Nm +will attempt to resolve the following types of fault: +.Bl -tag -width a +.It device removal +When a leaf vdev disappears, +.Nm +will activate any available hotspare. +.It device arrival +When a new GEOM device appears, +.Nm +will attempt to read its ZFS label, if any. +If it matches a previously removed vdev on an active pool, +.Nm +will online it. +Once resilvering completes, any active hotspare will detach automatically. +.Pp +If the new device has no ZFS label but its physical path matches the +physical path of a previously removed vdev on an active pool, and that +pool has the autoreplace property set, then +.Nm +will replace the missing vdev with the newly arrived device. +Once resilvering completes, any active hotspare will detach automatically. +.It vdev degrade or fault events +If a vdev becomes degraded or faulted, +.Nm +will activate any available hotspare. +.It I/O errors +If a leaf vdev generates more than 50 I/O errors in a 60 second period, then +.Nm +will mark that vdev as +.Em FAULTED . +ZFS will no longer issue any I/Os to it. +.Nm +will activate a hotspare if one is available. +.It Checksum errors +If a leaf vdev generates more than 50 checksum errors in a 60 second +period, then +.Nm +will mark that vdev as +.Em DEGRADED . +ZFS will still use it, but zfsd will activate a spare anyway. +.It Spare addition +If the system administrator adds a hotspare to a pool that is already degraded, +.Nm +will activate the spare. +.It Resilver complete +.Nm +will detach any hotspare once a permanent replacement finishes resilvering. +.It Physical path change +If the physical path of an existing disk changes, +.Nm +will attempt to replace any missing disk with the same physical path, +if its pool's autoreplace property is set. +.El +.Pp +.Nm +will log interesting events and its actions to syslog with facility +.Em daemon +and identity +.Op zfsd . +.El +.Sh FILES +.Bl -tag -width a -compact +.It Pa /var/db/zfsd/cases +When +.Nm +exits, it serializes any unresolved casefiles here, +then reads them back in when next it starts up. +.El +.Sh SEE ALSO +.Xr devctl 4 , +.Xr zpool 8 +.Sh HISTORY +.Nm +first appeared in +.Fx 11.0 . +.Sh AUTHORS +.Nm +was originally written by +.An Justin Gibbs Aq Mt gibbs@FreeBSD.org +and +.An Alan Somers Aq Mt asomers@FreeBSD.org +.Sh TODO +In the future, +.Nm +should be able to resume a pool that became suspended due to device +removals, if enough missing devices have returned. diff --git a/cmd/zfsd/zfsd.cc b/cmd/zfsd/zfsd.cc new file mode 100644 index 000000000000..29c6b1ae22e2 --- /dev/null +++ b/cmd/zfsd/zfsd.cc @@ -0,0 +1,449 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file zfsd.cc + * + * The ZFS daemon consumes kernel devdctl(4) event data via devd(8)'s + * unix domain socket in order to react to system changes that impact + * the function of ZFS storage pools. The goal of this daemon is to + * provide similar functionality to the Solaris ZFS Diagnostic Engine + * (zfs-diagnosis), the Solaris ZFS fault handler (zfs-retire), and + * the Solaris ZFS vdev insertion agent (zfs-mod sysevent handler). + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "callout.h" +#include "vdev_iterator.h" +#include "zfsd_event.h" +#include "case_file.h" +#include "vdev.h" +#include "vdev_iterator.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +__FBSDID("$FreeBSD$"); + +/*================================== Macros ==================================*/ +#define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) + +/*============================ Namespace Control =============================*/ +using DevdCtl::Event; +using DevdCtl::EventFactory; +using DevdCtl::EventList; + +/*================================ Global Data ===============================*/ +int g_debug = 0; +libzfs_handle_t *g_zfsHandle; + +/*--------------------------------- ZfsDaemon --------------------------------*/ +//- ZfsDaemon Static Private Data ---------------------------------------------- +ZfsDaemon *ZfsDaemon::s_theZfsDaemon; +bool ZfsDaemon::s_logCaseFiles; +bool ZfsDaemon::s_terminateEventLoop; +char ZfsDaemon::s_pidFilePath[] = "/var/run/zfsd.pid"; +pidfh *ZfsDaemon::s_pidFH; +int ZfsDaemon::s_signalPipeFD[2]; +bool ZfsDaemon::s_systemRescanRequested(false); +EventFactory::Record ZfsDaemon::s_registryEntries[] = +{ + { Event::NOTIFY, "GEOM", &GeomEvent::Builder }, + { Event::NOTIFY, "ZFS", &ZfsEvent::Builder } +}; + +//- ZfsDaemon Static Public Methods -------------------------------------------- +ZfsDaemon & +ZfsDaemon::Get() +{ + return (*s_theZfsDaemon); +} + +void +ZfsDaemon::WakeEventLoop() +{ + write(s_signalPipeFD[1], "+", 1); +} + +void +ZfsDaemon::RequestSystemRescan() +{ + s_systemRescanRequested = true; + ZfsDaemon::WakeEventLoop(); +} + +void +ZfsDaemon::Run() +{ + ZfsDaemon daemon; + + while (s_terminateEventLoop == false) { + + try { + daemon.DisconnectFromDevd(); + + if (daemon.ConnectToDevd() == false) { + sleep(30); + continue; + } + + daemon.DetectMissedEvents(); + + daemon.EventLoop(); + + } catch (const DevdCtl::Exception &exp) { + exp.Log(); + } + } + + daemon.DisconnectFromDevd(); +} + +//- ZfsDaemon Private Methods -------------------------------------------------- +ZfsDaemon::ZfsDaemon() + : Consumer(/*defBuilder*/NULL, s_registryEntries, + NUM_ELEMENTS(s_registryEntries)) +{ + if (s_theZfsDaemon != NULL) + errx(1, "Multiple ZfsDaemon instances created. Exiting"); + + s_theZfsDaemon = this; + + if (pipe(s_signalPipeFD) != 0) + errx(1, "Unable to allocate signal pipe. Exiting"); + + if (fcntl(s_signalPipeFD[0], F_SETFL, O_NONBLOCK) == -1) + errx(1, "Unable to set pipe as non-blocking. Exiting"); + + if (fcntl(s_signalPipeFD[1], F_SETFL, O_NONBLOCK) == -1) + errx(1, "Unable to set pipe as non-blocking. Exiting"); + + signal(SIGHUP, ZfsDaemon::RescanSignalHandler); + signal(SIGINFO, ZfsDaemon::InfoSignalHandler); + signal(SIGINT, ZfsDaemon::QuitSignalHandler); + signal(SIGTERM, ZfsDaemon::QuitSignalHandler); + signal(SIGUSR1, ZfsDaemon::RescanSignalHandler); + + g_zfsHandle = libzfs_init(); + if (g_zfsHandle == NULL) + errx(1, "Unable to initialize ZFS library. Exiting"); + + Callout::Init(); + InitializeSyslog(); + OpenPIDFile(); + + if (g_debug == 0) + daemon(0, 0); + + UpdatePIDFile(); +} + +ZfsDaemon::~ZfsDaemon() +{ + PurgeCaseFiles(); + ClosePIDFile(); +} + +void +ZfsDaemon::PurgeCaseFiles() +{ + CaseFile::PurgeAll(); +} + +bool +ZfsDaemon::VdevAddCaseFile(Vdev &vdev, void *cbArg) +{ + if (vdev.State() != VDEV_STATE_HEALTHY) + CaseFile::Create(vdev); + + return (/*break early*/false); +} + +void +ZfsDaemon::BuildCaseFiles() +{ + ZpoolList zpl; + ZpoolList::iterator pool; + + /* Add CaseFiles for vdevs with issues. */ + for (pool = zpl.begin(); pool != zpl.end(); pool++) + VdevIterator(*pool).Each(VdevAddCaseFile, NULL); + + /* De-serialize any saved cases. */ + CaseFile::DeSerialize(); + + /* Simulate config_sync events to force CaseFile reevaluation */ + for (pool = zpl.begin(); pool != zpl.end(); pool++) { + char evString[160]; + Event *event; + nvlist_t *config; + uint64_t poolGUID; + const char *poolname; + + poolname = zpool_get_name(*pool); + config = zpool_get_config(*pool, NULL); + if (config == NULL) { + syslog(LOG_ERR, "ZFSDaemon::BuildCaseFiles: Could not " + "find pool config for pool %s", poolname); + continue; + } + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, + &poolGUID) != 0) { + syslog(LOG_ERR, "ZFSDaemon::BuildCaseFiles: Could not " + "find pool guid for pool %s", poolname); + continue; + } + + + snprintf(evString, 160, "!system=ZFS subsystem=ZFS " + "type=misc.fs.zfs.config_sync sub_type=synthesized " + "pool_name=%s pool_guid=%" PRIu64 "\n", poolname, poolGUID); + event = Event::CreateEvent(GetFactory(), string(evString)); + if (event != NULL) { + event->Process(); + delete event; + } + } +} + +void +ZfsDaemon::RescanSystem() +{ + struct gmesh mesh; + struct gclass *mp; + struct ggeom *gp; + struct gprovider *pp; + int result; + + /* + * The devdctl system doesn't replay events for new consumers + * of the interface. Emit manufactured DEVFS arrival events + * for any devices that already before we started or during + * periods where we've lost our connection to devd. + */ + result = geom_gettree(&mesh); + if (result != 0) { + syslog(LOG_ERR, "ZfsDaemon::RescanSystem: " + "geom_gettree failed with error %d\n", result); + return; + } + + const string evStart("!system=DEVFS subsystem=CDEV type=CREATE " + "sub_type=synthesized cdev="); + LIST_FOREACH(mp, &mesh.lg_class, lg_class) { + LIST_FOREACH(gp, &mp->lg_geom, lg_geom) { + LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { + Event *event; + + string evString(evStart + pp->lg_name + "\n"); + event = Event::CreateEvent(GetFactory(), + evString); + if (event != NULL) { + if (event->Process()) + SaveEvent(*event); + delete event; + } + } + } + } + geom_deletetree(&mesh); +} + +void +ZfsDaemon::DetectMissedEvents() +{ + do { + PurgeCaseFiles(); + + /* + * Discard any events waiting for us. We don't know + * if they still apply to the current state of the + * system. + */ + FlushEvents(); + + BuildCaseFiles(); + + /* + * If the system state has changed during our + * interrogation, start over. + */ + } while (s_terminateEventLoop == false && EventsPending()); + + RescanSystem(); +} + +void +ZfsDaemon::EventLoop() +{ + while (s_terminateEventLoop == false) { + struct pollfd fds[2]; + int result; + + if (s_logCaseFiles == true) { + EventList::iterator event(m_unconsumedEvents.begin()); + s_logCaseFiles = false; + CaseFile::LogAll(); + while (event != m_unconsumedEvents.end()) + (*event++)->Log(LOG_INFO); + } + + Callout::ExpireCallouts(); + + /* Wait for data. */ + fds[0].fd = m_devdSockFD; + fds[0].events = POLLIN; + fds[0].revents = 0; + fds[1].fd = s_signalPipeFD[0]; + fds[1].events = POLLIN; + fds[1].revents = 0; + result = poll(fds, NUM_ELEMENTS(fds), /*timeout*/INFTIM); + if (result == -1) { + if (errno == EINTR) + continue; + else + err(1, "Polling for devd events failed"); + } else if (result == 0) { + errx(1, "Unexpected result of 0 from poll. Exiting"); + } + + if ((fds[0].revents & POLLIN) != 0) + ProcessEvents(); + + if ((fds[1].revents & POLLIN) != 0) { + static char discardBuf[128]; + + /* + * This pipe exists just to close the signal + * race. Its contents are of no interest to + * us, but we must ensure that future signals + * have space in the pipe to write. + */ + while (read(s_signalPipeFD[0], discardBuf, + sizeof(discardBuf)) > 0) + ; + } + + if (s_systemRescanRequested == true) { + s_systemRescanRequested = false; + syslog(LOG_INFO, "System Rescan request processed."); + RescanSystem(); + } + + if ((fds[0].revents & POLLERR) != 0) { + syslog(LOG_INFO, "POLLERROR detected on devd socket."); + break; + } + + if ((fds[0].revents & POLLHUP) != 0) { + syslog(LOG_INFO, "POLLHUP detected on devd socket."); + break; + } + } +} +//- ZfsDaemon staic Private Methods -------------------------------------------- +void +ZfsDaemon::InfoSignalHandler(int) +{ + s_logCaseFiles = true; + ZfsDaemon::WakeEventLoop(); +} + +void +ZfsDaemon::RescanSignalHandler(int) +{ + RequestSystemRescan(); +} + +void +ZfsDaemon::QuitSignalHandler(int) +{ + s_terminateEventLoop = true; + ZfsDaemon::WakeEventLoop(); +} + +void +ZfsDaemon::OpenPIDFile() +{ + pid_t otherPID; + + s_pidFH = pidfile_open(s_pidFilePath, 0600, &otherPID); + if (s_pidFH == NULL) { + if (errno == EEXIST) + errx(1, "already running as PID %d. Exiting", otherPID); + warn("cannot open PID file"); + } +} + +void +ZfsDaemon::UpdatePIDFile() +{ + if (s_pidFH != NULL) + pidfile_write(s_pidFH); +} + +void +ZfsDaemon::ClosePIDFile() +{ + if (s_pidFH != NULL) + pidfile_remove(s_pidFH); +} + +void +ZfsDaemon::InitializeSyslog() +{ + openlog("zfsd", LOG_NDELAY, LOG_DAEMON); +} + diff --git a/cmd/zfsd/zfsd.h b/cmd/zfsd/zfsd.h new file mode 100644 index 000000000000..7b4019c7ae10 --- /dev/null +++ b/cmd/zfsd/zfsd.h @@ -0,0 +1,228 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file zfsd.h + * + * Class definitions and supporting data strutures for the ZFS fault + * management daemon. + * + * Header requirements: + * + * #include + * + * #include + * + * #include + * #include + * #include + * + * #include + * #include + * #include + * #include + * + * #include "vdev_iterator.h" + */ +#ifndef _ZFSD_H_ +#define _ZFSD_H_ + +/*=========================== Forward Declarations ===========================*/ +struct pidfh; + +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct zfs_handle; +typedef struct libzfs_handle libzfs_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +typedef int LeafIterFunc(zpool_handle_t *, nvlist_t *, void *); + +/*================================ Global Data ===============================*/ +extern int g_debug; +extern libzfs_handle_t *g_zfsHandle; + +/*============================= Class Definitions ============================*/ +/*--------------------------------- ZfsDaemon --------------------------------*/ +/** + * Static singleton orchestrating the operations of the ZFS daemon program. + */ +class ZfsDaemon : public DevdCtl::Consumer +{ +public: + /** Return the ZfsDaemon singleton. */ + static ZfsDaemon &Get(); + + /** + * Used by signal handlers to ensure, in a race free way, that + * the event loop will perform at least one more full loop + * before sleeping again. + */ + static void WakeEventLoop(); + + /** + * Schedules a rescan of devices in the system for potential + * candidates to replace a missing vdev. The scan is performed + * during the next run of the event loop. + */ + static void RequestSystemRescan(); + + /** Daemonize and perform all functions of the ZFS daemon. */ + static void Run(); + +private: + ZfsDaemon(); + ~ZfsDaemon(); + + static VdevCallback_t VdevAddCaseFile; + + /** Purge our cache of outstanding ZFS issues in the system. */ + void PurgeCaseFiles(); + + /** Build a cache of outstanding ZFS issues in the system. */ + void BuildCaseFiles(); + + /** + * Iterate over all known issues and attempt to solve them + * given resources currently available in the system. + */ + void RescanSystem(); + + /** + * Interrogate the system looking for previously unknown + * faults that occurred either before ZFSD was started, + * or during a period of lost communication with Devd. + */ + void DetectMissedEvents(); + + /** + * Wait for and process event source activity. + */ + void EventLoop(); + + /** + * Signal handler for which our response is to + * log the current state of the daemon. + * + * \param sigNum The signal caught. + */ + static void InfoSignalHandler(int sigNum); + + /** + * Signal handler for which our response is to + * request a case rescan. + * + * \param sigNum The signal caught. + */ + static void RescanSignalHandler(int sigNum); + + /** + * Signal handler for which our response is to + * gracefully terminate. + * + * \param sigNum The signal caught. + */ + static void QuitSignalHandler(int sigNum); + + /** + * Open and lock our PID file. + */ + static void OpenPIDFile(); + + /** + * Update our PID file with our PID. + */ + static void UpdatePIDFile(); + + /** + * Close and release the lock on our PID file. + */ + static void ClosePIDFile(); + + /** + * Perform syslog configuration. + */ + static void InitializeSyslog(); + + static ZfsDaemon *s_theZfsDaemon; + + /** + * Set to true when our program is signaled to + * gracefully exit. + */ + static bool s_logCaseFiles; + + /** + * Set to true when our program is signaled to + * gracefully exit. + */ + static bool s_terminateEventLoop; + + /** + * The canonical path and file name of zfsd's PID file. + */ + static char s_pidFilePath[]; + + /** + * Control structure for PIDFILE(3) API. + */ + static pidfh *s_pidFH; + + /** + * Pipe file descriptors used to close races with our + * signal handlers. + */ + static int s_signalPipeFD[2]; + + /** + * Flag controlling a rescan from ZFSD's event loop of all + * GEOM providers in the system to find candidates for solving + * cases. + */ + static bool s_systemRescanRequested; + + /** + * Flag controlling whether events can be queued. This boolean + * is set during event replay to ensure that events for pools or + * devices no longer in the system are not retained forever. + */ + static bool s_consumingEvents; + + static DevdCtl::EventFactory::Record s_registryEntries[]; +}; + +#endif /* _ZFSD_H_ */ diff --git a/cmd/zfsd/zfsd_event.cc b/cmd/zfsd/zfsd_event.cc new file mode 100644 index 000000000000..45fc7adbb31b --- /dev/null +++ b/cmd/zfsd/zfsd_event.cc @@ -0,0 +1,488 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file zfsd_event.cc + */ +#include +#include +#include +#include +#include + +#include + +#include +#include +/* + * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with + * C++ flush methods + */ +#undef flush +#undef __init +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "callout.h" +#include "vdev_iterator.h" +#include "zfsd_event.h" +#include "case_file.h" +#include "vdev.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +__FBSDID("$FreeBSD$"); +/*============================ Namespace Control =============================*/ +using DevdCtl::Event; +using DevdCtl::Guid; +using DevdCtl::NVPairMap; +using std::stringstream; + +/*=========================== Class Implementations ==========================*/ + +/*-------------------------------- GeomEvent --------------------------------*/ + +//- GeomEvent Static Public Methods ------------------------------------------- +Event * +GeomEvent::Builder(Event::Type type, + NVPairMap &nvPairs, + const string &eventString) +{ + return (new GeomEvent(type, nvPairs, eventString)); +} + +//- GeomEvent Virtual Public Methods ------------------------------------------ +Event * +GeomEvent::DeepCopy() const +{ + return (new GeomEvent(*this)); +} + +bool +GeomEvent::Process() const +{ + /* + * We only use GEOM events to repair damaged pools. So return early if + * there are no damaged pools + */ + if (CaseFile::Empty()) + return (false); + + /* + * We are only concerned with arrivals and physical path changes, + * because those can be used to satisfy online and autoreplace + * operations + */ + if (Value("type") != "GEOM::physpath" && Value("type") != "CREATE") + return (false); + + /* Log the event since it is of interest. */ + Log(LOG_INFO); + + string devPath; + if (!DevPath(devPath)) + return (false); + + int devFd(open(devPath.c_str(), O_RDONLY)); + if (devFd == -1) + return (false); + + bool inUse; + bool degraded; + nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded)); + + string physPath; + bool havePhysPath(PhysicalPath(physPath)); + + string devName; + DevName(devName); + close(devFd); + + if (inUse && devLabel != NULL) { + OnlineByLabel(devPath, physPath, devLabel); + } else if (degraded) { + syslog(LOG_INFO, "%s is marked degraded. Ignoring " + "as a replace by physical path candidate.\n", + devName.c_str()); + } else if (havePhysPath) { + /* + * TODO: attempt to resolve events using every casefile + * that matches this physpath + */ + CaseFile *caseFile(CaseFile::Find(physPath)); + if (caseFile != NULL) { + syslog(LOG_INFO, + "Found CaseFile(%s:%s:%s) - ReEvaluating\n", + caseFile->PoolGUIDString().c_str(), + caseFile->VdevGUIDString().c_str(), + zpool_state_to_name(caseFile->VdevState(), + VDEV_AUX_NONE)); + caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL); + } + } + return (false); +} + +//- GeomEvent Protected Methods ----------------------------------------------- +GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs, + const string &eventString) + : DevdCtl::GeomEvent(type, nvpairs, eventString) +{ +} + +GeomEvent::GeomEvent(const GeomEvent &src) + : DevdCtl::GeomEvent::GeomEvent(src) +{ +} + +nvlist_t * +GeomEvent::ReadLabel(int devFd, bool &inUse, bool °raded) +{ + pool_state_t poolState; + char *poolName; + boolean_t b_inuse; + int nlabels; + + inUse = false; + degraded = false; + poolName = NULL; + if (zpool_in_use(g_zfsHandle, devFd, &poolState, + &poolName, &b_inuse) == 0) { + nvlist_t *devLabel = NULL; + + inUse = b_inuse == B_TRUE; + if (poolName != NULL) + free(poolName); + + if (zpool_read_label(devFd, &devLabel, &nlabels) != 0) + return (NULL); + /* + * If we find a disk with fewer than the maximum number of + * labels, it might be the whole disk of a partitioned disk + * where ZFS resides on a partition. In that case, we should do + * nothing and wait for the partition to appear. Or, the disk + * might be damaged. In that case, zfsd should do nothing and + * wait for the sysadmin to decide. + */ + if (nlabels != VDEV_LABELS || devLabel == NULL) { + nvlist_free(devLabel); + return (NULL); + } + + try { + Vdev vdev(devLabel); + degraded = vdev.State() != VDEV_STATE_HEALTHY; + return (devLabel); + } catch (ZfsdException &exp) { + string devName = fdevname(devFd); + string devPath = _PATH_DEV + devName; + string context("GeomEvent::ReadLabel: " + + devPath + ": "); + + exp.GetString().insert(0, context); + exp.Log(); + nvlist_free(devLabel); + } + } + return (NULL); +} + +bool +GeomEvent::OnlineByLabel(const string &devPath, const string& physPath, + nvlist_t *devConfig) +{ + bool ret = false; + try { + CaseFileList case_list; + /* + * A device with ZFS label information has been + * inserted. If it matches a device for which we + * have a case, see if we can solve that case. + */ + syslog(LOG_INFO, "Interrogating VDEV label for %s\n", + devPath.c_str()); + Vdev vdev(devConfig); + CaseFile::Find(vdev.PoolGUID(),vdev.GUID(), case_list); + for (CaseFileList::iterator curr = case_list.begin(); + curr != case_list.end(); curr++) { + ret |= (*curr)->ReEvaluate(devPath, physPath, &vdev); + } + return (ret); + + } catch (ZfsdException &exp) { + string context("GeomEvent::OnlineByLabel: " + devPath + ": "); + + exp.GetString().insert(0, context); + exp.Log(); + } + return (ret); +} + + +/*--------------------------------- ZfsEvent ---------------------------------*/ +//- ZfsEvent Static Public Methods --------------------------------------------- +DevdCtl::Event * +ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs, + const string &eventString) +{ + return (new ZfsEvent(type, nvpairs, eventString)); +} + +//- ZfsEvent Virtual Public Methods -------------------------------------------- +Event * +ZfsEvent::DeepCopy() const +{ + return (new ZfsEvent(*this)); +} + +bool +ZfsEvent::Process() const +{ + string logstr(""); + + if (!Contains("class") && !Contains("type")) { + syslog(LOG_ERR, + "ZfsEvent::Process: Missing class or type data."); + return (false); + } + + /* On config syncs, replay any queued events first. */ + if (Value("type").find("misc.fs.zfs.config_sync") == 0) { + /* + * Even if saved events are unconsumed the second time + * around, drop them. Any events that still can't be + * consumed are probably referring to vdevs or pools that + * no longer exist. + */ + ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true); + CaseFile::ReEvaluateByGuid(PoolGUID(), *this); + } + + if (Value("type").find("misc.fs.zfs.") == 0) { + /* Configuration changes, resilver events, etc. */ + ProcessPoolEvent(); + return (false); + } + + if (!Contains("pool_guid") || !Contains("vdev_guid")) { + /* Only currently interested in Vdev related events. */ + return (false); + } + + CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); + if (caseFile != NULL) { + Log(LOG_INFO); + syslog(LOG_INFO, "Evaluating existing case file\n"); + caseFile->ReEvaluate(*this); + return (false); + } + + /* Skip events that can't be handled. */ + Guid poolGUID(PoolGUID()); + /* If there are no replicas for a pool, then it's not manageable. */ + if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) { + stringstream msg; + msg << "No replicas available for pool " << poolGUID; + msg << ", ignoring"; + Log(LOG_INFO); + syslog(LOG_INFO, "%s", msg.str().c_str()); + return (false); + } + + /* + * Create a case file for this vdev, and have it + * evaluate the event. + */ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); + if (zpl.empty()) { + stringstream msg; + int priority = LOG_INFO; + msg << "ZfsEvent::Process: Event for unknown pool "; + msg << poolGUID << " "; + msg << "queued"; + Log(LOG_INFO); + syslog(priority, "%s", msg.str().c_str()); + return (true); + } + + nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID()); + if (vdevConfig == NULL) { + stringstream msg; + int priority = LOG_INFO; + msg << "ZfsEvent::Process: Event for unknown vdev "; + msg << VdevGUID() << " "; + msg << "queued"; + Log(LOG_INFO); + syslog(priority, "%s", msg.str().c_str()); + return (true); + } + + Vdev vdev(zpl.front(), vdevConfig); + caseFile = &CaseFile::Create(vdev); + if (caseFile->ReEvaluate(*this) == false) { + stringstream msg; + int priority = LOG_INFO; + msg << "ZfsEvent::Process: Unconsumed event for vdev("; + msg << zpool_get_name(zpl.front()) << ","; + msg << vdev.GUID() << ") "; + msg << "queued"; + Log(LOG_INFO); + syslog(priority, "%s", msg.str().c_str()); + return (true); + } + return (false); +} + +//- ZfsEvent Protected Methods ------------------------------------------------- +ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs, + const string &eventString) + : DevdCtl::ZfsEvent(type, nvpairs, eventString) +{ +} + +ZfsEvent::ZfsEvent(const ZfsEvent &src) + : DevdCtl::ZfsEvent(src) +{ +} + +/* + * Sometimes the kernel won't detach a spare when it is no longer needed. This + * can happen for example if a drive is removed, then either the pool is + * exported or the machine is powered off, then the drive is reinserted, then + * the machine is powered on or the pool is imported. ZFSD must detach these + * spares itself. + */ +void +ZfsEvent::CleanupSpares() const +{ + Guid poolGUID(PoolGUID()); + ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID); + if (!zpl.empty()) { + zpool_handle_t* hdl; + + hdl = zpl.front(); + VdevIterator(hdl).Each(TryDetach, (void*)hdl); + } +} + +void +ZfsEvent::ProcessPoolEvent() const +{ + bool degradedDevice(false); + + /* The pool is destroyed. Discard any open cases */ + if (Value("type") == "misc.fs.zfs.pool_destroy") { + Log(LOG_INFO); + CaseFile::ReEvaluateByGuid(PoolGUID(), *this); + return; + } + + CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID())); + if (caseFile != NULL) { + if (caseFile->VdevState() != VDEV_STATE_UNKNOWN + && caseFile->VdevState() < VDEV_STATE_HEALTHY) + degradedDevice = true; + + Log(LOG_INFO); + caseFile->ReEvaluate(*this); + } + else if (Value("type") == "misc.fs.zfs.resilver_finish") + { + /* + * It's possible to get a resilver_finish event with no + * corresponding casefile. For example, if a damaged pool were + * exported, repaired, then reimported. + */ + Log(LOG_INFO); + CleanupSpares(); + } + + if (Value("type") == "misc.fs.zfs.vdev_remove" + && degradedDevice == false) { + + /* See if any other cases can make use of this device. */ + Log(LOG_INFO); + ZfsDaemon::RequestSystemRescan(); + } +} + +bool +ZfsEvent::TryDetach(Vdev &vdev, void *cbArg) +{ + /* + * Outline: + * if this device is a spare, and its parent includes one healthy, + * non-spare child, then detach this device. + */ + zpool_handle_t *hdl(static_cast(cbArg)); + + if (vdev.IsSpare()) { + std::list siblings; + std::list::iterator siblings_it; + boolean_t cleanup = B_FALSE; + + Vdev parent = vdev.Parent(); + siblings = parent.Children(); + + /* Determine whether the parent should be cleaned up */ + for (siblings_it = siblings.begin(); + siblings_it != siblings.end(); + siblings_it++) { + Vdev sibling = *siblings_it; + + if (!sibling.IsSpare() && + sibling.State() == VDEV_STATE_HEALTHY) { + cleanup = B_TRUE; + break; + } + } + + if (cleanup) { + syslog(LOG_INFO, "Detaching spare vdev %s from pool %s", + vdev.Path().c_str(), zpool_get_name(hdl)); + zpool_vdev_detach(hdl, vdev.Path().c_str()); + } + + } + + /* Always return false, because there may be other spares to detach */ + return (false); +} diff --git a/cmd/zfsd/zfsd_event.h b/cmd/zfsd/zfsd_event.h new file mode 100644 index 000000000000..fd3f9f7c5200 --- /dev/null +++ b/cmd/zfsd/zfsd_event.h @@ -0,0 +1,144 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file dev_ctl_event.h + * + * \brief Class hierarchy used to express events received via + * the devdctl API. + * + * Header requirements: + * #include + * #include + * #include + * + * #include + * #include + */ + +#ifndef _ZFSD_EVENT_H_ +#define _ZFSD_EVENT_H_ + +/*============================ Namespace Control =============================*/ +using std::string; + +/*=========================== Forward Declarations ===========================*/ +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +/*--------------------------------- ZfsEvent ---------------------------------*/ +class ZfsEvent : public DevdCtl::ZfsEvent +{ +public: + /** Specialized DevdCtlEvent object factory for ZFS events. */ + static BuildMethod Builder; + + virtual DevdCtl::Event *DeepCopy() const; + + /** + * Interpret and perform any actions necessary to + * consume the event. + * \return True if this event should be queued for later reevaluation + */ + virtual bool Process() const; + +protected: + /** DeepCopy Constructor. */ + ZfsEvent(const ZfsEvent &src); + + /** Constructor */ + ZfsEvent(Type, DevdCtl::NVPairMap &, const string &); + + /** + * Detach any spares that are no longer needed, but were not + * automatically detached by the kernel + */ + virtual void CleanupSpares() const; + virtual void ProcessPoolEvent() const; + static VdevCallback_t TryDetach; +}; + +class GeomEvent : public DevdCtl::GeomEvent +{ +public: + static BuildMethod Builder; + + virtual DevdCtl::Event *DeepCopy() const; + + virtual bool Process() const; + +protected: + /** DeepCopy Constructor. */ + GeomEvent(const GeomEvent &src); + + /** Constructor */ + GeomEvent(Type, DevdCtl::NVPairMap &, const string &); + + /** + * Attempt to match the ZFS labeled device at devPath with an active + * CaseFile for a missing vdev. If a CaseFile is found, attempt + * to re-integrate the device with its pool. + * + * \param devPath The devfs path to the potential leaf vdev. + * \param physPath The physical path string reported by the device + * at devPath. + * \param devConfig The ZFS label information found on the device + * at devPath. + * + * \return true if the event that caused the online action can + * be considered consumed. + */ + static bool OnlineByLabel(const string &devPath, + const string& physPath, + nvlist_t *devConfig); + + /** + * \brief Read and return label information for a device. + * + * \param devFd The device from which to read ZFS label information. + * \param inUse The device is part of an active or potentially + * active configuration. + * \param degraded The device label indicates the vdev is not healthy. + * + * \return If label information is available, an nvlist describing + * the vdev configuraiton found on the device specified by + * devFd. Otherwise NULL. + */ + static nvlist_t *ReadLabel(int devFd, bool &inUse, bool °raded); + +}; +#endif /*_ZFSD_EVENT_H_ */ diff --git a/cmd/zfsd/zfsd_exception.cc b/cmd/zfsd/zfsd_exception.cc new file mode 100644 index 000000000000..e83dd0f6e506 --- /dev/null +++ b/cmd/zfsd/zfsd_exception.cc @@ -0,0 +1,135 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file zfsd_exception + * + * Implementation of the ZfsdException class. + */ +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +#include + +#include "vdev.h" +#include "zfsd_exception.h" + +__FBSDID("$FreeBSD$"); +/*============================ Namespace Control =============================*/ +using std::endl; +using std::string; +using std::stringstream; + +/*=========================== Class Implementations ==========================*/ +/*------------------------------- ZfsdException ------------------------------*/ +ZfsdException::ZfsdException(const char *fmt, ...) + : DevdCtl::Exception(), + m_poolConfig(NULL), + m_vdevConfig(NULL) +{ + va_list ap; + + va_start(ap, fmt); + FormatLog(fmt, ap); + va_end(ap); +} + +ZfsdException::ZfsdException(zpool_handle_t *pool, const char *fmt, ...) + : DevdCtl::Exception(), + m_poolConfig(zpool_get_config(pool, NULL)), + m_vdevConfig(NULL) +{ + va_list ap; + + va_start(ap, fmt); + FormatLog(fmt, ap); + va_end(ap); +} + +ZfsdException::ZfsdException(nvlist_t *poolConfig, const char *fmt, ...) + : DevdCtl::Exception(), + m_poolConfig(poolConfig), + m_vdevConfig(NULL) +{ + va_list ap; + + va_start(ap, fmt); + FormatLog(fmt, ap); + va_end(ap); +} + +void +ZfsdException::Log() const +{ + stringstream output; + + if (m_poolConfig != NULL) { + + output << "Pool "; + + const char *poolName; + if (nvlist_lookup_string(m_poolConfig, ZPOOL_CONFIG_POOL_NAME, + &poolName) == 0) + output << poolName; + else + output << "Unknown"; + output << ": "; + } + + if (m_vdevConfig != NULL) { + + if (m_poolConfig != NULL) { + Vdev vdev(m_poolConfig, m_vdevConfig); + + output << "Vdev " << vdev.GUID() << ": "; + } else { + Vdev vdev(m_vdevConfig); + + output << "Pool " << vdev.PoolGUID() << ": "; + output << "Vdev " << vdev.GUID() << ": "; + } + } + + output << m_log << endl; + syslog(LOG_ERR, "%s", output.str().c_str()); +} + diff --git a/cmd/zfsd/zfsd_exception.h b/cmd/zfsd/zfsd_exception.h new file mode 100644 index 000000000000..5170b2d0dbb1 --- /dev/null +++ b/cmd/zfsd/zfsd_exception.h @@ -0,0 +1,109 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file zfsd_exception.h + * + * Definition of the ZfsdException class hierarchy. All exceptions + * explicitly thrown by Zfsd are defined here. + * + * Header requirements: + * #include + * + * #include + */ +#ifndef _ZFSD_EXCEPTION_H_ +#define _ZFSD_EXCEPTION_H_ + +/*=========================== Forward Declarations ===========================*/ +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +/*============================= Class Definitions ============================*/ +/*------------------------------- ZfsdException ------------------------------*/ +/** + * \brief Class allowing unified reporting/logging of exceptional events. + */ +class ZfsdException : public DevdCtl::Exception +{ +public: + /** + * \brief ZfsdException constructor allowing arbitrary string + * data to be reported. + * + * \param fmt Printf-like string format specifier. + */ + ZfsdException(const char *fmt, ...); + + /** + * \brief ZfsdException constructor allowing arbitrary string + * data to be reported and associated with the configuration + * data for a ZFS pool. + * + * \param pool Pool handle describing the pool to which this + * exception is associated. + * \param fmt Printf-like string format specifier. + * + * Instantiation with this method is used to report global + * pool errors. + */ + ZfsdException(zpool_handle_t *pool, const char *, ...); + + /** + * \brief ZfsdException constructor allowing arbitrary string + * data to be reported and associated with the configuration + * data for a ZFS pool. + * + * \param poolConfig Pool configuration describing the pool to + * which this exception is associated. + * \param fmt Printf-like string format specifier. + * + * Instantiation with this method is used to report global + * pool errors. + */ + ZfsdException(nvlist_t *poolConfig, const char *, ...); + + /** + * \brief Emit exception data to syslog(3). + */ + virtual void Log() const; +private: + nvlist_t *m_poolConfig; + nvlist_t *m_vdevConfig; +}; + +#endif /* _ZFSD_EXCEPTION_H_ */ diff --git a/cmd/zfsd/zfsd_main.cc b/cmd/zfsd/zfsd_main.cc new file mode 100644 index 000000000000..f090631e21f2 --- /dev/null +++ b/cmd/zfsd/zfsd_main.cc @@ -0,0 +1,90 @@ +/*- + * Copyright (c) 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Alan Somers (Spectra Logic Corporation) + */ + +/** + * \file zfsd_main.cc + * + * main function for the ZFS Daemon. Separated to facilitate testing. + * + */ + +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "vdev_iterator.h" +#include "zfsd.h" + +__FBSDID("$FreeBSD$"); + +/*=============================== Program Main ===============================*/ +static void +usage() +{ + fprintf(stderr, "usage: %s [-d]\n", getprogname()); + exit(1); +} + +/** + * Program entry point. + */ +int +main(int argc, char **argv) +{ + int ch; + + while ((ch = getopt(argc, argv, "d")) != -1) { + switch (ch) { + case 'd': + g_debug++; + break; + default: + usage(); + } + } + + ZfsDaemon::Run(); + + return (0); +} diff --git a/cmd/zfsd/zpool_list.cc b/cmd/zfsd/zpool_list.cc new file mode 100644 index 000000000000..82c35736df13 --- /dev/null +++ b/cmd/zfsd/zpool_list.cc @@ -0,0 +1,122 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file zpool_list.cc + * + * Implementation of the ZpoolList class. + */ +#include +#include +#include + +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "vdev.h" +#include "vdev_iterator.h" +#include "zpool_list.h" +#include "zfsd.h" + +/*============================ Namespace Control =============================*/ +using DevdCtl::Guid; + +/*=========================== Class Implementations ==========================*/ +/*--------------------------------- ZpoolList --------------------------------*/ +bool +ZpoolList::ZpoolAll(zpool_handle_t *pool, nvlist_t *poolConfig, void *cbArg) +{ + return (true); +} + +bool +ZpoolList::ZpoolByGUID(zpool_handle_t *pool, nvlist_t *poolConfig, + void *cbArg) +{ + Guid *desiredPoolGUID(static_cast(cbArg)); + uint64_t poolGUID; + + /* We are only intested in the pool that matches our pool GUID. */ + return (nvlist_lookup_uint64(poolConfig, ZPOOL_CONFIG_POOL_GUID, + &poolGUID) == 0 + && poolGUID == (uint64_t)*desiredPoolGUID); +} + +bool +ZpoolList::ZpoolByName(zpool_handle_t *pool, nvlist_t *poolConfig, void *cbArg) +{ + const string &desiredPoolName(*static_cast(cbArg)); + + /* We are only intested in the pool that matches our pool GUID. */ + return (desiredPoolName == zpool_get_name(pool)); +} + +int +ZpoolList::LoadIterator(zpool_handle_t *pool, void *data) +{ + ZpoolList *zpl(reinterpret_cast(data)); + nvlist_t *poolConfig(zpool_get_config(pool, NULL)); + + if (zpl->m_filter(pool, poolConfig, zpl->m_filterArg)) + zpl->push_back(pool); + else + zpool_close(pool); + return (0); +} + +ZpoolList::ZpoolList(PoolFilter_t *filter, void * filterArg) + : m_filter(filter), + m_filterArg(filterArg) +{ + zpool_iter(g_zfsHandle, LoadIterator, this); +} + +ZpoolList::~ZpoolList() +{ + for (iterator it(begin()); it != end(); it++) + zpool_close(*it); + + clear(); +} diff --git a/cmd/zfsd/zpool_list.h b/cmd/zfsd/zpool_list.h new file mode 100644 index 000000000000..7d1b176ac710 --- /dev/null +++ b/cmd/zfsd/zpool_list.h @@ -0,0 +1,133 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file zpool_list.h + * + * ZpoolList class definition. ZpoolList is a standard container + * allowing filtering and iteration of imported ZFS pool information. + * + * Header requirements: + * + * #include + * #include + */ +#ifndef _ZPOOL_LIST_H_ +#define _ZPOOL_LIST_H_ + +/*============================ Namespace Control =============================*/ +using std::string; + +/*=========================== Forward Declarations ===========================*/ +struct zpool_handle; +typedef struct zpool_handle zpool_handle_t; + +struct nvlist; +typedef struct nvlist nvlist_t; + +class Vdev; + +/*============================= Class Definitions ============================*/ +/*--------------------------------- ZpoolList --------------------------------*/ +class ZpoolList; +typedef bool PoolFilter_t(zpool_handle_t *pool, nvlist_t *poolConfig, + void *filterArg); + +/** + * \brief Container of imported ZFS pool data. + * + * ZpoolList is a convenience class that converts libzfs's ZFS + * pool methods into a standard list container. + */ +class ZpoolList : public std::list +{ +public: + /** + * \brief Utility ZpoolList construction filter that causes all + * pools known to the system to be included in the + * instantiated ZpoolList. + */ + static PoolFilter_t ZpoolAll; + + /** + * \brief Utility ZpoolList construction filter that causes only + * a pool known to the system and having the specified GUID + * to be included in the instantiated ZpoolList. + */ + static PoolFilter_t ZpoolByGUID; + + /** + * \brief Utility ZpoolList construction filter that causes only + * pools known to the system and having the specified name + * to be included in the instantiated ZpoolList. + */ + static PoolFilter_t ZpoolByName; + + /** + * \brief ZpoolList constructor + * + * \param filter The filter function to use when constructing + * the ZpoolList. This may be one of the static + * utility filters defined for ZpoolList or a + * user defined function. + * \param filterArg A single argument to pass into the filter function + * when it is invoked on each candidate pool. + */ + ZpoolList(PoolFilter_t *filter = ZpoolAll, void *filterArg = NULL); + ~ZpoolList(); + +private: + /** + * \brief Helper routine used to populate the internal + * data store of ZFS pool objects using libzfs's + * zpool_iter() function. + * + * \param pool The ZFS pool object to filter. + * \param data User argument passed through zpool_iter(). + */ + static int LoadIterator(zpool_handle_t *pool, void *data); + + /** + * \brief The filter with which this ZpoolList was constructed. + */ + PoolFilter_t *m_filter; + + /** + * \brief The filter argument with which this ZpoolList was + * constructed. + */ + void *m_filterArg; +}; + +#endif /* _ZPOOL_ITERATOR_H_ */ diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index fbd4b81dfacc..0ba326473e6c 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -989,8 +989,12 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv, boolean_t replacing) * magic value left by the previous filesystem. */ verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path)); +#ifdef TRUENAS_SCALE_NEVER_WHOLEDISK + wholedisk = B_FALSE; +#else verify(!nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk)); +#endif if (!wholedisk) { /* @@ -1155,9 +1159,11 @@ is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force, &child, &children) != 0) { verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path)); +#ifndef TRUENAS_SCALE_NEVER_WHOLEDISK if (strcmp(type, VDEV_TYPE_DISK) == 0) verify(!nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk)); +#endif /* * As a generic check, we look to see if this is a replace of a diff --git a/config/Rules.am b/config/Rules.am index 9c0714c82513..8b43fd6cd4ca 100644 --- a/config/Rules.am +++ b/config/Rules.am @@ -29,6 +29,15 @@ if BUILD_FREEBSD AM_CFLAGS += -fPIC -Werror -Wno-unknown-pragmas -Wno-enum-conversion AM_CFLAGS += -include $(top_srcdir)/include/os/freebsd/spl/sys/ccompile.h AM_CFLAGS += -I/usr/include -I/usr/local/include + +AM_CXXFLAGS = -std=c++17 -Wall -Wstrict-prototypes -fno-strict-aliasing +AM_CXXFLAGS += $(NO_OMIT_FRAME_POINTER) +AM_CXXFLAGS += $(DEBUG_CFLAGS) +AM_CXXFLAGS += $(ASAN_CFLAGS) +AM_CXXFLAGS += $(CODE_COVERAGE_FLAGS) $(NO_FORMAT_ZERO_LENGTH) +AM_CXXFLAGS += -fPIC -Werror -Wno-unknown-pragmas -Wno-enum-conversion +AM_CXXFLAGS += -include $(top_srcdir)/include/os/freebsd/spl/sys/ccompile.h +AM_CXXFLAGS += -I/usr/include -I/usr/local/include endif AM_CPPFLAGS += -D_GNU_SOURCE @@ -44,6 +53,12 @@ AM_CPPFLAGS += -DPKGDATADIR=\"$(pkgdatadir)\" AM_CPPFLAGS += $(DEBUG_CPPFLAGS) AM_CPPFLAGS += $(CODE_COVERAGE_CPPFLAGS) AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-@ac_system_l@-user\" +if BUILD_LINUX +# NAS-118236: Vdevs from CORE are labeled with whole_disk=1, but we did not +# create the partition table in ZFS and must not repartition them. We know +# SCALE never creates wholedisk vdevs, so just ignore the config. +AM_CPPFLAGS += -DTRUENAS_SCALE_NEVER_WHOLEDISK +endif if ASAN_ENABLED AM_CPPFLAGS += -DZFS_ASAN_ENABLED diff --git a/config/kernel-fpu.m4 b/config/kernel-fpu.m4 index edfde1a02d30..f419f2982594 100644 --- a/config/kernel-fpu.m4 +++ b/config/kernel-fpu.m4 @@ -62,7 +62,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ ], [ kernel_fpu_begin(); kernel_fpu_end(); - ], [], [ZFS_META_LICENSE]) + ], [], []) ZFS_LINUX_TEST_SRC([__kernel_fpu], [ #include @@ -77,7 +77,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [ ], [ __kernel_fpu_begin(); __kernel_fpu_end(); - ], [], [ZFS_META_LICENSE]) + ], [], []) ZFS_LINUX_TEST_SRC([kernel_neon], [ #include @@ -92,7 +92,7 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ dnl # Legacy kernel dnl # AC_MSG_CHECKING([whether kernel fpu is available]) - ZFS_LINUX_TEST_RESULT([kernel_fpu_license], [ + ZFS_LINUX_TEST_RESULT([kernel_fpu], [ AC_MSG_RESULT(kernel_fpu_*) AC_DEFINE(HAVE_KERNEL_FPU, 1, [kernel has kernel_fpu_* functions]) @@ -102,7 +102,7 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [ dnl # dnl # Linux 4.2 kernel dnl # - ZFS_LINUX_TEST_RESULT_SYMBOL([__kernel_fpu_license], + ZFS_LINUX_TEST_RESULT_SYMBOL([__kernel_fpu], [__kernel_fpu_begin], [arch/x86/kernel/fpu/core.c arch/x86/kernel/i387.c], [ AC_MSG_RESULT(__kernel_fpu_*) diff --git a/configure.ac b/configure.ac index 2ce049c58219..45de74006346 100644 --- a/configure.ac +++ b/configure.ac @@ -49,6 +49,8 @@ AC_CONFIG_HEADERS([zfs_config.h], [ LT_INIT AC_PROG_INSTALL AC_PROG_CC +AC_PROG_CXX +AC_PROG_LIBTOOL AC_PROG_LN_S PKG_PROG_PKG_CONFIG AM_PROG_AS diff --git a/contrib/debian/changelog.in b/contrib/debian/changelog.in index 9ced39b1f7b5..a36bfb1aab85 100644 --- a/contrib/debian/changelog.in +++ b/contrib/debian/changelog.in @@ -17,3 +17,152 @@ openzfs-linux (2.1.99-1) unstable; urgency=low * This packaging is a fork of Debian zfs-linux 2.1.6-2 release. -- Umer Saleem Fri, 11 Oct 2022 15:00:00 -0400 +openzfs-linux (2.1.12-0) unstable; urgency=medium + + * Merge tag zfs-2.1.12 + + -- Ameer Hamza Wed, 14 Jun 2023 09:00:00 -0500 + +openzfs-linux (2.1.11-0) unstable; urgency=medium + + * Merge tag zfs-2.1.11 + + -- Ameer Hamza Thu, 20 Apr 2023 13:00:00 -0500 + +openzfs-linux (2.1.10-0) unstable; urgency=medium + + * Merge tag zfs-2.1.10 + + -- Ameer Hamza Mon, 17 Apr 2023 12:00:00 -0500 + +openzfs-linux (2.1.9-0) unstable; urgency=medium + + * Merge tag zfs-2.1.9 + + -- Ryan Moeller Wed, 25 Jan 2023 14:00:00 -0500 + +openzfs-linux (2.1.8-0) unstable; urgency=medium + + * Merge tag zfs-2.1.8 + + -- Ryan Moeller Fri, 20 Jan 2023 12:30:00 -0500 + +openzfs-linux (2.1.7-1) unstable; urgency=medium + + * Integrate native Debian packaging with TrueNAS ZFS. + * This packaging is a fork of Debian zfs-linux 2.1.6-2 release. + + -- Umer Saleem Wed, 14 Dec 2022 15:00:00 -0400 + +openzfs (2.1.7-0) unstable; urgency=medium + + * Merge tag zfs-2.1.7 + * Expose libzutil error info in libpc_handle_t + * Build packages with debug symbols + * Add support for overlayfs for docker + * Optimize microzaps + * zed: Avoid core dump if wholedisk property does not exist + * zed: post a udev change event from spa_vdev_attach() + * SCALE: ignore wholedisk + * Skip trivial permission checks on xattrs for POSIX ACL type + + -- Ryan Moeller Wed, 07 Dec 2022 16:00:00 -0500 + +openzfs (2.1.6-0) unstable; urgency=medium + + * Merge tag zfs-2.1.6 + * zed: mark disks as REMOVED when they are removed + * Provide kfpu_begin/end from spl + * Add snapshots_changed as property + * Add createtxg sort support for simple snapshot iterator + * Expose ZFS dataset case sensitivity setting via sb_opts + + -- Ryan Moeller Wed, 22 Jun 2022 16:00:00 -0500 + +openzfs (2.1.5-0) unstable; urgency=medium + + * Merged tag zfs-2.1.5 + + -- Ryan Moeller Wed, 22 Jun 2022 16:00:00 -0500 + +openzfs (2.1.4-1) unstable; urgency=medium + + * Merged from zfs-2.1.5-staging + * Remove wrong assertion in log spacemap + * Add debug symbols to the truenas package + + -- Ryan Moeller Thu, 02 Jun 2022 08:00:00 -0500 + +openzfs (2.1.4-0) unstable; urgency=medium + + * Merged OpenZFS zfs-2.1.4 + * Also merged from zfs-2.1.5-staging + * Fix zfs send -V + * Expose zpool guids through kstats + + -- Ryan Moeller Wed, 18 May 2022 14:00:00 -0500 + +openzfs (2.1.3-0) unstable; urgency=medium + + * Merged OpenZFS zfs-2.1.3 + + -- Ryan Moeller Thu, 10 Mar 2021 16:00:00 -0400 + +openzfs (2.1.2-1) unstable; urgency=medium + + * Merged OpenZFS zfs-2.1.2-release + * Implement FS_IOC_GETVERSION + * Improve log spacemap load time after unclean export + + -- Ryan Moeller Mon, 20 Dec 2021 10:00:00 -0400 + +openzfs (2.1.2-0) unstable; urgency=medium + + * Merged OpenZFS zfs-2.1.2-staging + * Updated Python build dependencies + * Linux 5.14 build fixes + + -- Ryan Moeller Wed, 03 Nov 2021 10:00:00 -0400 + +openzfs (2.1.1-0) unstable; urgency=medium + + * Merged OpenZFS 2.1.1 + * Removed feature@xattr_compat and xattr_fallback property + + -- Ryan Moeller Thu, 07 Oct 2021 14:09:30 -0400 + +openzfs (2.1.0-0) unstable; urgency=medium + + * Rebased to OpenZFS 2.1.0 + + -- Ryan Moeller Thu, 01 Apr 2021 13:00:00 -0500 + +openzfs (2.0.4-0) unstable; urgency=medium + + * Rebased to OpenZFS 2.0.4 + + -- Ryan Moeller Tue, 09 Mar 2021 14:00:00 -0500 + +openzfs (2.0.3-0) unstable; urgency=medium + + * Rebased to OpenZFS 2.0.3 + + -- Ryan Moeller Wed, 24 Feb 2021 15:00:00 -0500 + +openzfs (2.0.2-0) unstable; urgency=medium + + * Rebased to OpenZFS 2.0.2 + + -- Ryan Moeller Mon, 01 Feb 2021 15:00:00 -0500 + +openzfs (2.0.1-0) unstable; urgency=medium + + * Rebased to OpenZFS 2.0.1 + + -- Ryan Moeller Thu, 07 Jan 2021 17:34:28 -0500 + +openzfs (2.0.0-0) unstable; urgency=medium + + * Initial package for TrueNAS SCALE based on OpenZFS 2.0.0 + + -- Ryan Moeller Mon, 14 Sep 2020 22:01:55 -0400 diff --git a/contrib/debian/control b/contrib/debian/control index e56fbf0f1c93..1f1a98a5ba99 100644 --- a/contrib/debian/control +++ b/contrib/debian/control @@ -4,13 +4,13 @@ Priority: optional Maintainer: ZFS on Linux specific mailing list Build-Depends: debhelper-compat (= 12), dh-python, - dh-sequence-dkms | dkms (>> 2.1.1.2-5), + dh-sequence-dkms, libaio-dev, libblkid-dev, - libcurl4-openssl-dev, libelf-dev, libpam0g-dev, libssl-dev | libssl1.0-dev, + libtirpc-dev, libtool, libudev-dev, lsb-release, @@ -35,6 +35,7 @@ Depends: ${misc:Depends}, ${shlibs:Depends} Breaks: libnvpair1, libnvpair3 Replaces: libnvpair1, libnvpair3, libnvpair3linux Conflicts: libnvpair3linux +Provides: libnvpair3 Description: Solaris name-value library for Linux This library provides routines for packing and unpacking nv pairs for transporting data across process boundaries, transporting between @@ -46,6 +47,7 @@ Architecture: linux-any Depends: libpam-runtime, ${misc:Depends}, ${shlibs:Depends} Replaces: libpam-zfs Conflicts: libpam-zfs +Provides: pam-zfs-key Description: PAM module for managing encryption keys for ZFS OpenZFS is a storage platform that encompasses the functionality of traditional filesystems and volume managers. It supports data checksums, @@ -61,6 +63,7 @@ Depends: ${misc:Depends}, ${shlibs:Depends} Breaks: libuutil1, libuutil3 Replaces: libuutil1, libuutil3, libuutil3linux Conflicts: libuutil3linux +Provides: libuutil3 Description: Solaris userland utility library for Linux This library provides a variety of glue functions for ZFS on Linux: * libspl: The Solaris Porting Layer userland library, which provides APIs @@ -84,7 +87,7 @@ Depends: libssl-dev | libssl1.0-dev, ${misc:Depends} Replaces: libzfslinux-dev Conflicts: libzfslinux-dev -Provides: libnvpair-dev, libuutil-dev +Provides: libnvpair-dev, libuutil-dev, libzfs5-devel Description: OpenZFS filesystem development files for Linux Header files and static libraries for compiling software against libraries of OpenZFS filesystem. @@ -102,6 +105,7 @@ Recommends: libcurl4 Breaks: libzfs2, libzfs4 Replaces: libzfs2, libzfs4, libzfs4linux Conflicts: libzfs4linux +Provides: libzfs5 Description: OpenZFS filesystem library for Linux - general support OpenZFS is a storage platform that encompasses the functionality of traditional filesystems and volume managers. It supports data checksums, @@ -130,6 +134,7 @@ Depends: ${misc:Depends}, ${shlibs:Depends} Breaks: libzpool2, libzpool5 Replaces: libzpool2, libzpool5, libzpool5linux Conflicts: libzpool5linux +Provides: libzpool5 Description: OpenZFS pool library for Linux OpenZFS is a storage platform that encompasses the functionality of traditional filesystems and volume managers. It supports data checksums, @@ -146,6 +151,7 @@ Depends: python3-cffi, ${python3:Depends} Replaces: python3-pyzfs Conflicts: python3-pyzfs +Provides: python3-pyzfs Description: wrapper for libzfs_core C library libzfs_core is intended to be a stable interface for programmatic administration of ZFS. This wrapper provides one-to-one wrappers for @@ -197,6 +203,7 @@ Recommends: openzfs-zfs-zed, openzfs-zfsutils (>= ${source:Version}), ${linux:Re Suggests: debhelper Breaks: spl-dkms (<< 0.8.0~rc1) Replaces: spl-dkms, zfs-dkms +Conflicts: zfs-dkms Provides: openzfs-zfs-modules Description: OpenZFS filesystem kernel modules for Linux OpenZFS is a storage platform that encompasses the functionality of @@ -216,6 +223,7 @@ Depends: busybox-initramfs | busybox-static | busybox, Breaks: zfsutils-linux (<= 0.7.11-2) Replaces: zfsutils-linux (<= 0.7.11-2), zfs-initramfs Conflicts: zfs-initramfs +Provides: zfs-initramfs Description: OpenZFS root filesystem capabilities for Linux - initramfs OpenZFS is a storage platform that encompasses the functionality of traditional filesystems and volume managers. It supports data checksums, @@ -232,6 +240,7 @@ Depends: dracut, ${misc:Depends} Conflicts: zfs-dracut Replaces: zfs-dracut +Provides: zfs-dracut Description: OpenZFS root filesystem capabilities for Linux - dracut OpenZFS is a storage platform that encompasses the functionality of traditional filesystems and volume managers. It supports data checksums, @@ -248,10 +257,11 @@ Depends: openzfs-libnvpair3 (= ${binary:Version}), openzfs-libuutil3 (= ${binary:Version}), openzfs-libzfs4 (= ${binary:Version}), openzfs-libzpool5 (= ${binary:Version}), + openzfs-zfs-modules | openzfs-zfs-dkms, python3, ${misc:Depends}, ${shlibs:Depends} -Recommends: lsb-base, openzfs-zfs-modules | openzfs-zfs-dkms, openzfs-zfs-zed +Recommends: lsb-base, openzfs-zfs-zed Breaks: openrc, spl (<< 0.7.9-2), spl-dkms (<< 0.8.0~rc1), @@ -262,7 +272,7 @@ Conflicts: zfs, zfs-fuse, zfsutils-linux Suggests: nfs-kernel-server, samba-common-bin (>= 3.0.23), openzfs-zfs-initramfs | openzfs-zfs-dracut -Provides: openzfsutils +Provides: openzfs Description: command-line tools to manage OpenZFS filesystems OpenZFS is a storage platform that encompasses the functionality of traditional filesystems and volume managers. It supports data checksums, @@ -316,6 +326,7 @@ Recommends: nfs-kernel-server Breaks: zfsutils-linux (<= 0.7.9-2) Replaces: zfsutils-linux (<= 0.7.9-2), zfs-test Conflicts: zutils, zfs-test +Provides: zfs-test Description: OpenZFS test infrastructure and support scripts OpenZFS is a storage platform that encompasses the functionality of traditional filesystems and volume managers. It supports data checksums, diff --git a/contrib/debian/control.modules.in b/contrib/debian/control.modules.in index 34eb7fafba7c..e6e2b9b5be20 100644 --- a/contrib/debian/control.modules.in +++ b/contrib/debian/control.modules.in @@ -5,7 +5,7 @@ Maintainer: ZFS on Linux specific mailing list Build-Depends: debhelper-compat (= 10), dkms (>> 2.1.1.2-5), libtool, - linux-headers-_KVERS_ | raspberrypi-kernel-headers + linux-headers-_KVERS_ Standards-Version: 4.3.0 Homepage: http://www.openzfs.org/ Vcs-Git: https://github.com/openzfs/zfs.git @@ -14,7 +14,7 @@ Vcs-Browser: https://github.com/openzfs/zfs Package: openzfs-zfs-modules-_KVERS_ Architecture: _ARCH_ Provides: openzfs-zfs-modules -Depends: linux-image-_KVERS_ | raspberrypi-kernel +Depends: linux-image-_KVERS_ | linux-image-amd64 Recommends: openzfsutils Replaces: zfs-modules-_KVERS_ Conflicts: zfs-modules-_KVERS_ diff --git a/contrib/debian/rules.in b/contrib/debian/rules.in index a3a05efacb50..38c7038a701f 100755 --- a/contrib/debian/rules.in +++ b/contrib/debian/rules.in @@ -25,7 +25,7 @@ PARALLEL = $(subst parallel=,,$(filter parallel=%,$(DEB_BUILD_OPTIONS))) NJOBS = -j$(or $(PARALLEL),$(NUM_CPUS),1) %: - dh $@ --with autoreconf,dkms,python3,sphinxdoc + dh $@ --with autoreconf,python3,sphinxdoc override_dh_autoreconf: @# Embed the downstream version in the module. @@ -163,7 +163,6 @@ override_dh_installinit: dh_installinit -r --no-restart-after-upgrade --name zfs-import dh_installinit -r --no-restart-after-upgrade --name zfs-mount dh_installinit -r --no-restart-after-upgrade --name zfs-load-key - dh_installinit -R --name zfs-share dh_installinit -R --name zfs-zed override_dh_installsystemd: diff --git a/contrib/initramfs/scripts/local-top/zfs b/contrib/initramfs/scripts/local-top/zfs index 6b80e9f43607..bae1707bdaba 100755 --- a/contrib/initramfs/scripts/local-top/zfs +++ b/contrib/initramfs/scripts/local-top/zfs @@ -48,6 +48,8 @@ activate_vg() } udev_settle -activate_vg +# TrueNAS SCALE doesn't boot from pools on top of LVM, and the scan can take a +# significant amount of time on systems with a large number of disks. Skip it. +#activate_vg exit 0 diff --git a/contrib/initramfs/scripts/zfs b/contrib/initramfs/scripts/zfs index 0a2bd2efda7a..8d54f16bb16b 100644 --- a/contrib/initramfs/scripts/zfs +++ b/contrib/initramfs/scripts/zfs @@ -283,6 +283,7 @@ load_module_initrd() while true; do # Wait for all of the /dev/{hd,sd}[a-z] device nodes to appear. + echo "1" > /sys/kernel/wait_for_device_probe if command -v wait_for_udev > /dev/null 2>&1 ; then wait_for_udev 10 elif command -v wait_for_dev > /dev/null 2>&1 ; then diff --git a/etc/Makefile.am b/etc/Makefile.am index 7187762d3802..df7795097d4e 100644 --- a/etc/Makefile.am +++ b/etc/Makefile.am @@ -21,6 +21,14 @@ SUBSTFILES += $(sysconf_zfs_DATA) SHELLCHECKSCRIPTS += $(sysconf_zfs_DATA) $(call SHELLCHECK_OPTS,$(sysconf_zfs_DATA)): SHELLCHECK_SHELL = sh +if BUILD_FREEBSD +rcdir = $(sysconfdir)/rc.d +rc_SCRIPTS = %D%/rc.d/zfsd + +SUBSTFILES += $(rc_SCRIPTS) +SHELLCHECKSCRIPTS += $(rc_SCRIPTS) +$(call SHELLCHECK_OPTS,$(rc_SCRIPTS)): SHELLCHECK_SHELL = sh +endif if BUILD_LINUX initconf_DATA = \ diff --git a/etc/rc.d/.gitignore b/etc/rc.d/.gitignore new file mode 100644 index 000000000000..5e3efca4d908 --- /dev/null +++ b/etc/rc.d/.gitignore @@ -0,0 +1 @@ +/zfsd diff --git a/etc/rc.d/zfsd.in b/etc/rc.d/zfsd.in new file mode 100644 index 000000000000..942b03f1e4df --- /dev/null +++ b/etc/rc.d/zfsd.in @@ -0,0 +1,14 @@ +#!/bin/sh + +# PROVIDE: zfsd +# REQUIRE: devd zfs +# KEYWORD: nojail shutdown + +. /etc/rc.subr + +name="zfsd" +rcvar="zfsd_enable" +command="@sbindir@/${name}" + +load_rc_config ${name} +run_rc_command "${1}" diff --git a/etc/systemd/system/50-zfs.preset b/etc/systemd/system/50-zfs.preset index e4056a92cd98..d2da85d330eb 100644 --- a/etc/systemd/system/50-zfs.preset +++ b/etc/systemd/system/50-zfs.preset @@ -3,7 +3,7 @@ enable zfs-import-cache.service disable zfs-import-scan.service enable zfs-import.target enable zfs-mount.service -enable zfs-share.service +disable zfs-share.service enable zfs-zed.service enable zfs-volume-wait.service enable zfs.target diff --git a/etc/systemd/system/zfs-share.service.in b/etc/systemd/system/zfs-share.service.in index 1a6342a06fec..7c90f74a684a 100644 --- a/etc/systemd/system/zfs-share.service.in +++ b/etc/systemd/system/zfs-share.service.in @@ -17,4 +17,4 @@ EnvironmentFile=-@initconfdir@/zfs ExecStart=@sbindir@/zfs share -a [Install] -WantedBy=zfs.target +WantedBy=multi-user.target diff --git a/include/os/linux/kernel/linux/simd_x86.h b/include/os/linux/kernel/linux/simd_x86.h index 699b8a571824..4e99c2d1c212 100644 --- a/include/os/linux/kernel/linux/simd_x86.h +++ b/include/os/linux/kernel/linux/simd_x86.h @@ -106,29 +106,8 @@ #define kfpu_init() 0 #define kfpu_fini() ((void) 0) -#if defined(HAVE_UNDERSCORE_KERNEL_FPU) -#define kfpu_begin() \ -{ \ - preempt_disable(); \ - __kernel_fpu_begin(); \ -} -#define kfpu_end() \ -{ \ - __kernel_fpu_end(); \ - preempt_enable(); \ -} - -#elif defined(HAVE_KERNEL_FPU) -#define kfpu_begin() kernel_fpu_begin() -#define kfpu_end() kernel_fpu_end() - -#else -/* - * This case is unreachable. When KERNEL_EXPORTS_X86_FPU is defined then - * either HAVE_UNDERSCORE_KERNEL_FPU or HAVE_KERNEL_FPU must be defined. - */ -#error "Unreachable kernel configuration" -#endif +extern void kfpu_begin(void); +extern void kfpu_end(void); #else /* defined(KERNEL_EXPORTS_X86_FPU) */ diff --git a/include/os/linux/spl/rpc/xdr.h b/include/os/linux/spl/rpc/xdr.h index 5b621fa9c863..606566113e1c 100644 --- a/include/os/linux/spl/rpc/xdr.h +++ b/include/os/linux/spl/rpc/xdr.h @@ -22,6 +22,7 @@ #define _SPL_RPC_XDR_H #include +#include /* * XDR enums and types. diff --git a/include/os/linux/spl/sys/acl.h b/include/os/linux/spl/sys/acl.h index 5cd7a56b86ec..d66b523b985e 100644 --- a/include/os/linux/spl/sys/acl.h +++ b/include/os/linux/spl/sys/acl.h @@ -83,6 +83,8 @@ typedef struct ace_object { #define ACL_PROTECTED 0x0002 #define ACL_DEFAULTED 0x0004 #define ACL_FLAGS_ALL (ACL_AUTO_INHERIT|ACL_PROTECTED|ACL_DEFAULTED) +#define ACL_IS_TRIVIAL 0x10000 +#define ACL_IS_DIR 0x20000 #define ACE_ACCESS_ALLOWED_COMPOUND_ACE_TYPE 0x04 #define ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE 0x05 diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h index 91a4751fffb0..cfb2a6ed3e9f 100644 --- a/include/os/linux/zfs/sys/zpl.h +++ b/include/os/linux/zfs/sys/zpl.h @@ -106,6 +106,15 @@ zpl_chmod_acl(struct inode *ip) } #endif /* CONFIG_FS_POSIX_ACL */ +#if defined(HAVE_IOPS_PERMISSION_USERNS) +extern int zpl_permission(struct user_namespace *userns, struct inode *ip, + int mask); +#elif defined(HAVE_IOPS_PERMISSION_IDMAP) +extern int zpl_permission(struct mnt_idmap *idmap, struct inode *ip, int mask); +#else +extern int zpl_permission(struct inode *ip, int mask); +#endif + extern xattr_handler_t *zpl_xattr_handlers[]; /* zpl_ctldir.c */ diff --git a/include/sys/zfs_acl.h b/include/sys/zfs_acl.h index e19288528849..a137393548d8 100644 --- a/include/sys/zfs_acl.h +++ b/include/sys/zfs_acl.h @@ -211,6 +211,8 @@ void zfs_acl_ids_free(zfs_acl_ids_t *); boolean_t zfs_acl_ids_overquota(struct zfsvfs *, zfs_acl_ids_t *, uint64_t); int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *); int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *); +int zfs_stripacl(struct znode *, cred_t *); + void zfs_acl_rele(void *); void zfs_oldace_byteswap(ace_t *, int); void zfs_ace_byteswap(void *, size_t, boolean_t); diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 14410b153130..0379d1f52cb7 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3272,8 +3272,10 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &pathname) == 0) { uint64_t wholedisk = 0; +#ifndef TRUENAS_SCALE_NEVER_WHOLEDISK (void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); +#endif /* * XXX - L2ARC 1.0 devices can't support expansion. @@ -4468,7 +4470,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, !(name_flags & VDEV_NAME_PATH)) { path = zfs_strip_path(path); } - +#ifndef TRUENAS_SCALE_NEVER_WHOLEDISK /* * Remove the partition from the path if this is a whole disk. */ @@ -4477,6 +4479,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, == 0 && value && !(name_flags & VDEV_NAME_PATH)) { return (zfs_strip_partition(path)); } +#endif } else { path = type; diff --git a/lib/libzfs/os/linux/libzfs_pool_os.c b/lib/libzfs/os/linux/libzfs_pool_os.c index 7b18e31c8674..b87f53c06f57 100644 --- a/lib/libzfs/os/linux/libzfs_pool_os.c +++ b/lib/libzfs/os/linux/libzfs_pool_os.c @@ -87,6 +87,7 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg) return (0); } +#ifndef TRUENAS_SCALE_NEVER_WHOLEDISK /* * Read the EFI label from the config, if a label does not exist then * pass back the error to the caller. If the caller has passed a non-NULL @@ -118,6 +119,7 @@ read_efi_label(nvlist_t *config, diskaddr_t *sb) } return (err); } +#endif /* * determine where a partition starts on a disk in the current @@ -126,6 +128,9 @@ read_efi_label(nvlist_t *config, diskaddr_t *sb) static diskaddr_t find_start_block(nvlist_t *config) { +#ifdef TRUENAS_SCALE_NEVER_WHOLEDISK + (void) config; +#else nvlist_t **child; uint_t c, children; diskaddr_t sb = MAXOFFSET_T; @@ -149,6 +154,7 @@ find_start_block(nvlist_t *config) return (sb); } } +#endif return (MAXOFFSET_T); } diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7 index f7026119b730..506b7deff5b7 100644 --- a/man/man7/zfsprops.7 +++ b/man/man7/zfsprops.7 @@ -660,23 +660,18 @@ platform, the behavior is the same as if it were set to .Sy off . .Bl -tag -compact -offset 4n -width "posixacl" .It Sy off -default on Linux, when a file system has the +when a file system has the .Sy acltype property set to off then ACLs are disabled. .It Sy noacl an alias for .Sy off .It Sy nfsv4 -default on -.Fx , -indicates that NFSv4-style ZFS ACLs should be used. +default, indicates that NFSv4-style ZFS ACLs should be used. These ACLs can be managed with the .Xr getfacl 1 and .Xr setfacl 1 . -The -.Sy nfsv4 -ZFS ACL type is not yet supported on Linux. .It Sy posix indicates POSIX ACLs should be used. POSIX ACLs are specific to Linux and are not functional on other platforms. diff --git a/module/Kbuild.in b/module/Kbuild.in index 0472a9348c13..c14fa4ec1bce 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -74,6 +74,7 @@ SPL_OBJS := \ spl-cred.o \ spl-err.o \ spl-generic.o \ + spl-kfpu.o \ spl-kmem-cache.o \ spl-kmem.o \ spl-kstat.o \ diff --git a/module/Makefile.in b/module/Makefile.in index 9b34b3dfaec7..d42d9cc1f804 100644 --- a/module/Makefile.in +++ b/module/Makefile.in @@ -53,6 +53,7 @@ FMAKE = env -u MAKEFLAGS make $(FMAKEFLAGS) modules-Linux: mkdir -p $(sort $(dir $(spl-objs) $(spl-))) mkdir -p $(sort $(dir $(zfs-objs) $(zfs-))) + @# Build the kernel modules. $(MAKE) -C @LINUX_OBJ@ $(if @KERNEL_CC@,CC=@KERNEL_CC@) \ $(if @KERNEL_LD@,LD=@KERNEL_LD@) $(if @KERNEL_LLVM@,LLVM=@KERNEL_LLVM@) \ M="$$PWD" @KERNEL_MAKE@ CONFIG_ZFS=m modules @@ -161,7 +162,7 @@ cppcheck-FreeBSD: cppcheck: cppcheck-@ac_system@ distdir: - cd @srcdir@ && find . -name '*.[chS]' -exec sh -c 'for f; do mkdir -p $$distdir/$${f%/*}; cp @srcdir@/$$f $$distdir/$$f; done' _ {} + + cd @srcdir@ && find . -name '*.[chxS]' -exec sh -c 'for f; do mkdir -p $$distdir/$${f%/*}; cp @srcdir@/$$f $$distdir/$$f; done' _ {} + cp @srcdir@/Makefile.bsd $$distdir/Makefile.bsd gen-zstd-symbols: diff --git a/module/os/freebsd/zfs/zfs_acl.c b/module/os/freebsd/zfs/zfs_acl.c index 20466aeaaa05..fb47013d06ef 100644 --- a/module/os/freebsd/zfs/zfs_acl.c +++ b/module/os/freebsd/zfs/zfs_acl.c @@ -2029,6 +2029,12 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) return (error); } +int +zfs_stripacl(znode_t *zp, cred_t *cr) +{ + return (SET_ERROR(EOPNOTSUPP)); +} + /* * Check accesses of interest (AoI) against attributes of the dataset * such as read-only. Returns zero if no AoI conflict with dataset diff --git a/module/os/linux/spl/spl-kfpu.c b/module/os/linux/spl/spl-kfpu.c new file mode 100644 index 000000000000..9e781c9f768f --- /dev/null +++ b/module/os/linux/spl/spl-kfpu.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2022 iXsystems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef KERNEL_EXPORTS_X86_FPU + +#include + +#if defined(HAVE_KERNEL_FPU) + +void +// cppcheck-suppress syntaxError +kfpu_begin(void) +{ + kernel_fpu_begin(); +} + +void +// cppcheck-suppress syntaxError +kfpu_end(void) +{ + kernel_fpu_end(); +} + +#elif defined(HAVE_UNDERSCORE_KERNEL_FPU) + +void +// cppcheck-suppress syntaxError +kfpu_begin(void) +{ + preempt_disable(); + __kernel_fpu_begin(); +} + +void +// cppcheck-suppress syntaxError +kfpu_end(void) +{ + __kernel_fpu_end(); + preempt_enable(); +} + +#else +/* + * This case should be unreachable. When KERNEL_EXPORTS_X86_FPU is defined + * then either HAVE_UNDERSCORE_KERNEL_FPU or HAVE_KERNEL_FPU must be defined. + */ +#error "Unreachable kernel configuration" +#endif + +EXPORT_SYMBOL(kfpu_begin); +EXPORT_SYMBOL(kfpu_end); + +#endif /* KERNEL_EXPORTS_X86_FPU */ diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c index 5d1b4383412a..a3801c605dd6 100644 --- a/module/os/linux/zfs/policy.c +++ b/module/os/linux/zfs/policy.c @@ -33,6 +33,7 @@ #include #include #include +#include /* * The passed credentials cannot be directly verified because Linux only @@ -103,13 +104,56 @@ secpolicy_sys_config(const cred_t *cr, boolean_t checkonly) * Like secpolicy_vnode_access() but we get the actual wanted mode and the * current mode of the file, not the missing bits. * - * Enforced in the Linux VFS. + * If filesystem is using NFSv4 ACLs, validate the current mode + * and the wanted mode are the same, otherwise access fails. + * + * If using POSIX ACLs or no ACLs, enforced in the Linux VFS. */ int secpolicy_vnode_access2(const cred_t *cr, struct inode *ip, uid_t owner, mode_t curmode, mode_t wantmode) { - return (0); + mode_t remainder = ~curmode & wantmode; + uid_t uid = crgetuid(cr); + if ((ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_NFSV4) || + (remainder == 0)) { + return (0); + } + + if (uid == 0) + return (0); + +#if defined(CONFIG_USER_NS) + if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner))) + return (EPERM); +#endif + + /* + * There are some situations in which capabilities + * may allow overriding the DACL. + */ + if (S_ISDIR(ip->i_mode)) { + if (!(wantmode & S_IWUSR) && + (priv_policy_user(cr, CAP_DAC_READ_SEARCH, EPERM) == 0)) { + return (0); + } + if (priv_policy_user(cr, CAP_DAC_OVERRIDE, EPERM) == 0) { + return (0); + } + return (EACCES); + } + + if ((wantmode == S_IRUSR) && + (priv_policy_user(cr, CAP_DAC_READ_SEARCH, EPERM) == 0)) { + return (0); + } + + if (!(remainder & S_IXUSR) && + (priv_policy_user(cr, CAP_DAC_OVERRIDE, EPERM) == 0)) { + return (0); + } + + return (EACCES); } /* diff --git a/module/os/linux/zfs/zfs_acl.c b/module/os/linux/zfs/zfs_acl.c index 48abbc010917..e83672c468fa 100644 --- a/module/os/linux/zfs/zfs_acl.c +++ b/module/os/linux/zfs/zfs_acl.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -1963,8 +1964,8 @@ zfs_acl_ids_overquota(zfsvfs_t *zv, zfs_acl_ids_t *acl_ids, uint64_t projid) /* * Retrieve a file's ACL */ -int -zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) +static int +zfs_getacl_impl(znode_t *zp, vsecattr_t *vsecp, boolean_t stripped, cred_t *cr) { zfs_acl_t *aclp; ulong_t mask; @@ -1975,21 +1976,21 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES); - if (mask == 0) - return (SET_ERROR(ENOSYS)); - - if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr, - zfs_init_idmap))) - return (error); - - mutex_enter(&zp->z_acl_lock); - - error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE); - if (error != 0) { - mutex_exit(&zp->z_acl_lock); - return (error); + if (stripped) { + mode_t mode = ZTOI(zp)->i_mode; + aclp = zfs_acl_alloc(zfs_acl_version_zp(zp)); + (aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS; + zfs_acl_chmod(S_ISDIR(mode), mode, B_TRUE, + (ZTOZSB(zp)->z_acl_mode == ZFS_ACL_GROUPMASK), aclp); + } else { + error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE); + if (error != 0) + return (error); } + mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT | + VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES); + /* * Scan ACL to determine number of ACEs */ @@ -2054,11 +2055,37 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) vsecp->vsa_aclflags |= ACL_PROTECTED; if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT) vsecp->vsa_aclflags |= ACL_AUTO_INHERIT; + if (zp->z_pflags & ZFS_ACL_TRIVIAL) + vsecp->vsa_aclflags |= ACL_IS_TRIVIAL; + if (S_ISDIR(ZTOI(zp)->i_mode)) + vsecp->vsa_aclflags |= ACL_IS_DIR; + } + + return (0); +} + +int +zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) +{ + int error; + ulong_t mask; + + mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT | + VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES); + + if (mask == 0) + return (SET_ERROR(ENOSYS)); + + if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr, + zfs_init_idmap))) { + return (error); } + mutex_enter(&zp->z_acl_lock); + error = zfs_getacl_impl(zp, vsecp, B_FALSE, cr); mutex_exit(&zp->z_acl_lock); - return (0); + return (error); } int @@ -2119,12 +2146,11 @@ zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, umode_t obj_mode, /* * Set a file's ACL */ -int -zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) +static int +zfs_setacl_impl(znode_t *zp, vsecattr_t *vsecp, cred_t *cr) { zfsvfs_t *zfsvfs = ZTOZSB(zp); zilog_t *zilog = zfsvfs->z_log; - ulong_t mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT); dmu_tx_t *tx; int error; zfs_acl_t *aclp; @@ -2132,16 +2158,6 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) boolean_t fuid_dirtied; uint64_t acl_obj; - if (mask == 0) - return (SET_ERROR(ENOSYS)); - - if (zp->z_pflags & ZFS_IMMUTABLE) - return (SET_ERROR(EPERM)); - - if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr, - zfs_init_idmap))) - return (error); - error = zfs_vsec_2_aclp(zfsvfs, ZTOI(zp)->i_mode, vsecp, cr, &fuidp, &aclp); if (error) @@ -2156,9 +2172,6 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) (zp->z_pflags & V4_ACL_WIDE_FLAGS); } top: - mutex_enter(&zp->z_acl_lock); - mutex_enter(&zp->z_lock); - tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); @@ -2189,12 +2202,15 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { - mutex_exit(&zp->z_acl_lock); - mutex_exit(&zp->z_lock); - if (error == ERESTART) { + mutex_exit(&zp->z_acl_lock); + mutex_exit(&zp->z_lock); + dmu_tx_wait(tx); dmu_tx_abort(tx); + + mutex_enter(&zp->z_acl_lock); + mutex_enter(&zp->z_lock); goto top; } dmu_tx_abort(tx); @@ -2216,9 +2232,90 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) zfs_fuid_info_free(fuidp); dmu_tx_commit(tx); + return (error); +} + +int +zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) +{ + ulong_t mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT); + int error; + + if (mask == 0) + return (SET_ERROR(ENOSYS)); + + if (zp->z_pflags & ZFS_IMMUTABLE) + return (SET_ERROR(EPERM)); + + if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr, + zfs_init_idmap))) { + return (error); + } + + mutex_enter(&zp->z_acl_lock); + mutex_enter(&zp->z_lock); + + error = zfs_setacl_impl(zp, vsecp, cr); + mutex_exit(&zp->z_lock); mutex_exit(&zp->z_acl_lock); + return (error); +} + + +int +zfs_stripacl(znode_t *zp, cred_t *cr) +{ + int error; + zfsvfs_t *zfsvfs = ZTOZSB(zp); + vsecattr_t vsec = { + .vsa_mask = VSA_ACE_ALLTYPES | VSA_ACECNT | VSA_ACE | + VSA_ACE_ACLFLAGS + }; + + if ((error = zfs_enter(zfsvfs, FTAG)) != 0) + return (error); + + if ((error = zfs_verify_zp(zp)) != 0) + goto done; + + if (zp->z_pflags & ZFS_IMMUTABLE) { + error = SET_ERROR(EPERM); + goto done; + } + + if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, B_FALSE, cr, + zfs_init_idmap))) + goto done; + + if (zp->z_pflags & ZFS_ACL_TRIVIAL) { + // ACL is already stripped. Nothing to do. + error = 0; + goto done; + } + + mutex_enter(&zp->z_acl_lock); + error = zfs_getacl_impl(zp, &vsec, B_TRUE, cr); + if (error) { + mutex_exit(&zp->z_acl_lock); + goto done; + } + + mutex_enter(&zp->z_lock); + + error = zfs_setacl_impl(zp, &vsec, cr); + + mutex_exit(&zp->z_lock); + mutex_exit(&zp->z_acl_lock); + + kmem_free(vsec.vsa_aclentp, vsec.vsa_aclentsz); + + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zfsvfs->z_log, 0); + +done: + zfs_exit(zfsvfs, FTAG); return (error); } @@ -2345,8 +2442,11 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode, break; case OWNING_GROUP: who = gowner; - zfs_fallthrough; + checkit = zfs_groupmember(zfsvfs, who, cr); + break; case ACE_IDENTIFIER_GROUP: + who = zfs_gid_to_vfsgid(mnt_ns, zfs_i_user_ns(ZTOI(zp)), + who); checkit = zfs_groupmember(zfsvfs, who, cr); break; case ACE_EVERYONE: @@ -2357,6 +2457,8 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode, default: if (entry_type == 0) { uid_t newid; + who = zfs_uid_to_vfsuid(mnt_ns, + zfs_i_user_ns(ZTOI(zp)), who); newid = zfs_fuid_map_id(zfsvfs, who, cr, ZFS_ACE_USER); @@ -2523,7 +2625,7 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode, * Also note: DOS R/O is ignored for directories. */ if ((v4_mode & WRITE_MASK_DATA) && - S_ISDIR(ZTOI(zp)->i_mode) && + !S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_READONLY)) { return (SET_ERROR(EPERM)); } diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index a52f08868d96..d315355be93b 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -336,9 +336,15 @@ xattr_changed_cb(void *arg, uint64_t newval) zfsvfs_t *zfsvfs = arg; if (newval == ZFS_XATTR_OFF) { +#ifdef SB_LARGEXATTR + zfsvfs->z_sb->s_flags &= ~SB_LARGEXATTR; +#endif zfsvfs->z_flags &= ~ZSB_XATTR; } else { zfsvfs->z_flags |= ZSB_XATTR; +#ifdef SB_LARGEXATTR + zfsvfs->z_sb->s_flags |= SB_LARGEXATTR; +#endif if (newval == ZFS_XATTR_SA) zfsvfs->z_xattr_sa = B_TRUE; @@ -353,12 +359,17 @@ acltype_changed_cb(void *arg, uint64_t newval) zfsvfs_t *zfsvfs = arg; switch (newval) { - case ZFS_ACLTYPE_NFSV4: case ZFS_ACLTYPE_OFF: zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF; zfsvfs->z_sb->s_flags &= ~SB_POSIXACL; +#ifdef SB_NFSV4ACL + zfsvfs->z_sb->s_flags &= ~SB_NFSV4ACL; +#endif break; case ZFS_ACLTYPE_POSIX: +#ifdef SB_NFSV4ACL + zfsvfs->z_sb->s_flags &= ~SB_NFSV4ACL; +#endif #ifdef CONFIG_FS_POSIX_ACL zfsvfs->z_acl_type = ZFS_ACLTYPE_POSIX; zfsvfs->z_sb->s_flags |= SB_POSIXACL; @@ -367,6 +378,13 @@ acltype_changed_cb(void *arg, uint64_t newval) zfsvfs->z_sb->s_flags &= ~SB_POSIXACL; #endif /* CONFIG_FS_POSIX_ACL */ break; + case ZFS_ACLTYPE_NFSV4: + zfsvfs->z_acl_type = ZFS_ACLTYPE_NFSV4; + zfsvfs->z_sb->s_flags &= ~SB_POSIXACL; +#ifdef SB_NFSV4ACL + zfsvfs->z_sb->s_flags |= SB_NFSV4ACL; +#endif + break; default: break; } diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 9803c7fecb5c..9a12b361c16c 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -595,6 +595,22 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl, os = zfsvfs->z_os; zilog = zfsvfs->z_log; + /* + * For compatibility purposes with data migrated from FreeBSD + * (which will have NFSv4 ACL type), BSD file creation semantics + * are forced rather than System V. Hence on new file creation + * if NFSV4ACL we inherit GID from parent rather than take current + * process GID. This makes S_ISGID on directories a de-facto + * no-op, but we still honor setting / removing it and normal + * inheritance of the bit on new directories in case user changes + * the underlying ACL type. + */ + if ((vap->va_mask & ATTR_MODE) && + S_ISDIR(ZTOI(dzp)->i_mode) && + (zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4)) { + vap->va_gid = KGID_TO_SGID(ZTOI(dzp)->i_gid); + } + if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { zfs_exit(zfsvfs, FTAG); @@ -1221,6 +1237,22 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp, return (error); zilog = zfsvfs->z_log; + /* + * For compatibility purposes with data migrated from FreeBSD + * (which will have NFSv4 ACL type), BSD file creation semantics + * are forced rather than System V. Hence on new file creation + * if NFSV4ACL we inherit GID from parent rather than take current + * process GID. This makes S_ISGID on directories a de-facto + * no-op, but we still honor setting / removing it and normal + * inheritance of the bit on new directories in case user changes + * the underlying ACL type. + */ + if ((vap->va_mask & ATTR_MODE) && + S_ISDIR(ZTOI(dzp)->i_mode) && + (zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4)) { + vap->va_gid = KGID_TO_SGID(ZTOI(dzp)->i_gid); + } + if (dzp->z_pflags & ZFS_XATTR) { zfs_exit(zfsvfs, FTAG); return (SET_ERROR(EINVAL)); @@ -1979,10 +2011,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns) goto out3; } - if ((mask & ATTR_SIZE) && (zp->z_pflags & ZFS_READONLY)) { - err = SET_ERROR(EPERM); - goto out3; - } + /* ZFS_READONLY will be handled in zfs_zaccess() */ /* * Verify timestamps doesn't overflow 32 bits. diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index ad1753f7a071..b25f3e9224f3 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -816,6 +816,7 @@ const struct inode_operations zpl_inode_operations = { .get_acl = zpl_get_acl, #endif /* HAVE_GET_INODE_ACL */ #endif /* CONFIG_FS_POSIX_ACL */ + .permission = zpl_permission, }; #ifdef HAVE_RENAME2_OPERATIONS_WRAPPER @@ -862,6 +863,7 @@ const struct inode_operations zpl_dir_inode_operations = { .get_acl = zpl_get_acl, #endif /* HAVE_GET_INODE_ACL */ #endif /* CONFIG_FS_POSIX_ACL */ + .permission = zpl_permission, #ifdef HAVE_RENAME2_OPERATIONS_WRAPPER }, .rename2 = zpl_rename2, @@ -909,4 +911,5 @@ const struct inode_operations zpl_special_inode_operations = { .get_acl = zpl_get_acl, #endif /* HAVE_GET_INODE_ACL */ #endif /* CONFIG_FS_POSIX_ACL */ + .permission = zpl_permission, }; diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index 0a82b8858eb8..e7d2a8fdaf1c 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -230,6 +230,9 @@ __zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs) case ZFS_ACLTYPE_POSIX: seq_puts(seq, ",posixacl"); break; + case ZFS_ACLTYPE_NFSV4: + seq_puts(seq, ",nfs4acl"); + break; default: seq_puts(seq, ",noacl"); break; diff --git a/module/os/linux/zfs/zpl_xattr.c b/module/os/linux/zfs/zpl_xattr.c index 4e4f5210f85d..1b9c354fcec5 100644 --- a/module/os/linux/zfs/zpl_xattr.c +++ b/module/os/linux/zfs/zpl_xattr.c @@ -80,11 +80,34 @@ #include #include #include +#include #include #include #include #include +#define NFS41ACL_XATTR "system.nfs4_acl_xdr" + +static const struct { + int kmask; + int zfsperm; +} mask2zfs[] = { + { MAY_READ, ACE_READ_DATA }, + { MAY_WRITE, ACE_WRITE_DATA }, + { MAY_EXEC, ACE_EXECUTE }, +#ifdef SB_NFSV4ACL + { MAY_DELETE, ACE_DELETE }, + { MAY_DELETE_CHILD, ACE_DELETE_CHILD }, + { MAY_WRITE_ATTRS, ACE_WRITE_ATTRIBUTES }, + { MAY_WRITE_NAMED_ATTRS, ACE_WRITE_NAMED_ATTRS }, + { MAY_WRITE_ACL, ACE_WRITE_ACL }, + { MAY_WRITE_OWNER, ACE_WRITE_OWNER }, +#endif +}; + +#define POSIX_MASKS (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_OPEN) +#define GENERIC_MASK(mask) ((mask & ~POSIX_MASKS) == 0) + enum xattr_permission { XAPERM_DENY, XAPERM_ALLOW, @@ -250,6 +273,14 @@ zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) goto out1; rw_enter(&zp->z_xattr_lock, RW_READER); + if ((zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4) && + ((zp->z_pflags & ZFS_ACL_TRIVIAL) == 0)) { + error = zpl_xattr_filldir(&xf, NFS41ACL_XATTR, + sizeof (NFS41ACL_XATTR) - 1); + if (error) + goto out; + } + if (zfsvfs->z_use_sa && zp->z_is_sa) { error = zpl_xattr_list_sa(&xf); if (error) @@ -1457,6 +1488,469 @@ static xattr_handler_t zpl_xattr_acl_default_handler = { #endif /* CONFIG_FS_POSIX_ACL */ +/* + * zpl_permission() gets called by linux kernel whenever it checks + * inode_permission via inode->i_op->permission. The general preference + * is to defer to the standard in-kernel permission check (generic_permission) + * wherever possible. + * + * https://www.kernel.org/doc/Documentation/filesystems/vfs.txt + */ +int +#if defined(HAVE_IOPS_PERMISSION_USERNS) +zpl_permission(struct user_namespace *userns, struct inode *ip, int mask) +#elif defined(HAVE_IOPS_PERMISSION_IDMAP) +zpl_permission(struct mnt_idmap *idmap, struct inode *ip, int mask) +#else +zpl_permission(struct inode *ip, int mask) +#endif +{ + int to_check = 0, i, ret; + cred_t *cr = NULL; + zfsvfs_t *zfsvfs = NULL; + + /* + * If NFSv4 ACLs are not being used, go back to + * generic_permission(). If ACL is trivial and the + * mask is representable by POSIX permissions, then + * also go back to generic_permission(). + */ + if ((ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_NFSV4) || + ((ITOZ(ip)->z_pflags & ZFS_ACL_TRIVIAL && GENERIC_MASK(mask)))) { +#if defined(HAVE_IOPS_PERMISSION_USERNS) + return (generic_permission(userns, ip, mask)); +#elif defined(HAVE_IOPS_PERMISSION_IDMAP) + return (generic_permission(idmap, ip, mask)); +#else + return (generic_permission(ip, mask)); +#endif + } + + for (i = 0; i < ARRAY_SIZE(mask2zfs); i++) { + if (mask & mask2zfs[i].kmask) { + to_check |= mask2zfs[i].zfsperm; + } + } + + /* + * We're being asked to check something that doesn't contain an + * NFSv4 ACE. Pass back to default kernel permissions check. + */ + if (to_check == 0) { +#if defined(HAVE_IOPS_PERMISSION_USERNS) + return (generic_permission(userns, ip, mask)); +#elif defined(HAVE_IOPS_PERMISSION_IDMAP) + return (generic_permission(idmap, ip, mask)); +#else + return (generic_permission(ip, mask)); +#endif + } + + /* + * Fast path for execute checks. Do not use zfs_fastaccesschk_execute + * since it may end up granting execute access in presence of explicit + * deny entry for user / group, and it also read the ZFS ACL + * (non-cached) which we wish to avoid in RCU. + */ + if ((to_check == ACE_EXECUTE) && + (ITOZ(ip)->z_pflags & ZFS_NO_EXECS_DENIED)) + return (0); + + /* + * inode permission operation may be called in rcu-walk mode + * (mask & MAY_NOT_BLOCK). If in rcu-walk mode, the filesystem must + * check the permission without blocking or storing to the inode. + * + * If a situation is encountered that rcu-walk cannot handle, + * return -ECHILD and it will be called again in ref-walk mode. + */ + cr = CRED(); + crhold(cr); + + /* + * There are some situations in which capabilities may allow overriding + * the DACL. Skip reading ACL if requested permissions are fully + * satisfied by capabilities. + */ + + /* + * CAP_DAC_OVERRIDE may override RWX on directories, and RW on other + * files. Execute may also be overriden if at least one execute bit is + * set. This behavior is not formally documented, but is covered in + * commit messages and code comments in namei.c. + * + * CAP_DAC_READ_SEARCH may bypass file read permission checks and + * directory read and execute permission checks. + */ + if (S_ISDIR(ip->i_mode)) { +#ifdef SB_NFSV4ACL + if (!(mask & (MAY_WRITE | NFS41ACL_WRITE_ALL))) { +#else + if (!(mask & MAY_WRITE)) { +#endif + if (capable(CAP_DAC_READ_SEARCH)) { + crfree(cr); + return (0); + } + } + if (capable(CAP_DAC_OVERRIDE)) { + crfree(cr); + return (0); + } + } + + if (!(mask & MAY_EXEC) || (ip->i_mode & S_IXUGO)) { + if (capable(CAP_DAC_OVERRIDE)) { + crfree(cr); + return (0); + } + } + + if ((to_check == ACE_READ_DATA) && + capable(CAP_DAC_READ_SEARCH)) { + crfree(cr); + return (0); + } + + if (mask & MAY_NOT_BLOCK) { + crfree(cr); + return (-ECHILD); + } + + zfsvfs = ZTOZSB(ITOZ(ip)); + + if ((ret = -zfs_enter_verify_zp(zfsvfs, ITOZ(ip), FTAG)) != 0) + return (ret); + +#if defined(HAVE_IOPS_PERMISSION_USERNS) + ret = -zfs_zaccess(ITOZ(ip), to_check, V_ACE_MASK, B_FALSE, cr, + userns); +#elif defined(HAVE_IOPS_PERMISSION_IDMAP) + ret = -zfs_zaccess(ITOZ(ip), to_check, V_ACE_MASK, B_FALSE, cr, + idmap); +#else + ret = -zfs_zaccess(ITOZ(ip), to_check, V_ACE_MASK, B_FALSE, cr, + zfs_init_idmap); +#endif + + zfs_exit(zfsvfs, FTAG); + + crfree(cr); + return (ret); +} + +#define ACEI4_SPECIAL_WHO 1 +#define ACE4_SPECIAL_OWNER 1 +#define ACE4_SPECIAL_GROUP 2 +#define ACE4_SPECIAL_EVERYONE 3 +#define NFS41ACL_MAX_ACES MAX_ACL_ENTRIES +#define NFS41_FLAGS (ACE_DIRECTORY_INHERIT_ACE| \ + ACE_FILE_INHERIT_ACE| \ + ACE_NO_PROPAGATE_INHERIT_ACE| \ + ACE_INHERIT_ONLY_ACE| \ + ACE_INHERITED_ACE| \ + ACE_IDENTIFIER_GROUP) + +/* + * Macros for sanity checks related to XDR and ACL buffer sizes + */ +#define ACE4ELEM 5 +#define ACE4SIZE (ACE4ELEM * sizeof (u32)) +#define XDRBASE (2 * sizeof (u32)) + +#define ACES_TO_XDRSIZE(x) (XDRBASE + (x * ACE4SIZE)) +#define XDRSIZE_TO_ACES(x) ((x - XDRBASE) / ACE4SIZE) +#define XDRSIZE_IS_VALID(x) ((x >= XDRBASE) && \ + (((x - XDRBASE) % ACE4SIZE) == 0)) + +static int +__zpl_xattr_nfs41acl_list(struct inode *ip, char *list, size_t list_size, + const char *name, size_t name_len) +{ + char *xattr_name = NFS41ACL_XATTR; + size_t xattr_size = sizeof (NFS41ACL_XATTR); + + if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_NFSV4) + return (0); + + if (list && xattr_size <= list_size) + memcpy(list, xattr_name, xattr_size); + + return (xattr_size); +} +ZPL_XATTR_LIST_WRAPPER(zpl_xattr_nfs41acl_list); + +static int +acep_to_nfsace4i(const ace_t *acep, u32 *xattrbuf) +{ + u32 who = 0, iflag = 0; + + switch (acep->a_flags & ACE_TYPE_FLAGS) { + case ACE_OWNER: + iflag = ACEI4_SPECIAL_WHO; + who = ACE4_SPECIAL_OWNER; + break; + + case ACE_GROUP|ACE_IDENTIFIER_GROUP: + iflag = ACEI4_SPECIAL_WHO; + who = ACE4_SPECIAL_GROUP; + break; + + case ACE_EVERYONE: + iflag = ACEI4_SPECIAL_WHO; + who = ACE4_SPECIAL_EVERYONE; + break; + + case ACE_IDENTIFIER_GROUP: + case 0: + who = acep->a_who; + break; + + default: + dprintf("Unknown ACE_TYPE_FLAG 0x%08x\n", + acep->a_flags & ACE_TYPE_FLAGS); + return (-EINVAL); + } + + *xattrbuf++ = htonl(acep->a_type); + *xattrbuf++ = htonl(acep->a_flags & NFS41_FLAGS); + *xattrbuf++ = htonl(iflag); + *xattrbuf++ = htonl(acep->a_access_mask); + *xattrbuf++ = htonl(who); + + return (0); +} + +static int +zfsacl_to_nfsacl41i(const vsecattr_t vsecp, u32 *xattrbuf) +{ + int i, error = 0; + ace_t *acep = NULL; + + *xattrbuf++ = htonl(vsecp.vsa_aclflags); + *xattrbuf++ = htonl(vsecp.vsa_aclcnt); + + for (i = 0; i < vsecp.vsa_aclcnt; i++, xattrbuf += ACE4ELEM) { + acep = vsecp.vsa_aclentp + (i * sizeof (ace_t)); + + error = acep_to_nfsace4i(acep, xattrbuf); + if (error) + break; + } + + return (error); +} + +static int +nfsace4i_to_acep(const u32 *xattrbuf, ace_t *acep) +{ + u32 iflag, id; + + acep->a_type = ntohl(*(xattrbuf++)); + acep->a_flags = ntohl(*(xattrbuf++)) & NFS41_FLAGS; + iflag = ntohl(*(xattrbuf++)); + acep->a_access_mask = ntohl(*(xattrbuf++)); + id = ntohl(*(xattrbuf++)); + + if (iflag & ACEI4_SPECIAL_WHO) { + switch (id) { + case ACE4_SPECIAL_OWNER: + acep->a_flags |= ACE_OWNER; + acep->a_who = -1; + break; + + case ACE4_SPECIAL_GROUP: + acep->a_flags |= (ACE_GROUP | ACE_IDENTIFIER_GROUP); + acep->a_who = -1; + break; + + case ACE4_SPECIAL_EVERYONE: + acep->a_flags |= ACE_EVERYONE; + acep->a_who = -1; + break; + + default: + dprintf("Unknown id 0x%08x\n", id); + return (-EINVAL); + } + } else { + acep->a_who = id; + } + + return (0); +} + +static int +nfsacl41i_to_zfsacl(const u32 *xattrbuf, size_t bufsz, vsecattr_t *vsecp) +{ + int error; + int i; + + vsecp->vsa_aclflags = ntohl(*(xattrbuf++)); + vsecp->vsa_aclcnt = ntohl(*(xattrbuf++)); + bufsz -= (2 * sizeof (u32)); + vsecp->vsa_aclentsz = vsecp->vsa_aclcnt * sizeof (ace_t); + + if (bufsz != (vsecp->vsa_aclcnt * ACE4SIZE)) { + dprintf("Embedded ACL count [%d] is larger than " + "can fit in provided buffer size: %zu\n", + vsecp->vsa_aclcnt, bufsz); + return (-ERANGE); + } + + vsecp->vsa_aclentp = kmem_alloc(vsecp->vsa_aclentsz, KM_SLEEP); + + for (i = 0; i < vsecp->vsa_aclcnt; i++, xattrbuf += ACE4ELEM) { + ace_t *acep = vsecp->vsa_aclentp + (i * sizeof (ace_t)); + + error = nfsace4i_to_acep(xattrbuf, acep); + if (error) { + kmem_free(vsecp->vsa_aclentp, vsecp->vsa_aclentsz); + return (error); + } + } + + return (0); +} + +static int +__zpl_xattr_nfs41acl_get(struct inode *ip, const char *name, + void *buffer, size_t size) +{ + vsecattr_t vsecp = {0}; + cred_t *cr = CRED(); + int ret, fl; + size_t xdr_size; + + /* xattr_resolve_name will do this for us if this is defined */ +#ifndef HAVE_XATTR_HANDLER_NAME + if (strcmp(name, "") != 0) + return (-EINVAL); +#endif + + if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_NFSV4) + return (-EOPNOTSUPP); + + if (size == 0) { + /* + * API user may send 0 size so that we + * return size of buffer needed for ACL. + */ + crhold(cr); + vsecp.vsa_mask = VSA_ACECNT; + ret = -zfs_getsecattr(ITOZ(ip), &vsecp, ATTR_NOACLCHECK, cr); + if (ret) { + return (ret); + } + crfree(cr); + ret = ACES_TO_XDRSIZE(vsecp.vsa_aclcnt); + return (ret); + } + + if (size < ACES_TO_XDRSIZE(1)) { + return (-EINVAL); + } + + vsecp.vsa_mask = VSA_ACE_ALLTYPES | VSA_ACECNT | VSA_ACE | + VSA_ACE_ACLFLAGS; + + crhold(cr); + fl = capable(CAP_DAC_OVERRIDE) ? ATTR_NOACLCHECK : 0; + ret = -zfs_getsecattr(ITOZ(ip), &vsecp, fl, cr); + crfree(cr); + + if (ret) { + return (ret); + } + + if (vsecp.vsa_aclcnt == 0) { + ret = -ENODATA; + goto nfs4acl_get_out; + } + + xdr_size = ACES_TO_XDRSIZE(vsecp.vsa_aclcnt); + if (xdr_size > size) { + ret = -ERANGE; + goto nfs4acl_get_out; + } + + ret = zfsacl_to_nfsacl41i(vsecp, (u32 *)buffer); + if (ret == 0) + ret = xdr_size; + +nfs4acl_get_out: + kmem_free(vsecp.vsa_aclentp, vsecp.vsa_aclentsz); + + return (ret); +} +ZPL_XATTR_GET_WRAPPER(zpl_xattr_nfs41acl_get); + +static int +__zpl_xattr_nfs41acl_set(zidmap_t *mnt_ns, + struct inode *ip, const char *name, + const void *value, size_t size, int flags) +{ + (void) mnt_ns; + cred_t *cr = CRED(); + int error, fl, naces; + vsecattr_t vsecp = { .vsa_mask = (VSA_ACE | VSA_ACE_ACLFLAGS) }; + + if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_NFSV4) + return (-EOPNOTSUPP); + + if (value == NULL && size == 0) { + crhold(cr); + error = zfs_stripacl(ITOZ(ip), cr); + crfree(cr); + return (error); + } + + /* xdr data is 4-byte aligned */ + if (((ulong_t)value % 4) != 0) { + return (-EINVAL); + } + + naces = XDRSIZE_TO_ACES(size); + if (naces > NFS41ACL_MAX_ACES) { + return (-E2BIG); + } + + if (!XDRSIZE_IS_VALID(size)) { + return (-EINVAL); + } + + error = nfsacl41i_to_zfsacl((u32 *)value, size, &vsecp); + if (error) + return (error); + + crhold(cr); + fl = capable(CAP_DAC_OVERRIDE) ? ATTR_NOACLCHECK : 0; + error = -zfs_setsecattr(ITOZ(ip), &vsecp, fl, cr); + crfree(cr); + + kmem_free(vsecp.vsa_aclentp, vsecp.vsa_aclentsz); + return (error); +} +ZPL_XATTR_SET_WRAPPER(zpl_xattr_nfs41acl_set); + +/* + * ACL access xattr namespace handlers. + * + * Use .name instead of .prefix when available. xattr_resolve_name will match + * whole name and reject anything that has .name only as prefix. + */ +xattr_handler_t zpl_xattr_nfs41acl_handler = +{ +#ifdef HAVE_XATTR_HANDLER_NAME + .name = NFS41ACL_XATTR, +#else + .prefix = NFS41ACL_XATTR, +#endif + .list = zpl_xattr_nfs41acl_list, + .get = zpl_xattr_nfs41acl_get, + .set = zpl_xattr_nfs41acl_set, +}; + xattr_handler_t *zpl_xattr_handlers[] = { &zpl_xattr_security_handler, &zpl_xattr_trusted_handler, @@ -1465,6 +1959,7 @@ xattr_handler_t *zpl_xattr_handlers[] = { &zpl_xattr_acl_access_handler, &zpl_xattr_acl_default_handler, #endif /* CONFIG_FS_POSIX_ACL */ + &zpl_xattr_nfs41acl_handler, NULL }; @@ -1493,6 +1988,10 @@ zpl_xattr_handler(const char *name) return (&zpl_xattr_acl_default_handler); #endif /* CONFIG_FS_POSIX_ACL */ + if (strncmp(name, NFS41ACL_XATTR, + sizeof (NFS41ACL_XATTR)) == 0) + return (&zpl_xattr_nfs41acl_handler); + return (NULL); } diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 764993b45e7c..31425802f97f 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -436,13 +436,7 @@ zfs_prop_init(void) PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "discard | groupmask | passthrough | restricted", "ACLMODE", acl_mode_table, sfeatures); - zprop_register_index(ZFS_PROP_ACLTYPE, "acltype", -#ifdef __linux__ - /* Linux doesn't natively support ZFS's NFSv4-style ACLs. */ - ZFS_ACLTYPE_OFF, -#else - ZFS_ACLTYPE_NFSV4, -#endif + zprop_register_index(ZFS_PROP_ACLTYPE, "acltype", ZFS_ACLTYPE_NFSV4, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "off | nfsv4 | posix", "ACLTYPE", acltype_table, sfeatures); zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit", diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 001f774a6d16..7b489e03d306 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -373,7 +373,7 @@ zvol_set_volthreading(const char *name, boolean_t value) { zvol_state_t *zv = zvol_find_by_name(name, RW_NONE); if (zv == NULL) - return (ENOENT); + return (-1); zv->zv_threading = value; mutex_exit(&zv->zv_state_lock); return (0); diff --git a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_index_props.zcp b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_index_props.zcp index 10ef8e7f839a..1fd23ef02922 100644 --- a/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_index_props.zcp +++ b/tests/zfs-tests/tests/functional/channel_program/synctask_core/tst.get_index_props.zcp @@ -27,11 +27,8 @@ props['checksum'] = {{'on', 'default'}, {nil, nil}} props['dedup'] = {{'off', 'default'}, {nil, nil}} props['compression'] = {{'off', 'default'}, {nil, nil}} props['snapdir'] = {{'hidden', 'default'}, {nil, nil}} -if os == "Linux" then - props['acltype'] = {{'off', 'default'}, {'off', 'default'}} -elseif os == "FreeBSD" then - props['aclmode'] = {{'discard', 'default'}, {'discard', 'default'}} -end +props['acltype'] = {{'nfsv4', 'default'}, {'nfsv4', 'default'}} +props['aclmode'] = {{'discard', 'default'}, {'discard', 'default'}} props['aclinherit'] = {{'restricted','default'}, {nil, nil}} props['copies'] = {{'1', 'default'}, {nil, nil}} props['primarycache'] = {{'all', 'default'}, {'all', 'default'}} diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_001_pos.ksh index 935a85285e8e..f6b0c395f048 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_001_pos.ksh @@ -62,13 +62,12 @@ typeset -r uint64_max="18446744073709551615" typeset zfs_props=("type" used available creation volsize referenced \ compressratio mounted origin recordsize quota reservation mountpoint \ sharenfs checksum compression atime devices exec readonly setuid \ - snapdir aclinherit canmount primarycache secondarycache version \ - usedbychildren usedbydataset usedbyrefreservation usedbysnapshots \ - filesystem_limit snapshot_limit filesystem_count snapshot_count) + snapdir aclinherit aclmode acltype canmount primarycache secondarycache \ + version usedbychildren usedbydataset usedbyrefreservation usedbysnapshots) if is_freebsd; then - typeset zfs_props_os=(jailed aclmode) + typeset zfs_props_os=(jailed) else - typeset zfs_props_os=(zoned acltype) + typeset zfs_props_os=(zoned) fi typeset userquota_props=(userquota@root groupquota@root userused@root \ groupused@root) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_002_pos.ksh index 65986f6bd1a2..461fd4112e93 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_002_pos.ksh @@ -50,12 +50,12 @@ typeset options=(" " p r H) typeset zfs_props=("type" used available creation volsize referenced \ compressratio mounted origin recordsize quota reservation mountpoint \ sharenfs checksum compression atime devices exec readonly setuid \ - snapdir aclinherit canmount primarycache secondarycache version \ - usedbychildren usedbydataset usedbyrefreservation usedbysnapshots) + snapdir aclinherit aclmode acltype canmount primarycache secondarycache \ + version usedbychildren usedbydataset usedbyrefreservation usedbysnapshots) if is_freebsd; then - typeset zfs_props_os=(jailed aclmode) + typeset zfs_props_os=(jailed) else - typeset zfs_props_os=(zoned acltype) + typeset zfs_props_os=(zoned) fi typeset userquota_props=(userquota@root groupquota@root userused@root \ groupused@root) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_005_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_005_neg.ksh index d9be907909db..7b3f3a472dd5 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_005_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_005_neg.ksh @@ -48,12 +48,12 @@ typeset val_opts=(p r H) typeset v_props=(type used available creation volsize referenced compressratio \ mounted origin recordsize quota reservation mountpoint sharenfs checksum \ compression atime devices exec readonly setuid snapdir version \ - aclinherit canmount primarycache secondarycache \ + aclinherit aclmode acltype canmount primarycache secondarycache \ usedbychildren usedbydataset usedbyrefreservation usedbysnapshots) if is_freebsd; then - typeset v_props_os=(jailed aclmode) + typeset v_props_os=(jailed) else - typeset v_props_os=(zoned acltype) + typeset v_props_os=(zoned) fi typeset userquota_props=(userquota@root groupquota@root userused@root \ groupused@root) diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_008_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_008_pos.ksh index 6be432036845..1cb69d8fe0e3 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_008_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_008_pos.ksh @@ -53,13 +53,13 @@ set -A options " " "-r" "-H" "-p" "-rHp" "-o name" \ set -A props type used available creation volsize referenced compressratio \ mounted origin recordsize quota reservation mountpoint sharenfs \ checksum compression atime devices exec readonly setuid snapdir \ - aclinherit canmount primarycache secondarycache version \ + aclinherit aclmode acltype canmount primarycache secondarycache \ usedbychildren usedbydataset usedbyrefreservation usedbysnapshots \ userquota@root groupquota@root userused@root groupused@root if is_freebsd; then - set -A props ${props[*]} jailed aclmode + set -A props ${props[*]} jailed else - set -A props ${props[*]} zoned acltype + set -A props ${props[*]} zoned fi set -A dataset $TESTPOOL/$TESTCTR $TESTPOOL/$TESTFS $TESTPOOL/$TESTVOL \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh index 5686b63b41cc..f9a8f22f5285 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_get/zfs_get_009_pos.ksh @@ -52,16 +52,16 @@ fi log_assert "'zfs get -d ' should get expected output." log_onexit depth_fs_cleanup -set -A all_props type used available creation volsize referenced \ - compressratio mounted origin recordsize quota reservation mountpoint \ - sharenfs checksum compression atime devices exec readonly setuid \ - snapdir aclinherit canmount primarycache secondarycache version \ +set -A all_props type used available creation volsize referenced compressratio \ + mounted origin recordsize quota reservation mountpoint sharenfs \ + checksum compression atime devices exec readonly setuid snapdir \ + aclinherit aclmode acltype canmount primarycache secondarycache \ usedbychildren usedbydataset usedbyrefreservation usedbysnapshots \ userquota@root groupquota@root userused@root groupused@root if is_freebsd; then - set -A all_props ${all_props[*]} jailed aclmode + set -A all_props ${all_props[*]} jailed else - set -A all_props ${all_props[*]} zoned acltype + set -A all_props ${all_props[*]} zoned fi depth_fs_setup diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh index 5e8f1d7053e8..7eadb03793a4 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh @@ -45,6 +45,8 @@ verify_runnable "global" +log_unsupported "SKIP because auto partitioning removed for SCALE" + function cleanup { poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_005_pos.ksh index 095f3bc05e66..bccfaa27e31c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_005_pos.ksh @@ -38,6 +38,8 @@ verify_runnable "global" +log_unsupported "SKIP because auto partitioning removed for SCALE" + function cleanup { log_must zinject -c all diff --git a/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh index c0387e1d3235..d1bf2e8641c3 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh @@ -50,6 +50,7 @@ verify_runnable "both" if is_linux; then + log_unsupported "SKIP because auto partitioning removed for SCALE" # Add one 512b scsi_debug device (4Kn would generate IO errors) # NOTE: must be larger than other "file" vdevs and minimum SPA devsize: # add 32m of fudge diff --git a/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh index ae56ee9919bf..b6b9f789ed30 100755 --- a/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/fault/auto_replace_001_pos.ksh @@ -49,6 +49,8 @@ verify_runnable "both" +log_unsupported "SKIP because auto partitioning removed for SCALE" + if ! is_physical_device $DISKS; then log_unsupported "Unsupported disks for this test." fi diff --git a/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_001.ksh b/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_001.ksh index e7187935e532..820e9e544ef9 100755 --- a/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_001.ksh +++ b/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_001.ksh @@ -36,6 +36,8 @@ verify_runnable "global" +log_unsupported "SKIP: nfsv4 acls incompatible with id-mapped mounts for now." + export WORKDIR=$TESTDIR/idmap_test export IDMAPDIR=$TESTDIR/idmap_dest diff --git a/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_002.ksh b/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_002.ksh index 8cba90ea58b7..bb1d379af27b 100755 --- a/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_002.ksh +++ b/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_002.ksh @@ -37,6 +37,8 @@ verify_runnable "global" +log_unsupported "SKIP: nfsv4 acls incompatible with id-mapped mounts for now." + export WORKDIR=$TESTDIR/idmap_test export IDMAPDIR=$TESTDIR/idmap_dest diff --git a/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_003.ksh b/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_003.ksh index 1f1a2aec655e..84e7de13789a 100755 --- a/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_003.ksh +++ b/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_003.ksh @@ -39,6 +39,8 @@ verify_runnable "global" +log_unsupported "SKIP: nfsv4 acls incompatible with id-mapped mounts for now." + export WORKDIR=$TESTDIR/idmap_test export IDMAPDIR=$TESTDIR/idmap_dest diff --git a/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_004.ksh b/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_004.ksh index 89f2f750d23c..3e44da2e0553 100755 --- a/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_004.ksh +++ b/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_004.ksh @@ -38,6 +38,8 @@ verify_runnable "global" +log_unsupported "SKIP: nfsv4 acls incompatible with id-mapped mounts for now." + export WORKDIR=$TESTDIR/idmap_test export IDMAPDIR=$TESTDIR/idmap_dest diff --git a/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_005.ksh b/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_005.ksh index a4ecea92c127..c270dfb8f0d5 100755 --- a/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_005.ksh +++ b/tests/zfs-tests/tests/functional/idmap_mount/idmap_mount_005.ksh @@ -41,6 +41,8 @@ verify_runnable "global" +log_unsupported "SKIP: nfsv4 acls incompatible with id-mapped mounts for now." + export WORKDIR=$TESTDIR/idmap_test export IDMAPDIR=$TESTDIR/idmap_dest diff --git a/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh b/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh index 1f203e1dc551..cabbeff719f3 100755 --- a/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/inheritance/inherit_001_pos.ksh @@ -373,6 +373,8 @@ function scan_state { #state-file # set -A prop "checksum" "" \ "compression" "" \ + "aclmode" "" \ + "acltype" "" \ "atime" "" \ "sharenfs" "" \ "recordsize" "recsize" \ @@ -386,13 +388,21 @@ set -A prop "checksum" "" \ # above must have a corresponding entry in the two arrays below. # -set -A def_val "on" "on" "on" \ +set -A def_val "on" \ + "on" \ + "discard" \ + "nfsv4" \ + "on" \ "off" "" \ "hidden" \ "off" \ "all" -set -A local_val "off" "off" "off" \ +set -A local_val "off" \ + "off" \ + "groupmask" \ + "off" \ + "off" \ "on" "" \ "visible" \ "off" \ @@ -401,15 +411,6 @@ set -A local_val "off" "off" "off" \ # # Add system specific values # -if is_linux; then - prop+=("acltype" "") - def_val+=("off") - local_val+=("off") -else - prop+=("aclmode" "") - def_val+=("discard") - local_val+=("groupmask") -fi if is_illumos; then prop+=("mountpoint" "") def_val+=("")