diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 6912e34fab5a..cd67ece4097c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -13,7 +13,7 @@ /src/pybind/mgr/rook @ceph/orchestrators /src/pybind/mgr/cephadm @ceph/orchestrators /src/pybind/mgr/test_orchestrator @ceph/orchestrators -/src/python-common/ceph/deployment @ceph/orchestrators +/src/python-common @ceph/orchestrators /qa/workunits/cephadm @ceph/orchestrators /qa/tasks/cephadm.py @ceph/orchestrators /qa/tasks/cephadm_cases @ceph/orchestrators @@ -132,6 +132,7 @@ README* @ceph/doc-writers /src/test/run-rbd* @ceph/rbd /src/test/test_rbd* @ceph/rbd /src/tools/rbd* @ceph/rbd +/systemd/ceph-rbd-mirror* @ceph/rbd /systemd/rbdmap.service.in @ceph/rbd /udev/50-rbd.rules @ceph/rbd @@ -163,6 +164,10 @@ README* @ceph/doc-writers /src/cls/rgw_gc @ceph/rgw /src/cls/user @ceph/rgw /src/cls/version @ceph/rgw +/src/mrgw.sh @ceph/rgw +/src/mrun @ceph/rgw +/src/mstart.sh @ceph/rgw +/src/mstop.sh @ceph/rgw /src/rgw @ceph/rgw /src/s3select @ceph/rgw /src/spawn @ceph/rgw diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 98b7d53d8119..d23134597030 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -17,6 +17,6 @@ updates: schedule: interval: "daily" commit-message: - prefix: ".github/workflows:" + prefix: ".github:" pull-request-branch-name: separator: "-" diff --git a/.github/labeler.yml b/.github/labeler.yml index 60c1bc5f0882..cc32be385012 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -154,6 +154,7 @@ crimson: - src/crimson/** - src/test/crimson/** - qa/suites/crimson-rados/** + - src/seastar/** dashboard: - src/pybind/mgr/dashboard/** @@ -207,21 +208,73 @@ CI: - .github/** rbd: + - doc/dev/rbd* + - doc/man/8/ceph-rbdnamer.rst - doc/man/8/rbd* - doc/rbd/** + - doc/start/quick-rbd.rst + - examples/librbd/** + - examples/rbd-replay/** + - qa/rbd/** + - qa/run_xfstests* + - qa/suites/krbd/** - qa/suites/rbd/** + - qa/tasks/ceph_iscsi_client.py + - qa/tasks/metadata.yaml + - qa/tasks/qemu.py + - qa/tasks/rbd* + - qa/tasks/userdata* + - qa/workunits/cls/test_cls_journal.sh + - qa/workunits/cls/test_cls_lock.sh + - qa/workunits/cls/test_cls_rbd.sh - qa/workunits/rbd/** + - qa/workunits/windows/** + - src/ceph-rbdnamer + - src/cls/journal/** + - src/cls/lock/** + - src/cls/rbd/** + - src/common/options/rbd* + - src/etc-rbdmap + - src/include/krbd.h + - src/include/rbd* - src/include/rbd/** + - src/journal/** + - src/krbd.cc - src/librbd/** + - src/ocf/** - src/pybind/mgr/rbd_support/** - src/pybind/rbd/** + - src/rbd* + - src/rbd*/** + - src/test/cli/rbd/** + - src/test/cli-integration/rbd/** + - src/test/cls_journal/** + - src/test/cls_lock/** + - src/test/cls_rbd/** + - src/test/journal/** - src/test/librbd/** - - src/test/rbd_mirror/** - - src/tools/rbd/** - - src/tools/rbd_ggate/** - - src/tools/rbd_mirror/** - - src/tools/rbd_nbd/** - - src/tools/rbd_wnbd/** + - src/test/pybind/test_rbd.py + - src/test/rbd* + - src/test/rbd*/** + - src/test/run-rbd* + - src/test/test_rbd* + - src/tools/rbd*/** + - systemd/ceph-rbd-mirror* + - systemd/rbdmap.service.in + - udev/50-rbd.rules + +nvmeof: + - qa/suites/nvmeof/** + - qa/tasks/nvmeof.py + - qa/workunits/nvmeof/** + - src/ceph_nvmeof_monitor_client.cc + - src/cephadm/cephadmlib/daemons/nvmeof.py + - src/messages/MNVMeofGw* + - src/mon/NVMeofGw* + - src/nvmeof/** + - src/pybind/mgr/cephadm/services/nvmeof.py + - src/pybind/mgr/cephadm/templates/services/nvmeof/** + - src/tools/ceph-dencoder/nvmeof* rgw: - qa/suites/rgw/** @@ -235,6 +288,9 @@ rgw: - src/cls/rgw_gc/** - src/cls/timeindex/** - src/mrgw.sh + - src/mrun + - src/mstart.sh + - src/mstop.sh - src/rgw/** - src/test/cls_rgw/** - src/test/librgw_* @@ -248,8 +304,7 @@ ceph-volume: - src/python-common/ceph/deployment/drive_selection/** tests: - - qa/tasks/** - - qa/workunits/** + - qa/** - src/test/** nfs: @@ -284,3 +339,8 @@ telemetry: - qa/workunits/test_telemetry_quincy_x.sh - src/pybind/mgr/telemetry/** - src/telemetry/** + +script: + - src/script/** + - admin/** + - doc/scripts/** diff --git a/.github/milestone.yml b/.github/milestone.yml index 073b7e56b718..036048471209 100644 --- a/.github/milestone.yml +++ b/.github/milestone.yml @@ -5,3 +5,4 @@ base-branch: - "(pacific)" - "(quincy)" - "(reef)" + - "(squid)" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 834ef742832f..494a3f23e06a 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -22,7 +22,9 @@ ## Contribution Guidelines - To sign and title your commits, please refer to [Submitting Patches to Ceph](https://github.com/ceph/ceph/blob/main/SubmittingPatches.rst). -- If you are submitting a fix for a stable branch (e.g. "pacific"), please refer to [Submitting Patches to Ceph - Backports](https://github.com/ceph/ceph/blob/master/SubmittingPatches-backports.rst) for the proper workflow. +- If you are submitting a fix for a stable branch (e.g. "quincy"), please refer to [Submitting Patches to Ceph - Backports](https://github.com/ceph/ceph/blob/master/SubmittingPatches-backports.rst) for the proper workflow. + +- When filling out the below checklist, you may click boxes directly in the GitHub web UI. When entering or editing the entire PR message in the GitHub web UI editor, you may also select a checklist item by adding an `x` between the brackets: `[x]`. Spaces and capitalization matter when checking off items this way. ## Checklist - Tracker (select at least one) @@ -62,4 +64,5 @@ - `jenkins test ceph-volume all` - `jenkins test ceph-volume tox` - `jenkins test windows` +- `jenkins test rook e2e` diff --git a/.github/workflows/check-license.yml b/.github/workflows/check-license.yml new file mode 100644 index 000000000000..89dcfa292c3c --- /dev/null +++ b/.github/workflows/check-license.yml @@ -0,0 +1,14 @@ +--- +name: "Check for Incompatible Licenses" +on: [pull_request] + +jobs: + pull_request: + name: "Check for Incompatible Licenses" + runs-on: ubuntu-latest + steps: + - name: Check Pull Request + uses: JJ/github-pr-contains-action@526dfe784d8604ea1c39b6c26609074de95b1ffd # releases/v14.1 + with: + github-token: ${{github.token}} + diffDoesNotContain: "GNU General Public License" diff --git a/.github/workflows/create-backport-trackers.yml b/.github/workflows/create-backport-trackers.yml index b3525d9e94e1..79b03f62c1c6 100644 --- a/.github/workflows/create-backport-trackers.yml +++ b/.github/workflows/create-backport-trackers.yml @@ -1,7 +1,8 @@ --- -name: Create backport trackers for trackers in "Pending Backport" state +name: Issue Backporting on: # To manually trigger this: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#workflow_dispatch + # View past runs and output: https://github.com/ceph/ceph/actions/workflows/create-backport-trackers.yml workflow_dispatch: inputs: issues: @@ -36,12 +37,15 @@ jobs: runs-on: ubuntu-latest if: github.ref == 'refs/heads/main' steps: - - uses: Bhacaz/checkout-files@e3e34e7daef91a5f237485bb88a260aee4be29dd + - uses: actions/checkout@v4 with: - files: src/script/backport-create-issue src/script/requirements.backport-create-issue.txt - - uses: actions/setup-python@v4 + sparse-checkout: | + src/script/backport-create-issue + src/script/requirements.backport-create-issue.txt + sparse-checkout-cone-mode: false + - uses: actions/setup-python@v5 with: - python-version: '>=3.6' + python-version: '>=3.6 <3.12' cache: 'pip' cache-dependency-path: src/script/requirements.backport-create-issue.txt - run: pip install -r src/script/requirements.backport-create-issue.txt diff --git a/.github/workflows/pr-check-deps.yml b/.github/workflows/pr-check-deps.yml index 7815b8fe486f..a7258d187f13 100644 --- a/.github/workflows/pr-check-deps.yml +++ b/.github/workflows/pr-check-deps.yml @@ -5,6 +5,6 @@ jobs: runs-on: ubuntu-latest name: Check PR Dependencies steps: - - uses: gregsdennis/dependencies-action@80b5ffec566913b1494d5a8577ab0d60e476271d + - uses: gregsdennis/dependencies-action@f98d55eee1f66e7aaea4a60e71892736ae2548c7 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 31478e8e8f65..510a6bebd4e2 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -9,7 +9,7 @@ jobs: stale: runs-on: ubuntu-latest steps: - - uses: actions/stale@v8 + - uses: actions/stale@v9 with: # PAT for GitHub API authentication repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.githubmap b/.githubmap index cf5f15c130a7..68c711aa587f 100644 --- a/.githubmap +++ b/.githubmap @@ -12,6 +12,7 @@ aaSharma14 Aashish Sharma aclamk Adam Kupczyk adamemerson Adam C. Emerson adk3798 Adam King +afreen23 Afreen Misbah ajarr Ramana Raja alfonsomthd Alfonso Martínez alfredodeza Alfredo Deza @@ -19,6 +20,7 @@ alimaredia Ali Maredia amathuria Aishwarya Mathuria amitkumar50 Amit Kumar andrewschoen Andrew Schoen +anuradhagadge Anuradha Gadge aaryanporwal Aaryan Porwal asettle Alexandra Settle athanatos Samuel Just @@ -27,7 +29,7 @@ b-ranto Boris Ranto badone Brad Hubbard baruza Barbora Ančincová bassamtabbara Bassam Tabbara -batrick Patrick Donnelly +batrick Patrick Donnelly bigjust Justin Caratzas bk201 Kiefer Chang BlaineEXE Blaine Gardner @@ -47,6 +49,7 @@ Devp00l Stephan Müller dillaman Jason Dillaman djgalloway David Galloway dmick Dan Mick +dnyanee1997 Dnyaneshwari talwekar dragonylffly Li Wang dsavineau Dimitri Savineau dvanders Dan van der Ster @@ -96,6 +99,8 @@ mikechristie Mike Christie mogeb Mohamad Gebai MrFreezeex Arthur Outhenin-Chalandre myoungwon Myoungwon Oh +nmunet Naman Munet +Naveenaidu Naveen Naidu neha-ojha Neha Ojha NitzanMordhai Nitzan Mordechai nizamial09 Nizamudeen A @@ -108,6 +113,8 @@ p-se Patrick Seidensal pcuzner Paul Cuzner Pegonzal Pedro Gonzalez Gomez pereman2 Pere Diaz Bou +prgoel-code Prachi prgoel@redhat.com +pujaoshahu Puja Shahu rchagam Anjaneya Chagam renhwztetecs huanwen ren ricardoasmarques Ricardo Marques @@ -121,6 +128,9 @@ s0nea Tatjana Dehler Sarthak0702 Sarthak Gupta saschagrunert Sascha Grunert sebastian-philipp Sebastian Wagner +shraddhaag Shraddha Agrawal +Kushal-deb Kushal Deb +ShwetaBhosale1 Shweta Bhosale ShyamsundarR Shyamsundar R sidharthanup Sidharth Anupkrishnan smithfarm Nathan Cutler @@ -176,3 +186,5 @@ baergj Joshua Baergen zmc Zack Cerza robbat2 Robin H. Johnson leonid-s-usov Leonid Usov +ffilz Frank S. Filz +Jayaprakash-ibm Jaya Prakash Madaka diff --git a/.gitignore b/.gitignore index b01aef839bef..c74ad2efd69b 100644 --- a/.gitignore +++ b/.gitignore @@ -83,3 +83,17 @@ GTAGS # Python building things where it shouldn't /src/python-common/build/ .cache + +# Doc build output +src/pybind/cephfs/build/ +src/pybind/cephfs/cephfs.c +src/pybind/cephfs/cephfs.egg-info/ +src/pybind/rados/build/ +src/pybind/rados/rados.c +src/pybind/rados/rados.egg-info/ +src/pybind/rbd/build/ +src/pybind/rbd/rbd.c +src/pybind/rbd/rbd.egg-info/ +src/pybind/rgw/build/ +src/pybind/rgw/rgw.c +src/pybind/rgw/rgw.egg-info/ diff --git a/.gitmodules b/.gitmodules index 088ae3b577ce..4a20b958b569 100644 --- a/.gitmodules +++ b/.gitmodules @@ -50,18 +50,12 @@ [submodule "src/c-ares"] path = src/c-ares url = https://github.com/ceph/c-ares.git -[submodule "src/spawn"] - path = src/spawn - url = https://github.com/ceph/spawn.git [submodule "src/pybind/mgr/rook/rook-client-python"] path = src/pybind/mgr/rook/rook-client-python url = https://github.com/ceph/rook-client-python.git [submodule "s3select"] path = src/s3select url = https://github.com/ceph/s3select.git -[submodule "src/cpp_redis"] - path = src/cpp_redis - url = https://github.com/ceph/cpp_redis.git [submodule "src/libkmip"] path = src/libkmip url = https://github.com/ceph/libkmip @@ -75,3 +69,16 @@ [submodule "src/jaegertracing/opentelemetry-cpp"] path = src/jaegertracing/opentelemetry-cpp url = https://github.com/open-telemetry/opentelemetry-cpp.git +[submodule "src/qatlib"] + path = src/qatlib + url = https://github.com/intel/qatlib.git +[submodule "src/qatzip"] + path = src/qatzip + url = https://github.com/intel/qatzip.git +[submodule "src/BLAKE3"] + path = src/BLAKE3 + url = https://github.com/BLAKE3-team/BLAKE3.git +[submodule "src/nvmeof/gateway"] + path = src/nvmeof/gateway + url = https://github.com/ceph/ceph-nvmeof.git + fetchRecurseSubmodules = false diff --git a/.mailmap b/.mailmap index 2450b9043152..6322c4ba5238 100644 --- a/.mailmap +++ b/.mailmap @@ -24,6 +24,7 @@ Adam Kupczyk Adam Kupczyk Adam Twardowski Adir Lev +Afreen Misbah Ahoussi Armand Ailing Zhang Aishwarya Mathuria amathuria @@ -63,6 +64,7 @@ Anthony D Atri Anthony D Atri anthonyeleven Anton Oks Anton Turetckii banuchka +Anuradha Gadge Anurag Bandhu Aravind Ramesh Aravind Aristoteles Neto @@ -168,6 +170,7 @@ Dhairya Parmar dparmar18 Dingdang Zhang Dmitry Smirnov Dmitry Yatsushkevich +Dnyaneshwari talwekar Dominik Hannen Dongdong Tao Dongdong Tao @@ -508,12 +511,14 @@ Myoungwon Oh Myoungwon Oh Na Xie Nag Pavan Chilakam <55574442+nagpavan-chilakam@users.noreply.github.com> +Naman Munet Nancy Su Nathan Cutler Nathan Cutler Nathan Cutler Nathan Cutler Nathan Weinberg +Naveen Naidu Neeraj Pratap Singh Neeraj Pratap Singh neeraj pratap singh Neha Ojha @@ -543,7 +548,8 @@ Pan Liu Parth Arora parth-gr Pascal de Bruijn Patience Warnick -Patrick Donnelly +Patrick Donnelly +Patrick Donnelly Patrick McGarry Patrick McGarry Patrick Seidensal @@ -571,6 +577,8 @@ Pooja Gautam Pritha Srivastava Pritha Srivastava Pritha Srivastava +Prachi prgoel@redhat.com +Puja Shahu Qi Liang Hong Qiankun Zheng Qinfei Liu <18138800392@163.com> @@ -672,12 +680,15 @@ Shiqi Shiqi <1454927420@qq.com> Shishir Gowda Shotaro Kawaguchi +Shraddha Agrawal +Kushal Deb Shreyansh Sancheti shreyanshjain7174 Shu, Xinxin Shuai Yong Shun Song Shun Song Shun Song +Shweta Bhosale Shyamsundar R Shylesh Kumar Sibei Gao diff --git a/.organizationmap b/.organizationmap index d33d2cf54c73..e59e6ae24e1a 100644 --- a/.organizationmap +++ b/.organizationmap @@ -346,17 +346,28 @@ Huayun Zheng Yin Huazhong University of Science and Technology Luo Runbing HXT Semiconductor Jiang Yutang IBM Adam Kupczyk +IBM Afreen Misbah IBM Aliaksei Makarau IBM Andrew Solomon +IBM Anuradha Gadge +IBM Dnyaneshwari talwekar IBM Guillaume Abrioux IBM Jonas Pfefferle IBM Laura Flores IBM Martin Ohmacht IBM Michel Normand +IBM Naman Munet +IBM Naveen Naidu IBM Neeraj Pratap Singh IBM Or Ozeri IBM Paul Cuzner +IBM Prachi Goel +IBM Puja Shahu IBM Samuel Matzek +IBM Shraddha Agrawal +IBM Kushal Deb +IBM Shweta Bhosale +IBM Patrick Donnelly IBM Sunil Angadi IBM Teoman Onay IBM Ulrich Weigand @@ -580,6 +591,7 @@ Red Hat Adam King Red Hat Adam King Red Hat Adam Kupczyk Red Hat Ademar de Souza Reis Jr +Red Hat Afreen Misbah Red Hat Aishwarya Mathuria Red Hat Albin Antony Red Hat Alex Elder @@ -616,6 +628,7 @@ Red Hat Deepika Upadhyay Red Hat Dhairya Parmar Red Hat Dimitri Savineau Red Hat Divyansh Kamboj +Red Hat Dnyaneshwari talwekar Red Hat Douglas Fuller Red Hat Ernesto Puerta Red Hat Erwan Velu @@ -681,6 +694,7 @@ Red Hat Mike Hackett Red Hat Mike Perez Red Hat Milan Broz Red Hat Milind Changire +Red Hat Naman Munet Red Hat Nathan Weinberg Red Hat Neeraj Pratap Singh Red Hat Neha Ojha @@ -704,9 +718,11 @@ Red Hat Pere Diaz Bou Red Hat Pete Zaitcev Red Hat Petr Lautrbach Red Hat Petr Machata +Red Hat Prachi prgoel@redhat.com Red Hat Prasanna Kumar Kalever Red Hat Prashant D Red Hat Pritha Srivastava +Red Hat Puja Shahu Red Hat Radoslaw Zarzynski Red Hat Rafael Quintero Red Hat Ramakrishnan Periyasamy diff --git a/.peoplemap b/.peoplemap index 507f50edb43e..418e8505fb49 100644 --- a/.peoplemap +++ b/.peoplemap @@ -73,5 +73,5 @@ Yehuda Sadeh Yehuda Sadeh Yuri Weinstein Yuri Weinstein Zhi Zhang Zhi (David) Zhang Zheng Yin Zheng Yin -Patrick Donnelly Patrick Donnelly +Patrick Donnelly Patrick Donnelly Patrick Donnelly Myoungwon Oh Myoungwon Oh Myoungwon Oh diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e2af5e35634..2db321bed351 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,20 +1,9 @@ cmake_minimum_required(VERSION 3.16) project(ceph - VERSION 18.0.0 + VERSION 19.0.0 LANGUAGES CXX C ASM) -cmake_policy(SET CMP0028 NEW) -cmake_policy(SET CMP0046 NEW) -cmake_policy(SET CMP0048 NEW) -cmake_policy(SET CMP0051 NEW) -cmake_policy(SET CMP0054 NEW) -cmake_policy(SET CMP0056 NEW) -cmake_policy(SET CMP0065 NEW) -cmake_policy(SET CMP0074 NEW) -cmake_policy(SET CMP0075 NEW) -cmake_policy(SET CMP0093 NEW) -cmake_policy(SET CMP0094 NEW) foreach(policy CMP0127 CMP0135) if(POLICY ${policy}) cmake_policy(SET ${policy} NEW) @@ -23,10 +12,15 @@ endforeach() list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/modules/") -if(NOT CMAKE_BUILD_TYPE AND EXISTS "${CMAKE_SOURCE_DIR}/.git") - set(default_build_type "Debug") - set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE - STRING "Default BUILD_TYPE is Debug, other options are: RelWithDebInfo, Release, and MinSizeRel." FORCE) +if(NOT CMAKE_BUILD_TYPE) + if (EXISTS "${CMAKE_SOURCE_DIR}/.git") + message(WARNING "CMAKE_BUILD_TYPE not specified, assuming CMAKE_BUILD_TYPE=Debug because .git exists.") + set(default_build_type "Debug") + set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE + STRING "Default BUILD_TYPE is Debug, other options are: RelWithDebInfo, Release, and MinSizeRel." FORCE) + else() + message(WARNING "CMAKE_BUILD_TYPE not specified, leaving unset because .git does NOT exist.") + endif() endif() if(CMAKE_SYSTEM_NAME MATCHES "Linux") @@ -89,6 +83,40 @@ if(WITH_CCACHE) endif() endif(WITH_CCACHE) +option(WITH_SCCACHE "Build with sccache.") +if(WITH_SCCACHE) + find_program(SCCACHE_EXECUTABLE sccache) + if(NOT SCCACHE_EXECUTABLE) + message(FATAL_ERROR "Can't find sccache. Is it installed?") + endif() + if(NOT NINJA_MAX_COMPILE_JOBS) + if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.19") + execute_process( + COMMAND "sccache" "--dist-status" + OUTPUT_VARIABLE sccache_dist_status + ) + string( + JSON sccache_cores + ERROR_VARIABLE sccache_dist_status_error + GET "${sccache_dist_status}" SchedulerStatus 1 num_cpus + ) + string(FIND "${sccache_dist_status}" "disabled" find_result) + if(find_result EQUAL -1) + message(STATUS "Using sccache with distributed compilation. Effective cores: ${sccache_cores}") + set(NINJA_MAX_COMPILE_JOBS ${sccache_cores}) + set(NINJA_MAX_LINK_JOBS ${sccache_cores}) + else() + message(WARNING "Using sccache, but it is not configured for distributed complilation") + endif() + else() + message(WARNING "Using sccache, but cannot determine maximum job value since cmake version is <3.19") + endif() + endif() + message(STATUS "Building with sccache: ${SCCACHE_EXECUTABLE}, SCCACHE_CONF=$ENV{SCCACHE_CONF}") + set(CMAKE_C_COMPILER_LAUNCHER ${SCCACHE_EXECUTABLE}) + set(CMAKE_CXX_COMPILER_LAUNCHER ${SCCACHE_EXECUTABLE}) +endif(WITH_SCCACHE) + option(WITH_MANPAGE "Build man pages." ON) if(WITH_MANPAGE) find_program(SPHINX_BUILD @@ -214,12 +242,6 @@ if(WITH_XFS) set(HAVE_LIBXFS ${XFS_FOUND}) endif() -option(WITH_ZFS "enable LibZFS if found" OFF) -if(WITH_ZFS) - find_package(zfs) - set(HAVE_LIBZFS ${ZFS_FOUND}) -endif() - option(WITH_BLUESTORE "Bluestore OSD backend" ON) if(WITH_BLUESTORE) if(LINUX) @@ -237,15 +259,14 @@ if(WITH_RBD AND LINUX) set(HAVE_LIBCRYPTSETUP ${LIBCRYPTSETUP_FOUND}) endif() -include(CMakeDependentOption) - -CMAKE_DEPENDENT_OPTION(WITH_ZBD "Enable libzbd bluestore backend" OFF - "WITH_BLUESTORE" OFF) -if(WITH_ZBD) - find_package(zbd REQUIRED) - set(HAVE_LIBZBD ${ZBD_FOUND}) +# libnbd +if(WITH_RBD AND NOT WIN32) + find_package(libnbd 1.0 REQUIRED) + set(HAVE_LIBNBD ${LIBNBD_FOUND}) endif() +include(CMakeDependentOption) + CMAKE_DEPENDENT_OPTION(WITH_LIBURING "Enable io_uring bluestore backend" ON "WITH_BLUESTORE;HAVE_LIBAIO" OFF) set(HAVE_LIBURING ${WITH_LIBURING}) @@ -253,6 +274,20 @@ set(HAVE_LIBURING ${WITH_LIBURING}) CMAKE_DEPENDENT_OPTION(WITH_SYSTEM_LIBURING "Require and build with system liburing" OFF "HAVE_LIBAIO;WITH_BLUESTORE" OFF) +if(WITH_LIBURING) + if(WITH_SYSTEM_LIBURING) + find_package(uring REQUIRED) + else() + include(Builduring) + build_uring() + endif() + # enable uring in boost::asio + + if(CMAKE_SYSTEM_VERSION VERSION_GREATER_EQUAL "5.10") + add_compile_definitions("BOOST_ASIO_HAS_IO_URING") + endif() +endif() + CMAKE_DEPENDENT_OPTION(WITH_BLUESTORE_PMEM "Enable PMDK libraries" OFF "WITH_BLUESTORE" OFF) if(WITH_BLUESTORE_PMEM) @@ -309,18 +344,56 @@ endif() option(WITH_BLUEFS "libbluefs library" OFF) -option(WITH_QAT "Enable Qat driver" OFF) -if(WITH_QAT) +CMAKE_DEPENDENT_OPTION(WITH_QATLIB "Enable QAT with qatlib" ON + "CMAKE_SYSTEM_PROCESSOR MATCHES amd64|x86_64|AMD64" OFF) +option(WITH_SYSTEM_QATLIB "Use system packages for qatlib" OFF) +option(WITH_QATDRV "Enable QAT with out-of-tree driver" OFF) +CMAKE_DEPENDENT_OPTION(WITH_QATZIP "Enable QATzip" ON + "CMAKE_SYSTEM_PROCESSOR MATCHES amd64|x86_64|AMD64" OFF) +option(WITH_SYSTEM_QATZIP "Use system packages for QATzip" OFF) + +if(WITH_QATDRV) find_package(QatDrv REQUIRED COMPONENTS qat_s usdm_drv_s) - set(HAVE_QATDRV $(QatDrv_FOUND)) + set(HAVE_QAT TRUE) +elseif(WITH_QATLIB) + if(NOT WITH_SYSTEM_QAT) + include(BuildQAT) + build_qat() + endif() + find_package(QAT REQUIRED) + if(NOT WITH_SYSTEM_QAT) + add_dependencies(QAT::qat qatlib_ext) + add_dependencies(QAT::usdm qatlib_ext) + endif() + set(HAVE_QAT TRUE) endif() -option(WITH_QATZIP "Enable QATZIP" OFF) if(WITH_QATZIP) - find_package(qatzip REQUIRED) - set(HAVE_QATZIP ${qatzip_FOUND}) + if(NOT HAVE_QAT) + message(FATAL_ERROR "WITH_QATZIP requires WITH_QATLIB or WITH_QATDRV") + endif() + if(NOT WITH_SYSTEM_QATZIP) + include(BuildQATzip) + build_qatzip() + # qatzip build depends on qatlib + add_dependencies(qatzip_ext QAT::qat) + endif() + find_package(QATzip REQUIRED) + if(NOT WITH_SYSTEM_QATZIP) + add_dependencies(QAT::zip qatzip_ext) + endif() + set(HAVE_QATZIP TRUE) endif(WITH_QATZIP) +CMAKE_DEPENDENT_OPTION(WITH_UADK "Enable UADK" ON + "CMAKE_SYSTEM_PROCESSOR MATCHES aarch64" OFF) +if(WITH_UADK) + include(Builduadk) + build_uadk() + set(HAVE_UADK TRUE) + message("HAVE_UADK " ${HAVE_UADK}) +endif(WITH_UADK) + # needs mds and? XXX option(WITH_LIBCEPHFS "libcephfs client library" ON) @@ -519,10 +592,13 @@ endif (WITH_RADOSGW) option(WITH_CEPHFS "CephFS is enabled" ON) if(NOT WIN32) -# Please specify 3.[0-7] if you want to build with a certain version of python3. +# Please specify 3.x if you want to build with a certain version of python3. set(WITH_PYTHON3 "3" CACHE STRING "build with specified python3 version") find_package(Python3 ${WITH_PYTHON3} EXACT REQUIRED COMPONENTS Interpreter Development) +if(Python3_VERSION VERSION_LESS 3.9) + message(FATAL_ERROR "${Python3_VERSION} is not supported, please use Python 3.9 and up") +endif() option(WITH_MGR "ceph-mgr is enabled" ON) if(WITH_MGR) @@ -576,12 +652,11 @@ if(WITH_BABELTRACE) endif(WITH_BABELTRACE) option(DEBUG_GATHER "C_Gather debugging is enabled" ON) -option(ENABLE_COVERAGE "Coverage is enabled" OFF) option(PG_DEBUG_REFS "PG Ref debugging is enabled" OFF) option(WITH_TESTS "enable the build of ceph-test package scripts/binaries" ON) set(UNIT_TESTS_BUILT ${WITH_TESTS}) -set(CEPH_TEST_TIMEOUT 3600 CACHE STRING +set(CEPH_TEST_TIMEOUT 7200 CACHE STRING "Maximum time before a CTest gets killed" ) # fio @@ -639,7 +714,7 @@ option(WITH_SYSTEM_BOOST "require and build with system Boost" OFF) # Boost::thread depends on Boost::atomic, so list it explicitly. set(BOOST_COMPONENTS atomic chrono thread system regex random program_options date_time - iostreams context coroutine) + iostreams context coroutine url) set(BOOST_HEADER_COMPONENTS container) if(WITH_MGR) @@ -668,7 +743,7 @@ if(WITH_SYSTEM_BOOST) if(BOOST_ROOT AND CMAKE_LIBRARY_ARCHITECTURE) set(BOOST_LIBRARYDIR "${BOOST_ROOT}/lib/${CMAKE_LIBRARY_ARCHITECTURE}") endif() - find_package(Boost 1.82 COMPONENTS ${BOOST_COMPONENTS} REQUIRED) + find_package(Boost 1.85 COMPONENTS ${BOOST_COMPONENTS} REQUIRED) if(NOT ENABLE_SHARED) set_property(TARGET Boost::iostreams APPEND PROPERTY INTERFACE_LINK_LIBRARIES ZLIB::ZLIB) @@ -682,16 +757,11 @@ else() set(BOOST_J ${DEFAULT_BOOST_J} CACHE STRING "max jobs for Boost build") # override w/-DBOOST_J= set(Boost_USE_STATIC_LIBS ON) include(BuildBoost) - build_boost(1.82 + build_boost(1.85 COMPONENTS ${BOOST_COMPONENTS} ${BOOST_HEADER_COMPONENTS}) endif() include_directories(BEFORE SYSTEM ${Boost_INCLUDE_DIRS}) -if(Boost_VERSION VERSION_EQUAL 1.81 OR Boost_VERSION VERSION_EQUAL 1.82) - # This is a workaround for https://github.com/boostorg/phoenix/issues/111 - add_compile_options($<$:-DBOOST_PHOENIX_STL_TUPLE_H_>) -endif() - # dashboard angular2 frontend option(WITH_MGR_DASHBOARD_FRONTEND "Build the mgr/dashboard frontend using `npm`" ON) option(WITH_SYSTEM_NPM "Assume that dashboard build tools already installed through packages" OFF) @@ -701,7 +771,7 @@ if(WITH_SYSTEM_NPM) message(FATAL_ERROR "Can't find npm.") endif() endif() -set(DASHBOARD_FRONTEND_LANGS "" CACHE STRING +set(DASHBOARD_FRONTEND_LANGS "ALL" CACHE STRING "List of comma separated ceph-dashboard frontend languages to build. \ Use value `ALL` to build all languages") CMAKE_DEPENDENT_OPTION(WITH_MGR_ROOK_CLIENT "Enable the mgr's Rook support" ON diff --git a/COPYING b/COPYING index bd0b22f6bce4..8bc6b59b1c2f 100644 --- a/COPYING +++ b/COPYING @@ -29,10 +29,6 @@ Files: src/mount/canonicalize.c Copyright: Copyright (C) 1993 Rick Sladkey License: LGPL-2 or later -Files: src/os/btrfs_ioctl.h -Copyright: Copyright (C) 2007 Oracle. All rights reserved. -License: GPL2 (see COPYING-GPL2) - Files: src/include/ceph_hash.cc Copyright: None License: Public domain @@ -224,3 +220,7 @@ Files: src/script/backport-resolve-issue Copyright: 2015 Red Hat 2018 SUSE LLC License: GNU Affero General Public License, Version 3 + +Files: src/common/*s390x* +Copyright: 2024 IBM +License: Apache License, version 2.0 diff --git a/CodingStyle b/CodingStyle index 659298f0e5ae..019d23c7703d 100644 --- a/CodingStyle +++ b/CodingStyle @@ -108,6 +108,12 @@ by section. portability since `#pragma once` is widely supported and is known to work on GCC and Clang. +* Header Files -> Forward declarations: + + Forward declarations of structs, unions, classes and enums can be + used to reduce header dependencies. This speeds up compile times + because the compiler has to process less code. + The following guidelines have not been followed in the legacy code, but are worth mentioning and should be followed strictly for new code: diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 1fc9c4510d3e..97a326aa7198 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -1,55 +1,320 @@ +>=20.0.0 + +* RBD: All Python APIs that produce timestamps now return "aware" `datetime` + objects instead of "naive" ones (i.e. those including time zone information + instead of those not including it). All timestamps remain to be in UTC but + including `timezone.utc` makes it explicit and avoids the potential of the + returned timestamp getting misinterpreted -- in Python 3, many `datetime` + methods treat "naive" `datetime` objects as local times. +* RBD: `rbd group info` and `rbd group snap info` commands are introduced to + show information about a group and a group snapshot respectively. +* RBD: `rbd group snap ls` output now includes the group snapshot IDs. The header + of the column showing the state of a group snapshot in the unformatted CLI + output is changed from 'STATUS' to 'STATE'. The state of a group snapshot + that was shown as 'ok' is now shown as 'complete', which is more descriptive. +* Based on tests performed at scale on an HDD based Ceph cluster, it was found + that scheduling with mClock was not optimal with multiple OSD shards. For + example, in the test cluster with multiple OSD node failures, the client + throughput was found to be inconsistent across test runs coupled with multiple + reported slow requests. However, the same test with a single OSD shard and + with multiple worker threads yielded significantly better results in terms of + consistency of client and recovery throughput across multiple test runs. + Therefore, as an interim measure until the issue with multiple OSD shards + (or multiple mClock queues per OSD) is investigated and fixed, the following + changes to the default option values have been made: + - osd_op_num_shards_hdd = 1 (was 5) + - osd_op_num_threads_per_shard_hdd = 5 (was 1) + For more details see https://tracker.ceph.com/issues/66289. +* MGR: The Ceph Manager's always-on modulues/plugins can now be force-disabled. + This can be necessary in cases where we wish to prevent the manager from being + flooded by module commands when Ceph services are down or degraded. + +* CephFS: Modifying the setting "max_mds" when a cluster is + unhealthy now requires users to pass the confirmation flag + (--yes-i-really-mean-it). This has been added as a precaution to tell the + users that modifying "max_mds" may not help with troubleshooting or recovery + effort. Instead, it might further destabilize the cluster. + +* mgr/restful, mgr/zabbix: both modules, already deprecated since 2020, have been + finally removed. They have not been actively maintenance in the last years, + and started suffering from vulnerabilities in their dependency chain (e.g.: + CVE-2023-46136). As alternatives, for the `restful` module, the `dashboard` module + provides a richer and better maintained RESTful API. Regarding the `zabbix` module, + there are alternative monitoring solutions, like `prometheus`, which is the most + widely adopted among the Ceph user community. + +* CephFS: EOPNOTSUPP (Operation not supported ) is now returned by the CephFS + fuse client for `fallocate` for the default case (i.e. mode == 0) since + CephFS does not support disk space reservation. The only flags supported are + `FALLOC_FL_KEEP_SIZE` and `FALLOC_FL_PUNCH_HOLE`. + >=19.0.0 +* cephx: key rotation is now possible using `ceph auth rotate`. Previously, + this was only possible by deleting and then recreating the key. +* Ceph: a new --daemon-output-file switch is available for `ceph tell` commands + to dump output to a file local to the daemon. For commands which produce + large amounts of output, this avoids a potential spike in memory usage on the + daemon, allows for faster streaming writes to a file local to the daemon, and + reduces time holding any locks required to execute the command. For analysis, + it is necessary to retrieve the file from the host running the daemon + manually. Currently, only --format=json|json-pretty are supported. +* RGW: GetObject and HeadObject requests now return an x-rgw-replicated-at + header for replicated objects. This timestamp can be compared against the + Last-Modified header to determine how long the object took to replicate. +* The cephfs-shell utility is now packaged for RHEL / CentOS / Rocky 9 as required + Python dependencies are now available in EPEL9. * RGW: S3 multipart uploads using Server-Side Encryption now replicate correctly in - multi-site. Previously, the replicas of such objects were corrupted on decryption. + multi-site deployments Previously, replicas of such objects were corrupted on decryption. A new tool, ``radosgw-admin bucket resync encrypted multipart``, can be used to identify these original multipart uploads. The ``LastModified`` timestamp of any - identified object is incremented by 1ns to cause peer zones to replicate it again. - For multi-site deployments that make any use of Server-Side Encryption, we + identified object is incremented by one ns to cause peer zones to replicate it again. + For multi-site deployments that make use of Server-Side Encryption, we recommended running this command against every bucket in every zone after all zones have upgraded. -* CEPHFS: MDS evicts clients which are not advancing their request tids which causes - a large buildup of session metadata resulting in the MDS going read-only due to - the RADOS operation exceeding the size threshold. `mds_session_metadata_threshold` - config controls the maximum size that a (encoded) session metadata can grow. -* CephFS: For clusters with multiple CephFS file systems, all the snap-schedule +* Tracing: The blkin tracing feature (see https://docs.ceph.com/en/reef/dev/blkin/) + is now deprecated in favor of Opentracing (https://docs.ceph.com/en/reef/dev/developer_guide/jaegertracing/) + and will be removed in a later release. +* RGW: Introducing a new data layout for the Topic metadata associated with S3 + Bucket Notifications, where each Topic is stored as a separate RADOS object + and the bucket notification configuration is stored in a bucket attribute. + This new representation supports multisite replication via metadata sync and + can scale to many topics. This is on by default for new deployments, but is + is not enabled by default on upgrade. Once all radosgws have upgraded (on all + zones in a multisite configuration), the ``notification_v2`` zone feature can + be enabled to migrate to the new format. See + https://docs.ceph.com/en/squid/radosgw/zone-features for details. The "v1" + format is now considered deprecated and may be removed after 2 major releases. +* CephFS: The MDS evicts clients which are not advancing their request tids, which causes + a large buildup of session metadata, which in turn results in the MDS going read-only + due to RADOS operations exceeding the size threshold. `mds_session_metadata_threshold` + config controls the maximum size to which (encoded) session metadata can grow. +* CephFS: A new "mds last-seen" command is available for querying the last time + an MDS was in the FSMap, subject to a pruning threshold. +* CephFS: For clusters with multiple CephFS file systems, all snap-schedule commands now expect the '--fs' argument. * CephFS: The period specifier ``m`` now implies minutes and the period specifier - ``M`` now implies months. This has been made consistent with the rest - of the system. + ``M`` now implies months. This is consistent with the rest of the system. * RGW: New tools have been added to radosgw-admin for identifying and correcting issues with versioned bucket indexes. Historical bugs with the versioned bucket index transaction workflow made it possible for the index to accumulate extraneous "book-keeping" olh entries and plain placeholder entries. In some specific scenarios where clients made concurrent requests - referencing the same object key, it was likely that a lot of extra index + referencing the same object key, it was likely that extra index entries would accumulate. When a significant number of these entries are present in a single bucket index shard, they can cause high bucket listing - latencies and lifecycle processing failures. To check whether a versioned + latency and lifecycle processing failures. To check whether a versioned bucket has unnecessary olh entries, users can now run ``radosgw-admin bucket check olh``. If the ``--fix`` flag is used, the extra entries will - be safely removed. A distinct issue from the one described thus far, it is - also possible that some versioned buckets are maintaining extra unlinked - objects that are not listable from the S3/ Swift APIs. These extra objects - are typically a result of PUT requests that exited abnormally, in the middle - of a bucket index transaction - so the client would not have received a - successful response. Bugs in prior releases made these unlinked objects easy - to reproduce with any PUT request that was made on a bucket that was actively - resharding. Besides the extra space that these hidden, unlinked objects - consume, there can be another side effect in certain scenarios, caused by - the nature of the failure mode that produced them, where a client of a bucket - that was a victim of this bug may find the object associated with the key to - be in an inconsistent state. To check whether a versioned bucket has unlinked - entries, users can now run ``radosgw-admin bucket check unlinked``. If the - ``--fix`` flag is used, the unlinked objects will be safely removed. Finally, - a third issue made it possible for versioned bucket index stats to be - accounted inaccurately. The tooling for recalculating versioned bucket stats - also had a bug, and was not previously capable of fixing these inaccuracies. - This release resolves those issues and users can now expect that the existing - ``radosgw-admin bucket check`` command will produce correct results. We - recommend that users with versioned buckets, especially those that existed - on prior releases, use these new tools to check whether their buckets are - affected and to clean them up accordingly. + be safely removed. An additional issue is that some versioned buckets + may maintain extra unlinked objects that are not listable via the S3/Swift + APIs. These extra objects are typically a result of PUT requests that + exited abnormally in the middle of a bucket index transaction, and thus + the client would not have received a successful response. Bugs in prior + releases made these unlinked objects easy to reproduce with any PUT + request made on a bucket that was actively resharding. In certain + scenarios, a client of a bucket that was a victim of this bug may find + the object associated with the key to be in an inconsistent state. To check + whether a versioned bucket has unlinked entries, users can now run + ``radosgw-admin bucket check unlinked``. If the ``--fix`` flag is used, + the unlinked objects will be safely removed. Finally, a third issue made + it possible for versioned bucket index stats to be accounted inaccurately. + The tooling for recalculating versioned bucket stats also had a bug, and + was not previously capable of fixing these inaccuracies. This release + resolves those issues and users can now expect that the existing + ``radosgw-admin bucket check`` command will produce correct results. + We recommend that users with versioned buckets, especially those that + existed on prior releases, use these new tools to check whether their + buckets are affected and to clean them up accordingly. +* RGW: The "user accounts" feature unlocks several new AWS-compatible IAM APIs + for self-service management of users, keys, groups, roles, policy and + more. Existing users can be adopted into new accounts. This process is optional + but irreversible. See https://docs.ceph.com/en/squid/radosgw/account and + https://docs.ceph.com/en/squid/radosgw/iam for details. +* RGW: On startup, radosgw and radosgw-admin now validate the ``rgw_realm`` + config option. Previously, they would ignore invalid or missing realms and + go on to load a zone/zonegroup in a different realm. If startup fails with + a "failed to load realm" error, fix or remove the ``rgw_realm`` option. +* RGW: The radosgw-admin commands ``realm create`` and ``realm pull`` no + longer set the default realm without ``--default``. +* CephFS: Running the command "ceph fs authorize" for an existing entity now + upgrades the entity's capabilities instead of printing an error. It can now + also change read/write permissions in a capability that the entity already + holds. If the capability passed by user is same as one of the capabilities + that the entity already holds, idempotency is maintained. +* CephFS: Two FS names can now be swapped, optionally along with their IDs, + using "ceph fs swap" command. The function of this API is to facilitate + file system swaps for disaster recovery. In particular, it avoids situations + where a named file system is temporarily missing which would prompt a higher + level storage operator (like Rook) to recreate the missing file system. + See https://docs.ceph.com/en/latest/cephfs/administration/#file-systems + docs for more information. +* CephFS: Before running the command "ceph fs rename", the filesystem to be + renamed must be offline and the config "refuse_client_session" must be set + for it. The config "refuse_client_session" can be removed/unset and + filesystem can be online after the rename operation is complete. +* RADOS: A POOL_APP_NOT_ENABLED health warning will now be reported if + the application is not enabled for the pool irrespective of whether + the pool is in use or not. Always tag a pool with an application + using ``ceph osd pool application enable`` command to avoid reporting + of POOL_APP_NOT_ENABLED health warning for that pool. + The user might temporarily mute this warning using + ``ceph health mute POOL_APP_NOT_ENABLED``. +* The `mon_cluster_log_file_level` and `mon_cluster_log_to_syslog_level` options + have been removed. Henceforth, users should use the new generic option + `mon_cluster_log_level` to control the cluster log level verbosity for the cluster + log file as well as for all external entities. +CephFS: Disallow delegating preallocated inode ranges to clients. Config + `mds_client_delegate_inos_pct` defaults to 0 which disables async dirops + in the kclient. +* S3 Get/HeadObject now support query parameter `partNumber` to read a specific + part of a completed multipart upload. +* RGW: Fixed a S3 Object Lock bug with PutObjectRetention requests that specify + a RetainUntilDate after the year 2106. This date was truncated to 32 bits when + stored, so a much earlier date was used for object lock enforcement. This does + not effect PutBucketObjectLockConfiguration where a duration is given in Days. + The RetainUntilDate encoding is fixed for new PutObjectRetention requests, but + cannot repair the dates of existing object locks. Such objects can be identified + with a HeadObject request based on the x-amz-object-lock-retain-until-date + response header. +* RADOS: `get_pool_is_selfmanaged_snaps_mode` C++ API has been deprecated + due to being prone to false negative results. It's safer replacement is + `pool_is_in_selfmanaged_snaps_mode`. +* RADOS: For bug 62338 (https://tracker.ceph.com/issues/62338), in order to simplify + backporting, we choose to not + condition the fix on a server flag. As + a result, in rare cases it may be possible for a PG to flip between two acting + sets while an upgrade to a version with the fix is in progress. If you observe + this behavior, you should be able to work around it by completing the upgrade or + by disabling async recovery by setting osd_async_recovery_min_cost to a very + large value on all OSDs until the upgrade is complete: + ``ceph config set osd osd_async_recovery_min_cost 1099511627776`` +* RADOS: A detailed version of the `balancer status` CLI command in the balancer + module is now available. Users may run `ceph balancer status detail` to see more + details about which PGs were updated in the balancer's last optimization. + See https://docs.ceph.com/en/latest/rados/operations/balancer/ for more information. +* CephFS: Full support for subvolumes and subvolume groups is now available + for snap_schedule Manager module. +* RGW: The SNS CreateTopic API now enforces the same topic naming requirements as AWS: + Topic names must be made up of only uppercase and lowercase ASCII letters, numbers, + underscores, and hyphens, and must be between 1 and 256 characters long. +* RBD: When diffing against the beginning of time (`fromsnapname == NULL`) in + fast-diff mode (`whole_object == true` with `fast-diff` image feature enabled + and valid), diff-iterate is now guaranteed to execute locally if exclusive + lock is available. This brings a dramatic performance improvement for QEMU + live disk synchronization and backup use cases. +* RBD: The ``try-netlink`` mapping option for rbd-nbd has become the default + and is now deprecated. If the NBD netlink interface is not supported by the + kernel, then the mapping is retried using the legacy ioctl interface. +* RADOS: Read balancing may now be managed automatically via the balancer + manager module. Users may choose between two new modes: ``upmap-read``, which + offers upmap and read optimization simultaneously, or ``read``, which may be used + to only optimize reads. For more detailed information see https://docs.ceph.com/en/latest/rados/operations/read-balancer/#online-optimization. +* CephFS: MDS log trimming is now driven by a separate thread which tries to + trim the log every second (`mds_log_trim_upkeep_interval` config). Also, + a couple of configs govern how much time the MDS spends in trimming its + logs. These configs are `mds_log_trim_threshold` and `mds_log_trim_decay_rate`. +* RGW: Notification topics are now owned by the user that created them. + By default, only the owner can read/write their topics. Topic policy documents + are now supported to grant these permissions to other users. Preexisting topics + are treated as if they have no owner, and any user can read/write them using the SNS API. + If such a topic is recreated with CreateTopic, the issuing user becomes the new owner. + For backward compatibility, all users still have permission to publish bucket + notifications to topics owned by other users. A new configuration parameter: + ``rgw_topic_require_publish_policy`` can be enabled to deny ``sns:Publish`` + permissions unless explicitly granted by topic policy. +* RGW: Fix issue with persistent notifications where the changes to topic param that + were modified while persistent notifications were in the queue will be reflected in notifications. + So if user sets up topic with incorrect config (password/ssl) causing failure while delivering the + notifications to broker, can now modify the incorrect topic attribute and on retry attempt to delivery + the notifications, new configs will be used. +* RBD: The option ``--image-id`` has been added to `rbd children` CLI command, + so it can be run for images in the trash. +* PG dump: The default output of `ceph pg dump --format json` has changed. The + default json format produces a rather massive output in large clusters and + isn't scalable. So we have removed the 'network_ping_times' section from + the output. Details in the tracker: https://tracker.ceph.com/issues/57460 +* mgr/REST: The REST manager module will trim requests based on the 'max_requests' option. + Without this feature, and in the absence of manual deletion of old requests, + the accumulation of requests in the array can lead to Out Of Memory (OOM) issues, + resulting in the Manager crashing. + +* CephFS: The `subvolume snapshot clone` command now depends on the config option + `snapshot_clone_no_wait` which is used to reject the clone operation when + all the cloner threads are busy. This config option is enabled by default which means + that if no cloner threads are free, the clone request errors out with EAGAIN. + The value of the config option can be fetched by using: + `ceph config get mgr mgr/volumes/snapshot_clone_no_wait` + and it can be disabled by using: + `ceph config set mgr mgr/volumes/snapshot_clone_no_wait false` +* RBD: `RBD_IMAGE_OPTION_CLONE_FORMAT` option has been exposed in Python + bindings via `clone_format` optional parameter to `clone`, `deep_copy` and + `migration_prepare` methods. +* RBD: `RBD_IMAGE_OPTION_FLATTEN` option has been exposed in Python bindings via + `flatten` optional parameter to `deep_copy` and `migration_prepare` methods. + +* CephFS: Command "ceph mds fail" and "ceph fs fail" now requires a + confirmation flag when some MDSs exhibit health warning MDS_TRIM or + MDS_CACHE_OVERSIZED. This is to prevent accidental MDS failover causing + further delays in recovery. +* CephFS: fixes to the implementation of the ``root_squash`` mechanism enabled + via cephx ``mds`` caps on a client credential require a new client feature + bit, ``client_mds_auth_caps``. Clients using credentials with ``root_squash`` + without this feature will trigger the MDS to raise a HEALTH_ERR on the + cluster, MDS_CLIENTS_BROKEN_ROOTSQUASH. See the documentation on this warning + and the new feature bit for more information. +* CephFS: Expanded removexattr support for cephfs virtual extended attributes. + Previously one had to use setxattr to restore the default in order to "remove". + You may now properly use removexattr to remove. You can also now remove layout + on root inode, which then will restore layout to default layout. + +* cls_cxx_gather is marked as deprecated. +* CephFS: cephfs-journal-tool is guarded against running on an online file system. + The 'cephfs-journal-tool --rank : journal reset' and + 'cephfs-journal-tool --rank : journal reset --force' + commands require '--yes-i-really-really-mean-it'. + +* Dashboard: Rearranged Navigation Layout: The navigation layout has been reorganized + for improved usability and easier access to key features. +* Dashboard: CephFS Improvments + * Support for managing CephFS snapshots and clones, as well as snapshot schedule + management + * Manage authorization capabilities for CephFS resources + * Helpers on mounting a CephFS volume +* Dashboard: RGW Improvements + * Support for managing bucket policies + * Add/Remove bucket tags + * ACL Management + * Several UI/UX Improvements to the bucket form +* Monitoring: Grafana dashboards are now loaded into the container at runtime rather than + building a grafana image with the grafana dashboards. Official Ceph grafana images + can be found in quay.io/ceph/grafana +* Monitoring: RGW S3 Analytics: A new Grafana dashboard is now available, enabling you to + visualize per bucket and user analytics data, including total GETs, PUTs, Deletes, + Copies, and list metrics. +* RBD: `Image::access_timestamp` and `Image::modify_timestamp` Python APIs now + return timestamps in UTC. +* RBD: Support for cloning from non-user type snapshots is added. This is + intended primarily as a building block for cloning new groups from group + snapshots created with `rbd group snap create` command, but has also been + exposed via the new `--snap-id` option for `rbd clone` command. +* RBD: The output of `rbd snap ls --all` command now includes the original + type for trashed snapshots. +* CephFS: "ceph fs clone status" command will now print statistics about clone + progress in terms of how much data has been cloned (in both percentage as + well as bytes) and how many files have been cloned. +* CephFS: "ceph status" command will now print a progress bar when cloning is + ongoing. If clone jobs are more than the cloner threads, it will print one + more progress bar that shows total amount of progress made by both ongoing + as well as pending clones. Both progress are accompanied by messages that + show number of clone jobs in the respective categories and the amount of + progress made by each of them. +* RGW: in bucket notifications, the `principalId` inside `ownerIdentity` now contains + complete user id, prefixed with tenant id + +* NFS: The export create/apply of CephFS based exports will now have a additional parameter `cmount_path` under the FSAL block, + which specifies the path within the CephFS to mount this export on. If this and the other + `EXPORT { FSAL {} }` options are the same between multiple exports, those exports will share a single CephFS client. If not specified, the default is `/`. >=18.0.0 @@ -57,6 +322,10 @@ mirroring policies between RGW and AWS, you may wish to set "rgw policy reject invalid principals" to "false". This affects only newly set policies, not policies that are already in place. +* The CephFS automatic metadata load (sometimes called "default") balancer is + now disabled by default. The new file system flag `balance_automate` + can be used to toggle it on or off. It can be enabled or disabled via + `ceph fs set balance_automate `. * RGW's default backend for `rgw_enable_ops_log` changed from RADOS to file. The default value of `rgw_ops_log_rados` is now false, and `rgw_ops_log_file_path` defaults to "/var/log/ceph/ops-log-$cluster-$name.log". @@ -226,16 +495,15 @@ than the number mentioned against the config tunable `mds_max_snaps_per_dir` so that a new snapshot can be created and retained during the next schedule run. -* cephfs: Running the command "ceph fs authorize" for an existing entity now - upgrades the entity's capabilities instead of printing an error. It can now - also change read/write permissions in a capability that the entity already - holds. If the capability passed by user is same as one of the capabilities - that the entity already holds, idempotency is maintained. * `ceph config dump --format ` output will display the localized option names instead of its normalized version. For e.g., "mgr/prometheus/x/server_port" will be displayed instead of "mgr/prometheus/server_port". This matches the output of the non pretty-print formatted version of the command. +* CEPHFS: MDS config option name "mds_kill_skip_replaying_inotable" is a bit + confusing with "mds_inject_skip_replaying_inotable", therefore renaming it to + "mds_kill_after_journal_logs_flushed" + >=17.2.1 @@ -299,3 +567,11 @@ Relevant tracker: https://tracker.ceph.com/issues/57090 set using the `fs set` command. This flag prevents using a standby for another file system (join_fs = X) when standby for the current filesystem is not available. Relevant tracker: https://tracker.ceph.com/issues/61599 +* mon: add NVMe-oF gateway monitor and HA + This PR adds high availability support for the nvmeof Ceph service. High availability +means that even in the case that a certain GW is down, there will be another available +path for the initiator to be able to continue the IO through another GW. +It is also adding 2 new mon commands, to notify monitor about the gateway creation/deletion: + - nvme-gw create + - nvme-gw delete +Relevant tracker: https://tracker.ceph.com/issues/64777 diff --git a/README.md b/README.md index 9db4161c793d..56257697e9a1 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,10 @@ See https://ceph.com/ for current information about Ceph. +## Status + +[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/2220/badge)](https://www.bestpractices.dev/projects/2220) +[![Issue Backporting](https://github.com/ceph/ceph/actions/workflows/create-backport-trackers.yml/badge.svg)](https://github.com/ceph/ceph/actions/workflows/create-backport-trackers.yml) ## Contributing Code @@ -40,24 +44,26 @@ following commands to move into the cloned `ceph/ceph` repository and to check out the git submodules associated with it: cd ceph - git submodule update --init --recursive + git submodule update --init --recursive --progress ## Build Prerequisites -*section last updated 27 Jul 2023* +*section last updated 06 Sep 2024* + +We provide the Debian and Ubuntu ``apt`` commands in this procedure. If you use +a system with a different package manager, then you will have to use different +commands. -Make sure that ``curl`` is installed. The Debian and Ubuntu ``apt`` command is -provided here, but if you use a system with a different package manager, then -you must use whatever command is the proper counterpart of this one: +#. Install ``curl``: apt install curl -Install Debian or RPM package dependencies by running the following command: +#. Install package dependencies by running the ``install-deps.sh`` script: ./install-deps.sh -Install the ``python3-routes`` package: +#. Install the ``python3-routes`` package: apt install python3-routes @@ -70,44 +76,56 @@ we recommend that you build `.deb` or `.rpm` packages, or refer to ``ceph.spec.in`` or ``debian/rules`` to see which configuration options are specified for production builds. -To build Ceph, make sure that you are in the top-level `ceph` directory that -contains `do_cmake.sh` and `CONTRIBUTING.rst` and run the following commands: +To build Ceph, follow this procedure: - ./do_cmake.sh - cd build - ninja +1. Make sure that you are in the top-level `ceph` directory that + contains `do_cmake.sh` and `CONTRIBUTING.rst`. +2. Run the `do_cmake.sh` script: + + ./do_cmake.sh + + ``do_cmake.sh`` by default creates a "debug build" of Ceph, which can be + up to five times slower than a non-debug build. Pass + ``-DCMAKE_BUILD_TYPE=RelWithDebInfo`` to ``do_cmake.sh`` to create a + non-debug build. +3. Move into the `build` directory: + + cd build +4. Use the `ninja` buildsystem to build the development environment: -``do_cmake.sh`` by default creates a "debug build" of Ceph, which can be up to -five times slower than a non-debug build. Pass -``-DCMAKE_BUILD_TYPE=RelWithDebInfo`` to ``do_cmake.sh`` to create a non-debug -build. + ninja -j3 -[Ninja](https://ninja-build.org/) is the buildsystem used by the Ceph project -to build test builds. The number of jobs used by `ninja` is derived from the -number of CPU cores of the building host if unspecified. Use the `-j` option to -limit the job number if the build jobs are running out of memory. If you -attempt to run `ninja` and receive a message that reads `g++: fatal error: -Killed signal terminated program cc1plus`, then you have run out of memory. -Using the `-j` option with an argument appropriate to the hardware on which the -`ninja` command is run is expected to result in a successful build. For example, -to limit the job number to 3, run the command `ninja -j 3`. On average, each -`ninja` job run in parallel needs approximately 2.5 GiB of RAM. + > [IMPORTANT] + > + > [Ninja](https://ninja-build.org/) is the build system used by the Ceph + > project to build test builds. The number of jobs used by `ninja` is + > derived from the number of CPU cores of the building host if unspecified. + > Use the `-j` option to limit the job number if build jobs are running + > out of memory. If you attempt to run `ninja` and receive a message that + > reads `g++: fatal error: Killed signal terminated program cc1plus`, then + > you have run out of memory. + > + > Using the `-j` option with an argument appropriate to the hardware on + > which the `ninja` command is run is expected to result in a successful + > build. For example, to limit the job number to 3, run the command `ninja + > -j3`. On average, each `ninja` job run in parallel needs approximately + > 2.5 GiB of RAM. -This documentation assumes that your build directory is a subdirectory of the -`ceph.git` checkout. If the build directory is located elsewhere, point -`CEPH_GIT_DIR` to the correct path of the checkout. Additional CMake args can -be specified by setting ARGS before invoking ``do_cmake.sh``. See [cmake -options](#cmake-options) for more details. For example: + This documentation assumes that your build directory is a subdirectory of + the `ceph.git` checkout. If the build directory is located elsewhere, point + `CEPH_GIT_DIR` to the correct path of the checkout. Additional CMake args + can be specified by setting ARGS before invoking ``do_cmake.sh``. + See [cmake options](#cmake-options) for more details. For example: - ARGS="-DCMAKE_C_COMPILER=gcc-7" ./do_cmake.sh + ARGS="-DCMAKE_C_COMPILER=gcc-7" ./do_cmake.sh -To build only certain targets, run a command of the following form: + To build only certain targets, run a command of the following form: - ninja [target name] + ninja [target name] -To install: +5. Install the vstart cluster: - ninja install + ninja install ### CMake Options diff --git a/SubmittingPatches-backports.rst b/SubmittingPatches-backports.rst index 0f96aec65c4f..bb55088cb5fa 100644 --- a/SubmittingPatches-backports.rst +++ b/SubmittingPatches-backports.rst @@ -121,14 +121,11 @@ If you do not have sufficient permissions to modify any field of the tracker issue, just add a comment describing what changes you would like to make. Someone with permissions will make the necessary modifications on your behalf. -For straightforward backports, that's all that you (as the developer of the fix) -need to do. Volunteers from the `Stable Releases and Backports team`_ will -proceed to create Backport issues to track the necessary backports and stage the -backports by opening GitHub PRs with the cherry-picks. If you don't want to -wait, and provided you have sufficient permissions at https://tracker.ceph.com, -you can `create Backport tracker issues` and `stage backports`_ yourself. In -that case, read on. - +Authors of pull requests are responsible for creating associated backport pull +requests. As long as you have sufficient permissions at +https://tracker.ceph.com, you can `create Backport tracker issues` and `stage +backports`_ yourself. Read these linked sections to learn how to create +backport tracker issues and how to stage backports: .. _`create backport tracker issues`: .. _`backport tracker issue`: @@ -146,10 +143,7 @@ issues can be created in the backport tracker issue for tracking the backporting Under ordinary circumstances, the developer who merges the ``main`` PR will flag the ``main`` branch tracker issue for backport by changing the Status to "Pending -Backport", and volunteers from the `Stable Releases and Backports team`_ -periodically create backport tracker issues by running the -``backport-create-issue`` script. They also do the actual backporting. But that -does take time and you may not want to wait. +Backport". You might be tempted to forge ahead and create the backport issues yourself. Please don't do that - it is difficult (bordering on impossible) to get all the @@ -360,20 +354,11 @@ Once the backport PR is open, the first order of business is to set the Milestone tag to the stable release the backport PR is targeting. For example, if the PR is targeting "nautilus", set the Milestone tag to "nautilus". -If you don't have sufficient GitHub permissions to set the Milestone, don't -worry. Members of the `Stable Releases and Backports team`_ periodically run -a script (``ceph-backport.sh --milestones``) which scans all PRs targetting stable -branches and automatically adds the correct Milestone tag if it is missing. - Next, check which component label was applied to the ``main`` PR corresponding to this backport, and double-check that that label is applied to the backport PR as well. For example, if the ``main`` PR carries the component label "core", the backport PR should also get that label. -In general, it is the responsibility of the `Stable Releases and Backports -team`_ to ensure that backport PRs are properly labelled. If in doubt, just -leave the labelling to them. - .. _`backport PR reviewing`: .. _`backport PR testing`: .. _`backport PR merging`: @@ -381,9 +366,8 @@ leave the labelling to them. Reviewing, testing, and merging of backport PRs ----------------------------------------------- -Once your backport PR is open and the Milestone is set properly, the -`Stable Releases and Backports team` will take care of getting the PR -reviewed and tested. Once the PR is reviewed and tested, it will be merged. +Once your backport PR is open, it will be reviewed and tested. When the PR has +been reviewed and tested, it will be merged. If you would like to facilitate this process, you can solicit reviews and run integration tests on the PR. In this case, add comments to the PR describing the @@ -394,22 +378,3 @@ it will be merged. Even if you have sufficient GitHub permissions to merge the PR, please do *not* merge it yourself. (Uncontrolled merging to stable branches unnecessarily complicates the release preparation process, which is done by volunteers.) - - -Stable Releases and Backports team ----------------------------------- - -Ceph has a `Stable Releases and Backports`_ team, staffed by volunteers, -which is charged with maintaining the stable releases and backporting bugfixes -from the ``main`` branch to them. (That team maintains a wiki, accessible by -clicking the `Stable Releases and Backports`_ link, which describes various -workflows in the backporting lifecycle.) - -.. _`Stable Releases and Backports`: http://tracker.ceph.com/projects/ceph-releases/wiki - -Ordinarily, it is enough to fill out the "Backport" field in the bug (tracker -issue). The volunteers from the Stable Releases and Backports team will -backport the fix, run regression tests on it, and include it in one or more -future point releases. - - diff --git a/admin/doc-requirements.txt b/admin/doc-requirements.txt index 2f4970e3fbb3..636f7e138511 100644 --- a/admin/doc-requirements.txt +++ b/admin/doc-requirements.txt @@ -1,4 +1,4 @@ -Sphinx == 4.5.0 +Sphinx == 5.0.2 git+https://github.com/ceph/sphinx-ditaa.git@py3#egg=sphinx-ditaa git+https://github.com/vlasovskikh/funcparserlib.git breathe >= 4.20.0,!=4.33 diff --git a/ceph-menv/README b/ceph-menv/README index badbd3a028f4..91606c48f02e 100644 --- a/ceph-menv/README +++ b/ceph-menv/README @@ -1,6 +1,6 @@ ceph-menv -Environment assistant for use in conjuction with multiple ceph vstart (or more accurately mstart) clusters. Eliminates the need to specify the cluster that is being used with each and every command. Can provide a shell prompt feedback about the currently used cluster. +Environment assistant for use in conjunction with multiple Ceph vstart (or more accurately mstart) clusters. Eliminates the need to specify the cluster that is being used with each and every command. Can provide a shell prompt feedback about the currently used cluster. Usage: diff --git a/ceph-object-corpus b/ceph-object-corpus index 038c72b5acec..84714379121c 160000 --- a/ceph-object-corpus +++ b/ceph-object-corpus @@ -1 +1 @@ -Subproject commit 038c72b5acec667e1aca4c79a8cfcae705d766fe +Subproject commit 84714379121c19f89a8145fee179d6388bf74c1e diff --git a/ceph.spec.in b/ceph.spec.in index 6496a0cec785..ece1ebf2ec85 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -21,7 +21,6 @@ # https://rpm-software-management.github.io/rpm/manual/conditionalbuilds.html ################################################################################# %bcond_with make_check -%bcond_with zbd %bcond_with cmake_verbose_logging %bcond_without ceph_test_package %ifarch s390 @@ -98,7 +97,7 @@ %else %bcond_without jaeger %endif -%if 0%{?fedora} || 0%{?suse_version} >= 1500 +%if 0%{?fedora} || 0%{?suse_version} >= 1500 || 0%{?rhel} >= 9 # distros that ship cmd2 and/or colorama %bcond_without cephfs_shell %else @@ -112,6 +111,18 @@ # this is tracked in https://bugzilla.redhat.com/2152265 %bcond_with system_arrow %endif +# qat only supported for intel devices +%ifarch x86_64 +%if 0%{?fedora} || 0%{?rhel} >= 9 +%bcond_without system_qat +%else +# not fedora/rhel +%bcond_with system_qat +%endif +%else +# not x86_64 +%bcond_with system_qat +%endif %if 0%{?fedora} || 0%{?suse_version} || 0%{?rhel} >= 8 || 0%{?openEuler} %global weak_deps 1 %endif @@ -124,13 +135,41 @@ %{!?_selinux_policy_version: %global _selinux_policy_version 0.0.0} %endif %endif +%bcond_without cephadm_bundling +%bcond_without cephadm_pip_deps +%bcond_without dwz +%if %{with dwz} +%else +# disable dwz for 50% speedup at the cost of ~33% space +%global _find_debuginfo_dwz_opts %{nil} +%endif +%bcond_with sccache %{!?_udevrulesdir: %global _udevrulesdir /lib/udev/rules.d} %{!?tmpfiles_create: %global tmpfiles_create systemd-tmpfiles --create} %{!?python3_pkgversion: %global python3_pkgversion 3} %{!?python3_version_nodots: %global python3_version_nodots 3} %{!?python3_version: %global python3_version 3} -%{!?gts_prefix: %global gts_prefix gcc-toolset-11} +%if 0%{with seastar} +%{!?gts_version: %global gts_version 13} +%else +%if 0%{?rhel} == 8 +%{!?gts_version: %global gts_version 11} +%endif +%endif + +# gcc-toolset-13 seems to trigger a linker bug resulting in a segfault in SafeTimer +# and perhaps elsewhere. For now, let's just disable it. See +# ceph bug https://tracker.ceph.com/issues/63867 +# and +# gcc bug https://bugzilla.redhat.com/show_bug.cgi?id=2241339 +# for details. +# +# Also disable lto on systems that do not support symver attribute +# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=48200 for details +%if 0%{?gts_version} == 13 || (0%{?rhel} && 0%{?rhel} < 9) || ( 0%{?suse_version} && 0%{?suse_version} <= 1500 ) +%define _lto_cflags %{nil} +%endif %if ! 0%{?suse_version} # use multi-threaded xz compression: xz level 7 using ncpus threads @@ -162,9 +201,17 @@ # do not provide gcc-annobin.so anymore, despite that they provide annobin.so. but # redhat-rpm-config still passes -fplugin=gcc-annobin to the compiler. %undefine _annotated_build -%if 0%{?rhel} == 8 && 0%{?enable_devtoolset11:1} +%if 0%{?gts_version} > 0 +%if 0%{gts_version} == 13 +%if 0%{?enable_devtoolset13:1} +%enable_devtoolset13 +%endif +%else +%if 0%{?enable_devtoolset11:1} %enable_devtoolset11 %endif +%endif +%endif ################################################################################# # main package definition @@ -189,7 +236,7 @@ URL: http://ceph.com/ Source0: %{?_remote_tarball_prefix}@TARBALL_BASENAME@.tar.bz2 %if 0%{?suse_version} # _insert_obs_source_lines_here -ExclusiveArch: x86_64 aarch64 ppc64le s390x +ExclusiveArch: x86_64 aarch64 ppc64le s390x riscv64 %endif ################################################################################# # dependencies that apply across all distro families @@ -211,16 +258,22 @@ BuildRequires: selinux-policy-devel BuildRequires: gperf BuildRequires: cmake > 3.5 BuildRequires: fuse-devel +BuildRequires: git +BuildRequires: grpc-devel %if 0%{?fedora} || 0%{?suse_version} > 1500 || 0%{?rhel} == 9 || 0%{?openEuler} BuildRequires: gcc-c++ >= 11 %endif %if 0%{?suse_version} == 1500 BuildRequires: gcc11-c++ %endif -%if 0%{?rhel} == 8 -BuildRequires: %{gts_prefix}-gcc-c++ -BuildRequires: %{gts_prefix}-build -BuildRequires: %{gts_prefix}-libatomic-devel +%if 0%{?gts_version} > 0 +BuildRequires: gcc-toolset-%{gts_version}-gcc-c++ +%if 0%{?gts_version} >= 12 +BuildRequires: gcc-toolset-%{gts_version}-runtime +%else +BuildRequires: gcc-toolset-%{gts_version}-build +%endif +BuildRequires: gcc-toolset-%{gts_version}-libatomic-devel %endif %if 0%{?fedora} || 0%{?rhel} == 9 || 0%{?openEuler} BuildRequires: libatomic @@ -240,6 +293,7 @@ BuildRequires: gperftools-devel >= 2.4 BuildRequires: libaio-devel BuildRequires: libblkid-devel >= 2.17 BuildRequires: cryptsetup-devel +BuildRequires: libnbd-devel BuildRequires: libcurl-devel BuildRequires: libcap-devel BuildRequires: libcap-ng-devel @@ -281,14 +335,14 @@ BuildRequires: librabbitmq-devel BuildRequires: librdkafka-devel %endif %if 0%{with lua_packages} -BuildRequires: %{luarocks_package_name} +Requires: lua-devel +Requires: %{luarocks_package_name} %endif %if 0%{with make_check} BuildRequires: hostname BuildRequires: jq BuildRequires: libuuid-devel BuildRequires: python%{python3_pkgversion}-bcrypt -BuildRequires: python%{python3_pkgversion}-pecan BuildRequires: python%{python3_pkgversion}-requests BuildRequires: python%{python3_pkgversion}-dateutil BuildRequires: python%{python3_pkgversion}-coverage @@ -297,9 +351,6 @@ BuildRequires: socat BuildRequires: python%{python3_pkgversion}-asyncssh BuildRequires: python%{python3_pkgversion}-natsort %endif -%if 0%{with zbd} -BuildRequires: libzbd-devel -%endif %if 0%{?suse_version} BuildRequires: libthrift-devel >= 0.13.0 %else @@ -332,6 +383,10 @@ BuildRequires: libarrow-devel BuildRequires: parquet-libs-devel BuildRequires: utf8proc-devel %endif +%if 0%{with system_qat} +BuildRequires: qatlib-devel +BuildRequires: qatzip-devel +%endif %if 0%{with seastar} BuildRequires: c-ares-devel BuildRequires: gnutls-devel @@ -342,11 +397,17 @@ BuildRequires: ragel BuildRequires: systemtap-sdt-devel BuildRequires: libubsan BuildRequires: libasan -%if 0%{?rhel} == 8 -BuildRequires: %{gts_prefix}-annobin -BuildRequires: %{gts_prefix}-annobin-plugin-gcc -BuildRequires: %{gts_prefix}-libubsan-devel -BuildRequires: %{gts_prefix}-libasan-devel +BuildRequires: protobuf-devel +BuildRequires: protobuf-compiler +%if 0%{?gts_version} > 0 +%if 0%{?gts_version} >= 12 +BuildRequires: gcc-toolset-%{gts_version}-gcc-plugin-annobin +%else +BuildRequires: gcc-toolset-%{gts_version}-annobin +BuildRequires: gcc-toolset-%{gts_version}-annobin-plugin-gcc +%endif +BuildRequires: gcc-toolset-%{gts_version}-libubsan-devel +BuildRequires: gcc-toolset-%{gts_version}-libasan-devel %endif %endif ################################################################################# @@ -388,6 +449,7 @@ BuildRequires: libibverbs-devel BuildRequires: librdmacm-devel BuildRequires: ninja-build BuildRequires: openldap-devel +BuildRequires: numactl-devel #BuildRequires: krb5-devel BuildRequires: openssl-devel BuildRequires: CUnit-devel @@ -411,22 +473,18 @@ BuildRequires: xmlsec1-nss BuildRequires: xmlsec1-openssl BuildRequires: xmlsec1-openssl-devel BuildRequires: python%{python3_pkgversion}-cherrypy -BuildRequires: python%{python3_pkgversion}-jwt BuildRequires: python%{python3_pkgversion}-routes BuildRequires: python%{python3_pkgversion}-scipy -BuildRequires: python%{python3_pkgversion}-werkzeug BuildRequires: python%{python3_pkgversion}-pyOpenSSL %endif +BuildRequires: jsonnet %if 0%{?suse_version} BuildRequires: golang-github-prometheus-prometheus -BuildRequires: jsonnet BuildRequires: libxmlsec1-1 BuildRequires: libxmlsec1-nss1 BuildRequires: libxmlsec1-openssl1 BuildRequires: python%{python3_pkgversion}-CherryPy -BuildRequires: python%{python3_pkgversion}-PyJWT BuildRequires: python%{python3_pkgversion}-Routes -BuildRequires: python%{python3_pkgversion}-Werkzeug BuildRequires: python%{python3_pkgversion}-numpy-devel BuildRequires: xmlsec1-devel BuildRequires: xmlsec1-openssl-devel @@ -459,7 +517,6 @@ BuildRequires: openEuler-rpm-config %if 0%{with seastar} %if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler} BuildRequires: cryptopp-devel -BuildRequires: numactl-devel %endif %if 0%{?suse_version} BuildRequires: libcryptopp-devel @@ -527,6 +584,13 @@ Requires: which %if 0%{?weak_deps} Recommends: podman >= 2.0.2 %endif +%if 0%{with cephadm_bundling} +%if 0%{without cephadm_pip_deps} +BuildRequires: python3-jinja2 >= 2.10 +%endif +%else +Requires: python3-jinja2 >= 2.10 +%endif %description -n cephadm Utility to bootstrap a Ceph cluster and manage Ceph daemons deployed with systemd and podman. @@ -586,6 +650,17 @@ system. One or more instances of ceph-mon form a Paxos part-time parliament cluster that provides extremely reliable and durable storage of cluster membership, configuration, and state. +%package mon-client-nvmeof +Summary: Ceph NVMeoF Gateway Monitor Client +%if 0%{?suse_version} +Group: System/Filesystems +%endif +Provides: ceph-test:/usr/bin/ceph-nvmeof-monitor-client +Requires: librados2 = %{_epoch_prefix}%{version}-%{release} +%description mon-client-nvmeof +Ceph NVMeoF Gateway Monitor Client distributes Paxos ANA info +to NVMeoF Gateway and provides beacons to the monitor daemon + %package mgr Summary: Ceph Manager Daemon %if 0%{?suse_version} @@ -616,21 +691,24 @@ Group: System/Filesystems Requires: ceph-mgr = %{_epoch_prefix}%{version}-%{release} Requires: ceph-grafana-dashboards = %{_epoch_prefix}%{version}-%{release} Requires: ceph-prometheus-alerts = %{_epoch_prefix}%{version}-%{release} -Requires: python%{python3_pkgversion}-setuptools +%if 0%{?fedora} || 0%{?rhel} >= 9 +Requires: python%{python3_pkgversion}-grpcio +Requires: python%{python3_pkgversion}-grpcio-tools +%endif %if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler} Requires: python%{python3_pkgversion}-cherrypy -Requires: python%{python3_pkgversion}-jwt Requires: python%{python3_pkgversion}-routes -Requires: python%{python3_pkgversion}-werkzeug %if 0%{?weak_deps} Recommends: python%{python3_pkgversion}-saml +%if 0%{?fedora} || 0%{?rhel} <= 8 +Recommends: python%{python3_pkgversion}-grpcio +Recommends: python%{python3_pkgversion}-grpcio-tools +%endif %endif %endif %if 0%{?suse_version} Requires: python%{python3_pkgversion}-CherryPy -Requires: python%{python3_pkgversion}-PyJWT Requires: python%{python3_pkgversion}-Routes -Requires: python%{python3_pkgversion}-Werkzeug Recommends: python%{python3_pkgversion}-python3-saml %endif %description mgr-dashboard @@ -662,7 +740,7 @@ BuildArch: noarch Group: System/Filesystems %endif Requires: python%{python3_pkgversion}-bcrypt -Requires: python%{python3_pkgversion}-pecan +Requires: python%{python3_pkgversion}-packaging Requires: python%{python3_pkgversion}-pyOpenSSL Requires: python%{python3_pkgversion}-requests Requires: python%{python3_pkgversion}-dateutil @@ -670,12 +748,15 @@ Requires: python%{python3_pkgversion}-setuptools %if 0%{?fedora} || 0%{?rhel} >= 8 || 0%{?openEuler} Requires: python%{python3_pkgversion}-cherrypy Requires: python%{python3_pkgversion}-pyyaml -Requires: python%{python3_pkgversion}-werkzeug %endif %if 0%{?suse_version} Requires: python%{python3_pkgversion}-CherryPy Requires: python%{python3_pkgversion}-PyYAML -Requires: python%{python3_pkgversion}-Werkzeug +%endif +# RHEL8 has python 3.6 and that lacks dataclasses in the stdlib, so pull in the +# backport dataclasses module instead. +%if 0%{?rhel} <= 8 +Requires: python%{python3_pkgversion}-dataclasses %endif %if 0%{?weak_deps} Recommends: ceph-mgr-rook = %{_epoch_prefix}%{version}-%{release} @@ -860,6 +941,9 @@ Provides: ceph-test:/usr/bin/ceph-osdomap-tool Requires: ceph-base = %{_epoch_prefix}%{version}-%{release} Requires: sudo Requires: libstoragemgmt +%if 0%{with seastar} +Requires: protobuf +%endif %if 0%{?weak_deps} Recommends: ceph-volume = %{_epoch_prefix}%{version}-%{release} %endif @@ -1100,7 +1184,7 @@ Group: System/Libraries Obsoletes: libcephfs1 < %{_epoch_prefix}%{version}-%{release} %if 0%{?rhel} || 0%{?fedora} || 0%{?openEuler} Obsoletes: ceph-libs < %{_epoch_prefix}%{version}-%{release} -Obsoletes: ceph-libcephfs +Obsoletes: ceph-libcephfs < %{_epoch_prefix}%{version}-%{release} %endif %description -n libcephfs2 Ceph is a distributed network file system designed to provide excellent @@ -1290,6 +1374,15 @@ Group: System/Monitoring %description mib This package provides a Ceph MIB for SNMP traps. +%package node-proxy +Summary: hw monitoring agent for Ceph +BuildArch: noarch +%if 0%{?suse_version} +Group: System/Monitoring +%endif +%description node-proxy +This package provides a Ceph hardware monitoring agent. + ################################################################################# # common ################################################################################# @@ -1297,11 +1390,6 @@ This package provides a Ceph MIB for SNMP traps. %autosetup -p1 -n @TARBALL_BASENAME@ %build -# Disable lto on systems that do not support symver attribute -# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=48200 for details -%if ( 0%{?rhel} && 0%{?rhel} < 9 ) || ( 0%{?suse_version} && 0%{?suse_version} <= 1500 ) -%define _lto_cflags %{nil} -%endif %if 0%{with cephfs_java} # Find jni.h @@ -1338,6 +1426,9 @@ cmake .. \ %if 0%{?suse_version} == 1500 -DCMAKE_C_COMPILER=gcc-11 \ -DCMAKE_CXX_COMPILER=g++-11 \ +%endif +%if 0%{?gts_version} == 13 + -DCMAKE_EXE_LINKER_FLAGS=-lstdc++ \ %endif -DCMAKE_INSTALL_PREFIX=%{_prefix} \ -DCMAKE_INSTALL_LIBDIR:PATH=%{_libdir} \ @@ -1397,9 +1488,6 @@ cmake .. \ %if 0%{without lua_packages} -DWITH_RADOSGW_LUA_PACKAGES:BOOL=OFF \ %endif -%if 0%{with zbd} - -DWITH_ZBD:BOOL=ON \ -%endif %if 0%{with cmake_verbose_logging} -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON \ %endif @@ -1427,11 +1515,27 @@ cmake .. \ -DWITH_SYSTEM_ARROW:BOOL=ON \ -DWITH_SYSTEM_UTF8PROC:BOOL=ON \ %endif +%if 0%{with system_qat} + -DWITH_SYSTEM_QATLIB:BOOL=ON \ + -DWITH_SYSTEM_QATZIP:BOOL=ON \ +%endif %if 0%{with seastar} -DWITH_SEASTAR:BOOL=ON \ -DWITH_JAEGER:BOOL=OFF \ %endif - -DWITH_GRAFANA:BOOL=ON + -DWITH_GRAFANA:BOOL=ON \ +%if %{with sccache} + -DWITH_SCCACHE=ON \ +%endif +%if 0%{with cephadm_bundling} +%if 0%{with cephadm_pip_deps} + -DCEPHADM_BUNDLED_DEPENDENCIES=pip +%else + -DCEPHADM_BUNDLED_DEPENDENCIES=rpm +%endif +%else + -DCEPHADM_BUNDLED_DEPENDENCIES=none +%endif %if %{with cmake_verbose_logging} cat ./CMakeFiles/CMakeOutput.log @@ -1530,6 +1634,9 @@ mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-rbd-mirror # prometheus alerts install -m 644 -D monitoring/ceph-mixin/prometheus_alerts.yml %{buildroot}/etc/prometheus/ceph/ceph_default_alerts.yml +# grafana charts +install -m 644 -D monitoring/ceph-mixin/dashboards_out/* %{buildroot}/etc/grafana/dashboards/ceph-dashboard/ + # SNMP MIB install -m 644 -D -t %{buildroot}%{_datadir}/snmp/mibs monitoring/snmp/CEPH-MIB.txt @@ -1581,6 +1688,7 @@ rm -rf %{_vpath_builddir} %if %{with lttng} %{_libdir}/libos_tp.so* %{_libdir}/libosd_tp.so* +%{_libdir}/libmgr_op_tp.so* %endif %config(noreplace) %{_sysconfdir}/logrotate.d/ceph %if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler} @@ -1891,9 +1999,9 @@ fi %{_datadir}/ceph/mgr/progress %{_datadir}/ceph/mgr/prometheus %{_datadir}/ceph/mgr/rbd_support -%{_datadir}/ceph/mgr/restful %{_datadir}/ceph/mgr/rgw %{_datadir}/ceph/mgr/selftest +%{_datadir}/ceph/mgr/smb %{_datadir}/ceph/mgr/snap_schedule %{_datadir}/ceph/mgr/stats %{_datadir}/ceph/mgr/status @@ -1901,7 +2009,6 @@ fi %{_datadir}/ceph/mgr/telemetry %{_datadir}/ceph/mgr/test_orchestrator %{_datadir}/ceph/mgr/volumes -%{_datadir}/ceph/mgr/zabbix %files mgr-rook %{_datadir}/ceph/mgr/rook @@ -1985,6 +2092,9 @@ if [ $1 -ge 1 ] ; then fi fi +%files mon-client-nvmeof +%{_bindir}/ceph-nvmeof-monitor-client + %files fuse %{_bindir}/ceph-fuse %{_mandir}/man8/ceph-fuse.8* @@ -2036,6 +2146,7 @@ fi %files -n ceph-exporter %{_bindir}/ceph-exporter +%{_unitdir}/ceph-exporter.service %files -n rbd-fuse %{_bindir}/rbd-fuse @@ -2470,6 +2581,7 @@ fi %{_bindir}/ceph-coverage %{_bindir}/ceph-debugpack %{_bindir}/ceph-dedup-tool +%{_bindir}/ceph-dedup-daemon %if 0%{with seastar} %{_bindir}/crimson-store-nbd %endif @@ -2623,4 +2735,10 @@ exit 0 %attr(0755,root,root) %dir %{_datadir}/snmp %{_datadir}/snmp/mibs +%files node-proxy +%{_sbindir}/ceph-node-proxy +%dir %{python3_sitelib}/ceph_node_proxy +%{python3_sitelib}/ceph_node_proxy/* +%{python3_sitelib}/ceph_node_proxy-* + %changelog diff --git a/cmake/modules/AddCephTest.cmake b/cmake/modules/AddCephTest.cmake index 2784567c6871..ab4dc63ca32a 100644 --- a/cmake/modules/AddCephTest.cmake +++ b/cmake/modules/AddCephTest.cmake @@ -19,9 +19,43 @@ function(add_ceph_test test_name test_path) PATH=${CMAKE_RUNTIME_OUTPUT_DIRECTORY}:${CMAKE_SOURCE_DIR}/src:$ENV{PATH} PYTHONPATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/cython_modules/lib.3:${CMAKE_SOURCE_DIR}/src/pybind CEPH_BUILD_VIRTUALENV=${CEPH_BUILD_VIRTUALENV}) - # none of the tests should take more than 1 hour to complete + if(WITH_UBSAN) + set_property(TEST ${test_name} + APPEND + PROPERTY ENVIRONMENT + UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1) + endif() + if(WITH_ASAN) + # AddressSanitizer: odr-violation: global 'ceph::buffer::list::always_empty_bptr' at + # /home/jenkins-build/build/workspace/ceph-pull-requests/src/common/buffer.cc:1267:34 + # see https://tracker.ceph.com/issues/65098 + set_property(TEST ${test_name} + APPEND + PROPERTY ENVIRONMENT + ASAN_OPTIONS=detect_odr_violation=0 + LSAN_OPTIONS=suppressions=${CMAKE_SOURCE_DIR}/qa/lsan.supp) + endif() set_property(TEST ${test_name} PROPERTY TIMEOUT ${CEPH_TEST_TIMEOUT}) + # Crimson seastar unittest always run with --smp N to start N threads. By default, crimson seastar unittest + # will take cpu cores[0, N), starting one thread per core. When running many crimson seastar unittests + # parallely, the front N cpu cores are shared, and the left cpu cores are idle. Lots of cpu cores are wasted. + # Using CTest resource allocation feature(https://cmake.org/cmake/help/latest/manual/ctest.1.html#resource-allocation), + # ctest can specify cpu cores resources to crimson seastar unittests. + # 3 steps to enable CTest resource allocation feature: + # Step 1: Generate a resource specification file to describe available resource, $(nproc) CPUs with id 0 to $(nproc) - 1 + # Step 2: Set RESOURCE_GROUPS property to a test with value "${smp_count},cpus:1" + # Step 3: Read a series of environment variables CTEST_RESOURCE_GROUP_* and set seastar smp_opts while running a test + list(FIND ARGV "--smp" smp_pos) + if(smp_pos GREATER -1) + if(smp_pos EQUAL ARGC) + message(FATAL_ERROR "${test_name} --smp requires an argument") + endif() + math(EXPR i "${smp_pos} + 1") + list(GET ARGV ${i} smp_count) + set_property(TEST ${test_name} + PROPERTY RESOURCE_GROUPS "${smp_count},cpus:1") + endif() endfunction() option(WITH_GTEST_PARALLEL "Enable running gtest based tests in parallel" OFF) diff --git a/cmake/modules/BuildArrow.cmake b/cmake/modules/BuildArrow.cmake index 691108a40c55..0ee1d85b49ff 100644 --- a/cmake/modules/BuildArrow.cmake +++ b/cmake/modules/BuildArrow.cmake @@ -69,6 +69,10 @@ function(build_arrow) list(APPEND arrow_DEPENDS Boost) endif() + # since Arrow 15.0.0 needs xsimd>=8.1.0 and since Ubuntu Jammy + # Jellyfish only provides 7.6.0, we'll have arrow build it as source + list(APPEND arrow_CMAKE_ARGS -Dxsimd_SOURCE=BUNDLED) + # cmake doesn't properly handle arguments containing ";", such as # CMAKE_PREFIX_PATH, for which reason we'll have to use some other separator. string(REPLACE ";" "!" CMAKE_PREFIX_PATH_ALT_SEP "${CMAKE_PREFIX_PATH}") @@ -86,6 +90,9 @@ function(build_arrow) else() list(APPEND arrow_CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release) endif() + # don't add -Werror or debug package builds fail with: + #warning _FORTIFY_SOURCE requires compiling with optimization (-O) + list(APPEND arrow_CMAKE_ARGS -DBUILD_WARNING_LEVEL=PRODUCTION) # we use an external project and copy the sources to bin directory to ensure # that object files are built outside of the source tree. diff --git a/cmake/modules/BuildBoost.cmake b/cmake/modules/BuildBoost.cmake index a22578795908..380c55445d6f 100644 --- a/cmake/modules/BuildBoost.cmake +++ b/cmake/modules/BuildBoost.cmake @@ -11,6 +11,8 @@ # Boost_USE_STATIC_LIBS : boolean (default: OFF) # Boost_USE_MULTITHREADED : boolean (default: OFF) # BOOST_J: integer (defanult 1) +# +# Note: Remove boost_redis submodule once upgraded to Boost version that includes it function(check_boost_version source_dir expected_version) set(version_hpp "${source_dir}/boost/version.hpp") @@ -47,7 +49,11 @@ endmacro() function(do_build_boost root_dir version) cmake_parse_arguments(Boost_BUILD "" "" COMPONENTS ${ARGN}) - set(boost_features "variant=release") + if(CMAKE_BUILD_TYPE STREQUAL Debug) + set(boost_features "variant=debug") + else() + set(boost_features "variant=release") + endif() if(Boost_USE_MULTITHREADED) list(APPEND boost_features "threading=multi") else() @@ -149,18 +155,19 @@ function(do_build_boost root_dir version) check_boost_version("${PROJECT_SOURCE_DIR}/src/boost" ${version}) set(source_dir SOURCE_DIR "${PROJECT_SOURCE_DIR}/src/boost") - elseif(version VERSION_GREATER 1.82) + elseif(version VERSION_GREATER 1.85) message(FATAL_ERROR "Unknown BOOST_REQUESTED_VERSION: ${version}") else() message(STATUS "boost will be downloaded...") # NOTE: If you change this version number make sure the package is available # at the three URLs below (may involve uploading to download.ceph.com) - set(boost_version 1.82.0) - set(boost_sha256 a6e1ab9b0860e6a2881dd7b21fe9f737a095e5f33a3a874afc6a345228597ee6) + set(boost_version 1.85.0) + set(boost_sha256 7009fe1faa1697476bdc7027703a2badb84e849b7b0baad5086b087b971f8617) string(REPLACE "." "_" boost_version_underscore ${boost_version} ) - string(JOIN " " boost_url - https://boostorg.jfrog.io/artifactory/main/release/${boost_version}/source/boost_${boost_version_underscore}.tar.bz2 - https://download.ceph.com/qa/boost_${boost_version_underscore}.tar.bz2) + list(APPEND boost_url + https://download.ceph.com/qa/boost_${boost_version_underscore}.tar.bz2 + https://archives.boost.io//release/${boost_version}/source/boost_${boost_version_underscore}.tar.bz2 + https://boostorg.jfrog.io/artifactory/main/release/${boost_version}/source/boost_${boost_version_underscore}.tar.bz2) set(source_dir URL ${boost_url} URL_HASH SHA256=${boost_sha256} diff --git a/cmake/modules/BuildFIO.cmake b/cmake/modules/BuildFIO.cmake index 3a0694b543ee..49fcfb31d973 100644 --- a/cmake/modules/BuildFIO.cmake +++ b/cmake/modules/BuildFIO.cmake @@ -37,6 +37,7 @@ function(build_fio) add_library(fio INTERFACE IMPORTED) add_dependencies(fio fio_ext) set_target_properties(fio PROPERTIES + CXX_EXTENSIONS ON INTERFACE_INCLUDE_DIRECTORIES ${source_dir} - INTERFACE_COMPILE_OPTIONS "-include;${source_dir}/config-host.h;$<$:-std=gnu99>$<$:-std=gnu++17>") + INTERFACE_COMPILE_OPTIONS "-include;${source_dir}/config-host.h;$<$:-std=gnu99>") endfunction() diff --git a/cmake/modules/BuildISAL.cmake b/cmake/modules/BuildISAL.cmake new file mode 100644 index 000000000000..6df15bc5bb83 --- /dev/null +++ b/cmake/modules/BuildISAL.cmake @@ -0,0 +1,42 @@ +# use an ExternalProject to build isa-l using its makefile +function(build_isal) + set(isal_BINARY_DIR ${CMAKE_BINARY_DIR}/src/isa-l) + set(isal_INSTALL_DIR ${isal_BINARY_DIR}/install) + set(isal_INCLUDE_DIR "${isal_INSTALL_DIR}/include") + set(isal_LIBRARY "${isal_INSTALL_DIR}/lib/libisal.a") + + # this include directory won't exist until the install step, but the + # imported targets need it early for INTERFACE_INCLUDE_DIRECTORIES + file(MAKE_DIRECTORY "${isal_INCLUDE_DIR}") + + set(configure_cmd env CC=${CMAKE_C_COMPILER} ./configure --prefix=${isal_INSTALL_DIR}) + # build a static library with -fPIC that we can link into crypto/compressor plugins + list(APPEND configure_cmd --with-pic --enable-static --disable-shared) + + # clear the DESTDIR environment variable from debian/rules, + # because it messes with the internal install paths of arrow's bundled deps + set(NO_DESTDIR_COMMAND ${CMAKE_COMMAND} -E env --unset=DESTDIR) + + include(ExternalProject) + ExternalProject_Add(isal_ext + SOURCE_DIR "${PROJECT_SOURCE_DIR}/src/isa-l" + CONFIGURE_COMMAND ./autogen.sh COMMAND ${configure_cmd} + BUILD_COMMAND ${NO_DESTDIR_COMMAND} make -j3 + BUILD_IN_SOURCE 1 + BUILD_BYPRODUCTS ${isal_LIBRARY} + INSTALL_COMMAND ${NO_DESTDIR_COMMAND} make install + UPDATE_COMMAND "" + LOG_CONFIGURE ON + LOG_BUILD ON + LOG_INSTALL ON + LOG_MERGED_STDOUTERR ON + LOG_OUTPUT_ON_FAILURE ON) + + # add imported library target ISAL::Crypto + add_library(ISAL::ISAL STATIC IMPORTED GLOBAL) + add_dependencies(ISAL::ISAL isal_ext) + set_target_properties(ISAL::ISAL PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES ${isal_INCLUDE_DIR} + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION ${isal_LIBRARY}) +endfunction() diff --git a/cmake/modules/BuildISALCrypto.cmake b/cmake/modules/BuildISALCrypto.cmake new file mode 100644 index 000000000000..26fb4a8f9cd5 --- /dev/null +++ b/cmake/modules/BuildISALCrypto.cmake @@ -0,0 +1,31 @@ +# use an ExternalProject to build isa-l_crypto using its makefile +function(build_isal_crypto) + set(ISAL_CRYPTO_SOURCE_DIR ${CMAKE_SOURCE_DIR}/src/crypto/isa-l/isa-l_crypto) + set(ISAL_CRYPTO_INCLUDE_DIR "${ISAL_CRYPTO_SOURCE_DIR}/include") + set(ISAL_CRYPTO_LIBRARY "${ISAL_CRYPTO_SOURCE_DIR}/bin/isa-l_crypto.a") + + include(FindMake) + find_make("MAKE_EXECUTABLE" "make_cmd") + + include(ExternalProject) + ExternalProject_Add(isal_crypto_ext + SOURCE_DIR ${ISAL_CRYPTO_SOURCE_DIR} + CONFIGURE_COMMAND "" + BUILD_COMMAND ${make_cmd} -f /Makefile.unx + BUILD_IN_SOURCE 1 + BUILD_BYPRODUCTS ${ISAL_CRYPTO_LIBRARY} + INSTALL_COMMAND "" + UPDATE_COMMAND "" + LOG_CONFIGURE ON + LOG_BUILD ON + LOG_MERGED_STDOUTERR ON + LOG_OUTPUT_ON_FAILURE ON) + + # add imported library target ISAL::Crypto + add_library(ISAL::Crypto STATIC IMPORTED GLOBAL) + add_dependencies(ISAL::Crypto isal_crypto_ext) + set_target_properties(ISAL::Crypto PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES ${ISAL_CRYPTO_INCLUDE_DIR} + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION ${ISAL_CRYPTO_LIBRARY}) +endfunction() diff --git a/cmake/modules/BuildOpentelemetry.cmake b/cmake/modules/BuildOpentelemetry.cmake index ba2edaa09329..48b219e9c0fc 100644 --- a/cmake/modules/BuildOpentelemetry.cmake +++ b/cmake/modules/BuildOpentelemetry.cmake @@ -82,4 +82,5 @@ function(build_opentelemetry) PROPERTIES INTERFACE_LINK_LIBRARIES "${opentelemetry_deps}" INTERFACE_INCLUDE_DIRECTORIES "${opentelemetry_include_dir}") + include_directories(SYSTEM "${opentelemetry_include_dir}") endfunction() diff --git a/cmake/modules/BuildQAT.cmake b/cmake/modules/BuildQAT.cmake new file mode 100644 index 000000000000..d65d07639dc1 --- /dev/null +++ b/cmake/modules/BuildQAT.cmake @@ -0,0 +1,44 @@ +function(build_qat) + set(QAT_BINARY_DIR ${CMAKE_BINARY_DIR}/src/qatlib) + set(QAT_INSTALL_DIR ${QAT_BINARY_DIR}/install) + set(QAT_INCLUDE_DIR ${QAT_INSTALL_DIR}/include) + set(QAT_LIBRARY_DIR ${QAT_INSTALL_DIR}/lib) + set(QAT_LIBRARY ${QAT_LIBRARY_DIR}/libqat.a) + set(QAT_USDM_LIBRARY ${QAT_LIBRARY_DIR}/libusdm.a) + + # this include directory won't exist until the install step, but the + # imported targets need it early for INTERFACE_INCLUDE_DIRECTORIES + file(MAKE_DIRECTORY "${QAT_INCLUDE_DIR}") + + set(configure_cmd env CC=${CMAKE_C_COMPILER} ./configure --prefix=${QAT_INSTALL_DIR}) + # disable systemd or 'make install' tries to write /usr/lib/systemd/system/qat.service + list(APPEND configure_cmd --disable-systemd) + # build a static library with -fPIC that we can link into crypto/compressor plugins + list(APPEND configure_cmd --with-pic --enable-static --disable-shared) + + # clear the DESTDIR environment variable from debian/rules, + # because it messes with the internal install paths of arrow's bundled deps + set(NO_DESTDIR_COMMAND ${CMAKE_COMMAND} -E env --unset=DESTDIR) + + include(ExternalProject) + ExternalProject_Add(qatlib_ext + SOURCE_DIR "${PROJECT_SOURCE_DIR}/src/qatlib" + CONFIGURE_COMMAND ./autogen.sh COMMAND ${configure_cmd} + BUILD_COMMAND ${NO_DESTDIR_COMMAND} make -j3 + BUILD_IN_SOURCE 1 + BUILD_BYPRODUCTS ${QAT_LIBRARY} ${QAT_USDM_LIBRARY} + INSTALL_COMMAND ${NO_DESTDIR_COMMAND} make install + UPDATE_COMMAND "" + LOG_CONFIGURE ON + LOG_BUILD ON + LOG_INSTALL ON + LOG_MERGED_STDOUTERR ON + LOG_OUTPUT_ON_FAILURE ON) + + # export vars for find_package(QAT) + set(QAT_LIBRARY ${QAT_LIBRARY} PARENT_SCOPE) + set(QAT_USDM_LIBRARY ${QAT_USDM_LIBRARY} PARENT_SCOPE) + set(QAT_INCLUDE_DIR ${QAT_INCLUDE_DIR} PARENT_SCOPE) + # library dir for BuildQATzip.cmake + set(QAT_LIBRARY_DIR ${QAT_LIBRARY_DIR} PARENT_SCOPE) +endfunction() diff --git a/cmake/modules/BuildQATzip.cmake b/cmake/modules/BuildQATzip.cmake new file mode 100644 index 000000000000..91cb43c822de --- /dev/null +++ b/cmake/modules/BuildQATzip.cmake @@ -0,0 +1,47 @@ +function(build_qatzip) + set(QATzip_BINARY_DIR ${CMAKE_BINARY_DIR}/src/qatzip) + set(QATzip_INSTALL_DIR ${QATzip_BINARY_DIR}/install) + set(QATzip_INCLUDE_DIR ${QATzip_INSTALL_DIR}/include) + set(QATzip_LIBRARY ${QATzip_INSTALL_DIR}/lib/libqatzip.a) + + # this include directory won't exist until the install step, but the + # imported targets need it early for INTERFACE_INCLUDE_DIRECTORIES + file(MAKE_DIRECTORY "${QATzip_INCLUDE_DIR}") + + set(configure_cmd env CC=${CMAKE_C_COMPILER} ./configure --prefix=${QATzip_INSTALL_DIR}) + # build a static library with -fPIC that we can link into crypto/compressor plugins + list(APPEND configure_cmd --with-pic --enable-static --disable-shared) + if(QATDRV_INCLUDE_DIR) + list(APPEND configure_cmd --with-ICP_ROOT=${QATDRV_INCLUDE_DIR}) + endif() + if(QAT_INCLUDE_DIR) + list(APPEND configure_cmd CFLAGS=-I${QAT_INCLUDE_DIR}) + endif() + if(QAT_LIBRARY_DIR) + list(APPEND configure_cmd LDFLAGS=-L${QAT_LIBRARY_DIR}) + endif() + + # clear the DESTDIR environment variable from debian/rules, + # because it messes with the internal install paths of arrow's bundled deps + set(NO_DESTDIR_COMMAND ${CMAKE_COMMAND} -E env --unset=DESTDIR) + + include(ExternalProject) + ExternalProject_Add(qatzip_ext + SOURCE_DIR "${PROJECT_SOURCE_DIR}/src/qatzip" + CONFIGURE_COMMAND ./autogen.sh COMMAND ${configure_cmd} + BUILD_COMMAND ${NO_DESTDIR_COMMAND} make -j3 + BUILD_IN_SOURCE 1 + BUILD_BYPRODUCTS ${QATzip_LIBRARY} + INSTALL_COMMAND ${NO_DESTDIR_COMMAND} make install + UPDATE_COMMAND "" + LOG_CONFIGURE ON + LOG_BUILD ON + LOG_INSTALL ON + LOG_MERGED_STDOUTERR ON + LOG_OUTPUT_ON_FAILURE ON) + + # export vars for find_package(QATzip) + set(QATzip_LIBRARIES ${QATzip_LIBRARY} PARENT_SCOPE) + set(QATzip_INCLUDE_DIR ${QATzip_INCLUDE_DIR} PARENT_SCOPE) + set(QATzip_INTERFACE_LINK_LIBRARIES QAT::qat QAT::usdm LZ4::LZ4 PARENT_SCOPE) +endfunction() diff --git a/cmake/modules/BuildRocksDB.cmake b/cmake/modules/BuildRocksDB.cmake index f71f2bb6cc4d..c1f4823963f2 100644 --- a/cmake/modules/BuildRocksDB.cmake +++ b/cmake/modules/BuildRocksDB.cmake @@ -11,12 +11,20 @@ function(build_rocksdb) -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}) endif() + list(APPEND rocksdb_CMAKE_ARGS -DWITH_LIBURING=${WITH_LIBURING}) + if(WITH_LIBURING) + list(APPEND rocksdb_CMAKE_ARGS -During_INCLUDE_DIR=${URING_INCLUDE_DIR}) + list(APPEND rocksdb_CMAKE_ARGS -During_LIBRARIES=${URING_LIBRARY_DIR}) + list(APPEND rocksdb_INTERFACE_LINK_LIBRARIES uring::uring) + endif() + if(ALLOCATOR STREQUAL "jemalloc") list(APPEND rocksdb_CMAKE_ARGS -DWITH_JEMALLOC=ON) list(APPEND rocksdb_INTERFACE_LINK_LIBRARIES JeMalloc::JeMalloc) endif() list(APPEND rocksdb_CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}) + list(APPEND rocksdb_CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}) list(APPEND rocksdb_CMAKE_ARGS -DWITH_SNAPPY=${SNAPPY_FOUND}) if(SNAPPY_FOUND) @@ -52,12 +60,13 @@ function(build_rocksdb) endif() include(CheckCXXCompilerFlag) check_cxx_compiler_flag("-Wno-deprecated-copy" HAS_WARNING_DEPRECATED_COPY) + set(rocksdb_CXX_FLAGS "${CMAKE_CXX_FLAGS}") if(HAS_WARNING_DEPRECATED_COPY) - set(rocksdb_CXX_FLAGS -Wno-deprecated-copy) + string(APPEND rocksdb_CXX_FLAGS " -Wno-deprecated-copy") endif() check_cxx_compiler_flag("-Wno-pessimizing-move" HAS_WARNING_PESSIMIZING_MOVE) if(HAS_WARNING_PESSIMIZING_MOVE) - set(rocksdb_CXX_FLAGS "${rocksdb_CXX_FLAGS} -Wno-pessimizing-move") + string(APPEND rocksdb_CXX_FLAGS " -Wno-pessimizing-move") endif() if(rocksdb_CXX_FLAGS) list(APPEND rocksdb_CMAKE_ARGS -DCMAKE_CXX_FLAGS='${rocksdb_CXX_FLAGS}') @@ -84,6 +93,9 @@ function(build_rocksdb) INSTALL_COMMAND "" LIST_SEPARATOR !) + # make sure all the link libraries are built first + add_dependencies(rocksdb_ext ${rocksdb_INTERFACE_LINK_LIBRARIES}) + add_library(RocksDB::RocksDB STATIC IMPORTED) add_dependencies(RocksDB::RocksDB rocksdb_ext) set(rocksdb_INCLUDE_DIR "${rocksdb_SOURCE_DIR}/include") diff --git a/cmake/modules/Builduadk.cmake b/cmake/modules/Builduadk.cmake new file mode 100644 index 000000000000..e3b11f32aaf5 --- /dev/null +++ b/cmake/modules/Builduadk.cmake @@ -0,0 +1,53 @@ +function(build_uadk) + set(UADK_INSTALL_DIR ${CMAKE_BINARY_DIR}/src/uadk/install) + set(UADK_INCLUDE_DIR ${UADK_INSTALL_DIR}/include) + set(UADK_LIBRARY_DIR ${UADK_INSTALL_DIR}/lib) + set(UADK_WD_LIBRARY ${UADK_LIBRARY_DIR}/libwd.a) + set(UADK_WD_COMP_LIBRARY ${UADK_LIBRARY_DIR}/libwd_comp.a) + set(UADK_WD_ZIP_LIBRARY ${UADK_LIBRARY_DIR}/uadk/libhisi_zip.a) + set(configure_cmd env ./configure --prefix=${UADK_INSTALL_DIR}) + list(APPEND configure_cmd --with-pic --enable-static --disable-shared --with-static_drv) + + include(ExternalProject) + ExternalProject_Add(uadk_ext + UPDATE_COMMAND "" # this disables rebuild on each run + GIT_REPOSITORY "https://github.com/Linaro/uadk.git" + GIT_CONFIG advice.detachedHead=false + GIT_SHALLOW 1 + GIT_TAG "master" + SOURCE_DIR "${PROJECT_SOURCE_DIR}/src/uadk" + BUILD_IN_SOURCE 1 + CMAKE_ARGS -DCMAKE_CXX_COMPILER=which g++ + CONFIGURE_COMMAND ./autogen.sh COMMAND ${configure_cmd} + BUILD_COMMAND make + BUILD_BYPRODUCTS ${UADK_WD_LIBRARY} ${UADK_WD_COMP_LIBRARY} ${UADK_WD_ZIP_LIBRARY} + INSTALL_COMMAND make install + LOG_CONFIGURE ON + LOG_BUILD ON + LOG_INSTALL ON + LOG_MERGED_STDOUTERR ON + LOG_OUTPUT_ON_FAILURE ON) + + ExternalProject_Get_Property(uadk_ext source_dir) + set(UADK_INCLUDE_DIR ${UADK_INCLUDE_DIR} PARENT_SCOPE) + + add_library(uadk::uadk UNKNOWN IMPORTED) + add_library(uadk::uadkwd UNKNOWN IMPORTED) + add_library(uadk::uadkzip UNKNOWN IMPORTED) + add_dependencies(uadk::uadk uadk_ext) + add_dependencies(uadk::uadkwd uadk_ext) + add_dependencies(uadk::uadkzip uadk_ext) + file(MAKE_DIRECTORY ${UADK_INCLUDE_DIR}) + set_target_properties(uadk::uadk PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES ${UADK_INCLUDE_DIR} + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${UADK_WD_COMP_LIBRARY}") + set_target_properties(uadk::uadkwd PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES ${UADK_INCLUDE_DIR} + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${UADK_WD_LIBRARY}") + set_target_properties(uadk::uadkzip PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES ${UADK_INCLUDE_DIR} + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${UADK_WD_ZIP_LIBRARY}") +endfunction() diff --git a/cmake/modules/Builduring.cmake b/cmake/modules/Builduring.cmake index 8683880f7116..4e4107fb5ac7 100644 --- a/cmake/modules/Builduring.cmake +++ b/cmake/modules/Builduring.cmake @@ -9,7 +9,7 @@ function(build_uring) set(source_dir_args SOURCE_DIR ${CMAKE_BINARY_DIR}/src/liburing GIT_REPOSITORY https://github.com/axboe/liburing.git - GIT_TAG "liburing-0.7" + GIT_TAG "liburing-2.5" GIT_SHALLOW TRUE GIT_CONFIG advice.detachedHead=false) endif() @@ -17,7 +17,7 @@ function(build_uring) include(ExternalProject) ExternalProject_Add(liburing_ext ${source_dir_args} - CONFIGURE_COMMAND env CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} /configure + CONFIGURE_COMMAND env CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} /configure --use-libc BUILD_COMMAND ${make_cmd} "CFLAGS=${CMAKE_C_FLAGS} -fPIC" -C src -s BUILD_IN_SOURCE 1 BUILD_BYPRODUCTS "/src/liburing.a" @@ -32,6 +32,8 @@ function(build_uring) ExternalProject_Get_Property(liburing_ext source_dir) set(URING_INCLUDE_DIR "${source_dir}/src/include") set(URING_LIBRARY_DIR "${source_dir}/src") + set(URING_INCLUDE_DIR ${URING_INCLUDE_DIR} PARENT_SCOPE) + set(URING_LIBRARY_DIR ${URING_LIBRARY_DIR} PARENT_SCOPE) add_library(uring::uring STATIC IMPORTED GLOBAL) add_dependencies(uring::uring liburing_ext) diff --git a/cmake/modules/CTags.cmake b/cmake/modules/CTags.cmake index c3e1b3799b02..13fe9fcb32d4 100644 --- a/cmake/modules/CTags.cmake +++ b/cmake/modules/CTags.cmake @@ -3,23 +3,24 @@ find_program(CTAGS_EXECUTABLE ctags) function(add_tags name) cmake_parse_arguments(TAGS "" "SRC_DIR;TAG_FILE" "EXCLUDE_OPTS;EXCLUDES" ${ARGN}) set(excludes ${TAGS_EXCLUDES}) + find_package(Git) if(TAGS_EXCLUDE_OPTS) # always respect EXCLUDES_OPTS list(APPEND excludes ${TAGS_EXCLUDE_OPTS}) - else() + elseif(Git_FOUND) # exclude the submodules under SRC_DIR by default execute_process( - COMMAND git config --file .gitmodules --get-regexp path + COMMAND ${GIT_EXECUTABLE} config --file .gitmodules --get-regexp path COMMAND awk "/${TAGS_SRC_DIR}/ { print $2 }" WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} RESULT_VARIABLE result_code OUTPUT_VARIABLE submodules OUTPUT_STRIP_TRAILING_WHITESPACE) if(${result_code} EQUAL 0) - string(REPLACE "${TAGS_SRC_DIR}/" "" submodules ${submodules}) + string(REPLACE "${TAGS_SRC_DIR}/" "" submodules "${submodules}") # cmake list uses ";" as the delimiter, so split the string manually # before iterating in it. - string(REPLACE "\n" ";" submodules ${submodules}) + string(REPLACE "\n" ";" submodules "${submodules}") list(APPEND excludes ${submodules}) endif() endif() diff --git a/cmake/modules/Distutils.cmake b/cmake/modules/Distutils.cmake index daaae4ba63fd..f3d6c41e7317 100644 --- a/cmake/modules/Distutils.cmake +++ b/cmake/modules/Distutils.cmake @@ -73,6 +73,8 @@ function(distutils_add_cython_module target name src) set(PY_CC ${compiler_launcher} ${CMAKE_C_COMPILER} ${c_compiler_arg1}) set(PY_CXX ${compiler_launcher} ${CMAKE_CXX_COMPILER} ${cxx_compiler_arg1}) set(PY_LDSHARED ${link_launcher} ${CMAKE_C_COMPILER} ${c_compiler_arg1} "-shared") + string(REPLACE " " ";" PY_LDFLAGS "${CMAKE_SHARED_LINKER_FLAGS}") + list(APPEND PY_LDFLAGS -L${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) execute_process(COMMAND "${Python3_EXECUTABLE}" -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))" @@ -98,7 +100,7 @@ function(distutils_add_cython_module target name src) CXX="${PY_CXX}" LDSHARED="${PY_LDSHARED}" OPT=\"-DNDEBUG -g -fwrapv -O2 -w\" - LDFLAGS=-L${CMAKE_LIBRARY_OUTPUT_DIRECTORY} + LDFLAGS="${PY_LDFLAGS}" CYTHON_BUILD_DIR=${CMAKE_CURRENT_BINARY_DIR} CEPH_LIBDIR=${CMAKE_LIBRARY_OUTPUT_DIRECTORY} ${Python3_EXECUTABLE} ${setup_py} @@ -130,7 +132,7 @@ function(distutils_install_cython_module name) -D'void0=dead_function\(void\)' \ -D'__Pyx_check_single_interpreter\(ARG\)=ARG\#\#0' \ ${CFLAG_DISABLE_VTA}\") - set(ENV{LDFLAGS} \"-L${CMAKE_LIBRARY_OUTPUT_DIRECTORY}\") + set(ENV{LDFLAGS} \"${PY_LDFLAGS}\") set(ENV{CYTHON_BUILD_DIR} \"${CMAKE_CURRENT_BINARY_DIR}\") set(ENV{CEPH_LIBDIR} \"${CMAKE_LIBRARY_OUTPUT_DIRECTORY}\") diff --git a/cmake/modules/FindBoost.cmake b/cmake/modules/FindBoost.cmake index d47c4862dd08..6d9b87f844de 100644 --- a/cmake/modules/FindBoost.cmake +++ b/cmake/modules/FindBoost.cmake @@ -1365,7 +1365,22 @@ function(_Boost_COMPONENT_DEPENDENCIES component _ret) set(_Boost_TIMER_DEPENDENCIES chrono) set(_Boost_WAVE_DEPENDENCIES filesystem serialization thread chrono atomic) set(_Boost_WSERIALIZATION_DEPENDENCIES serialization) - else() + elseif(Boost_VERSION_STRING VERSION_LESS 1.83.0) + set(_Boost_CONTRACT_DEPENDENCIES thread chrono) + set(_Boost_COROUTINE_DEPENDENCIES context) + set(_Boost_FIBER_DEPENDENCIES context) + set(_Boost_IOSTREAMS_DEPENDENCIES regex) + set(_Boost_JSON_DEPENDENCIES container) + set(_Boost_LOG_DEPENDENCIES log_setup filesystem thread regex chrono atomic) + set(_Boost_MATH_DEPENDENCIES math_c99 math_c99f math_c99l math_tr1 math_tr1f math_tr1l) + set(_Boost_MPI_DEPENDENCIES serialization) + set(_Boost_MPI_PYTHON_DEPENDENCIES python${component_python_version} mpi serialization) + set(_Boost_NUMPY_DEPENDENCIES python${component_python_version}) + set(_Boost_THREAD_DEPENDENCIES chrono atomic) + set(_Boost_TIMER_DEPENDENCIES chrono) + set(_Boost_WAVE_DEPENDENCIES filesystem serialization thread chrono atomic) + set(_Boost_WSERIALIZATION_DEPENDENCIES serialization) + elseif(Boost_VERSION_STRING VERSION_LESS 1.84.0) set(_Boost_CONTRACT_DEPENDENCIES thread chrono) set(_Boost_COROUTINE_DEPENDENCIES context) set(_Boost_FIBER_DEPENDENCIES context) @@ -1380,7 +1395,21 @@ function(_Boost_COMPONENT_DEPENDENCIES component _ret) set(_Boost_TIMER_DEPENDENCIES chrono) set(_Boost_WAVE_DEPENDENCIES filesystem serialization thread chrono atomic) set(_Boost_WSERIALIZATION_DEPENDENCIES serialization) - if(Boost_VERSION_STRING VERSION_GREATER_EQUAL 1.81.0 AND NOT Boost_NO_WARN_NEW_VERSIONS) + else() + set(_Boost_CONTRACT_DEPENDENCIES thread chrono) + set(_Boost_COROUTINE_DEPENDENCIES context) + set(_Boost_FIBER_DEPENDENCIES context) + set(_Boost_IOSTREAMS_DEPENDENCIES regex) + set(_Boost_JSON_DEPENDENCIES container) + set(_Boost_LOG_DEPENDENCIES log_setup filesystem thread regex chrono atomic) + set(_Boost_MATH_DEPENDENCIES math_c99 math_c99f math_c99l math_tr1 math_tr1f math_tr1l) + set(_Boost_MPI_DEPENDENCIES serialization) + set(_Boost_MPI_PYTHON_DEPENDENCIES python${component_python_version} mpi serialization) + set(_Boost_NUMPY_DEPENDENCIES python${component_python_version}) + set(_Boost_THREAD_DEPENDENCIES chrono atomic) + set(_Boost_WAVE_DEPENDENCIES filesystem serialization thread chrono atomic) + set(_Boost_WSERIALIZATION_DEPENDENCIES serialization) + if(Boost_VERSION_STRING VERSION_GREATER_EQUAL 1.86.0 AND NOT Boost_NO_WARN_NEW_VERSIONS) message(WARNING "New Boost version may have incorrect or missing dependencies and imported targets") endif() endif() @@ -1445,6 +1474,7 @@ function(_Boost_COMPONENT_HEADERS component _hdrs) set(_Boost_MATH_TR1L_HEADERS "boost/math/tr1.hpp") set(_Boost_MPI_HEADERS "boost/mpi.hpp") set(_Boost_MPI_PYTHON_HEADERS "boost/mpi/python/config.hpp") + set(_Boost_MYSQL_HEADERS "boost/mysql.hpp") set(_Boost_NUMPY_HEADERS "boost/python/numpy.hpp") set(_Boost_NOWIDE_HEADERS "boost/nowide/cstdlib.hpp") set(_Boost_PRG_EXEC_MONITOR_HEADERS "boost/test/prg_exec_monitor.hpp") @@ -1466,6 +1496,7 @@ function(_Boost_COMPONENT_HEADERS component _hdrs) set(_Boost_TIMER_HEADERS "boost/timer.hpp") set(_Boost_TYPE_ERASURE_HEADERS "boost/type_erasure/config.hpp") set(_Boost_UNIT_TEST_FRAMEWORK_HEADERS "boost/test/framework.hpp") + set(_Boost_URL_HEADERS "boost/url.hpp") set(_Boost_WAVE_HEADERS "boost/wave.hpp") set(_Boost_WSERIALIZATION_HEADERS "boost/archive/text_wiarchive.hpp") set(_Boost_BZIP2_HEADERS "boost/iostreams/filter/bzip2.hpp") @@ -1653,7 +1684,8 @@ else() # _Boost_COMPONENT_HEADERS. See the instructions at the top of # _Boost_COMPONENT_DEPENDENCIES. set(_Boost_KNOWN_VERSIONS ${Boost_ADDITIONAL_VERSIONS} - "1.82.0" "1.82" "1.81.0" "1.81" "1.80.0" "1.80" "1.79.0" "1.79" + "1.85.0" "1.85" "1.84.0" "1.84" + "1.83.0" "1.83" "1.82.0" "1.82" "1.81.0" "1.81" "1.80.0" "1.80" "1.79.0" "1.79" "1.78.0" "1.78" "1.77.0" "1.77" "1.76.0" "1.76" "1.75.0" "1.75" "1.74.0" "1.74" "1.73.0" "1.73" "1.72.0" "1.72" "1.71.0" "1.71" "1.70.0" "1.70" "1.69.0" "1.69" "1.68.0" "1.68" "1.67.0" "1.67" "1.66.0" "1.66" "1.65.1" "1.65.0" "1.65" diff --git a/cmake/modules/FindQAT.cmake b/cmake/modules/FindQAT.cmake new file mode 100644 index 000000000000..9044e5493922 --- /dev/null +++ b/cmake/modules/FindQAT.cmake @@ -0,0 +1,40 @@ +find_package(PkgConfig) +pkg_search_module(PC_QAT libqat qatlib QUIET) + +find_path(QAT_INCLUDE_DIR + NAMES qat/cpa.h + HINTS ${PC_QAT_INCLUDE_DIRS}) + +find_library(QAT_LIBRARY + NAMES qat + HINTS ${PC_QAT_LIBRARY_DIRS}) + +find_library(QAT_USDM_LIBRARY + NAMES usdm + HINTS ${PC_QAT_LIBRARY_DIRS}) + +set(QAT_VERSION ${PC_QAT_VERSION}) +set(QAT_LIBRARIES ${QAT_LIBRARY} ${QAT_USDM_LIBRARY}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(QAT + REQUIRED_VARS QAT_LIBRARY QAT_USDM_LIBRARY QAT_INCLUDE_DIR + VERSION_VAR QAT_VERSION) + +mark_as_advanced(QAT_LIBRARY QAT_USDM_LIBRARY QAT_LIBRARIES QAT_INCLUDE_DIR QAT_VERSION) + +if(QAT_FOUND AND NOT (TARGET QAT::qat)) + add_library(QAT::qat UNKNOWN IMPORTED) + set_target_properties(QAT::qat PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${QAT_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${QAT_LIBRARY}") +endif() + +if(QAT_FOUND AND NOT (TARGET QAT::usdm)) + add_library(QAT::usdm UNKNOWN IMPORTED) + set_target_properties(QAT::usdm PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${QAT_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${QAT_USDM_LIBRARY}") +endif() diff --git a/cmake/modules/FindQATzip.cmake b/cmake/modules/FindQATzip.cmake new file mode 100644 index 000000000000..364cb7359148 --- /dev/null +++ b/cmake/modules/FindQATzip.cmake @@ -0,0 +1,33 @@ +# - Find QATzip +# Find the QATzip compression library and includes +# +# QATzip_INCLUDE_DIR - where to find QATzip.h, etc. +# QATzip_LIBRARIES - List of libraries when using QATzip. +# QATzip_FOUND - True if QATzip found. + +find_package(PkgConfig QUIET) +pkg_search_module(PC_QATzip qatzip QUIET) + +find_path(QATzip_INCLUDE_DIR + NAMES qatzip.h + HINTS ${PC_QATzip_INCLUDE_DIRS}) + +find_library(QATzip_LIBRARIES + NAMES qatzip + HINTS ${PC_QATzip_LIBRARY_DIRS}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(QATzip DEFAULT_MSG QATzip_LIBRARIES QATzip_INCLUDE_DIR) + +mark_as_advanced( + QATzip_LIBRARIES + QATzip_INCLUDE_DIR) + +if(QATzip_FOUND AND NOT TARGET QAT::zip) + add_library(QAT::zip SHARED IMPORTED) + set_target_properties(QAT::zip PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${QATzip_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "${QATzip_INTERFACE_LINK_LIBRARIES}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${QATzip_LIBRARIES}") +endif() diff --git a/cmake/modules/FindQatDrv.cmake b/cmake/modules/FindQatDrv.cmake index 3305a38c06cb..e2849d9c89be 100644 --- a/cmake/modules/FindQatDrv.cmake +++ b/cmake/modules/FindQatDrv.cmake @@ -74,7 +74,15 @@ foreach(component ${QatDrv_FIND_COMPONENTS}) add_library(QatDrv::${component} STATIC IMPORTED GLOBAL) set_target_properties(QatDrv::${component} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${QatDrv_INCLUDE_DIRS}" + INTERFACE_COMPILE_OPTIONS "-DHAVE_QATDRV" IMPORTED_LINK_INTERFACE_LANGUAGES "C" IMPORTED_LOCATION "${QatDrv_${component}_LIBRARIES}") endif() + + # add alias targets to match FindQAT.cmake + if(component STREQUAL "qat_s") + add_library(QAT::qat ALIAS QatDrv::qat_s) + elseif(component STREQUAL "usdm_drv_s") + add_library(QAT::usdm ALIAS QatDrv::usdm_drv_s) + endif() endforeach() diff --git a/cmake/modules/FindSanitizers.cmake b/cmake/modules/FindSanitizers.cmake index adafc5ebe3f7..1401ca2442bf 100644 --- a/cmake/modules/FindSanitizers.cmake +++ b/cmake/modules/FindSanitizers.cmake @@ -14,8 +14,8 @@ foreach(component ${Sanitizers_FIND_COMPONENTS}) elseif(component STREQUAL "leak") set(Sanitizers_leak_COMPILE_OPTIONS "-fsanitize=leak") elseif(component STREQUAL "thread") - if ("address" IN_LIST ${Sanitizers_FIND_COMPONENTS} OR - "leak" IN_LIST ${Sanitizers_FIND_COMPONENTS}) + if ("address" IN_LIST "${Sanitizers_FIND_COMPONENTS}" OR + "leak" IN_LIST "${Sanitizers_FIND_COMPONENTS}") message(SEND_ERROR "Cannot combine -fsanitize-leak w/ -fsanitize-thread") elseif(NOT CMAKE_POSITION_INDEPENDENT_CODE) message(SEND_ERROR "TSan requires all code to be position independent") @@ -57,6 +57,9 @@ string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${Sanitizers_COMPILE_OPTIONS}") set(CMAKE_REQUIRED_LIBRARIES ${Sanitizers_COMPILE_OPTIONS}) check_cxx_source_compiles("int main() {}" Sanitizers_ARE_SUPPORTED) + +file (READ ${CMAKE_CURRENT_LIST_DIR}/code_tests/Sanitizers_fiber_test.cc _sanitizers_fiber_test_code) +check_cxx_source_compiles ("${_sanitizers_fiber_test_code}" Sanitizers_FIBER_SUPPORT) cmake_pop_check_state() include(FindPackageHandleStandardArgs) diff --git a/cmake/modules/Findcap.cmake b/cmake/modules/Findcap.cmake new file mode 100644 index 000000000000..f33b22d2c29e --- /dev/null +++ b/cmake/modules/Findcap.cmake @@ -0,0 +1,35 @@ +# Try to find libcap +# +find_package(PkgConfig QUIET REQUIRED) + +pkg_check_modules(PC_cap QUIET cap) + +find_library(cap_LIBRARY + NAMES cap + HINTS + ${PC_cap_LIBDIR} + ${PC_cap_LIBRARY_DIRS}) + +find_path(cap_INCLUDE_DIR + NAMES sys/capability.h + HINTS + ${PC_cap_INCLUDEDIR} + ${PC_cap_INCLUDE_DIRS}) + +mark_as_advanced( + cap_LIBRARY + cap_INCLUDE_DIR) + +include (FindPackageHandleStandardArgs) +find_package_handle_standard_args (cap + REQUIRED_VARS + cap_LIBRARY + cap_INCLUDE_DIR) + +if(cap_FOUND AND NOT TARGET cap::cap) + add_library(cap::cap UNKNOWN IMPORTED) + set_target_properties(cap::cap + PROPERTIES + IMPORTED_LOCATION ${cap_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES ${cap_INCLUDE_DIR}) +endif() diff --git a/cmake/modules/Findfmt.cmake b/cmake/modules/Findfmt.cmake deleted file mode 100644 index 734c2b0571c2..000000000000 --- a/cmake/modules/Findfmt.cmake +++ /dev/null @@ -1,61 +0,0 @@ -find_path(fmt_INCLUDE_DIR NAMES fmt/format.h) - -if(fmt_INCLUDE_DIR) - set(_fmt_version_file "${fmt_INCLUDE_DIR}/fmt/core.h") - if(NOT EXISTS "${_fmt_version_file}") - set(_fmt_version_file "${fmt_INCLUDE_DIR}/fmt/format.h") - endif() - if(EXISTS "${_fmt_version_file}") - # parse "#define FMT_VERSION 40100" to 4.1.0 - file(STRINGS "${_fmt_version_file}" fmt_VERSION_LINE - REGEX "^#define[ \t]+FMT_VERSION[ \t]+[0-9]+$") - string(REGEX REPLACE "^#define[ \t]+FMT_VERSION[ \t]+([0-9]+)$" - "\\1" fmt_VERSION "${fmt_VERSION_LINE}") - foreach(ver "fmt_VERSION_PATCH" "fmt_VERSION_MINOR" "fmt_VERSION_MAJOR") - math(EXPR ${ver} "${fmt_VERSION} % 100") - math(EXPR fmt_VERSION "(${fmt_VERSION} - ${${ver}}) / 100") - endforeach() - set(fmt_VERSION - "${fmt_VERSION_MAJOR}.${fmt_VERSION_MINOR}.${fmt_VERSION_PATCH}") - endif() -endif() - -find_library(fmt_LIBRARY NAMES fmt) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(fmt - REQUIRED_VARS fmt_INCLUDE_DIR fmt_LIBRARY - VERSION_VAR fmt_VERSION) -mark_as_advanced( - fmt_INCLUDE_DIR - fmt_LIBRARY - fmt_VERSION_MAJOR - fmt_VERSION_MINOR - fmt_VERSION_PATCH - fmt_VERSION_STRING) - -if(fmt_FOUND AND NOT (TARGET fmt::fmt)) - add_library(fmt-header-only INTERFACE) - set_target_properties(fmt-header-only PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${fmt_INCLUDE_DIR}" - INTERFACE_COMPILE_DEFINITIONS FMT_HEADER_ONLY=1 - INTERFACE_COMPILE_FEATURES cxx_std_11) - - add_library(fmt UNKNOWN IMPORTED GLOBAL) - set_target_properties(fmt PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${fmt_INCLUDE_DIR}" - INTERFACE_COMPILE_FEATURES cxx_std_11 - IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" - IMPORTED_LOCATION "${fmt_LIBRARY}") - - if(WITH_FMT_HEADER_ONLY) - # please note, this is different from how upstream defines fmt::fmt. - # in order to force 3rd party libraries to link against fmt-header-only if - # WITH_FMT_HEADER_ONLY is ON, we have to point fmt::fmt to fmt-header-only - # in this case. - add_library(fmt::fmt ALIAS fmt-header-only) - else() - add_library(fmt::fmt ALIAS fmt) - endif() - -endif() diff --git a/cmake/modules/Findlibnbd.cmake b/cmake/modules/Findlibnbd.cmake new file mode 100644 index 000000000000..4a908659a6bd --- /dev/null +++ b/cmake/modules/Findlibnbd.cmake @@ -0,0 +1,33 @@ +# - Find libnbd +# Sets the following: +# +# LIBNBD_INCLUDE_DIR +# LIBNBD_LIBRARIES +# LIBNBD_VERSION +# LIBNBD_FOUND + +find_package(PkgConfig QUIET REQUIRED) +pkg_search_module(PC_libnbd libnbd) + +find_path(LIBNBD_INCLUDE_DIR + NAMES libnbd.h + PATHS ${PC_libnbd_INCLUDE_DIRS}) + +find_library(LIBNBD_LIBRARIES + NAMES libnbd.so + PATHS ${PC_libnbd_LIBRARY_DIRS}) + +set(LIBNBD_VERSION ${PC_libnbd_VERSION}) + +include(FindPackageHandleStandardArgs) + +find_package_handle_standard_args(libnbd + REQUIRED_VARS + LIBNBD_INCLUDE_DIR + LIBNBD_LIBRARIES + VERSION_VAR LIBNBD_VERSION) + +mark_as_advanced( + LIBNBD_LIBRARIES + LIBNBD_INCLUDE_DIR + LIBNBD_VERSION) diff --git a/cmake/modules/Findqatzip.cmake b/cmake/modules/Findqatzip.cmake deleted file mode 100644 index 2d0f2ace3887..000000000000 --- a/cmake/modules/Findqatzip.cmake +++ /dev/null @@ -1,24 +0,0 @@ -# - Find qatzip -# Find the qatzip compression library and includes -# -# qatzip_INCLUDE_DIR - where to find qatzip.h, etc. -# qatzip_LIBRARIES - List of libraries when using qatzip. -# qatzip_FOUND - True if qatzip found. - -find_path(qatzip_INCLUDE_DIR NAMES qatzip.h) -find_library(qatzip_LIBRARIES NAMES qatzip HINTS /usr/local/lib64/) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(qatzip DEFAULT_MSG qatzip_LIBRARIES qatzip_INCLUDE_DIR) - -mark_as_advanced( - qatzip_LIBRARIES - qatzip_INCLUDE_DIR) - -if(qatzip_FOUND AND NOT TARGET qatzip::qatzip) - add_library(qatzip::qatzip SHARED IMPORTED) - set_target_properties(qatzip::qatzip PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${qatzip_INCLUDE_DIR}" - IMPORTED_LINK_INTERFACE_LANGUAGES "C" - IMPORTED_LOCATION "${qatzip_LIBRARIES}") -endif() diff --git a/cmake/modules/Finduring.cmake b/cmake/modules/Finduring.cmake index 10c8de425501..8a6267ef2f31 100644 --- a/cmake/modules/Finduring.cmake +++ b/cmake/modules/Finduring.cmake @@ -5,7 +5,7 @@ # uring_FOUND - True if uring found. find_path(URING_INCLUDE_DIR liburing.h) -find_library(URING_LIBRARIES liburing.a liburing) +find_library(URING_LIBRARIES uring) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(uring DEFAULT_MSG URING_LIBRARIES URING_INCLUDE_DIR) diff --git a/cmake/modules/Findzfs.cmake b/cmake/modules/Findzfs.cmake deleted file mode 100644 index d92dd1fb04c3..000000000000 --- a/cmake/modules/Findzfs.cmake +++ /dev/null @@ -1,28 +0,0 @@ -# find libzfs or libzfslinux -# Once done, this will define -# -# ZFS_FOUND - system has libzfs -# ZFS_INCLUDE_DIR - the libzfs include directories -# ZFS_LIBRARIES - link these to use libzfs - -find_package(PkgConfig) -if(PKG_CONFIG_FOUND) - pkg_check_modules(ZFS QUIET libzfs) -else() - find_path(ZFS_INCLUDE_DIR libzfs.h - HINTS - ENV ZFS_DIR - PATH_SUFFIXES libzfs) - - find_library(ZFS_LIBRARIES - NAMES zfs - HINTS - ENV ZFS_DIR) - set(XFS_LIBRARIES ${LIBXFS}) -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(zfs DEFAULT_MSG - ZFS_INCLUDE_DIRS ZFS_LIBRARIES) - -mark_as_advanced(ZFS_INCLUDE_DIRS XFS_LIBRARIES) diff --git a/cmake/modules/LimitJobs.cmake b/cmake/modules/LimitJobs.cmake index 591a9321b668..2dcad24a806a 100644 --- a/cmake/modules/LimitJobs.cmake +++ b/cmake/modules/LimitJobs.cmake @@ -4,16 +4,20 @@ set(MAX_LINK_MEM 4500 CACHE INTERNAL "maximum memory used by each linking job (i cmake_host_system_information(RESULT _num_cores QUERY NUMBER_OF_LOGICAL_CORES) cmake_host_system_information(RESULT _total_mem QUERY TOTAL_PHYSICAL_MEMORY) -math(EXPR _avg_compile_jobs "${_total_mem} / ${MAX_COMPILE_MEM}") -if(_avg_compile_jobs EQUAL 0) - set(_avg_compile_jobs 1) -endif() -if(_num_cores LESS _avg_compile_jobs) - set(_avg_compile_jobs ${_num_cores}) +if(NINJA_MAX_COMPILE_JOBS) + set(_avg_compile_jobs "${NINJA_MAX_COMPILE_JOBS}") +else() + math(EXPR _avg_compile_jobs "${_total_mem} / ${MAX_COMPILE_MEM}") + if(_avg_compile_jobs EQUAL 0) + set(_avg_compile_jobs 1) + endif() + if(_num_cores LESS _avg_compile_jobs) + set(_avg_compile_jobs "${_num_cores}") + endif() + set(NINJA_MAX_COMPILE_JOBS "${_avg_compile_jobs}" CACHE STRING + "The maximum number of concurrent compilation jobs, for Ninja build system." FORCE) + mark_as_advanced(NINJA_MAX_COMPILE_JOBS) endif() -set(NINJA_MAX_COMPILE_JOBS "${_avg_compile_jobs}" CACHE STRING - "The maximum number of concurrent compilation jobs, for Ninja build system." FORCE) -mark_as_advanced(NINJA_MAX_COMPILE_JOBS) if(NINJA_MAX_COMPILE_JOBS) math(EXPR _heavy_compile_jobs "${_avg_compile_jobs} / 2") if(_heavy_compile_jobs EQUAL 0) @@ -25,16 +29,20 @@ if(NINJA_MAX_COMPILE_JOBS) set(CMAKE_JOB_POOL_COMPILE avg_compile_job_pool) endif() -math(EXPR _avg_link_jobs "${_total_mem} / ${MAX_LINK_MEM}") -if(_avg_link_jobs EQUAL 0) - set(_avg_link_jobs 1) -endif() -if(_num_cores LESS _avg_link_jobs) - set(_avg_link_jobs ${_num_cores}) +if(NINJA_MAX_LINK_JOBS) + set(_avg_link_jobs "${NINJA_MAX_LINK_JOBS}") +else() + math(EXPR _avg_link_jobs "${_total_mem} / ${MAX_LINK_MEM}") + if(_avg_link_jobs EQUAL 0) + set(_avg_link_jobs 1) + endif() + if(_num_cores LESS _avg_link_jobs) + set(_avg_link_jobs "${_num_cores}") + endif() + set(NINJA_MAX_LINK_JOBS "${_avg_link_jobs}" CACHE STRING + "The maximum number of concurrent link jobs, for Ninja build system." FORCE) + mark_as_advanced(NINJA_MAX_LINK_JOBS) endif() -set(NINJA_MAX_LINK_JOBS "${_avg_link_jobs}" CACHE STRING - "The maximum number of concurrent link jobs, for Ninja build system." FORCE) -mark_as_advanced(NINJA_MAX_LINK_JOBS) if(NINJA_MAX_LINK_JOBS) math(EXPR _heavy_link_jobs "${_avg_link_jobs} / 2") if(_heavy_link_jobs EQUAL 0) diff --git a/cmake/modules/SIMDExt.cmake b/cmake/modules/SIMDExt.cmake index 84818617ac32..35b52e64200b 100644 --- a/cmake/modules/SIMDExt.cmake +++ b/cmake/modules/SIMDExt.cmake @@ -20,6 +20,8 @@ # HAVE_PPC64 # HAVE_PPC # +# HAVE_S390X +# # SIMD_COMPILE_FLAGS # @@ -107,4 +109,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(powerpc|ppc)") if(HAVE_POWER8) message(STATUS " HAVE_POWER8 yes") endif() +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(s390x|S390X|s390|S390)") + set(HAVE_S390X 1) + message(STATUS " we are s390x") endif() diff --git a/cmake/modules/code_tests/Sanitizers_fiber_test.cc b/cmake/modules/code_tests/Sanitizers_fiber_test.cc new file mode 100644 index 000000000000..9df531f2675f --- /dev/null +++ b/cmake/modules/code_tests/Sanitizers_fiber_test.cc @@ -0,0 +1,11 @@ +#include + +extern "C" { + void __sanitizer_start_switch_fiber(void**, const void*, size_t); + void __sanitizer_finish_switch_fiber(void*, const void**, size_t*); +} + +int main() { + __sanitizer_start_switch_fiber(nullptr, nullptr, 0); + __sanitizer_finish_switch_fiber(nullptr, nullptr, nullptr); +} diff --git a/container/Containerfile b/container/Containerfile new file mode 100644 index 000000000000..2f75c8c6ce62 --- /dev/null +++ b/container/Containerfile @@ -0,0 +1,209 @@ +ARG FROM_IMAGE="quay.io/centos/centos:stream9" +FROM $FROM_IMAGE + +# allow FROM_IMAGE to be visible inside this stage +ARG FROM_IMAGE + +# Ceph branch name +ARG CEPH_REF="main" + +# Ceph SHA1 +ARG CEPH_SHA1 + +# Ceph git repo (ceph-ci.git or ceph.git) +ARG CEPH_GIT_REPO + +# (optional) Define the baseurl= for the ganesha.repo +ARG GANESHA_REPO_BASEURL="https://buildlogs.centos.org/centos/\$releasever-stream/storage/\$basearch/nfsganesha-5/" + +# (optional) Set to "crimson" to install crimson packages. +ARG OSD_FLAVOR="default" + +# (optional) Should be 'true' for CI builds (pull from shaman, etc.) +ARG CI_CONTAINER="true" + +RUN /bin/echo -e "\ +FROM_IMAGE: ${FROM_IMAGE}\n\ +CEPH_REF: ${CEPH_REF}\n\ +GANESHA_REPO_BASEURL: ${GANESHA_REPO_BASEURL} \n\ +OSD_FLAVOR: ${OSD_FLAVOR} \n\ +CI_CONTAINER: ${CI_CONTAINER}" + +# Other labels are set automatically by container/build github action +# See: https://github.com/opencontainers/image-spec/blob/main/annotations.md +LABEL org.opencontainers.image.authors="Ceph Release Team " \ + org.opencontainers.image.documentation="https://docs.ceph.com/" + +LABEL \ +FROM_IMAGE=${FROM_IMAGE} \ +CEPH_REF=${CEPH_REF} \ +CEPH_SHA1=${CEPH_SHA1} \ +CEPH_GIT_REPO=${CEPH_GIT_REPO} \ +GANESHA_REPO_BASEURL=${GANESHA_REPO_BASEURL} \ +OSD_FLAVOR=${OSD_FLAVOR} + + +#=================================================================================================== +# Install ceph and dependencies, and clean up +# IMPORTANT: in official builds, use '--squash' build option to keep image as small as possible +# keeping run steps separate makes local rebuilds quick, but images are big without squash option +#=================================================================================================== + +# Pre-reqs +RUN dnf install -y --setopt=install_weak_deps=False epel-release jq + +# Add NFS-Ganesha repo +RUN \ + echo "[ganesha]" > /etc/yum.repos.d/ganesha.repo && \ + echo "name=ganesha" >> /etc/yum.repos.d/ganesha.repo && \ + echo "baseurl=${GANESHA_REPO_BASEURL}" >> /etc/yum.repos.d/ganesha.repo && \ + echo "gpgcheck=0" >> /etc/yum.repos.d/ganesha.repo && \ + echo "enabled=1" >> /etc/yum.repos.d/ganesha.repo + +# ISCSI repo +RUN set -x && \ + curl -s -L https://shaman.ceph.com/api/repos/tcmu-runner/main/latest/centos/9/repo?arch=$(arch) -o /etc/yum.repos.d/tcmu-runner.repo && \ + case "${CEPH_REF}" in \ + quincy|reef) \ + curl -s -L https://download.ceph.com/ceph-iscsi/3/rpm/el9/ceph-iscsi.repo -o /etc/yum.repos.d/ceph-iscsi.repo ;\ + ;;\ + main|*) \ + curl -s -L https://shaman.ceph.com/api/repos/ceph-iscsi/main/latest/centos/9/repo -o /etc/yum.repos.d/ceph-iscsi.repo ;\ + ;;\ + esac + +# Ceph repo +RUN set -x && \ + rpm --import 'https://download.ceph.com/keys/release.asc' && \ + ARCH=$(arch); if [ "${ARCH}" == "aarch64" ]; then ARCH="arm64"; fi ;\ + IS_RELEASE=0 ;\ + if [[ "${CI_CONTAINER}" == "true" ]] ; then \ + # TODO: this can return different ceph builds (SHA1) for x86 vs. arm runs. is it important to fix? + REPO_URL=$(curl -s "https://shaman.ceph.com/api/search/?project=ceph&distros=centos/9/${ARCH}&flavor=${OSD_FLAVOR}&ref=${CEPH_REF}&sha1=latest" | jq -r .[0].url) ;\ + else \ + IS_RELEASE=1 ;\ + REPO_URL="http://download.ceph.com/rpm-${CEPH_REF}/el9/" ;\ + fi && \ + rpm -Uvh "$REPO_URL/noarch/ceph-release-1-${IS_RELEASE}.el9.noarch.rpm" + +# Copr repos +# scikit for mgr-diskprediction-local +# ref: https://github.com/ceph/ceph-container/pull/1821 +RUN \ + dnf install -y --setopt=install_weak_deps=False dnf-plugins-core && \ + dnf copr enable -y tchaikov/python-scikit-learn + +# Update package mgr +RUN dnf update -y --setopt=install_weak_deps=False + +# Define and install packages +# General +RUN echo "ca-certificates" > packages.txt +# Ceph +# TODO: remove lua-devel and luarocks once they are present in ceph.spec.in +# ref: https://github.com/ceph/ceph/pull/54575#discussion_r1401199635 +RUN echo \ +"ceph-common \ +ceph-exporter \ +ceph-grafana-dashboards \ +ceph-immutable-object-cache \ +ceph-mds \ +ceph-mgr-cephadm \ +ceph-mgr-dashboard \ +ceph-mgr-diskprediction-local \ +ceph-mgr-k8sevents \ +ceph-mgr-rook \ +ceph-mgr \ +ceph-mon \ +ceph-osd \ +ceph-radosgw lua-devel luarocks \ +ceph-volume \ +cephfs-mirror \ +cephfs-top \ +kmod \ +libradosstriper1 \ +rbd-mirror" \ +>> packages.txt + +# Optional crimson package(s) +RUN if [ "${OSD_FLAVOR}" == "crimson" ]; then \ + echo "ceph-crimson-osd" >> packages.txt ; \ +fi + +# Ceph "Recommends" +RUN echo "nvme-cli python3-saml smartmontools" >> packages.txt +# NFS-Ganesha +RUN echo "\ +dbus-daemon \ +nfs-ganesha-ceph \ +nfs-ganesha-rados-grace \ +nfs-ganesha-rados-urls \ +nfs-ganesha-rgw \ +nfs-ganesha \ +rpcbind \ +sssd-client" >> packages.txt + +# ISCSI +RUN echo "ceph-iscsi tcmu-runner python3-rtslib" >> packages.txt + +# Ceph-CSI +# TODO: coordinate with @Madhu-1 to have Ceph-CSI install these itself if unused by ceph +# @adk3798 does cephadm use these? +RUN echo "attr ceph-fuse rbd-nbd" >> packages.txt + +# Rook (only if packages must be in ceph container image) +RUN echo "systemd-udev" >> packages.txt + +# Util packages (should be kept to only utils that are truly very useful) +# 'sgdisk' (from gdisk) is used in docs and scripts for clearing disks (could be a risk? @travisn @guits @ktdreyer ?) +# 'ps' (from procps-ng) and 'hostname' are very valuable for debugging and CI +# TODO: remove sg3_utils once they are moved to ceph.spec.in with libstoragemgmt +# ref: https://github.com/ceph/ceph-container/pull/2013#issuecomment-1248606472 +RUN echo "gdisk hostname procps-ng sg3_utils e2fsprogs lvm2 gcc" >> packages.txt + +# scikit +RUN echo "python3-scikit-learn" >> packages.txt + +# ceph-node-proxy +RUN echo "ceph-node-proxy" >> packages.txt + +RUN echo "=== PACKAGES TO BE INSTALLED ==="; cat packages.txt +RUN echo "=== INSTALLING ===" ; \ +dnf install -y --setopt=install_weak_deps=False --setopt=skip_missing_names_on_install=False --enablerepo=crb $(cat packages.txt) + +# XXX why isn't this done in the ganesha package? +RUN mkdir -p /var/run/ganesha + +# Disable sync with udev since the container can not contact udev +RUN \ + sed -i -e 's/udev_rules = 1/udev_rules = 0/' \ + -e 's/udev_sync = 1/udev_sync = 0/' \ + -e 's/obtain_device_list_from_udev = 1/obtain_device_list_from_udev = 0/' \ + /etc/lvm/lvm.conf && \ + # validate the sed command worked as expected + grep -sqo "udev_sync = 0" /etc/lvm/lvm.conf && \ + grep -sqo "udev_rules = 0" /etc/lvm/lvm.conf && \ + grep -sqo "obtain_device_list_from_udev = 0" /etc/lvm/lvm.conf + +# CLEAN UP! +RUN set -x && \ + dnf clean all && \ + rm -rf /var/cache/dnf/* && \ + rm -rf /var/lib/dnf/* && \ + rm -f /var/lib/rpm/__db* && \ + # remove unnecessary files with big impact + rm -rf /etc/selinux /usr/share/{doc,man,selinux} && \ + # don't keep compiled python binaries + find / -xdev \( -name "*.pyc" -o -name "*.pyo" \) -delete + +# Verify that the packages installed haven't been accidentally cleaned, then +# clean the package list and re-clean unnecessary RPM database files +RUN rpm -q $(cat packages.txt) && rm -f /var/lib/rpm/__db* && rm -f *packages.txt + +# +# Set some envs in the container for quickly inspecting details about the build at runtime +ENV CEPH_IS_DEVEL="${CI_CONTAINER}" \ + CEPH_REF="${CEPH_REF}" \ + CEPH_OSD_FLAVOR="${OSD_FLAVOR}" \ + FROM_IMAGE="${FROM_IMAGE}" + diff --git a/container/build.sh b/container/build.sh new file mode 100755 index 000000000000..5edf469d2d2e --- /dev/null +++ b/container/build.sh @@ -0,0 +1,175 @@ +#!/bin/bash -ex +# vim: ts=4 sw=4 expandtab + +# repo auth with write perms must be present (this script does not log into +# CONTAINER_REPO_HOSTNAME and CONTAINER_REPO_ORGANIZATION). +# If NO_PUSH is set, no login is necessary + + +CFILE=${1:-Containerfile} +shift || true + +usage() { + cat << EOF +$0 [containerfile] (defaults to 'Containerfile') +For a CI build (from ceph-ci.git, built and pushed to shaman): +CI_CONTAINER: must be 'true' +FLAVOR (OSD flavor, default or crimson) +BRANCH (of Ceph. /) +CEPH_SHA1 (of Ceph) +ARCH (of build host, and resulting container) +CONTAINER_REPO_HOSTNAME (quay.ceph.io, for CI, for instance) +CONTAINER_REPO_ORGANIZATION (ceph-ci, for CI, for instance) +CONTAINER_REPO_USERNAME +CONTAINER_REPO_PASSWORD + +For a release build: (from ceph.git, built and pushed to download.ceph.com) +CI_CONTAINER: must be 'false' +and you must also add +VERSION (for instance, 19.1.0) for tagging the image + +You can avoid the push step (for testing) by setting NO_PUSH to anything +EOF +} + +CI_CONTAINER=${CI_CONTAINER:-false} +FLAVOR=${FLAVOR:-default} +# default: current checked-out branch +BRANCH=${BRANCH:-$(git rev-parse --abbrev-ref HEAD)} +# default: current checked-out branch +CEPH_SHA1=${CEPH_SHA1:-$(git rev-parse HEAD)} +# default: build host arch +ARCH=${ARCH:-$(arch)} +if [[ "${ARCH}" == "aarch64" ]] ; then ARCH=arm64; fi +if [[ ${CI_CONTAINER} == "true" ]] ; then + CONTAINER_REPO_HOSTNAME=${CONTAINER_REPO_HOSTNAME:-quay.ceph.io} + CONTAINER_REPO_ORGANIZATION=${CONTAINER_REPO_ORGANIZATION:-ceph/ceph-${ARCH}} +else + CONTAINER_REPO_HOSTNAME=${CONTAINER_REPO_HOSTNAME:-quay.io} + CONTAINER_REPO_ORGANIZATION=${CONTAINER_REPO_ORGANIZATION:-ceph/ceph} + # default: most-recent annotated tag + VERSION=${VERSION:-$(git describe --abbrev=0)} +fi + +# check for existence of all required variables +: "${CI_CONTAINER:?}" +: "${FLAVOR:?}" +: "${BRANCH:?}" +: "${CEPH_SHA1:?}" +: "${ARCH:?}" +: "${CONTAINER_REPO_HOSTNAME:?}" +: "${CONTAINER_REPO_ORGANIZATION:?}" +: "${CONTAINER_REPO_USERNAME:?}" +: "${CONTAINER_REPO_PASSWORD:?}" +if [[ ${CI_CONTAINER} != "true" ]] ; then ${VERSION:?}; fi + +# check for valid repo auth (if pushing) +ORGURL=${CONTAINER_REPO_HOSTNAME}/${CONTAINER_REPO_ORGANIZATION} +MINIMAL_IMAGE=${ORGURL}/ceph:minimal-test +if [[ ${NO_PUSH} != "true" ]] ; then + podman rmi ${MINIMAL_IMAGE} || true + echo "FROM scratch" | podman build -f - -t ${MINIMAL_IMAGE} + if ! podman push ${MINIMAL_IMAGE} ; then + echo "Not authenticated to ${ORGURL}; need docker/podman login?" + exit 1 + fi + podman rmi ${MINIMAL_IMAGE} | true +fi + +if [[ -z "${CEPH_GIT_REPO}" ]] ; then + if [[ ${CI_CONTAINER} == "true" ]]; then + CEPH_GIT_REPO=https://github.com/ceph/ceph-ci.git + else + CEPH_GIT_REPO=https://github.com/ceph/ceph.git + fi +fi + +# BRANCH will be, say, origin/main. remove / +BRANCH=${BRANCH##*/} + +podman build --pull=newer --squash -f $CFILE -t build.sh.output \ + --build-arg FROM_IMAGE=${FROM_IMAGE:-quay.io/centos/centos:stream9} \ + --build-arg CEPH_SHA1=${CEPH_SHA1} \ + --build-arg CEPH_GIT_REPO=${CEPH_GIT_REPO} \ + --build-arg CEPH_REF=${BRANCH:-main} \ + --build-arg OSD_FLAVOR=${FLAVOR:-default} \ + --build-arg CI_CONTAINER=${CI_CONTAINER:-default} \ + 2>&1 + +image_id=$(podman image ls localhost/build.sh.output --format '{{.ID}}') + +# grab useful image attributes for building the tag +# +# the variable settings are prefixed with "export CEPH_CONTAINER_" so that +# an eval or . can be used to put them into the environment +# +# PATH is removed from the output as it would cause problems for this +# parent script and its children +# +# notes: +# +# we want .Architecture and everything in .Config.Env +# +# printf will not accept "\n" (is this a podman bug?) +# so construct vars with two calls to podman inspect, joined by a newline, +# so that vars will get the output of the first command, newline, output +# of the second command +# +vars="$(podman inspect -f '{{printf "export CEPH_CONTAINER_ARCH=%v" .Architecture}}' ${image_id}) +$(podman inspect -f '{{range $index, $value := .Config.Env}}export CEPH_CONTAINER_{{$value}}{{println}}{{end}}' ${image_id})" +vars="$(echo "${vars}" | grep -v PATH)" +eval ${vars} + +# remove everything up to and including the last slash +fromtag=${CEPH_CONTAINER_FROM_IMAGE##*/} +# translate : to - +fromtag=${fromtag/:/-} +builddate=$(date +%Y%m%d) +local_tag=${fromtag}-${CEPH_CONTAINER_CEPH_REF}-${CEPH_CONTAINER_ARCH}-${builddate} + +repopath=${CONTAINER_REPO_HOSTNAME}/${CONTAINER_REPO_ORGANIZATION} + +if [[ ${CI_CONTAINER} == "true" ]] ; then + # ceph-ci conventions for remote tags: + # requires ARCH, BRANCH, CEPH_SHA1, FLAVOR + full_repo_tag=$repopath/ceph:${BRANCH}-${fromtag}-${ARCH}-devel + branch_repo_tag=$repopath/ceph:${BRANCH} + sha1_repo_tag=$repopath/ceph:${CEPH_SHA1} + + if [[ "${ARCH}" == "arm64" ]] ; then + branch_repo_tag=${branch_repo_tag}-arm64 + sha1_repo_tag=${sha1_repo_tag}-arm64 + fi + + podman tag ${image_id} ${full_repo_tag} + podman tag ${image_id} ${branch_repo_tag} + podman tag ${image_id} ${sha1_repo_tag} + + if [[ ${FLAVOR} == "crimson" && ${ARCH} == "x86_64" ]] ; then + sha1_flavor_repo_tag=${sha1_repo_tag}-${FLAVOR} + podman tag ${image_id} ${sha1_flavor_repo_tag} + if [[ -z "${NO_PUSH}" ]] ; then + podman push ${sha1_flavor_repo_tag} + fi + exit + fi + + if [[ -z "${NO_PUSH}" ]] ; then + podman push ${full_repo_tag} + podman push ${branch_repo_tag} + podman push ${sha1_repo_tag} + fi +else + # + # non-CI build. Tags are like v19.1.0-20240701 + # push to quay.ceph.io/ceph/prerelease + # + version_tag=${repopath}/prerelease/ceph-${ARCH}:${VERSION}-${builddate} + + podman tag ${image_id} ${version_tag} + if [[ -z "${NO_PUSH}" ]] ; then + podman push ${image_id} ${version_tag} + fi +fi + + diff --git a/container/make-manifest-list.py b/container/make-manifest-list.py new file mode 100755 index 000000000000..010dcaed2b72 --- /dev/null +++ b/container/make-manifest-list.py @@ -0,0 +1,164 @@ +#!/usr/bin/python3 +# +# make a combined "manifest-list" container out of two arch-specific containers +# searches for latest tags on HOST/{AMD,ARM}64_REPO, makes sure they refer +# to the same Ceph SHA1, and creates a manifest-list ("fat") image on +# MANIFEST_HOST/MANIFEST_REPO with the 'standard' set of tags. +# +# uses scratch local manifest LOCALMANIFEST, will be destroyed if present + +from datetime import datetime +import functools +import json +import os +import re +import subprocess +import sys + +# optional env vars (will default if not set) + +OPTIONAL_VARS = ( + 'HOST', + 'AMD64_REPO', + 'ARM64_REPO', + 'MANIFEST_HOST', + 'MANIFEST_REPO', +) + +# Manifest image. Will be destroyed if already present. +LOCALMANIFEST = 'localhost/m' + + +def dump_vars(names, vardict): + for name in names: + print(f'{name}: {vardict[name]}', file=sys.stderr) + + +def run_command(args): + print(f'running {args}', file=sys.stderr) + if not isinstance(args, list): + args = args.split() + try: + result = subprocess.run( + args, + capture_output=True, + text=True, + check=True) + return True, result.stdout, result.stderr + + except subprocess.CalledProcessError as e: + print(f"Command '{e.cmd}' returned {e.returncode}") + print("Error output:") + print(e.stderr) + return False, result.stdout, result.stderr + + +def get_command_output(args): + success, stdout, stderr = run_command(args) + return (stdout if success else None) + + +def run_command_show_failure(args): + success, stdout, stderr = run_command(args) + if not success: + print(f'{args} failed:', file=sys.stderr) + print(f'stdout:\n{stdout}') + print(f'stderr:\n{stderr}') + return success + + +@functools.lru_cache +def get_latest_tag(path): + latest_tag = json.loads( + get_command_output(f'skopeo list-tags docker://{path}') + )['Tags'][-1] + return latest_tag + + +@functools.lru_cache +def get_image_inspect(path): + info = json.loads( + get_command_output(f'skopeo inspect docker://{path}') + ) + return info + + +def get_sha1(info): + return info['Labels']['GIT_COMMIT'] + + +def main(): + host = os.environ.get('HOST', 'quay.io') + amd64_repo = os.environ.get('AMD64_REPO', 'ceph/ceph-amd64') + arm64_repo = os.environ.get('ARM64_REPO', 'ceph/ceph-arm64') + manifest_host = os.environ.get('MANIFEST_HOST', host) + manifest_repo = os.environ.get('MANIFEST_REPO', 'ceph/ceph') + dump_vars( + ('host', + 'amd64_repo', + 'arm64_repo', + 'manifest_host', + 'manifest_repo', + ), + locals()) + + repopaths = ( + f'{host}/{amd64_repo}', + f'{host}/{arm64_repo}', + ) + tags = [get_latest_tag(p) for p in repopaths] + print(f'latest tags: amd64:{tags[0]} arm64:{tags[1]}') + + # check that version of latest tag matches + version_re = \ + r'v(?P\d+)\.(?P\d+)\.(?P\d+)-(?P\d+)' + versions = list() + for tag in tags: + mo = re.match(version_re, tag) + ver = f'{mo.group("major")}.{mo.group("minor")}.{mo.group("micro")}' + versions.append(ver) + if versions[0] != versions[1]: + print( + f'version mismatch: amd64:{versions[0]} arm64:{versions[1]}', + file=sys.stderr, + ) + return(1) + + major, minor, micro = mo.group(1), mo.group(2), mo.group(3) + print(f'Ceph version: {major}.{minor}.{micro}', file=sys.stderr) + + # check that ceph sha1 of two arch images matches + paths_with_tags = [f'{p}:{t}' for (p, t) in zip(repopaths, tags)] + info = [get_image_inspect(p) for p in paths_with_tags] + sha1s = [get_sha1(i) for i in info] + if sha1s[0] != sha1s[1]: + print( + f'sha1 mismatch: amd64: {sha1s[0]} arm64: {sha1s[1]}', + file=sys.stderr, + ) + builddate = [i['Created'] for i in info] + print( + f'Build dates: amd64: {builddate[0]} arm64: {builddate[1]}', + file=sys.stderr, + ) + return(1) + + # create manifest list image with the standard list of tags + # ignore failure on manifest rm + run_command(f'podman manifest rm localhost/m') + run_command_show_failure(f'podman manifest create localhost/m') + for p in paths_with_tags: + run_command_show_failure(f'podman manifest add m {p}') + base = f'{manifest_host}/{manifest_repo}' + for t in ( + f'v{major}', + f'v{major}.{minor}', + f'v{major}.{minor}.{micro}', + f'v{major}.{minor}.{micro}-{datetime.today().strftime("%Y%m%d")}', + ): + run_command_show_failure( + f'podman manifest push localhost/m {base}:{t}') + + +if (__name__ == '__main__'): + sys.exit(main()) diff --git a/debian/ceph-base.postinst b/debian/ceph-base.postinst index 75eeb59c6246..35c88a0921b1 100644 --- a/debian/ceph-base.postinst +++ b/debian/ceph-base.postinst @@ -31,15 +31,16 @@ set -e case "$1" in configure) rm -f /etc/init/ceph.conf - [ -x /sbin/start ] && start ceph-all || : # adjust file and directory permissions for DIR in /var/lib/ceph/* ; do - if ! dpkg-statoverride --list $DIR >/dev/null + if ! dpkg-statoverride --list "${DIR}" >/dev/null then - chown $SERVER_USER:$SERVER_GROUP $DIR + chown "${SERVER_USER}:${SERVER_GROUP}" "${DIR}" fi done + + chown "${SERVER_USER}:${SERVER_GROUP}" -R /var/lib/ceph/crash/*; ;; abort-upgrade|abort-remove|abort-deconfigure) : diff --git a/debian/ceph-base.prerm b/debian/ceph-base.prerm index bfd7d3d6fb27..12e5da7d6331 100644 --- a/debian/ceph-base.prerm +++ b/debian/ceph-base.prerm @@ -5,7 +5,6 @@ set -e case "$1" in remove) - [ -x /sbin/stop ] && stop ceph-all || true invoke-rc.d ceph stop || { RESULT=$? if [ $RESULT != 100 ]; then diff --git a/debian/ceph-common.postinst b/debian/ceph-common.postinst index d147de5386b2..e058d096ee1a 100644 --- a/debian/ceph-common.postinst +++ b/debian/ceph-common.postinst @@ -52,16 +52,20 @@ case "$1" in --system \ --no-create-home \ --disabled-password \ + --home $SERVER_HOME \ --uid $SERVER_UID \ --gid $SERVER_GID \ $SERVER_USER 2>/dev/null || true echo "..done" fi # 3. adjust passwd entry + # NOTE: we should use "adduser --comment" if we don't need to + # support adduser <3.136. "adduser --gecos" is deprecated, + # and will be removed, so we don't use it. the first distro + # using --comment is debian/trixie or ubuntu/mantic. echo -n "Setting system user $SERVER_USER properties.." - usermod -c "$SERVER_NAME" \ - -d $SERVER_HOME \ - -g $SERVER_GROUP \ + usermod --comment "$SERVER_NAME" \ + --gid $SERVER_GROUP \ $SERVER_USER # Unlock $SERVER_USER in case it is locked from an uninstall if [ -f /etc/shadow ]; then diff --git a/debian/ceph-exporter.install b/debian/ceph-exporter.install new file mode 100644 index 000000000000..1ac0edcd2a18 --- /dev/null +++ b/debian/ceph-exporter.install @@ -0,0 +1,2 @@ +lib/systemd/system/ceph-exporter* +usr/bin/ceph-exporter diff --git a/debian/ceph-mds.postinst b/debian/ceph-mds.postinst index b69efedaafb0..2fad7537b94b 100644 --- a/debian/ceph-mds.postinst +++ b/debian/ceph-mds.postinst @@ -24,8 +24,6 @@ set -e case "$1" in configure) - [ -x /sbin/start ] && start ceph-mds-all || : - if ! dpkg-statoverride --list /var/lib/ceph/mds >/dev/null then chown $SERVER_USER:$SERVER_GROUP /var/lib/ceph/mds diff --git a/debian/ceph-mds.prerm b/debian/ceph-mds.prerm index 654518a7d552..51f30d7f98e1 100644 --- a/debian/ceph-mds.prerm +++ b/debian/ceph-mds.prerm @@ -5,7 +5,6 @@ set -e case "$1" in remove) - [ -x /sbin/stop ] && stop ceph-mds-all || : invoke-rc.d ceph stop mds || { RESULT=$? if [ $RESULT != 100 ]; then diff --git a/debian/ceph-mgr-modules-core.install b/debian/ceph-mgr-modules-core.install index e99f78efb9fc..5d1e35204fc2 100644 --- a/debian/ceph-mgr-modules-core.install +++ b/debian/ceph-mgr-modules-core.install @@ -15,7 +15,7 @@ usr/share/ceph/mgr/pg_autoscaler usr/share/ceph/mgr/progress usr/share/ceph/mgr/prometheus usr/share/ceph/mgr/rbd_support -usr/share/ceph/mgr/restful +usr/share/ceph/mgr/rgw usr/share/ceph/mgr/selftest usr/share/ceph/mgr/snap_schedule usr/share/ceph/mgr/stats @@ -24,4 +24,3 @@ usr/share/ceph/mgr/telegraf usr/share/ceph/mgr/telemetry usr/share/ceph/mgr/test_orchestrator usr/share/ceph/mgr/volumes -usr/share/ceph/mgr/zabbix diff --git a/debian/ceph-mgr-modules-core.requires b/debian/ceph-mgr-modules-core.requires index 9814e67b7108..07769e866f88 100644 --- a/debian/ceph-mgr-modules-core.requires +++ b/debian/ceph-mgr-modules-core.requires @@ -1,7 +1,5 @@ natsort CherryPy -pecan -werkzeug +packaging requests -pkg-resources python-dateutil diff --git a/debian/ceph-mgr.postinst b/debian/ceph-mgr.postinst index 6d38ccf09feb..5223a8a83ad2 100644 --- a/debian/ceph-mgr.postinst +++ b/debian/ceph-mgr.postinst @@ -24,8 +24,6 @@ set -e case "$1" in configure) - [ -x /sbin/start ] && start ceph-mgr-all || : - if ! dpkg-statoverride --list /var/lib/ceph/mgr >/dev/null then chown $SERVER_USER:$SERVER_GROUP /var/lib/ceph/mgr diff --git a/debian/ceph-mgr.prerm b/debian/ceph-mgr.prerm index 6fb7b245a78e..5e4bf42c2dda 100644 --- a/debian/ceph-mgr.prerm +++ b/debian/ceph-mgr.prerm @@ -5,7 +5,6 @@ set -e case "$1" in remove) - [ -x /sbin/stop ] && stop ceph-mgr-all || : invoke-rc.d ceph stop mgr || { RESULT=$? if [ $RESULT != 100 ]; then diff --git a/debian/ceph-mgr.requires b/debian/ceph-mgr.requires index bf334fb9bd29..39336330c939 100644 --- a/debian/ceph-mgr.requires +++ b/debian/ceph-mgr.requires @@ -1,3 +1,4 @@ +bcrypt pyOpenSSL cephfs ceph-argparse diff --git a/debian/ceph-mon.postinst b/debian/ceph-mon.postinst index 688d8141d39a..935a0ca55b28 100644 --- a/debian/ceph-mon.postinst +++ b/debian/ceph-mon.postinst @@ -24,7 +24,7 @@ set -e case "$1" in configure) - [ -x /sbin/start ] && start ceph-mon-all || : + : ;; abort-upgrade|abort-remove|abort-deconfigure) : diff --git a/debian/ceph-mon.prerm b/debian/ceph-mon.prerm index 5f64058a5da1..a31fc3c21842 100644 --- a/debian/ceph-mon.prerm +++ b/debian/ceph-mon.prerm @@ -5,7 +5,6 @@ set -e case "$1" in remove) - [ -x /sbin/stop ] && stop ceph-mon-all || true invoke-rc.d ceph stop mon || { RESULT=$? if [ $RESULT != 100 ]; then diff --git a/debian/ceph-osd.postinst b/debian/ceph-osd.postinst index 04e33b8601f9..be99d1d26513 100644 --- a/debian/ceph-osd.postinst +++ b/debian/ceph-osd.postinst @@ -25,7 +25,6 @@ set -e case "$1" in configure) [ -x /etc/init.d/procps ] && invoke-rc.d procps restart || : - [ -x /sbin/start ] && start ceph-osd-all || : ;; abort-upgrade|abort-remove|abort-deconfigure) : diff --git a/debian/ceph-osd.prerm b/debian/ceph-osd.prerm index 40f07b62152e..93c459614e44 100644 --- a/debian/ceph-osd.prerm +++ b/debian/ceph-osd.prerm @@ -5,7 +5,6 @@ set -e case "$1" in remove) - [ -x /sbin/stop ] && stop ceph-osd-all || true invoke-rc.d ceph stop osd || { RESULT=$? if [ $RESULT != 100 ]; then diff --git a/debian/ceph-test.install b/debian/ceph-test.install index aeab6fac3d96..fb2847ccb1e6 100644 --- a/debian/ceph-test.install +++ b/debian/ceph-test.install @@ -16,5 +16,6 @@ usr/bin/ceph_scratchtool usr/bin/ceph_scratchtoolpp usr/bin/ceph_test_* usr/bin/ceph-dedup-tool +usr/bin/ceph-dedup-daemon usr/lib/ceph/ceph-monstore-update-crush.sh usr/share/java/libcephfs-test.jar diff --git a/debian/cephadm.postinst b/debian/cephadm.postinst index 53d503e1eaa0..50aa6f8dd510 100644 --- a/debian/cephadm.postinst +++ b/debian/cephadm.postinst @@ -25,7 +25,12 @@ case "$1" in # 1. create user if not existing if ! getent passwd | grep -q "^cephadm:"; then echo -n "Adding system user cephadm.." - adduser --quiet --system --disabled-password --gecos 'cephadm user for mgr/cephadm' --shell /bin/bash cephadm 2>/dev/null || true + adduser --quiet \ + --system \ + --disabled-password \ + --home /home/cephadm \ + --shell /bin/bash cephadm 2>/dev/null || true + usermod --comment "cephadm user for mgr/cephadm" cephadm echo "..done" fi @@ -38,19 +43,19 @@ case "$1" in # set up (initially empty) .ssh/authorized_keys file if ! test -d /home/cephadm/.ssh; then - mkdir /home/cephadm/.ssh - chown --reference /home/cephadm /home/cephadm/.ssh - chmod 0700 /home/cephadm/.ssh + mkdir /home/cephadm/.ssh + chown --reference /home/cephadm /home/cephadm/.ssh + chmod 0700 /home/cephadm/.ssh fi if ! test -e /home/cephadm/.ssh/authorized_keys; then - touch /home/cephadm/.ssh/authorized_keys - chown --reference /home/cephadm /home/cephadm/.ssh/authorized_keys - chmod 0600 /home/cephadm/.ssh/authorized_keys + touch /home/cephadm/.ssh/authorized_keys + chown --reference /home/cephadm /home/cephadm/.ssh/authorized_keys + chmod 0600 /home/cephadm/.ssh/authorized_keys fi ;; abort-upgrade|abort-remove|abort-deconfigure) - : + : ;; *) diff --git a/debian/control b/debian/control index 8263e3251835..d31a82bbc759 100644 --- a/debian/control +++ b/debian/control @@ -54,6 +54,7 @@ Build-Depends: automake, liblttng-ust-dev, liblua5.3-dev, liblz4-dev (>= 0.0~r131), + libnbd-dev, libncurses-dev, libnss3-dev, liboath-dev, @@ -72,7 +73,6 @@ Build-Depends: automake, libre2-dev, libutf8proc-dev (>= 2.2.0), librdkafka-dev, - luarocks, libthrift-dev (>= 0.13.0), libyaml-cpp-dev (>= 0.6), libzstd-dev , @@ -84,20 +84,21 @@ Build-Depends: automake, libndctl-dev (>= 63) , libpmem-dev , libpmemobj-dev (>= 1.8) , + libprotobuf-dev , ninja-build, nlohmann-json3-dev, patch, pkg-config, prometheus , + protobuf-compiler , python3-all-dev, python3-cherrypy3, python3-natsort, - python3-jwt , - python3-pecan , python3-bcrypt , tox , python3-coverage , python3-dateutil , + python3-grpcio , python3-openssl , python3-prettytable , python3-requests , @@ -105,7 +106,6 @@ Build-Depends: automake, python3-setuptools, python3-sphinx, python3-venv, - python3-werkzeug , python3-yaml, ragel , socat , @@ -185,7 +185,8 @@ Description: debugging symbols for ceph-base Package: cephadm Architecture: linux-any Recommends: podman (>= 2.0.2) | docker.io | docker-ce -Depends: lvm2, +Depends: adduser (>= 3.11), + lvm2, python3, ${python3:Depends}, Description: cephadm utility to bootstrap ceph daemons with systemd and containers @@ -353,6 +354,30 @@ Description: debugging symbols for ceph-mgr . This package contains the debugging symbols for ceph-mgr. +Package: ceph-exporter +Architecture: linux-any +Depends: ceph-base (= ${binary:Version}), +Description: metrics exporter for the ceph distributed storage system + Ceph is a massively scalable, open-source, distributed + storage system that runs on commodity hardware and delivers object, + block and file system storage. + . + This package contains the metrics exporter daemon, which is used to expose + the performance metrics. + +Package: ceph-exporter-dbg +Architecture: linux-any +Section: debug +Priority: extra +Depends: ceph-exporter (= ${binary:Version}), + ${misc:Depends}, +Description: debugging symbols for ceph-exporter + Ceph is a massively scalable, open-source, distributed + storage system that runs on commodity hardware and delivers object, + block and file system storage. + . + This package contains the debugging symbols for ceph-exporter. + Package: ceph-mon Architecture: linux-any Depends: ceph-base (= ${binary:Version}), @@ -390,6 +415,7 @@ Depends: ceph-base (= ${binary:Version}), ${misc:Depends}, ${python3:Depends}, ${shlibs:Depends}, + libprotobuf23 , Replaces: ceph (<< 10), ceph-test (<< 12.2.2-14), ceph-osd (<< 17.0.0) @@ -611,7 +637,8 @@ Description: debugging symbols for rbd-nbd Package: ceph-common Architecture: linux-any -Depends: librbd1 (= ${binary:Version}), +Depends: adduser (>= 3.11), + librbd1 (= ${binary:Version}), python3-cephfs (= ${binary:Version}), python3-ceph-argparse (= ${binary:Version}), python3-ceph-common (= ${binary:Version}), @@ -919,6 +946,8 @@ Section: libs Depends: librados2 (= ${binary:Version}), ${misc:Depends}, ${shlibs:Depends}, + liblua5.3-dev, + luarocks, Description: RADOS Gateway client library RADOS is a distributed object store used by the Ceph distributed storage system. This package provides a REST gateway to the diff --git a/debian/radosgw.postinst b/debian/radosgw.postinst index 07e3ec30b6d3..95af1c030ad9 100644 --- a/debian/radosgw.postinst +++ b/debian/radosgw.postinst @@ -30,8 +30,6 @@ set -e case "$1" in configure) - [ -x /sbin/start ] && start radosgw-all || : - if ! dpkg-statoverride --list /var/lib/ceph/radosgw >/dev/null then chown $SERVER_USER:$SERVER_GROUP /var/lib/ceph/radosgw diff --git a/debian/radosgw.prerm b/debian/radosgw.prerm index 4120fb6272c1..0288ab77b3a3 100644 --- a/debian/radosgw.prerm +++ b/debian/radosgw.prerm @@ -5,7 +5,6 @@ set -e case "$1" in remove) - [ -x /sbin/stop ] && stop radosgw-all || true invoke-rc.d radosgw stop || { RESULT=$? if [ $RESULT != 100 ]; then diff --git a/debian/rules b/debian/rules index ed7f4a255ed4..3fbed3f3a2e8 100755 --- a/debian/rules +++ b/debian/rules @@ -77,6 +77,7 @@ override_dh_auto_install: install -D -m 755 src/tools/rbd_nbd/rbd-nbd_quiesce $(DESTDIR)/usr/libexec/rbd-nbd/rbd-nbd_quiesce install -m 644 -D monitoring/ceph-mixin/prometheus_alerts.yml $(DESTDIR)/etc/prometheus/ceph/ceph_default_alerts.yml + install -m 644 -D monitoring/ceph-mixin/dashboards_out/* ${DESTDIR}/etc/grafana/dashboards/ceph-dashboard # doc/changelog is a directory, which confuses dh_installchangelogs override_dh_installchangelogs: @@ -105,6 +106,7 @@ override_dh_strip: dh_strip -pceph-mds --dbg-package=ceph-mds-dbg dh_strip -pceph-fuse --dbg-package=ceph-fuse-dbg dh_strip -pceph-mgr --dbg-package=ceph-mgr-dbg + dh_strip -pceph-exporter --dbg-package=ceph-exporter-dbg dh_strip -pceph-mon --dbg-package=ceph-mon-dbg dh_strip -pceph-osd --dbg-package=ceph-osd-dbg dh_strip -pceph-base --dbg-package=ceph-base-dbg diff --git a/do_cmake.sh b/do_cmake.sh index 6936a5596ebd..50befc81a49c 100755 --- a/do_cmake.sh +++ b/do_cmake.sh @@ -2,7 +2,7 @@ set -ex if [ -d .git ]; then - git submodule update --init --recursive + git submodule update --init --recursive --progress --recommend-shallow fi : ${BUILD_DIR:=build} @@ -14,24 +14,19 @@ if [ -e $BUILD_DIR ]; then fi PYBUILD="3" -ARGS="-GNinja" +ARGS="${ARGS} -GNinja" if [ -r /etc/os-release ]; then source /etc/os-release case "$ID" in fedora) - if [ "$VERSION_ID" -ge "37" ] ; then - PYBUILD="3.11" - elif [ "$VERSION_ID" -ge "35" ] ; then - PYBUILD="3.10" - elif [ "$VERSION_ID" -ge "33" ] ; then - PYBUILD="3.9" - elif [ "$VERSION_ID" -ge "32" ] ; then - PYBUILD="3.8" + if [ "$VERSION_ID" -ge "39" ] ; then + PYBUILD="3.12" else - PYBUILD="3.7" + # Fedora 37 and above + PYBUILD="3.11" fi ;; - rocky|rhel|centos) + almalinux|rocky|rhel|centos) MAJOR_VER=$(echo "$VERSION_ID" | sed -e 's/\..*$//') if [ "$MAJOR_VER" -ge "9" ] ; then PYBUILD="3.9" @@ -63,7 +58,10 @@ fi ARGS+=" -DWITH_PYTHON3=${PYBUILD}" -if type ccache > /dev/null 2>&1 ; then +if type sccache > /dev/null 2>&1 ; then + echo "enabling sccache" + ARGS+=" -DWITH_SCCACHE=ON" +elif type ccache > /dev/null 2>&1 ; then echo "enabling ccache" ARGS+=" -DWITH_CCACHE=ON" fi diff --git a/doc/.gitignore b/doc/.gitignore index 0c7c74746ae9..9ee3c337d0ab 100644 --- a/doc/.gitignore +++ b/doc/.gitignore @@ -1,2 +1,3 @@ /overview.png /object_store.png +_build/ diff --git a/doc/_ext/ceph_commands.py b/doc/_ext/ceph_commands.py index 861a013ea5a4..0697c71f0e1c 100644 --- a/doc/_ext/ceph_commands.py +++ b/doc/_ext/ceph_commands.py @@ -177,7 +177,7 @@ def parse_cmd(cmd): @staticmethod def parse_args(args): - return [Sig._parse_arg_desc(arg) for arg in args.split()] + return [Sig._parse_arg_desc(arg) for arg in args] TEMPLATE = ''' @@ -285,12 +285,6 @@ def mocked_modules(self): # make diskprediction_local happy mock_imports += ['numpy', 'scipy'] - # make restful happy - mock_imports += ['pecan', - 'pecan.rest', - 'pecan.hooks', - 'werkzeug', - 'werkzeug.serving'] for m in mock_imports: args = {} @@ -358,8 +352,9 @@ def run(self): cmds = sorted(cmds, key=lambda cmd: cmd.prefix) self._render_cmds(cmds) - orig_rgw_mod = sys.modules['pybind_rgw_mod'] - sys.modules['rgw'] = orig_rgw_mod + if 'pybind_rgw_mod' in sys.modules: + orig_rgw_mod = sys.modules['pybind_rgw_mod'] + sys.modules['rgw'] = orig_rgw_mod return [] diff --git a/doc/_ext/ceph_confval.py b/doc/_ext/ceph_confval.py index cde538b45c9a..da93d1b415c4 100644 --- a/doc/_ext/ceph_confval.py +++ b/doc/_ext/ceph_confval.py @@ -289,12 +289,6 @@ def mocked_modules(self): # make diskprediction_local happy mock_imports += ['numpy', 'scipy'] - # make restful happy - mock_imports += ['pecan', - 'pecan.rest', - 'pecan.hooks', - 'werkzeug', - 'werkzeug.serving'] for m in mock_imports: args = {} diff --git a/doc/_ext/ceph_releases.py b/doc/_ext/ceph_releases.py index 94e92ffdd6ac..481c2a1b6194 100644 --- a/doc/_ext/ceph_releases.py +++ b/doc/_ext/ceph_releases.py @@ -191,7 +191,7 @@ def run(self): class CephTimeline(Directive): has_content = False - required_arguments = 3 + required_arguments = 4 optional_arguments = 0 option_spec = {} diff --git a/doc/_static/js/pgcalc.js b/doc/_static/js/pgcalc.js new file mode 100644 index 000000000000..e13c30895fcf --- /dev/null +++ b/doc/_static/js/pgcalc.js @@ -0,0 +1,357 @@ +var _____WB$wombat$assign$function_____ = function(name) {return (self._wb_wombat && self._wb_wombat.local_init && self._wb_wombat.local_init(name)) || self[name]; }; +if (!self.__WB_pmw) { self.__WB_pmw = function(obj) { this.__WB_source = obj; return this; } } +{ + let window = _____WB$wombat$assign$function_____("window"); + let self = _____WB$wombat$assign$function_____("self"); + let document = _____WB$wombat$assign$function_____("document"); + let location = _____WB$wombat$assign$function_____("location"); + let top = _____WB$wombat$assign$function_____("top"); + let parent = _____WB$wombat$assign$function_____("parent"); + let frames = _____WB$wombat$assign$function_____("frames"); + let opener = _____WB$wombat$assign$function_____("opener"); + +var pow2belowThreshold = 0.25 +var key_values={}; +key_values['poolName'] ={'name':'Pool Name','default':'newPool','description': 'Name of the pool in question. Typical pool names are included below.', 'width':'30%; text-align: left'}; +key_values['size'] ={'name':'Size','default': 3, 'description': 'Number of replicas the pool will have. Default value of 3 is pre-filled.', 'width':'10%', 'global':1}; +key_values['osdNum'] ={'name':'OSD #','default': 100, 'description': 'Number of OSDs which this Pool will have PGs in. Typically, this is the entire Cluster OSD count, but could be less based on CRUSH rules. (e.g. Separate SSD and SATA disk sets)', 'width':'10%', 'global':1}; +key_values['percData'] ={'name':'%Data', 'default': 5, 'description': 'This value represents the approximate percentage of data which will be contained in this pool for that specific OSD set. Examples are pre-filled below for guidance.','width':'10%'}; +key_values['targPGsPerOSD'] ={'name':'Target PGs per OSD', 'default':100, 'description': 'This value should be populated based on the following guidance:', 'width':'10%', 'global':1, 'options': [ ['100','If the cluster OSD count is not expected to increase in the foreseeable future.'], ['200', 'If the cluster OSD count is expected to increase (up to double the size) in the foreseeable future.']]} + +var notes ={ + 'totalPerc':'"Total Data Percentage" below table should be a multiple of 100%.', + 'totalPGs':'"Total PG Count" below table will be the count of Primary PG copies. However, when calculating total PGs per OSD average, you must include all copies.', + 'noDecrease':'It\'s also important to know that the PG count can be increased, but NEVER decreased without destroying / recreating the pool. However, increasing the PG Count of a pool is one of the most impactful events in a Ceph Cluster, and should be avoided for production clusters if possible.', +}; + +var presetTables={}; +presetTables['All-in-One']=[ + { 'poolName' : 'rbd', 'size' : '3', 'osdNum' : '100', 'percData' : '100', 'targPGsPerOSD' : '100'}, +]; +presetTables['OpenStack']=[ + { 'poolName' : 'cinder-backup', 'size' : '3', 'osdNum' : '100', 'percData' : '25', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'cinder-volumes', 'size' : '3', 'osdNum' : '100', 'percData' : '53', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'ephemeral-vms', 'size' : '3', 'osdNum' : '100', 'percData' : '15', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'glance-images', 'size' : '3', 'osdNum' : '100', 'percData' : '7', 'targPGsPerOSD' : '100'}, +]; +presetTables['OpenStack w RGW - Jewel and later']=[ + { 'poolName' : '.rgw.root', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.control', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.data.root', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.gc', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.intent-log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.meta', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.usage', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.users.keys', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.users.email', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.users.swift', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.users.uid', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.buckets.extra', 'size' : '3', 'osdNum' : '100', 'percData' : '1.0', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.buckets.index', 'size' : '3', 'osdNum' : '100', 'percData' : '3.0', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.buckets.data', 'size' : '3', 'osdNum' : '100', 'percData' : '19', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'cinder-backup', 'size' : '3', 'osdNum' : '100', 'percData' : '18', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'cinder-volumes', 'size' : '3', 'osdNum' : '100', 'percData' : '42.8', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'ephemeral-vms', 'size' : '3', 'osdNum' : '100', 'percData' : '10', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'glance-images', 'size' : '3', 'osdNum' : '100', 'percData' : '5', 'targPGsPerOSD' : '100'}, +]; + +presetTables['Rados Gateway Only - Jewel and later']=[ + { 'poolName' : '.rgw.root', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.control', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.data.root', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.gc', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.intent-log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.meta', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.usage', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.users.keys', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.users.email', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.users.swift', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.users.uid', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.buckets.extra', 'size' : '3', 'osdNum' : '100', 'percData' : '1.0', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.buckets.index', 'size' : '3', 'osdNum' : '100', 'percData' : '3.0', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'default.rgw.buckets.data', 'size' : '3', 'osdNum' : '100', 'percData' : '94.8', 'targPGsPerOSD' : '100'}, +]; + +presetTables['OpenStack w RGW - Infernalis and earlier']=[ + { 'poolName' : '.intent-log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw.buckets', 'size' : '3', 'osdNum' : '100', 'percData' : '18', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw.buckets.extra', 'size' : '3', 'osdNum' : '100', 'percData' : '1.0', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw.buckets.index', 'size' : '3', 'osdNum' : '100', 'percData' : '3.0', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw.control', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw.gc', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw.root', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.usage', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.users', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.users.email', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.users.swift', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.users.uid', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'cinder-backup', 'size' : '3', 'osdNum' : '100', 'percData' : '19', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'cinder-volumes', 'size' : '3', 'osdNum' : '100', 'percData' : '42.9', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'ephemeral-vms', 'size' : '3', 'osdNum' : '100', 'percData' : '10', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'glance-images', 'size' : '3', 'osdNum' : '100', 'percData' : '5', 'targPGsPerOSD' : '100'}, +]; + +presetTables['Rados Gateway Only - Infernalis and earlier']=[ + { 'poolName' : '.intent-log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.log', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw.buckets', 'size' : '3', 'osdNum' : '100', 'percData' : '94.9', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw.buckets.extra', 'size' : '3', 'osdNum' : '100', 'percData' : '1.0', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw.buckets.index', 'size' : '3', 'osdNum' : '100', 'percData' : '3.0', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw.control', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw.gc', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.rgw.root', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.usage', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.users', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.users.email', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.users.swift', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, + { 'poolName' : '.users.uid', 'size' : '3', 'osdNum' : '100', 'percData' : '0.1', 'targPGsPerOSD' : '100'}, +]; +presetTables['RBD and libRados']=[ + { 'poolName' : 'rbd', 'size' : '3', 'osdNum' : '100', 'percData' : '75', 'targPGsPerOSD' : '100'}, + { 'poolName' : 'myObjects', 'size' : '3', 'osdNum' : '100', 'percData' : '25', 'targPGsPerOSD' : '100'}, +]; + +$(function() { + $("#presetType").on("change",changePreset); + $("#btnAddPool").on("click",addPool); + $("#btnGenCommands").on("click",generateCommands); + $.each(presetTables,function(index,value) { + selIndex=''; + if ( index == 'OpenStack w RGW - Jewel and later' ) + selIndex=' selected'; + $("#presetType").append(""); + }); + changePreset(); + $("#beforeTable").html("
Key
"); + $.each(key_values, function(index, value) { + pre=''; + post=''; + if ('global' in value) { + pre=''; + post='' + } + + var dlAdd="
"+pre+value['name']+post+"
"+value['description']; + if ( 'options' in value ) { + dlAdd+="
"; + $.each(value['options'], function (subIndex, subValue) { + dlAdd+="
"+subValue[0]+"
"+subValue[1]+"
"; + }); + dlAdd+="
"; + } + dlAdd+="
"; + $("#keyDL").append(dlAdd); + }); + $("#afterTable").html("
Notes
    \n
    "); + $.each(notes,function(index, value) { + $("#notesUL").append("\t
  • "+value+"
  • \n"); + }); + +}); + +function changePreset() { + resetTable(); + fillTable($("#presetType").val()); +} + +function resetTable() { + $("#pgsperpool").html(""); + $("#pgsperpool").append("\n\n"); + $("#headerRow").append("\t \n"); + var fieldCount=0; + var percDataIndex=0; + $.each(key_values, function(index, value) { + fieldCount++; + pre=''; + post=''; + var widthAdd=''; + if ( index == 'percData' ) + percDataIndex=fieldCount; + if ('width' in value) + widthAdd=' style=\'width: '+value['width']+'\''; + if ('global' in value) { + pre=''; + post='' + } + $("#headerRow").append("\t"+pre+value['name']+post+"\n"); + }); + percDataIndex++; + $("#headerRow").append("\tSuggested PG Count\n"); + $("#pgsperpool").append("Total Data Percentage: 0% PG Total Count: 0"); +} + +function nearestPow2( aSize ){ + var tmp=Math.pow(2, Math.round(Math.log(aSize)/Math.log(2))); + if(tmp<(aSize*(1-pow2belowThreshold))) + tmp*=2; + return tmp; +} + +function globalChange(field) { + dialogHTML='
    '; + dialogHTML+='
    \n'; + dialogHTML+=''; + dialogHTML+=''; + dialogHTML+=''; + dialogHTML+='
    '; + globalDialog=$(dialogHTML).dialog({ + autoOpen: true, + width: 350, + show: 'fold', + hide: 'fold', + modal: true, + buttons: { + "Update Value": function() { massUpdate($("#globalField").val(),$("#globalValue").val()); globalDialog.dialog("close"); setTimeout(function() { globalDialog.dialog("destroy"); }, 1000); }, + "Cancel": function() { globalDialog.dialog("close"); setTimeout(function() { globalDialog.dialog("destroy"); }, 1000); } + } + }); +} + +var rowCount=0; +function fillTable(presetType) { + rowCount=0; + $.each(presetTables[presetType], function(index,value) { + addTableRow(value); + }); +} + +function addPool() { + dialogHTML='
    '; + $.each(key_values, function(index,value) { + dialogHTML+='

    \n'; + classAdd='right'; + if ( index == 'poolName' ) + classAdd='left'; + dialogHTML+='
    '; + }); + dialogHTML+=''; + dialogHTML+='
    '; + addPoolDialog=$(dialogHTML).dialog({ + autoOpen: true, + width: 350, + show: 'fold', + hide: 'fold', + modal: true, + buttons: { + "Add Pool": function() { + var newPoolValues={}; + $.each(key_values,function(index,value) { + newPoolValues[index]=$("#new"+index).val(); + }); + addTableRow(newPoolValues); + addPoolDialog.dialog("close"); + setTimeout(function() { addPoolDialog.dialog("destroy"); }, 1000); }, + "Cancel": function() { addPoolDialog.dialog("close"); setTimeout(function() { addPoolDialog.dialog("destroy"); }, 1000); } + } + }); + +// addTableRow({'poolName':'newPool','size':3, 'osdNum':100,'targPGsPerOSD': 100, 'percData':0}); +} + +function addTableRow(rowValues) { + rowAdd="\n"; + rowAdd+="\t\n"; + $.each(key_values, function(index,value) { + classAdd=' center'; + modifier=''; + if ( index == 'percData' ) { + classAdd='" style="text-align: right;'; + // modifier=' %'; + } else if ( index == 'poolName' ) + classAdd=' left'; + rowAdd+="\t"+modifier+"\n"; + }); + rowAdd+="\t0"; + $("#totalRow").before(rowAdd); + updatePGCount(rowCount); + $("[id$='percData_input']").each(function() { var fieldVal=parseFloat($(this).val()); $(this).val(fieldVal.toFixed(2)); }); + rowCount++; +} + +function updatePGCount(rowID) { + if(rowID==-1) { + for(var i=0;icalcValue) + $("#row"+rowID+"_pgCount").html(minValue); + else + $("#row"+rowID+"_pgCount").html(calcValue); + } + updateTotals(); +} + +function focusMe(rowID,field) { + $("#row"+rowID+"_"+field+"_input").toggleClass('inputColor'); + $("#row"+rowID+"_"+field+"_input").toggleClass('highlightColor'); + $("#dt_"+field).toggleClass('highlightColor'); + $("#dd_"+field).toggleClass('highlightColor'); + updatePGCount(rowID); +} + +function blurMe(rowID,field) { + focusMe(rowID,field); + $("[id$='percData_input']").each(function() { var fieldVal=parseFloat($(this).val()); $(this).val(fieldVal.toFixed(2)); }); +} + +function keyMe(rowID,field) { + updatePGCount(rowID); +} + +function massUpdate(field,value) { + $("[id$='_"+field+"_input']").val(value); + key_values[field]['default']=value; + updatePGCount(-1); +} + +function updateTotals() { + var totalPerc=0; + var totalPGs=0; + $("[id$='percData_input']").each(function() { + totalPerc+=parseFloat($(this).val()); + if ( parseFloat($(this).val()) > 100 ) + $(this).addClass('ui-state-error'); + else + $(this).removeClass('ui-state-error'); + }); + $("[id$='_pgCount']").each(function() { + totalPGs+=parseInt($(this).html()); + }); + $("#percTotalValue").html(totalPerc.toFixed(2)); + $("#pgTotalValue").html(totalPGs); + if(parseFloat(totalPerc.toFixed(2)) % 100 != 0) { + $("#percTotalValue").addClass('ui-state-error'); + $("#li_totalPerc").addClass('ui-state-error'); + } else { + $("#percTotalValue").removeClass('ui-state-error'); + $("#li_totalPerc").removeClass('ui-state-error'); + } + $("#commandCode").html(""); +} + +function generateCommands() { + outputCommands="## Note: The 'while' loops below pause between pools to allow all\n\ +## PGs to be created. This is a safety mechanism to prevent\n\ +## saturating the Monitor nodes.\n\ +## -------------------------------------------------------------------\n\n"; + for(i=0;i`_" blog +post for a brief explanation of RADOS and see `RADOS - A Scalable, Reliable +Storage Service for Petabyte-scale Storage Clusters`_ for an exhaustive +explanation of :term:`RADOS`. A Ceph Storage Cluster consists of multiple types of daemons: @@ -30,12 +35,13 @@ A Ceph Storage Cluster consists of multiple types of daemons: - :term:`Ceph Manager` - :term:`Ceph Metadata Server` +.. _arch_monitor: + Ceph Monitors maintain the master copy of the cluster map, which they provide -to Ceph clients. Provisioning multiple monitors within the Ceph cluster ensures -availability in the event that one of the monitor daemons or its host fails. -The Ceph monitor provides copies of the cluster map to storage cluster clients. +to Ceph clients. The existence of multiple monitors in the Ceph cluster ensures +availability if one of the monitor daemons or its host fails. -A Ceph OSD Daemon checks its own state and the state of other OSDs and reports +A Ceph OSD Daemon checks its own state and the state of other OSDs and reports back to monitors. A Ceph Manager serves as an endpoint for monitoring, orchestration, and plug-in @@ -45,10 +51,11 @@ A Ceph Metadata Server (MDS) manages file metadata when CephFS is used to provide file services. Storage cluster clients and :term:`Ceph OSD Daemon`\s use the CRUSH algorithm -to compute information about data location. This means that clients and OSDs -are not bottlenecked by a central lookup table. Ceph's high-level features -include a native interface to the Ceph Storage Cluster via ``librados``, and a -number of service interfaces built on top of ``librados``. +to compute information about the location of data. By using the CRUSH +algorithm, clients and OSDs avoid being bottlenecked by a central lookup table. +Ceph's high-level features include a native interface to the Ceph Storage +Cluster via ``librados`` and a number of service interfaces built on top of +``librados``. Storing Data ------------ @@ -59,7 +66,7 @@ comes through a :term:`Ceph Block Device`, :term:`Ceph Object Storage`, the ``librados``. The data received by the Ceph Storage Cluster is stored as RADOS objects. Each object is stored on an :term:`Object Storage Device` (this is also called an "OSD"). Ceph OSDs control read, write, and replication -operations on storage drives. The default BlueStore back end stores objects +operations on storage drives. The default BlueStore back end stores objects in a monolithic, database-like fashion. .. ditaa:: @@ -67,7 +74,7 @@ in a monolithic, database-like fashion. /------\ +-----+ +-----+ | obj |------>| {d} |------>| {s} | \------/ +-----+ +-----+ - + Object OSD Drive Ceph OSD Daemons store data as objects in a flat namespace. This means that @@ -83,10 +90,10 @@ created date, and the last modified date. /------+------------------------------+----------------\ | ID | Binary Data | Metadata | +------+------------------------------+----------------+ - | 1234 | 0101010101010100110101010010 | name1 = value1 | + | 1234 | 0101010101010100110101010010 | name1 = value1 | | | 0101100001010100110101010010 | name2 = value2 | | | 0101100001010100110101010010 | nameN = valueN | - \------+------------------------------+----------------/ + \------+------------------------------+----------------/ .. note:: An object ID is unique across the entire cluster, not just the local filesystem. @@ -126,8 +133,8 @@ massive scale by distributing the work to all the OSD daemons in the cluster and all the clients that communicate with them. CRUSH uses intelligent data replication to ensure resiliency, which is better suited to hyper-scale storage. The following sections provide additional details on how CRUSH works. -For a detailed discussion of CRUSH, see `CRUSH - Controlled, Scalable, -Decentralized Placement of Replicated Data`_. +For an in-depth, academic discussion of CRUSH, see `CRUSH - Controlled, +Scalable, Decentralized Placement of Replicated Data`_. .. index:: architecture; cluster map @@ -145,14 +152,14 @@ five maps that constitute the cluster map are: the address, and the TCP port of each monitor. The monitor map specifies the current epoch, the time of the monitor map's creation, and the time of the monitor map's last modification. To view a monitor map, run ``ceph mon - dump``. - + dump``. + #. **The OSD Map:** Contains the cluster ``fsid``, the time of the OSD map's creation, the time of the OSD map's last modification, a list of pools, a list of replica sizes, a list of PG numbers, and a list of OSDs and their statuses (for example, ``up``, ``in``). To view an OSD map, run ``ceph - osd dump``. - + osd dump``. + #. **The PG Map:** Contains the PG version, its time stamp, the last OSD map epoch, the full ratios, and the details of each placement group. This includes the PG ID, the `Up Set`, the `Acting Set`, the state of the PG (for @@ -166,8 +173,8 @@ five maps that constitute the cluster map are: {decomp-crushmap-filename}``. Use a text editor or ``cat`` to view the decompiled map. -#. **The MDS Map:** Contains the current MDS map epoch, when the map was - created, and the last time it changed. It also contains the pool for +#. **The MDS Map:** Contains the current MDS map epoch, when the map was + created, and the last time it changed. It also contains the pool for storing metadata, a list of metadata servers, and which metadata servers are ``up`` and ``in``. To view an MDS map, execute ``ceph fs dump``. @@ -210,13 +217,13 @@ High Availability Authentication ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``cephx`` authentication system is used by Ceph to authenticate users and -daemons and to protect against man-in-the-middle attacks. +daemons and to protect against man-in-the-middle attacks. -.. note:: The ``cephx`` protocol does not address data encryption in transport +.. note:: The ``cephx`` protocol does not address data encryption in transport (for example, SSL/TLS) or encryption at rest. ``cephx`` uses shared secret keys for authentication. This means that both the -client and the monitor cluster keep a copy of the client's secret key. +client and the monitor cluster keep a copy of the client's secret key. The ``cephx`` protocol makes it possible for each party to prove to the other that it has a copy of the key without revealing it. This provides mutual @@ -233,7 +240,7 @@ Direct interactions between Ceph clients and OSDs require authenticated connections. The ``cephx`` authentication system establishes and sustains these authenticated connections. -The ``cephx`` protocol operates in a manner similar to `Kerberos`_. +The ``cephx`` protocol operates in a manner similar to `Kerberos`_. A user invokes a Ceph client to contact a monitor. Unlike Kerberos, each monitor can authenticate users and distribute keys, which means that there is @@ -246,7 +253,7 @@ Monitors. The client then uses the session key to request services from the monitors, and the monitors provide the client with a ticket that authenticates the client against the OSDs that actually handle data. Ceph Monitors and OSDs share a secret, which means that the clients can use the ticket provided by the -monitors to authenticate against any OSD or metadata server in the cluster. +monitors to authenticate against any OSD or metadata server in the cluster. Like Kerberos tickets, ``cephx`` tickets expire. An attacker cannot use an expired ticket or session key that has been obtained surreptitiously. This form @@ -262,8 +269,8 @@ subsystem generates the username and key, stores a copy on the monitor(s), and transmits the user's secret back to the ``client.admin`` user. This means that the client and the monitor share a secret key. -.. note:: The ``client.admin`` user must provide the user ID and - secret key to the user in a secure manner. +.. note:: The ``client.admin`` user must provide the user ID and + secret key to the user in a secure manner. .. ditaa:: @@ -273,7 +280,7 @@ the client and the monitor share a secret key. | request to | | create a user | |-------------->|----------+ create user - | | | and + | | | and |<--------------|<---------+ store key | transmit key | | | @@ -296,25 +303,25 @@ and uses it to sign requests to OSDs and to metadata servers in the cluster. +---------+ +---------+ | authenticate | |-------------->|----------+ generate and - | | | encrypt + | | | encrypt |<--------------|<---------+ session key | transmit | | encrypted | | session key | - | | + | | |-----+ decrypt | - | | session | - |<----+ key | + | | session | + |<----+ key | | | | req. ticket | |-------------->|----------+ generate and - | | | encrypt + | | | encrypt |<--------------|<---------+ ticket | recv. ticket | - | | + | | |-----+ decrypt | - | | ticket | - |<----+ | + | | ticket | + |<----+ | The ``cephx`` protocol authenticates ongoing communications between the clients @@ -329,7 +336,7 @@ between the client and the daemon. | Client | | Monitor | | MDS | | OSD | +---------+ +---------+ +-------+ +-------+ | request to | | | - | create a user | | | + | create a user | | | |-------------->| mon and | | |<--------------| client share | | | receive | a secret. | | @@ -337,7 +344,7 @@ between the client and the daemon. | |<------------>| | | |<-------------+------------>| | | mon, mds, | | - | authenticate | and osd | | + | authenticate | and osd | | |-------------->| share | | |<--------------| a secret | | | session key | | | @@ -353,7 +360,7 @@ between the client and the daemon. | receive response (CephFS only) | | | | make request | - |------------------------------------------->| + |------------------------------------------->| |<-------------------------------------------| receive response @@ -362,7 +369,7 @@ daemons. The authentication is not extended beyond the Ceph client. If a user accesses the Ceph client from a remote host, cephx authentication will not be applied to the connection between the user's host and the client host. -See `Cephx Config Guide`_ for more on configuration details. +See `Cephx Config Guide`_ for more on configuration details. See `User Management`_ for more on user management. @@ -416,7 +423,7 @@ the greater cluster provides several benefits: Monitors receive no such message after a configurable period of time, then they mark the OSD ``down``. This mechanism is a failsafe, however. Normally, Ceph OSD Daemons determine if a neighboring OSD is ``down`` and - report it to the Ceph Monitors. This contributes to making Ceph Monitors + report it to the Ceph Monitors. This contributes to making Ceph Monitors lightweight processes. See `Monitoring OSDs`_ and `Heartbeats`_ for additional details. @@ -463,7 +470,7 @@ the greater cluster provides several benefits: Write (2) | | | | Write (3) +------+ | | +------+ | +------+ +------+ | - | | Ack (4) Ack (5)| | + | | Ack (4) Ack (5)| | v * * v +---------------+ +---------------+ | Secondary OSD | | Tertiary OSD | @@ -490,7 +497,7 @@ About Pools The Ceph storage system supports the notion of 'Pools', which are logical partitions for storing objects. - + Ceph Clients retrieve a `Cluster Map`_ from a Ceph Monitor, and write RADOS objects to pools. The way that Ceph places the data in the pools is determined by the pool's ``size`` or number of replicas, the CRUSH rule, and the number of @@ -511,12 +518,12 @@ placement groups in the pool. +--------+ +---------------+ | Pool |---------->| CRUSH Rule | +--------+ Selects +---------------+ - + Pools set at least the following parameters: - Ownership/Access to Objects -- The Number of Placement Groups, and +- The Number of Placement Groups, and - The CRUSH Rule to Use. See `Set Pool Values`_ for details. @@ -529,12 +536,12 @@ Mapping PGs to OSDs Each pool has a number of placement groups (PGs) within it. CRUSH dynamically maps PGs to OSDs. When a Ceph Client stores objects, CRUSH maps each RADOS -object to a PG. +object to a PG. This mapping of RADOS objects to PGs implements an abstraction and indirection layer between Ceph OSD Daemons and Ceph Clients. The Ceph Storage Cluster must be able to grow (or shrink) and redistribute data adaptively when the internal -topology changes. +topology changes. If the Ceph Client "knew" which Ceph OSD Daemons were storing which objects, a tight coupling would exist between the Ceph Client and the Ceph OSD Daemon. @@ -563,11 +570,11 @@ placement groups, and how it maps placement groups to OSDs. +------+------+-------------+ | | | | | v v v v - /----------\ /----------\ /----------\ /----------\ + /----------\ /----------\ /----------\ /----------\ | | | | | | | | | OSD #1 | | OSD #2 | | OSD #3 | | OSD #4 | | | | | | | | | - \----------/ \----------/ \----------/ \----------/ + \----------/ \----------/ \----------/ \----------/ The client uses its copy of the cluster map and the CRUSH algorithm to compute precisely which OSD it will use when reading or writing a particular object. @@ -581,11 +588,11 @@ When a Ceph Client binds to a Ceph Monitor, it retrieves the latest version of the `Cluster Map`_. When a client has been equipped with a copy of the cluster map, it is aware of all the monitors, OSDs, and metadata servers in the cluster. **However, even equipped with a copy of the latest version of the -cluster map, the client doesn't know anything about object locations.** +cluster map, the client doesn't know anything about object locations.** **Object locations must be computed.** -The client requies only the object ID and the name of the pool in order to +The client requires only the object ID and the name of the pool in order to compute the object location. Ceph stores data in named pools (for example, "liverpool"). When a client @@ -624,7 +631,7 @@ persists, you may need to refer to the `Troubleshooting Peering Failure`_ section. .. Note:: PGs that agree on the state of the cluster do not necessarily have - the current data yet. + the current data yet. The Ceph Storage Cluster was designed to store at least two copies of an object (that is, ``size = 2``), which is the minimum requirement for data safety. For @@ -654,7 +661,7 @@ epoch. The Ceph OSD daemons that are part of an *Acting Set* might not always be ``up``. When an OSD in the *Acting Set* is ``up``, it is part of the *Up Set*. The *Up Set* is an important distinction, because Ceph can remap PGs to other -Ceph OSD Daemons when an OSD fails. +Ceph OSD Daemons when an OSD fails. .. note:: Consider a hypothetical *Acting Set* for a PG that contains ``osd.25``, ``osd.32`` and ``osd.61``. The first OSD (``osd.25``), is the @@ -674,7 +681,7 @@ process (albeit rather crudely, since it is substantially less impactful with large clusters) where some, but not all of the PGs migrate from existing OSDs (OSD 1, and OSD 2) to the new OSD (OSD 3). Even when rebalancing, CRUSH is stable. Many of the placement groups remain in their original configuration, -and each OSD gets some added capacity, so there are no load spikes on the +and each OSD gets some added capacity, so there are no load spikes on the new OSD after rebalancing is complete. @@ -732,7 +739,8 @@ of ``K+M`` so that each chunk is stored in an OSD in the acting set. The rank of the chunk is stored as an attribute of the object. For instance an erasure coded pool can be created to use five OSDs (``K+M = 5``) and -sustain the loss of two of them (``M = 2``). +sustain the loss of two of them (``M = 2``). Data may be unavailable until (``K+1``) +shards are restored. Reading and Writing Encoded Chunks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -821,7 +829,7 @@ account. | | | | | +-------+-------+ | | ^ | - | | | + | | | | | | +--+---+ +------+ +---+--+ +---+--+ name | NYAN | | NYAN | | NYAN | | NYAN | @@ -874,7 +882,7 @@ version 1). .. ditaa:: Primary OSD - + +-------------+ | OSD 1 | +-------------+ | log | Write Full | | @@ -919,7 +927,7 @@ as ``D2v2`` ) while others are acknowledged and persisted to storage drives .. ditaa:: Primary OSD - + +-------------+ | OSD 1 | | log | @@ -928,11 +936,11 @@ as ``D2v2`` ) while others are acknowledged and persisted to storage drives | +----+ +<------------+ Ceph Client | | | v2 | | | +----+ | +-------------+ - | |D1v1| 1,1 | - | +----+ | - +------+------+ - | - | + | |D1v1| 1,1 | + | +----+ | + +------+------+ + | + | | +------+------+ | | OSD 2 | | +------+ | log | @@ -960,7 +968,7 @@ the logs' ``last_complete`` pointer can move from ``1,1`` to ``1,2``. .. ditaa:: Primary OSD - + +-------------+ | OSD 1 | | log | @@ -969,10 +977,10 @@ the logs' ``last_complete`` pointer can move from ``1,1`` to ``1,2``. | +----+ +<------------+ Ceph Client | | | v2 | | | +----+ | +-------------+ - | |D1v1| 1,1 | - | +----+ | - +------+------+ - | + | |D1v1| 1,1 | + | +----+ | + +------+------+ + | | +-------------+ | | OSD 2 | | | log | @@ -984,7 +992,7 @@ the logs' ``last_complete`` pointer can move from ``1,1`` to ``1,2``. | | |D2v1| 1,1 | | | +----+ | | +-------------+ - | + | | +-------------+ | | OSD 3 | | | log | @@ -1005,7 +1013,7 @@ on **OSD 3**. .. ditaa:: Primary OSD - + +-------------+ | OSD 1 | | log | @@ -1048,7 +1056,7 @@ will be the head of the new authoritative log. | (down) | | c333 | +------+------+ - | + | | +-------------+ | | OSD 2 | | | log | @@ -1057,7 +1065,7 @@ will be the head of the new authoritative log. | | +----+ | | | | | +-------------+ - | + | | +-------------+ | | OSD 3 | | | log | @@ -1077,20 +1085,20 @@ will be the head of the new authoritative log. | 1,1 | | | +------+------+ - + The log entry 1,2 found on **OSD 3** is divergent from the new authoritative log provided by **OSD 4**: it is discarded and the file containing the ``C1v2`` chunk is removed. The ``D1v1`` chunk is rebuilt with the ``decode`` function of -the erasure coding library during scrubbing and stored on the new primary +the erasure coding library during scrubbing and stored on the new primary **OSD 4**. .. ditaa:: Primary OSD - + +-------------+ | OSD 4 | | log | @@ -1138,7 +1146,7 @@ configured to act as a cache tier, and a backing pool of either erasure-coded or relatively slower/cheaper devices configured to act as an economical storage tier. The Ceph objecter handles where to place the objects and the tiering agent determines when to flush objects from the cache to the backing storage -tier. So the cache tier and the backing storage tier are completely transparent +tier. So the cache tier and the backing storage tier are completely transparent to Ceph clients. @@ -1148,14 +1156,14 @@ to Ceph clients. | Ceph Client | +------+------+ ^ - Tiering is | + Tiering is | Transparent | Faster I/O to Ceph | +---------------+ - Client Ops | | | + Client Ops | | | | +----->+ Cache Tier | | | | | | | +-----+---+-----+ - | | | ^ + | | | ^ v v | | Active Data in Cache Tier +------+----+--+ | | | Objecter | | | @@ -1196,18 +1204,18 @@ operations on the outbound data and return the data to the client. A Ceph class for a content management system that presents pictures of a particular size and aspect ratio could take an inbound bitmap image, crop it - to a particular aspect ratio, resize it and embed an invisible copyright or - watermark to help protect the intellectual property; then, save the + to a particular aspect ratio, resize it and embed an invisible copyright or + watermark to help protect the intellectual property; then, save the resulting bitmap image to the object store. -See ``src/objclass/objclass.h``, ``src/fooclass.cc`` and ``src/barclass`` for +See ``src/objclass/objclass.h``, ``src/fooclass.cc`` and ``src/barclass`` for exemplary implementations. Summary ------- -Ceph Storage Clusters are dynamic--like a living organism. Whereas, many storage +Ceph Storage Clusters are dynamic--like a living organism. Although many storage appliances do not fully utilize the CPU and RAM of a typical commodity server, Ceph does. From heartbeats, to peering, to rebalancing the cluster or recovering from faults, Ceph offloads work from clients (and from a centralized @@ -1277,7 +1285,7 @@ synchronization/communication channel. +----------+ +----------+ +----------+ +---------------+ | | | | | | | | - | | Watch Object | | + | | Watch Object | | |--------------------------------------------------->| | | | | |<---------------------------------------------------| @@ -1293,7 +1301,7 @@ synchronization/communication channel. | | | | | | |<-----------------| | | | Ack/Commit | - | | Notify | | + | | Notify | | |--------------------------------------------------->| | | | | |<---------------------------------------------------| @@ -1303,7 +1311,7 @@ synchronization/communication channel. | | Notify | | | | |<-----------------| | | | Notify | - | | Ack | | + | | Ack | | |----------------+---------------------------------->| | | | | | | Ack | | @@ -1311,7 +1319,7 @@ synchronization/communication channel. | | | | | | | Ack | | | |----------------->| - | | | | + | | | | |<---------------+----------------+------------------| | Complete @@ -1329,13 +1337,13 @@ volume'. Ceph's striping offers the throughput of RAID 0 striping, the reliability of n-way RAID mirroring and faster recovery. Ceph provides three types of clients: Ceph Block Device, Ceph File System, and -Ceph Object Storage. A Ceph Client converts its data from the representation +Ceph Object Storage. A Ceph Client converts its data from the representation format it provides to its users (a block device image, RESTful objects, CephFS -filesystem directories) into objects for storage in the Ceph Storage Cluster. +filesystem directories) into objects for storage in the Ceph Storage Cluster. -.. tip:: The objects Ceph stores in the Ceph Storage Cluster are not striped. - Ceph Object Storage, Ceph Block Device, and the Ceph File System stripe their - data over multiple Ceph Storage Cluster objects. Ceph Clients that write +.. tip:: The objects Ceph stores in the Ceph Storage Cluster are not striped. + Ceph Object Storage, Ceph Block Device, and the Ceph File System stripe their + data over multiple Ceph Storage Cluster objects. Ceph Clients that write directly to the Ceph Storage Cluster via ``librados`` must perform the striping (and parallel I/O) for themselves to obtain these benefits. @@ -1378,7 +1386,7 @@ diagram depicts the simplest form of striping: | End cCCC | | End cCCC | | Object 0 | | Object 1 | \-----------/ \-----------/ - + If you anticipate large images sizes, large S3 or Swift objects (e.g., video), or large CephFS directories, you may see considerable read/write performance @@ -1418,16 +1426,16 @@ stripe (``stripe unit 16``) in the first object in the new object set (``object +-----------------+--------+--------+-----------------+ | | | | +--\ v v v v | - /-----------\ /-----------\ /-----------\ /-----------\ | + /-----------\ /-----------\ /-----------\ /-----------\ | | Begin cCCC| | Begin cCCC| | Begin cCCC| | Begin cCCC| | | Object 0 | | Object 1 | | Object 2 | | Object 3 | | +-----------+ +-----------+ +-----------+ +-----------+ | | stripe | | stripe | | stripe | | stripe | | | unit 0 | | unit 1 | | unit 2 | | unit 3 | | +-----------+ +-----------+ +-----------+ +-----------+ | - | stripe | | stripe | | stripe | | stripe | +-\ + | stripe | | stripe | | stripe | | stripe | +-\ | unit 4 | | unit 5 | | unit 6 | | unit 7 | | Object - +-----------+ +-----------+ +-----------+ +-----------+ +- Set + +-----------+ +-----------+ +-----------+ +-----------+ +- Set | stripe | | stripe | | stripe | | stripe | | 1 | unit 8 | | unit 9 | | unit 10 | | unit 11 | +-/ +-----------+ +-----------+ +-----------+ +-----------+ | @@ -1435,36 +1443,36 @@ stripe (``stripe unit 16``) in the first object in the new object set (``object | unit 12 | | unit 13 | | unit 14 | | unit 15 | | +-----------+ +-----------+ +-----------+ +-----------+ | | End cCCC | | End cCCC | | End cCCC | | End cCCC | | - | Object 0 | | Object 1 | | Object 2 | | Object 3 | | + | Object 0 | | Object 1 | | Object 2 | | Object 3 | | \-----------/ \-----------/ \-----------/ \-----------/ | | +--/ - + +--\ | - /-----------\ /-----------\ /-----------\ /-----------\ | + /-----------\ /-----------\ /-----------\ /-----------\ | | Begin cCCC| | Begin cCCC| | Begin cCCC| | Begin cCCC| | - | Object 4 | | Object 5 | | Object 6 | | Object 7 | | + | Object 4 | | Object 5 | | Object 6 | | Object 7 | | +-----------+ +-----------+ +-----------+ +-----------+ | | stripe | | stripe | | stripe | | stripe | | | unit 16 | | unit 17 | | unit 18 | | unit 19 | | +-----------+ +-----------+ +-----------+ +-----------+ | - | stripe | | stripe | | stripe | | stripe | +-\ + | stripe | | stripe | | stripe | | stripe | +-\ | unit 20 | | unit 21 | | unit 22 | | unit 23 | | Object +-----------+ +-----------+ +-----------+ +-----------+ +- Set - | stripe | | stripe | | stripe | | stripe | | 2 + | stripe | | stripe | | stripe | | stripe | | 2 | unit 24 | | unit 25 | | unit 26 | | unit 27 | +-/ +-----------+ +-----------+ +-----------+ +-----------+ | | stripe | | stripe | | stripe | | stripe | | | unit 28 | | unit 29 | | unit 30 | | unit 31 | | +-----------+ +-----------+ +-----------+ +-----------+ | | End cCCC | | End cCCC | | End cCCC | | End cCCC | | - | Object 4 | | Object 5 | | Object 6 | | Object 7 | | + | Object 4 | | Object 5 | | Object 6 | | Object 7 | | \-----------/ \-----------/ \-----------/ \-----------/ | | +--/ -Three important variables determine how Ceph stripes data: +Three important variables determine how Ceph stripes data: - **Object Size:** Objects in the Ceph Storage Cluster have a maximum configurable size (e.g., 2MB, 4MB, etc.). The object size should be large @@ -1472,24 +1480,24 @@ Three important variables determine how Ceph stripes data: the stripe unit. - **Stripe Width:** Stripes have a configurable unit size (e.g., 64kb). - The Ceph Client divides the data it will write to objects into equally - sized stripe units, except for the last stripe unit. A stripe width, - should be a fraction of the Object Size so that an object may contain + The Ceph Client divides the data it will write to objects into equally + sized stripe units, except for the last stripe unit. A stripe width, + should be a fraction of the Object Size so that an object may contain many stripe units. - **Stripe Count:** The Ceph Client writes a sequence of stripe units - over a series of objects determined by the stripe count. The series - of objects is called an object set. After the Ceph Client writes to + over a series of objects determined by the stripe count. The series + of objects is called an object set. After the Ceph Client writes to the last object in the object set, it returns to the first object in the object set. - + .. important:: Test the performance of your striping configuration before putting your cluster into production. You CANNOT change these striping parameters after you stripe the data and write it to objects. Once the Ceph Client has striped data to stripe units and mapped the stripe units to objects, Ceph's CRUSH algorithm maps the objects to placement groups, -and the placement groups to Ceph OSD Daemons before the objects are stored as +and the placement groups to Ceph OSD Daemons before the objects are stored as files on a storage drive. .. note:: Since a client writes to a single pool, all data striped into objects @@ -1513,23 +1521,23 @@ Ceph Clients include a number of service interfaces. These include: that uses ``librbd`` directly--avoiding the kernel object overhead for virtualized systems. -- **Object Storage:** The :term:`Ceph Object Storage` (a.k.a., RGW) service +- **Object Storage:** The :term:`Ceph Object Storage` (a.k.a., RGW) service provides RESTful APIs with interfaces that are compatible with Amazon S3 - and OpenStack Swift. - -- **Filesystem**: The :term:`Ceph File System` (CephFS) service provides - a POSIX compliant filesystem usable with ``mount`` or as + and OpenStack Swift. + +- **Filesystem**: The :term:`Ceph File System` (CephFS) service provides + a POSIX compliant filesystem usable with ``mount`` or as a filesystem in user space (FUSE). Ceph can run additional instances of OSDs, MDSs, and monitors for scalability and high availability. The following diagram depicts the high-level -architecture. +architecture. .. ditaa:: +--------------+ +----------------+ +-------------+ | Block Device | | Object Storage | | CephFS | - +--------------+ +----------------+ +-------------+ + +--------------+ +----------------+ +-------------+ +--------------+ +----------------+ +-------------+ | librbd | | librgw | | libcephfs | @@ -1561,10 +1569,10 @@ another application. .. topic:: S3/Swift Objects and Store Cluster Objects Compared Ceph's Object Storage uses the term *object* to describe the data it stores. - S3 and Swift objects are not the same as the objects that Ceph writes to the + S3 and Swift objects are not the same as the objects that Ceph writes to the Ceph Storage Cluster. Ceph Object Storage objects are mapped to Ceph Storage - Cluster objects. The S3 and Swift objects do not necessarily - correspond in a 1:1 manner with an object stored in the storage cluster. It + Cluster objects. The S3 and Swift objects do not necessarily + correspond in a 1:1 manner with an object stored in the storage cluster. It is possible for an S3 or Swift object to map to multiple Ceph objects. See `Ceph Object Storage`_ for details. @@ -1580,7 +1588,7 @@ Ceph Storage Cluster, where each object gets mapped to a placement group and distributed, and the placement groups are spread across separate ``ceph-osd`` daemons throughout the cluster. -.. important:: Striping allows RBD block devices to perform better than a single +.. important:: Striping allows RBD block devices to perform better than a single server could! Thin-provisioned snapshottable Ceph Block Devices are an attractive option for @@ -1589,7 +1597,8 @@ typically deploy a Ceph Block Device with the ``rbd`` network storage driver in QEMU/KVM, where the host machine uses ``librbd`` to provide a block device service to the guest. Many cloud computing stacks use ``libvirt`` to integrate with hypervisors. You can use thin-provisioned Ceph Block Devices with QEMU and -``libvirt`` to support OpenStack and CloudStack among other solutions. +``libvirt`` to support OpenStack, OpenNebula and CloudStack +among other solutions. While we do not provide ``librbd`` support with other hypervisors at this time, you may also use Ceph Block Device kernel objects to provide a block device to a @@ -1614,7 +1623,7 @@ a Filesystem in User Space (FUSE). +-----------------------+ +------------------------+ | CephFS Kernel Object | | CephFS FUSE | - +-----------------------+ +------------------------+ + +-----------------------+ +------------------------+ +---------------------------------------------------+ | CephFS Library (libcephfs) | @@ -1643,9 +1652,9 @@ CephFS separates the metadata from the data, storing the metadata in the MDS, and storing the file data in one or more objects in the Ceph Storage Cluster. The Ceph filesystem aims for POSIX compatibility. ``ceph-mds`` can run as a single process, or it can be distributed out to multiple physical machines, -either for high availability or for scalability. +either for high availability or for scalability. -- **High Availability**: The extra ``ceph-mds`` instances can be `standby`, +- **High Availability**: The extra ``ceph-mds`` instances can be `standby`, ready to take over the duties of any failed ``ceph-mds`` that was `active`. This is easy because all the data, including the journal, is stored on RADOS. The transition is triggered automatically by ``ceph-mon``. diff --git a/doc/ceph-volume/lvm/activate.rst b/doc/ceph-volume/lvm/activate.rst index d5129def11d5..fe34ecb713a9 100644 --- a/doc/ceph-volume/lvm/activate.rst +++ b/doc/ceph-volume/lvm/activate.rst @@ -3,18 +3,20 @@ ``activate`` ============ -Once :ref:`ceph-volume-lvm-prepare` is completed, and all the various steps -that entails are done, the volume is ready to get "activated". +After :ref:`ceph-volume-lvm-prepare` has completed its run, the volume can be +activated. -This activation process enables a systemd unit that persists the OSD ID and its -UUID (also called ``fsid`` in Ceph CLI tools), so that at boot time it can -understand what OSD is enabled and needs to be mounted. +Activating the volume involves enabling a ``systemd`` unit that persists the +``OSD ID`` and its ``UUID`` (which is also called the ``fsid`` in the Ceph CLI +tools). After this information has been persisted, the cluster can determine +which OSD is enabled and must be mounted. -.. note:: The execution of this call is fully idempotent, and there is no - side-effects when running multiple times +.. note:: The execution of this call is fully idempotent. This means that the + call can be executed multiple times without changing the result of its first + successful execution. -For OSDs deployed by cephadm, please refer to :ref:`cephadm-osd-activate` -instead. +For information about OSDs deployed by cephadm, refer to +:ref:`cephadm-osd-activate`. New OSDs -------- diff --git a/doc/ceph-volume/lvm/newdb.rst b/doc/ceph-volume/lvm/newdb.rst index dcc87fc8a740..a8136c9886bb 100644 --- a/doc/ceph-volume/lvm/newdb.rst +++ b/doc/ceph-volume/lvm/newdb.rst @@ -9,3 +9,48 @@ Logical volume name format is vg/lv. Fails if OSD has already got attached DB. Attach vgname/lvname as a DB volume to OSD 1:: ceph-volume lvm new-db --osd-id 1 --osd-fsid 55BD4219-16A7-4037-BC20-0F158EFCC83D --target vgname/new_db + +Reversing BlueFS Spillover to Slow Devices +------------------------------------------ + +Under certain circumstances, OSD RocksDB databases spill onto slow storage and +the Ceph cluster returns specifics regarding BlueFS spillover warnings. ``ceph +health detail`` returns these spillover warnings. Here is an example of a +spillover warning:: + + osd.76 spilled over 128 KiB metadata from 'db' device (56 GiB used of 60 GiB) to slow device + +To move this DB metadata from the slower device to the faster device, take the +following steps: + +#. Expand the database's logical volume (LV): + + .. prompt:: bash # + + lvextend -l ${size} ${lv}/${db} ${ssd_dev} + +#. Stop the OSD: + + .. prompt:: bash # + + cephadm unit --fsid $cid --name osd.${osd} stop + +#. Run the ``bluefs-bdev-expand`` command: + + .. prompt:: bash # + + cephadm shell --fsid $cid --name osd.${osd} -- ceph-bluestore-tool bluefs-bdev-expand --path /var/lib/ceph/osd/ceph-${osd} + +#. Run the ``bluefs-bdev-migrate`` command: + + .. prompt:: bash # + + cephadm shell --fsid $cid --name osd.${osd} -- ceph-bluestore-tool bluefs-bdev-migrate --path /var/lib/ceph/osd/ceph-${osd} --devs-source /var/lib/ceph/osd/ceph-${osd}/block --dev-target /var/lib/ceph/osd/ceph-${osd}/block.db + +#. Restart the OSD: + + .. prompt:: bash # + + cephadm unit --fsid $cid --name osd.${osd} start + +.. note:: *The above procedure was developed by Chris Dunlop on the [ceph-users] mailing list, and can be seen in its original context here:* `[ceph-users] Re: Fixing BlueFS spillover (pacific 16.2.14) `_ diff --git a/doc/ceph-volume/lvm/prepare.rst b/doc/ceph-volume/lvm/prepare.rst index 2faf12a4e1fe..c7dae83d0627 100644 --- a/doc/ceph-volume/lvm/prepare.rst +++ b/doc/ceph-volume/lvm/prepare.rst @@ -61,6 +61,12 @@ For enabling :ref:`encryption `, the ``--dmcrypt`` f ceph-volume lvm prepare --bluestore --dmcrypt --data vg/lv +Starting with Ceph Squid, you can opt for TPM2 token enrollment for the created LUKS2 devices with the ``--with-tpm`` flag: + +.. prompt:: bash # + + ceph-volume lvm prepare --bluestore --dmcrypt --with-tpm --data vg/lv + If a ``block.db`` device or a ``block.wal`` device is needed, it can be specified with ``--block.db`` or ``--block.wal``. These can be physical devices, partitions, or logical volumes. ``block.db`` and ``block.wal`` are diff --git a/doc/cephadm/adoption.rst b/doc/cephadm/adoption.rst index 86254a16cd41..2ebce606c4f0 100644 --- a/doc/cephadm/adoption.rst +++ b/doc/cephadm/adoption.rst @@ -22,20 +22,20 @@ Preparation #. Make sure that the ``cephadm`` command line tool is available on each host in the existing cluster. See :ref:`get-cephadm` to learn how. -#. Prepare each host for use by ``cephadm`` by running this command: +#. Prepare each host for use by ``cephadm`` by running this command on that host: .. prompt:: bash # cephadm prepare-host #. Choose a version of Ceph to use for the conversion. This procedure will work - with any release of Ceph that is Octopus (15.2.z) or later, inclusive. The + with any release of Ceph that is Octopus (15.2.z) or later. The latest stable release of Ceph is the default. You might be upgrading from an earlier Ceph release at the same time that you're performing this - conversion; if you are upgrading from an earlier release, make sure to + conversion. If you are upgrading from an earlier release, make sure to follow any upgrade-related instructions for that release. - Pass the image to cephadm with the following command: + Pass the Ceph container image to cephadm with the following command: .. prompt:: bash # @@ -50,25 +50,27 @@ Preparation cephadm ls - Before starting the conversion process, ``cephadm ls`` shows all existing - daemons to have a style of ``legacy``. As the adoption process progresses, - adopted daemons will appear with a style of ``cephadm:v1``. + Before starting the conversion process, ``cephadm ls`` reports all existing + daemons with the style ``legacy``. As the adoption process progresses, + adopted daemons will appear with the style ``cephadm:v1``. Adoption process ---------------- -#. Make sure that the ceph configuration has been migrated to use the cluster - config database. If the ``/etc/ceph/ceph.conf`` is identical on each host, - then the following command can be run on one single host and will affect all - hosts: +#. Make sure that the ceph configuration has been migrated to use the cluster's + central config database. If ``/etc/ceph/ceph.conf`` is identical on all + hosts, then the following command can be run on one host and will take + effect for all hosts: .. prompt:: bash # ceph config assimilate-conf -i /etc/ceph/ceph.conf If there are configuration variations between hosts, you will need to repeat - this command on each host. During this adoption process, view the cluster's + this command on each host, taking care that if there are conflicting option + settings across hosts, the values from the last host will be used. During this + adoption process, view the cluster's central configuration to confirm that it is complete by running the following command: @@ -76,36 +78,36 @@ Adoption process ceph config dump -#. Adopt each monitor: +#. Adopt each Monitor: .. prompt:: bash # cephadm adopt --style legacy --name mon. - Each legacy monitor should stop, quickly restart as a cephadm + Each legacy Monitor will stop, quickly restart as a cephadm container, and rejoin the quorum. -#. Adopt each manager: +#. Adopt each Manager: .. prompt:: bash # cephadm adopt --style legacy --name mgr. -#. Enable cephadm: +#. Enable cephadm orchestration: .. prompt:: bash # ceph mgr module enable cephadm ceph orch set backend cephadm -#. Generate an SSH key: +#. Generate an SSH key for cephadm: .. prompt:: bash # ceph cephadm generate-key ceph cephadm get-pub-key > ~/ceph.pub -#. Install the cluster SSH key on each host in the cluster: +#. Install the cephadm SSH key on each host in the cluster: .. prompt:: bash # @@ -118,9 +120,10 @@ Adoption process SSH keys. .. note:: - It is also possible to have cephadm use a non-root user to SSH + It is also possible to arrange for cephadm to use a non-root user to SSH into cluster hosts. This user needs to have passwordless sudo access. - Use ``ceph cephadm set-user `` and copy the SSH key to that user. + Use ``ceph cephadm set-user `` and copy the SSH key to that user's + home directory on each host. See :ref:`cephadm-ssh-user` #. Tell cephadm which hosts to manage: @@ -129,10 +132,10 @@ Adoption process ceph orch host add [ip-address] - This will perform a ``cephadm check-host`` on each host before adding it; - this check ensures that the host is functioning properly. The IP address - argument is recommended; if not provided, then the host name will be resolved - via DNS. + This will run ``cephadm check-host`` on each host before adding it. + This check ensures that the host is functioning properly. The IP address + argument is recommended. If the address is not provided, then the host name + will be resolved via DNS. #. Verify that the adopted monitor and manager daemons are visible: @@ -153,8 +156,8 @@ Adoption process cephadm adopt --style legacy --name osd.1 cephadm adopt --style legacy --name osd.2 -#. Redeploy MDS daemons by telling cephadm how many daemons to run for - each file system. List file systems by name with the command ``ceph fs +#. Redeploy CephFS MDS daemons (if deployed) by telling cephadm how many daemons to run for + each file system. List CephFS file systems by name with the command ``ceph fs ls``. Run the following command on the master nodes to redeploy the MDS daemons: @@ -189,19 +192,19 @@ Adoption process systemctl stop ceph-mds.target rm -rf /var/lib/ceph/mds/ceph-* -#. Redeploy RGW daemons. Cephadm manages RGW daemons by zone. For each - zone, deploy new RGW daemons with cephadm: +#. Redeploy Ceph Object Gateway RGW daemons if deployed. Cephadm manages RGW + daemons by zone. For each zone, deploy new RGW daemons with cephadm: .. prompt:: bash # ceph orch apply rgw [--realm=] [--zone=] [--port=] [--ssl] [--placement=] where ** can be a simple daemon count, or a list of - specific hosts (see :ref:`orchestrator-cli-placement-spec`), and the + specific hosts (see :ref:`orchestrator-cli-placement-spec`). The zone and realm arguments are needed only for a multisite setup. After the daemons have started and you have confirmed that they are - functioning, stop and remove the old, legacy daemons: + functioning, stop and remove the legacy daemons: .. prompt:: bash # diff --git a/doc/cephadm/client-setup.rst b/doc/cephadm/client-setup.rst index f98ba798b5fd..0f38773b12bd 100644 --- a/doc/cephadm/client-setup.rst +++ b/doc/cephadm/client-setup.rst @@ -1,36 +1,36 @@ ======================= Basic Ceph Client Setup ======================= -Client machines require some basic configuration to interact with -Ceph clusters. This section describes how to configure a client machine -so that it can interact with a Ceph cluster. +Client hosts require basic configuration to interact with +Ceph clusters. This section describes how to perform this configuration. .. note:: - Most client machines need to install only the `ceph-common` package - and its dependencies. Such a setup supplies the basic `ceph` and - `rados` commands, as well as other commands including `mount.ceph` - and `rbd`. + Most client hosts need to install only the ``ceph-common`` package + and its dependencies. Such an installation supplies the basic ``ceph`` and + ``rados`` commands, as well as other commands including ``mount.ceph`` + and ``rbd``. Config File Setup ================= -Client machines usually require smaller configuration files (here -sometimes called "config files") than do full-fledged cluster members. +Client hosts usually require smaller configuration files (here +sometimes called "config files") than do back-end cluster hosts. To generate a minimal config file, log into a host that has been -configured as a client or that is running a cluster daemon, and then run the following command: +configured as a client or that is running a cluster daemon, then +run the following command: .. prompt:: bash # ceph config generate-minimal-conf This command generates a minimal config file that tells the client how -to reach the Ceph monitors. The contents of this file should usually -be installed in ``/etc/ceph/ceph.conf``. +to reach the Ceph Monitors. This file should usually +be copied to ``/etc/ceph/ceph.conf`` on each client host. Keyring Setup ============= Most Ceph clusters run with authentication enabled. This means that -the client needs keys in order to communicate with the machines in the -cluster. To generate a keyring file with credentials for `client.fs`, +the client needs keys in order to communicate with Ceph daemons. +To generate a keyring file with credentials for ``client.fs``, log into an running cluster member and run the following command: .. prompt:: bash $ @@ -40,6 +40,10 @@ log into an running cluster member and run the following command: The resulting output is directed into a keyring file, typically ``/etc/ceph/ceph.keyring``. -To gain a broader understanding of client keyring distribution and administration, you should read :ref:`client_keyrings_and_configs`. +To gain a broader understanding of client keyring distribution and administration, +you should read :ref:`client_keyrings_and_configs`. -To see an example that explains how to distribute ``ceph.conf`` configuration files to hosts that are tagged with the ``bare_config`` label, you should read the section called "Distributing ceph.conf to hosts tagged with bare_config" in the section called :ref:`etc_ceph_conf_distribution`. +To see an example that explains how to distribute ``ceph.conf`` configuration +files to hosts that are tagged with the ``bare_config`` label, you should read +the subsection named "Distributing ceph.conf to hosts tagged with bare_config" +under the heading :ref:`etc_ceph_conf_distribution`. diff --git a/doc/cephadm/compatibility.rst b/doc/cephadm/compatibility.rst index 46ab62a62726..8dd301f1a222 100644 --- a/doc/cephadm/compatibility.rst +++ b/doc/cephadm/compatibility.rst @@ -30,8 +30,8 @@ This table shows which version pairs are expected to work or not work together: .. note:: - While not all podman versions have been actively tested against - all Ceph versions, there are no known issues with using podman + While not all Podman versions have been actively tested against + all Ceph versions, there are no known issues with using Podman version 3.0 or greater with Ceph Quincy and later releases. .. warning:: diff --git a/doc/cephadm/host-management.rst b/doc/cephadm/host-management.rst index 4b964c5f455a..197647b608e3 100644 --- a/doc/cephadm/host-management.rst +++ b/doc/cephadm/host-management.rst @@ -74,9 +74,9 @@ To add each new host to the cluster, perform two steps: ceph orch host add host2 10.10.0.102 ceph orch host add host3 10.10.0.103 - It is best to explicitly provide the host IP address. If an IP is + It is best to explicitly provide the host IP address. If an address is not provided, then the host name will be immediately resolved via - DNS and that IP will be used. + DNS and the result will be used. One or more labels can also be included to immediately label the new host. For example, by default the ``_admin`` label will make @@ -104,7 +104,7 @@ To drain all daemons from a host, run a command of the following form: The ``_no_schedule`` and ``_no_conf_keyring`` labels will be applied to the host. See :ref:`cephadm-special-host-labels`. -If you only want to drain daemons but leave managed ceph conf and keyring +If you want to drain daemons but leave managed `ceph.conf` and keyring files on the host, you may pass the ``--keep-conf-keyring`` flag to the drain command. @@ -115,7 +115,8 @@ drain command. This will apply the ``_no_schedule`` label to the host but not the ``_no_conf_keyring`` label. -All OSDs on the host will be scheduled to be removed. You can check the progress of the OSD removal operation with the following command: +All OSDs on the host will be scheduled to be removed. You can check +progress of the OSD removal operation with the following command: .. prompt:: bash # @@ -148,7 +149,7 @@ cluster by running the following command: Offline host removal -------------------- -Even if a host is offline and can not be recovered, it can be removed from the +If a host is offline and can not be recovered, it can be removed from the cluster by running a command of the following form: .. prompt:: bash # @@ -232,11 +233,16 @@ Place a host in and out of maintenance mode (stops all Ceph daemons on host): .. prompt:: bash # ceph orch host maintenance enter [--force] [--yes-i-really-mean-it] - ceph orch host maintenance exit + ceph orch host maintenance exit [--force] [--offline] -The ``--force`` flag allows the user to bypass warnings (but not alerts). The ``--yes-i-really-mean-it`` -flag bypasses all safety checks and will attempt to force the host into maintenance mode no -matter what. +The ``--force`` flag on the ``enter`` command allows the user to bypass warnings (but not alerts). +The ``--yes-i-really-mean-it`` flag bypasses all safety checks and will attempt to force the +host into maintenance mode no matter what. The ``--force`` and ``--offline`` flags to the ``exit`` command +can be used to to have cephadm mark a host that is in maintenance mode and offline as no longer +in maintenance mode. Note in this case if the host comes online, the Ceph daemons +on the host will remain in the stopped state. The ``--force`` and ``--offline`` flags to the ``exit`` +command are intended to be run for hosts in maintenance mode that are permanently offline +before removing the host entirely from cephadm management using the ``ceph orch host rm`` command. .. warning:: Using the --yes-i-really-mean-it flag to force the host to enter maintenance mode can potentially cause loss of data availability, the mon quorum to break down due @@ -250,8 +256,8 @@ Rescanning Host Devices ======================= Some servers and external enclosures may not register device removal or insertion with the -kernel. In these scenarios, you'll need to perform a host rescan. A rescan is typically -non-disruptive, and can be performed with the following CLI command: +kernel. In these scenarios, you'll need to perform a device rescan on the appropriate host. +A rescan is typically non-disruptive, and can be performed with the following CLI command: .. prompt:: bash # @@ -302,7 +308,10 @@ Setting the initial CRUSH location of host ========================================== Hosts can contain a ``location`` identifier which will instruct cephadm to -create a new CRUSH host located in the specified hierarchy. +create a new CRUSH host bucket located in the specified hierarchy. +You can specify more than one element of the tree when doing so (for +instance if you want to ensure that the rack that a host is being +added to is also added to the default bucket), for example: .. code-block:: yaml @@ -310,23 +319,47 @@ create a new CRUSH host located in the specified hierarchy. hostname: node-00 addr: 192.168.0.10 location: + root: default rack: rack1 .. note:: The ``location`` attribute will be only affect the initial CRUSH location. Subsequent changes of the ``location`` property will be ignored. Also, removing a host will not remove - any CRUSH buckets. + an associated CRUSH bucket unless the ``--rm-crush-entry`` flag is provided to the ``orch host rm`` command See also :ref:`crush_map_default_types`. +Removing a host from the CRUSH map +================================== + +The ``ceph orch host rm`` command has support for removing the associated host bucket +from the CRUSH map. This is done by providing the ``--rm-crush-entry`` flag. + +.. prompt:: bash [ceph:root@host1/]# + + ceph orch host rm host1 --rm-crush-entry + +When this flag is specified, cephadm will attempt to remove the host bucket +from the CRUSH map as part of the host removal process. Note that if +it fails to do so, cephadm will report the failure and the host will remain under +cephadm control. + +.. note:: + + Removal from the CRUSH map will fail if there are OSDs deployed on the + host. If you would like to remove all the host's OSDs as well, please start + by using the ``ceph orch host drain`` command to do so. Once the OSDs + have been removed, then you may direct cephadm remove the CRUSH bucket + along with the host using the ``--rm-crush-entry`` flag. + OS Tuning Profiles ================== -Cephadm can be used to manage operating-system-tuning profiles that apply sets -of sysctl settings to sets of hosts. +Cephadm can be used to manage operating system tuning profiles that apply +``sysctl`` settings to sets of hosts. -Create a YAML spec file in the following format: +To do so, create a YAML spec file in the following format: .. code-block:: yaml @@ -345,18 +378,21 @@ Apply the tuning profile with the following command: ceph orch tuned-profile apply -i -This profile is written to ``/etc/sysctl.d/`` on each host that matches the -hosts specified in the placement block of the yaml, and ``sysctl --system`` is +This profile is written to a file under ``/etc/sysctl.d/`` on each host +specified in the ``placement`` block, then ``sysctl --system`` is run on the host. .. note:: The exact filename that the profile is written to within ``/etc/sysctl.d/`` is ``-cephadm-tuned-profile.conf``, where ```` is - the ``profile_name`` setting that you specify in the YAML spec. Because + the ``profile_name`` setting that you specify in the YAML spec. We suggest + naming these profiles following the usual ``sysctl.d`` `NN-xxxxx` convention. Because sysctl settings are applied in lexicographical order (sorted by the filename - in which the setting is specified), you may want to set the ``profile_name`` - in your spec so that it is applied before or after other conf files. + in which the setting is specified), you may want to carefully choose + the ``profile_name`` in your spec so that it is applied before or after other + conf files. Careful selection ensures that values supplied here override or + do not override those in other ``sysctl.d`` files as desired. .. note:: @@ -365,7 +401,7 @@ run on the host. .. note:: - Applying tuned profiles is idempotent when the ``--no-overwrite`` option is + Applying tuning profiles is idempotent when the ``--no-overwrite`` option is passed. Moreover, if the ``--no-overwrite`` option is passed, existing profiles with the same name are not overwritten. @@ -525,7 +561,7 @@ There are two ways to customize this configuration for your environment: We do *not recommend* this approach. The path name must be visible to *any* mgr daemon, and cephadm runs all daemons as - containers. That means that the file either need to be placed + containers. That means that the file must either be placed inside a customized container image for your deployment, or manually distributed to the mgr data directory (``/var/lib/ceph//mgr.`` on the host, visible at @@ -578,8 +614,8 @@ Note that ``man hostname`` recommends ``hostname`` to return the bare host name: The FQDN (Fully Qualified Domain Name) of the system is the - name that the resolver(3) returns for the host name, such as, - ursula.example.com. It is usually the hostname followed by the DNS + name that the resolver(3) returns for the host name, for example + ``ursula.example.com``. It is usually the short hostname followed by the DNS domain name (the part after the first dot). You can check the FQDN using ``hostname --fqdn`` or the domain name using ``dnsdomainname``. diff --git a/doc/cephadm/install.rst b/doc/cephadm/install.rst index 52023ae83514..88a170fe6a3f 100644 --- a/doc/cephadm/install.rst +++ b/doc/cephadm/install.rst @@ -1,10 +1,10 @@ .. _cephadm_deploying_new_cluster: -============================ -Deploying a new Ceph cluster -============================ +========================================== +Using cephadm to Deploy a New Ceph Cluster +========================================== -Cephadm creates a new Ceph cluster by "bootstrapping" on a single +Cephadm creates a new Ceph cluster by bootstrapping a single host, expanding the cluster to encompass any additional hosts, and then deploying the needed services. @@ -18,12 +18,16 @@ Requirements - Python 3 - Systemd - Podman or Docker for running containers -- Time synchronization (such as chrony or NTP) +- Time synchronization (such as Chrony or the legacy ``ntpd``) - LVM2 for provisioning storage devices Any modern Linux distribution should be sufficient. Dependencies are installed automatically by the bootstrap process below. +See `Docker Live Restore `_ +for an optional feature that allows restarting Docker Engine without restarting +all running containers. + See the section :ref:`Compatibility With Podman Versions` for a table of Ceph versions that are compatible with Podman. Not every version of Podman is compatible with @@ -47,9 +51,9 @@ up-to-date cephadm. There are two ways to get the initial ``cephadm``: Choose either the distribution-specific method or the curl-based method. Do not attempt to use both these methods on one system. -.. note:: Recent versions of cephadm are based on a compilation of source files. +.. note:: Recent versions of cephadm are distributed as an executable compiled from source code. Unlike for earlier versions of Ceph it is no longer sufficient to copy a - single source file from Ceph's git tree and run it. If you wish to run + single script from Ceph's git tree and run it. If you wish to run cephadm using a development version you should create your own build of cephadm. See :ref:`compiling-cephadm` for details on how to create your own standalone cephadm executable. @@ -91,67 +95,80 @@ that case, you can install cephadm directly. For example: .. _cephadm_install_curl: -curl-based installation ------------------------ +Using curl to install cephadm +----------------------------- -* First, determine what version of Ceph you will need. You can use the releases - page to find the `latest active releases `_. - For example, we might look at that page and find that ``18.2.0`` is the latest - active release. +#. Determine which version of Ceph you will install. Use the releases page to + find the `latest active releases + `_. For example, + you might find that ``18.2.1`` is the latest active release. -* Use ``curl`` to fetch a build of cephadm for that release. +#. Use ``curl`` to fetch a build of cephadm for that release. - .. prompt:: bash # - :substitutions: + .. prompt:: bash # + :substitutions: - CEPH_RELEASE=18.2.0 # replace this with the active release - curl --silent --remote-name --location https://download.ceph.com/rpm-${CEPH_RELEASE}/el9/noarch/cephadm + CEPH_RELEASE=18.2.0 # replace this with the active release + curl --silent --remote-name --location https://download.ceph.com/rpm-${CEPH_RELEASE}/el9/noarch/cephadm - Ensure the ``cephadm`` file is executable: +#. Use ``chmod`` to make the ``cephadm`` file executable: - .. prompt:: bash # + .. prompt:: bash # - chmod +x cephadm + chmod +x cephadm - This file can be run directly from the current directory: + After ``chmod`` has been run on cephadm, it can be run from the current + directory: - .. prompt:: bash # + .. prompt:: bash # - ./cephadm + ./cephadm -* If you encounter any issues with running cephadm due to errors including - the message ``bad interpreter``, then you may not have Python or - the correct version of Python installed. The cephadm tool requires Python 3.6 - and above. You can manually run cephadm with a particular version of Python by - prefixing the command with your installed Python version. For example: +cephadm Requires Python 3.6 or Later +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* ``cephadm`` requires Python 3.6 or later. If you encounter difficulties + running ``cephadm``, then you may not have Python or the correct version of + Python installed. This includes any errors that include the message ``bad + interpreter``. + + You can manually run cephadm with a particular version of Python by prefixing + the command with your installed Python version. For example: .. prompt:: bash # - :substitutions: python3.8 ./cephadm -* Although the standalone cephadm is sufficient to get a cluster started, it is - convenient to have the ``cephadm`` command installed on the host. To install - the packages that provide the ``cephadm`` command, run the following - commands: +Installing cephadm on the Host +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - .. prompt:: bash # - :substitutions: +Although the standalone ``cephadm`` is sufficient to bootstrap a cluster, it is +best to have the ``cephadm`` command installed on the host. To install the +packages that provide the ``cephadm`` command, run the following commands: - ./cephadm add-repo --release |stable-release| - ./cephadm install +#. Add the repository: - Confirm that ``cephadm`` is now in your PATH by running ``which``: + .. prompt:: bash # - .. prompt:: bash # + ./cephadm add-repo --release |stable-release| - which cephadm +#. Run ``cephadm install``: - A successful ``which cephadm`` command will return this: + .. prompt:: bash # + + ./cephadm install - .. code-block:: bash +#. Confirm that ``cephadm`` is now in your PATH by running ``which``: - /usr/sbin/cephadm + .. prompt:: bash # + + which cephadm + + A successful ``which cephadm`` command will return this: + + .. code-block:: bash + + /usr/sbin/cephadm Bootstrap a new cluster ======================= @@ -162,7 +179,7 @@ What to know before you bootstrap The first step in creating a new Ceph cluster is running the ``cephadm bootstrap`` command on the Ceph cluster's first host. The act of running the ``cephadm bootstrap`` command on the Ceph cluster's first host creates the Ceph -cluster's first "monitor daemon", and that monitor daemon needs an IP address. +cluster's first Monitor daemon. You must pass the IP address of the Ceph cluster's first host to the ``ceph bootstrap`` command, so you'll need to know the IP address of that host. @@ -183,13 +200,13 @@ Run the ``ceph bootstrap`` command: This command will: -* Create a monitor and manager daemon for the new cluster on the local +* Create a Monitor and a Manager daemon for the new cluster on the local host. * Generate a new SSH key for the Ceph cluster and add it to the root user's ``/root/.ssh/authorized_keys`` file. * Write a copy of the public key to ``/etc/ceph/ceph.pub``. * Write a minimal configuration file to ``/etc/ceph/ceph.conf``. This - file is needed to communicate with the new cluster. + file is needed to communicate with Ceph daemons. * Write a copy of the ``client.admin`` administrative (privileged!) secret key to ``/etc/ceph/ceph.client.admin.keyring``. * Add the ``_admin`` label to the bootstrap host. By default, any host @@ -201,7 +218,7 @@ This command will: Further information about cephadm bootstrap ------------------------------------------- -The default bootstrap behavior will work for most users. But if you'd like +The default bootstrap process will work for most users. But if you'd like immediately to know more about ``cephadm bootstrap``, read the list below. Also, you can run ``cephadm bootstrap -h`` to see all of ``cephadm``'s @@ -212,15 +229,15 @@ available options. journald. If you want Ceph to write traditional log files to ``/var/log/ceph/$fsid``, use the ``--log-to-file`` option during bootstrap. -* Larger Ceph clusters perform better when (external to the Ceph cluster) +* Larger Ceph clusters perform best when (external to the Ceph cluster) public network traffic is separated from (internal to the Ceph cluster) cluster traffic. The internal cluster traffic handles replication, recovery, and heartbeats between OSD daemons. You can define the :ref:`cluster network` by supplying the ``--cluster-network`` option to the ``bootstrap`` - subcommand. This parameter must define a subnet in CIDR notation (for example + subcommand. This parameter must be a subnet in CIDR notation (for example ``10.90.90.0/24`` or ``fe80::/64``). -* ``cephadm bootstrap`` writes to ``/etc/ceph`` the files needed to access +* ``cephadm bootstrap`` writes to ``/etc/ceph`` files needed to access the new cluster. This central location makes it possible for Ceph packages installed on the host (e.g., packages that give access to the cephadm command line interface) to find these files. @@ -241,12 +258,12 @@ available options. EOF $ ./cephadm bootstrap --config initial-ceph.conf ... -* The ``--ssh-user **`` option makes it possible to choose which SSH +* The ``--ssh-user **`` option makes it possible to designate which SSH user cephadm will use to connect to hosts. The associated SSH key will be added to ``/home/**/.ssh/authorized_keys``. The user that you designate with this option must have passwordless sudo access. -* If you are using a container on an authenticated registry that requires +* If you are using a container image from a registry that requires login, you may add the argument: * ``--registry-json `` @@ -257,7 +274,7 @@ available options. Cephadm will attempt to log in to this registry so it can pull your container and then store the login info in its config database. Other hosts added to - the cluster will then also be able to make use of the authenticated registry. + the cluster will then also be able to make use of the authenticated container registry. * See :ref:`cephadm-deployment-scenarios` for additional examples for using ``cephadm bootstrap``. @@ -322,7 +339,7 @@ Add all hosts to the cluster by following the instructions in By default, a ``ceph.conf`` file and a copy of the ``client.admin`` keyring are maintained in ``/etc/ceph`` on all hosts that have the ``_admin`` label. This -label is initially applied only to the bootstrap host. We usually recommend +label is initially applied only to the bootstrap host. We recommend that one or more other hosts be given the ``_admin`` label so that the Ceph CLI (for example, via ``cephadm shell``) is easily accessible on multiple hosts. To add the ``_admin`` label to additional host(s), run a command of the following form: @@ -335,9 +352,10 @@ the ``_admin`` label to additional host(s), run a command of the following form: Adding additional MONs ====================== -A typical Ceph cluster has three or five monitor daemons spread +A typical Ceph cluster has three or five Monitor daemons spread across different hosts. We recommend deploying five -monitors if there are five or more nodes in your cluster. +Monitors if there are five or more nodes in your cluster. Most clusters do not +benefit from seven or more Monitors. Please follow :ref:`deploy_additional_monitors` to deploy additional MONs. @@ -362,12 +380,12 @@ See :ref:`osd_autotune`. To deploy hyperconverged Ceph with TripleO, please refer to the TripleO documentation: `Scenario: Deploy Hyperconverged Ceph `_ -In other cases where the cluster hardware is not exclusively used by Ceph (hyperconverged), +In other cases where the cluster hardware is not exclusively used by Ceph (converged infrastructure), reduce the memory consumption of Ceph like so: .. prompt:: bash # - # hyperconverged only: + # converged only: ceph config set mgr mgr/cephadm/autotune_memory_target_ratio 0.2 Then enable memory autotuning: @@ -396,9 +414,11 @@ Different deployment scenarios Single host ----------- -To configure a Ceph cluster to run on a single host, use the -``--single-host-defaults`` flag when bootstrapping. For use cases of this, see -:ref:`one-node-cluster`. +To deploy a Ceph cluster running on a single host, use the +``--single-host-defaults`` flag when bootstrapping. For use cases, see +:ref:`one-node-cluster`. Such clusters are generally not suitable for +production. + The ``--single-host-defaults`` flag sets the following configuration options:: @@ -415,8 +435,8 @@ Deployment in an isolated environment ------------------------------------- You might need to install cephadm in an environment that is not connected -directly to the internet (such an environment is also called an "isolated -environment"). This can be done if a custom container registry is used. Either +directly to the Internet (an "isolated" or "airgapped" +environment). This requires the use of a custom container registry. Either of two kinds of custom container registry can be used in this scenario: (1) a Podman-based or Docker-based insecure registry, or (2) a secure registry. @@ -565,9 +585,9 @@ in order to have cephadm use them for SSHing between cluster hosts Note that this setup does not require installing the corresponding public key from the private key passed to bootstrap on other nodes. In fact, cephadm will reject the ``--ssh-public-key`` argument when passed along with ``--ssh-signed-cert``. -Not because having the public key breaks anything, but because it is not at all needed -for this setup and it helps bootstrap differentiate if the user wants the CA signed -keys setup or standard pubkey encryption. What this means is, SSH key rotation +This is not because having the public key breaks anything, but rather because it is not at all needed +and helps the bootstrap command differentiate if the user wants the CA signed +keys setup or standard pubkey encryption. What this means is that SSH key rotation would simply be a matter of getting another key signed by the same CA and providing cephadm with the new private key and signed cert. No additional distribution of keys to cluster nodes is needed after the initial setup of the CA key as a trusted key, diff --git a/doc/cephadm/operations.rst b/doc/cephadm/operations.rst index d6323c04e0c3..22d91c39b062 100644 --- a/doc/cephadm/operations.rst +++ b/doc/cephadm/operations.rst @@ -375,7 +375,7 @@ One or more hosts have failed the basic cephadm host check, which verifies that (1) the host is reachable and cephadm can be executed there, and (2) that the host satisfies basic prerequisites, like a working container runtime (podman or docker) and working time synchronization. -If this test fails, cephadm will no be able to manage services on that host. +If this test fails, cephadm will not be able to manage services on that host. You can manually run this check by running the following command: @@ -397,15 +397,15 @@ You can disable this health warning by running the following command: Cluster Configuration Checks ---------------------------- -Cephadm periodically scans each of the hosts in the cluster in order -to understand the state of the OS, disks, NICs etc. These facts can -then be analysed for consistency across the hosts in the cluster to +Cephadm periodically scans each host in the cluster in order +to understand the state of the OS, disks, network interfacess etc. This information can +then be analyzed for consistency across the hosts in the cluster to identify any configuration anomalies. Enabling Cluster Configuration Checks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The configuration checks are an **optional** feature, and are enabled +These configuration checks are an **optional** feature, and are enabled by running the following command: .. prompt:: bash # @@ -415,7 +415,7 @@ by running the following command: States Returned by Cluster Configuration Checks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The configuration checks are triggered after each host scan (1m). The +Configuration checks are triggered after each host scan. The cephadm log entries will show the current state and outcome of the configuration checks as follows: @@ -452,14 +452,14 @@ To list all the configuration checks and their current states, run the following # ceph cephadm config-check ls NAME HEALTHCHECK STATUS DESCRIPTION - kernel_security CEPHADM_CHECK_KERNEL_LSM enabled checks SELINUX/Apparmor profiles are consistent across cluster hosts - os_subscription CEPHADM_CHECK_SUBSCRIPTION enabled checks subscription states are consistent for all cluster hosts - public_network CEPHADM_CHECK_PUBLIC_MEMBERSHIP enabled check that all hosts have a NIC on the Ceph public_network + kernel_security CEPHADM_CHECK_KERNEL_LSM enabled check that SELINUX/Apparmor profiles are consistent across cluster hosts + os_subscription CEPHADM_CHECK_SUBSCRIPTION enabled check that subscription states are consistent for all cluster hosts + public_network CEPHADM_CHECK_PUBLIC_MEMBERSHIP enabled check that all hosts have a network interface on the Ceph public_network osd_mtu_size CEPHADM_CHECK_MTU enabled check that OSD hosts share a common MTU setting - osd_linkspeed CEPHADM_CHECK_LINKSPEED enabled check that OSD hosts share a common linkspeed - network_missing CEPHADM_CHECK_NETWORK_MISSING enabled checks that the cluster/public networks defined exist on the Ceph hosts - ceph_release CEPHADM_CHECK_CEPH_RELEASE enabled check for Ceph version consistency - ceph daemons should be on the same release (unless upgrade is active) - kernel_version CEPHADM_CHECK_KERNEL_VERSION enabled checks that the MAJ.MIN of the kernel on Ceph hosts is consistent + osd_linkspeed CEPHADM_CHECK_LINKSPEED enabled check that OSD hosts share a common network link speed + network_missing CEPHADM_CHECK_NETWORK_MISSING enabled check that the cluster/public networks as defined exist on the Ceph hosts + ceph_release CEPHADM_CHECK_CEPH_RELEASE enabled check for Ceph version consistency: all Ceph daemons should be the same release unless upgrade is in progress + kernel_version CEPHADM_CHECK_KERNEL_VERSION enabled checks that the maj.min version of the kernel is consistent across Ceph hosts The name of each configuration check can be used to enable or disable a specific check by running a command of the following form: : @@ -483,31 +483,31 @@ flagged as an anomaly and a healthcheck (WARNING) state raised. CEPHADM_CHECK_SUBSCRIPTION ~~~~~~~~~~~~~~~~~~~~~~~~~~ -This check relates to the status of vendor subscription. This check is -performed only for hosts using RHEL, but helps to confirm that all hosts are +This check relates to the status of OS vendor subscription. This check is +performed only for hosts using RHEL and helps to confirm that all hosts are covered by an active subscription, which ensures that patches and updates are available. CEPHADM_CHECK_PUBLIC_MEMBERSHIP ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -All members of the cluster should have NICs configured on at least one of the +All members of the cluster should have a network interface configured on at least one of the public network subnets. Hosts that are not on the public network will rely on routing, which may affect performance. CEPHADM_CHECK_MTU ~~~~~~~~~~~~~~~~~ -The MTU of the NICs on OSDs can be a key factor in consistent performance. This +The MTU of the network interfaces on OSD hosts can be a key factor in consistent performance. This check examines hosts that are running OSD services to ensure that the MTU is -configured consistently within the cluster. This is determined by establishing +configured consistently within the cluster. This is determined by determining the MTU setting that the majority of hosts is using. Any anomalies result in a -Ceph health check. +health check. CEPHADM_CHECK_LINKSPEED ~~~~~~~~~~~~~~~~~~~~~~~ -This check is similar to the MTU check. Linkspeed consistency is a factor in -consistent cluster performance, just as the MTU of the NICs on the OSDs is. -This check determines the linkspeed shared by the majority of OSD hosts, and a -health check is run for any hosts that are set at a lower linkspeed rate. +This check is similar to the MTU check. Link speed consistency is a factor in +consistent cluster performance, as is the MTU of the OSD node network interfaces. +This check determines the link speed shared by the majority of OSD hosts, and a +health check is run for any hosts that are set at a lower link speed rate. CEPHADM_CHECK_NETWORK_MISSING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -517,15 +517,14 @@ a health check is raised. CEPHADM_CHECK_CEPH_RELEASE ~~~~~~~~~~~~~~~~~~~~~~~~~~ -Under normal operations, the Ceph cluster runs daemons under the same ceph -release (that is, the Ceph cluster runs all daemons under (for example) -Octopus). This check determines the active release for each daemon, and +Under normal operations, the Ceph cluster runs daemons that are of the same Ceph +release (for example, Reef). This check determines the active release for each daemon, and reports any anomalies as a healthcheck. *This check is bypassed if an upgrade -process is active within the cluster.* +is in process.* CEPHADM_CHECK_KERNEL_VERSION ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The OS kernel version (maj.min) is checked for consistency across the hosts. +The OS kernel version (maj.min) is checked for consistency across hosts. The kernel version of the majority of the hosts is used as the basis for identifying anomalies. @@ -602,6 +601,13 @@ The resulting keyring file is: -rw-r-----. 1 qemu qemu 156 Apr 21 08:47 /etc/ceph/client.client.rbd.keyring +By default, cephadm will also manage ``/etc/ceph/ceph.conf`` on hosts where it writes the keyrings. +This feature can be suppressed by passing ``--no-ceph-conf`` when setting the keyring. + +.. prompt:: bash # + + ceph orch client-keyring set client.foo label:foo 0:0 --no-ceph-conf + Disabling Management of a Keyring File -------------------------------------- @@ -659,6 +665,51 @@ For example, to distribute configs to hosts with the ``bare_config`` label, run (See :ref:`orchestrator-cli-placement-spec` for more information about placement specs.) + +Limiting Password-less sudo Access +================================== + +By default, the cephadm install guide recommends enabling password-less +``sudo`` for the cephadm user. This option is the most flexible and +future-proof but may not be preferred in all environments. An administrator can +restrict ``sudo`` to only running an exact list of commands without password +access. Note that this list may change between Ceph versions and +administrators choosing this option should read the release notes and review +this list in the destination version of the Ceph documentation. If the list +differs one must extend the list of password-less ``sudo`` commands prior to +upgrade. + +Commands requiring password-less sudo support: + + - ``chmod`` + - ``chown`` + - ``ls`` + - ``mkdir`` + - ``mv`` + - ``rm`` + - ``sysctl`` + - ``touch`` + - ``true`` + - ``which`` (see note) + - ``/usr/bin/cephadm`` or python executable (see note) + +.. note:: Typically cephadm will execute ``which`` to determine what python3 + command is available and then use the command returned by ``which`` in + subsequent commands. + Before configuring ``sudo`` run ``which python3`` to determine what + python command to add to the ``sudo`` configuration. + In some rare configurations ``/usr/bin/cephadm`` will be used instead. + + +Configuring the ``sudoers`` file can be performed using a tool like ``visudo`` +and adding or replacing a user configuration line such as the following: + +.. code-block:: + + # assuming the cephadm user is named "ceph" + ceph ALL=(ALL) NOPASSWD:/usr/bin/chmod,/usr/bin/chown,/usr/bin/ls,/usr/bin/mkdir,/usr/bin/mv,/usr/bin/rm,/usr/sbin/sysctl,/usr/bin/touch,/usr/bin/true,/usr/bin/which,/usr/bin/cephadm,/usr/bin/python3 + + Purging a cluster ================= @@ -683,3 +734,72 @@ Purge ceph daemons from all hosts in the cluster # For each host: cephadm rm-cluster --force --zap-osds --fsid + + +Replacing a device +================== + +The ``ceph orch device replace`` command automates the process of replacing the underlying device of an OSD. +Previously, this process required manual intervention at various stages. +With this new command, all necessary operations are performed automatically, streamlining the replacement process +and improving the overall user experience. + +.. note:: This only supports LVM-based deployed OSD(s) + +.. prompt:: bash # + + ceph orch device replace + +In the case the device being replaced is shared by multiple OSDs (eg: DB/WAL device shared by multiple OSDs), the orchestrator will warn you. + +.. prompt:: bash # + + [ceph: root@ceph /]# ceph orch device replace osd-1 /dev/vdd + + Error EINVAL: /dev/vdd is a shared device. + Replacing /dev/vdd implies destroying OSD(s): ['0', '1']. + Please, *be very careful*, this can be a very dangerous operation. + If you know what you are doing, pass --yes-i-really-mean-it + +If you know what you are doing, you can go ahead and pass ``--yes-i-really-mean-it``. + +.. prompt:: bash # + + [ceph: root@ceph /]# ceph orch device replace osd-1 /dev/vdd --yes-i-really-mean-it + Scheduled to destroy osds: ['6', '7', '8'] and mark /dev/vdd as being replaced. + +``cephadm`` will make ``ceph-volume`` zap and destroy all related devices and mark the corresponding OSD as ``destroyed`` so the +different OSD(s) ID(s) will be preserved: + +.. prompt:: bash # + + [ceph: root@ceph-1 /]# ceph osd tree + ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF + -1 0.97659 root default + -3 0.97659 host devel-1 + 0 hdd 0.29300 osd.0 destroyed 1.00000 1.00000 + 1 hdd 0.29300 osd.1 destroyed 1.00000 1.00000 + 2 hdd 0.19530 osd.2 up 1.00000 1.00000 + 3 hdd 0.19530 osd.3 up 1.00000 1.00000 + +The device being replaced is finally seen as ``being replaced`` preventing ``cephadm`` from redeploying the OSDs too fast: + +.. prompt:: bash # + + [ceph: root@ceph-1 /]# ceph orch device ls + HOST PATH TYPE DEVICE ID SIZE AVAILABLE REFRESHED REJECT REASONS + osd-1 /dev/vdb hdd 200G Yes 13s ago + osd-1 /dev/vdc hdd 200G Yes 13s ago + osd-1 /dev/vdd hdd 200G Yes 13s ago Is being replaced + osd-1 /dev/vde hdd 200G No 13s ago Has a FileSystem, Insufficient space (<10 extents) on vgs, LVM detected + osd-1 /dev/vdf hdd 200G No 13s ago Has a FileSystem, Insufficient space (<10 extents) on vgs, LVM detected + +If for any reason you need to clear the 'device replace header' on a device, then you can use ``ceph orch device replace --clear``: + +.. prompt:: bash # + + [ceph: root@devel-1 /]# ceph orch device replace devel-1 /dev/vdk --clear + Replacement header cleared on /dev/vdk + [ceph: root@devel-1 /]# + +After that, ``cephadm`` will redeploy the OSD service spec within a few minutes (unless the service is set to ``unmanaged``). diff --git a/doc/cephadm/services/index.rst b/doc/cephadm/services/index.rst index 82f83bfac8e7..4df9933f8e74 100644 --- a/doc/cephadm/services/index.rst +++ b/doc/cephadm/services/index.rst @@ -19,6 +19,9 @@ for details on individual services: monitoring snmp-gateway tracing + smb + mgmt-gateway + oauth2-proxy Service Status ============== @@ -354,10 +357,14 @@ Or in YAML: * See :ref:`orchestrator-host-labels` +.. _cephadm-services-placement-by-pattern-matching: + Placement by pattern matching ----------------------------- -Daemons can be placed on hosts as well: +Daemons can be placed on hosts using a host pattern as well. +By default, the host pattern is matched using fnmatch which supports +UNIX shell-style wildcards (see https://docs.python.org/3/library/fnmatch.html): .. prompt:: bash # @@ -385,6 +392,26 @@ Or in YAML: placement: host_pattern: "*" +The host pattern also has support for using a regex. To use a regex, you +must either add "regex: " to the start of the pattern when using the +command line, or specify a ``pattern_type`` field to be "regex" +when using YAML. + +On the command line: + +.. prompt:: bash # + + ceph orch apply prometheus --placement='regex:FOO[0-9]|BAR[0-9]' + +In YAML: + +.. code-block:: yaml + + service_type: prometheus + placement: + host_pattern: + pattern: 'FOO[0-9]|BAR[0-9]' + pattern_type: regex Changing the number of daemons ------------------------------ diff --git a/doc/cephadm/services/mgmt-gateway.rst b/doc/cephadm/services/mgmt-gateway.rst new file mode 100644 index 000000000000..2b88d55952e9 --- /dev/null +++ b/doc/cephadm/services/mgmt-gateway.rst @@ -0,0 +1,196 @@ +.. _deploy-cephadm-mgmt-gateway: + +================== +Management Gateway +================== + +Deploying mgmt-gateway +====================== + +In Ceph releases beginning with Squid, the `mgmt-gateway` service introduces a new design for Ceph applications +based on a modular, service-based architecture. This service, managed by cephadm and built on top of nginx +(an open-source, high-performance web server), acts as the new front-end and single entry point to the +Ceph cluster. The `mgmt-gateway` provides unified access to all Ceph applications, including the Ceph dashboard +and monitoring stack. Employing nginx enhances security and simplifies access management due to its robust +community support and high-security standards. The `mgmt-gateway` service acts as a reverse proxy that routes +requests to the appropriate Ceph application instances. + +In order to deploy the mgmt-gateway service, use the following command: + +.. prompt:: bash # + + ceph orch apply mgmt-gateway [--placement ...] ... + +Once applied cephadm will reconfigure specific running daemons (such as monitoring) to run behind the +new created service. External access to those services will not be possible anymore. Access will be +consolidated behind the new service endpoint: `https://:`. + + +Benefits of the mgmt-gateway service +==================================== +* ``Unified Access``: Consolidated access through nginx improves security and provide a single entry point to services. +* ``Improved user experience``: User no longer need to know where each application is running (ip/host). +* ``High Availability for dashboard``: nginx HA mechanisms are used to provide high availability for the Ceph dashboard. +* ``High Availability for monitoring``: nginx HA mechanisms are used to provide high availability for monitoring. + +Security enhancements +===================== + +Once the `mgmt-gateway` service is deployed user cannot access monitoring services without authentication through the +Ceph dashboard. + + +High availability enhancements +============================== +nginx HA mechanisms are used to provide high availability for all the Ceph management applications including the Ceph dashboard +and monitoring stack. In case of the Ceph dashboard user no longer need to know where the active manager is running. +`mgmt-gateway` handles manager failover transparently and redirects the user to the active manager. In case of the +monitoring `mgmt-gateway` takes care of handling HA when several instances of Prometheus, Alertmanager or Grafana are +available. The reverse proxy will automatically detect healthy instances and use them to process user requests. + + +High Availability for mgmt-gateway service +========================================== + +In addition to providing high availability for the underlying backend services, the mgmt-gateway +service itself can be configured for high availability, ensuring that the system remains resilient +even if certain core components for the service fail. + +Multiple mgmt-gateway instances can be deployed in an active/standby configuration using keepalived +for seamless failover. The `oauth2-proxy` service can be deployed as multiple stateless instances, +with nginx acting as a load balancer across them using round-robin strategy. This setup removes +single points of failure and enhances the resilience of the entire system. + +In this setup, the underlying internal services follow the same high availability mechanism. Instead of +directly accessing the `mgmt-gateway` internal endpoint, services use the virtual IP specified in the spec. +This ensures that the high availability mechanism for `mgmt-gateway` is transparent to other services. + +Example Configuration for High Availability + +To deploy the mgmt-gateway in a high availability setup, here is an example of the specification files required: + +`mgmt-gateway` Configuration: + +.. code-block:: yaml + + service_type: mgmt-gateway + placement: + label: mgmt + spec: + enable_auth: true + virtual_ip: 192.168.100.220 + +`Ingress` Configuration for Keepalived: + +.. code-block:: yaml + + service_type: ingress + service_id: ingress-mgmt-gw + placement: + label: mgmt + virtual_ip: 192.168.100.220 + backend_service: mgmt-gateway + keepalive_only: true + +The number of deployed instances is determined by the number of hosts with the mgmt label. +The ingress is configured in `keepalive_only` mode, with labels ensuring that any changes to +the mgmt-gateway daemons are replicated to the corresponding keepalived instances. Additionally, +the `virtual_ip` parameter must be identical in both specifications. + + +Accessing services with mgmt-gateway +==================================== + +Once the `mgmt-gateway` service is deployed direct access to the monitoring services will not be allowed anymore. +Applications including: Prometheus, Grafana and Alertmanager are now accessible through links +from `Administration > Services`. + + +Service Specification +===================== + +A mgmt-gateway service can be applied using a specification. An example in YAML follows: + +.. code-block:: yaml + + service_type: mgmt-gateway + service_id: gateway + placement: + hosts: + - ceph0 + spec: + port: 5000 + ssl_protocols: + - TLSv1.2 + - TLSv1.3 + - ... + ssl_ciphers: + - AES128-SHA + - AES256-SHA + - ... + ssl_certificate: | + -----BEGIN CERTIFICATE----- + MIIDtTCCAp2gAwIBAgIYMC4xNzc1NDQxNjEzMzc2MjMyXzxvQ7EcMA0GCSqGSIb3 + DQEBCwUAMG0xCzAJBgNVBAYTAlVTMQ0wCwYDVQQIDARVdGFoMRcwFQYDVQQHDA5T + [...] + -----END CERTIFICATE----- + ssl_certificate_key: | + -----BEGIN PRIVATE KEY----- + MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQC5jdYbjtNTAKW4 + /CwQr/7wOiLGzVxChn3mmCIF3DwbL/qvTFTX2d8bDf6LjGwLYloXHscRfxszX/4h + [...] + -----END PRIVATE KEY----- + +Fields specific to the ``spec`` section of the mgmt-gateway service are described below. + +.. py:currentmodule:: ceph.deployment.service_spec + +.. autoclass:: MgmtGatewaySpec + :members: + +.. warning:: + + TLSv1.3 is considered safe at this moment and includes a set of secure ciphers by default. + When configuring SSL/TLS ciphers for older versions, especially TLSv1.2, it is crucial to + use only a subset of secure ciphers. Using weak or outdated ciphers can significantly + compromise the security of your system. + + Any alteration of the cipher list for SSL/TLS configurations is the responsibility of the + system administrator. Avoid modifying these lists without a thorough understanding of the + implications. Incorrect configurations can lead to vulnerabilities such as weak encryption, + lack of forward secrecy, and susceptibility to various attacks. Always refer to up-to-date + security guidelines and best practices when configuring SSL/TLS settings. + + +The specification can then be applied by running the following command: + +.. prompt:: bash # + + ceph orch apply -i mgmt-gateway.yaml + + +Limitations +=========== + +* Services must bind to the appropriate ports based on the applications being proxied. Ensure that there + are no port conflicts that might disrupt service availability. + + +Default images +~~~~~~~~~~~~~~ + +The `mgmt-gateway` service internally makes use of nginx reverse proxy. The following container image is used by default: + +:: + + DEFAULT_NGINX_IMAGE = 'quay.io/ceph/nginx:1.26.1' + +Admins can specify the image to be used by changing the `container_image_nginx` cephadm module option. If there were already +running daemon(s) you must redeploy the daemon(s) in order to have them actually use the new image. + +For example: + +.. code-block:: bash + + ceph config set mgr mgr/cephadm/container_image_nginx + ceph orch redeploy mgmt-gateway diff --git a/doc/cephadm/services/monitoring.rst b/doc/cephadm/services/monitoring.rst index 33bffdc0157e..0e2c62105fac 100644 --- a/doc/cephadm/services/monitoring.rst +++ b/doc/cephadm/services/monitoring.rst @@ -83,6 +83,37 @@ steps below: ceph orch apply grafana +Enabling security for the monitoring stack +---------------------------------------------- + +By default, in a cephadm-managed cluster, the monitoring components are set up and configured without enabling security measures. +While this suffices for certain deployments, others with strict security needs may find it necessary to protect the +monitoring stack against unauthorized access. In such cases, cephadm relies on a specific configuration parameter, +`mgr/cephadm/secure_monitoring_stack`, which toggles the security settings for all monitoring components. To activate security +measures, set this option to ``true`` with a command of the following form: + + .. prompt:: bash # + + ceph config set mgr mgr/cephadm/secure_monitoring_stack true + +This change will trigger a sequence of reconfigurations across all monitoring daemons, typically requiring +few minutes until all components are fully operational. The updated secure configuration includes the following modifications: + +#. Prometheus: basic authentication is required to access the web portal and TLS is enabled for secure communication. +#. Alertmanager: basic authentication is required to access the web portal and TLS is enabled for secure communication. +#. Node Exporter: TLS is enabled for secure communication. +#. Grafana: TLS is enabled and authentication is requiered to access the datasource information. + +In this secure setup, users will need to setup authentication +(username/password) for both Prometheus and Alertmanager. By default the +username and password are set to ``admin``/``admin``. The user can change these +value with the commands ``ceph orch prometheus set-credentials`` and ``ceph +orch alertmanager set-credentials`` respectively. These commands offer the +flexibility to input the username/password either as parameters or via a JSON +file, which enhances security. Additionally, Cephadm provides the commands +`orch prometheus get-credentials` and `orch alertmanager get-credentials` to +retrieve the current credentials. + .. _cephadm-monitoring-centralized-logs: Centralized Logging in Ceph @@ -129,12 +160,44 @@ example spec file: .. _cephadm_monitoring-images: +.. _cephadm_default_images: + +Default images +~~~~~~~~~~~~~~ + +*The information in this section was developed by Eugen Block in a thread on +the [ceph-users] mailing list in April of 2024. The thread can be viewed here: +``https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/thread/QGC66QIFBKRTPZAQMQEYFXOGZJ7RLWBN/``.* + +``cephadm`` stores a local copy of the ``cephadm`` binary in +``var/lib/ceph/{FSID}/cephadm.{DIGEST}``, where ``{DIGEST}`` is an alphanumeric +string representing the currently-running version of Ceph. + +To see the default container images, run a command of the following form: + +.. prompt:: bash # + + grep -E "DEFAULT*IMAGE" /var/lib/ceph/{FSID}/cephadm.{DIGEST} + +:: + + DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.51.0' + DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.9.5' + DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.9.5' + DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.7.0' + DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.27.0' + DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/grafana:10.4.0' + +Default monitoring images are specified in +``/src/cephadm/cephadmlib/constants.py`` and in +``/src/pybind/mgr/cephadm/module.py``. + Using custom images ~~~~~~~~~~~~~~~~~~~ It is possible to install or upgrade monitoring components based on other -images. To do so, the name of the image to be used needs to be stored in the -configuration first. The following configuration options are available. +images. The ID of the image that you plan to use must be stored in the +configuration. The following configuration options are available: - ``container_image_prometheus`` - ``container_image_grafana`` @@ -150,51 +213,53 @@ configuration first. The following configuration options are available. - ``container_image_jaeger_collector`` - ``container_image_jaeger_query`` -Custom images can be set with the ``ceph config`` command - -.. code-block:: bash +Custom images can be set with the ``ceph config`` command. To set custom images, run a command of the following form: + +.. prompt:: bash # - ceph config set mgr mgr/cephadm/ + ceph config set mgr mgr/cephadm/ -For example +For example: -.. code-block:: bash +.. prompt:: bash # - ceph config set mgr mgr/cephadm/container_image_prometheus prom/prometheus:v1.4.1 + ceph config set mgr mgr/cephadm/container_image_prometheus prom/prometheus:v1.4.1 -If there were already running monitoring stack daemon(s) of the type whose -image you've changed, you must redeploy the daemon(s) in order to have them -actually use the new image. +If you were already running monitoring stack daemon(s) of the same image type +that you changed, then you must redeploy the daemon(s) in order to make them +use the new image. -For example, if you had changed the prometheus image +For example, if you changed the Prometheus image, you would have to run the +following command in order to pick up the changes: .. prompt:: bash # - ceph orch redeploy prometheus + ceph orch redeploy prometheus .. note:: By setting a custom image, the default value will be overridden (but not - overwritten). The default value changes when updates become available. - By setting a custom image, you will not be able to update the component - you have set the custom image for automatically. You will need to - manually update the configuration (image name and tag) to be able to - install updates. + overwritten). The default value will change when an update becomes + available. If you set a custom image, you will not be able automatically + to update the component you have modified with the custom image. You will + need to manually update the configuration (that includes the image name + and the tag) to be able to install updates. - If you choose to go with the recommendations instead, you can reset the - custom image you have set before. After that, the default value will be - used again. Use ``ceph config rm`` to reset the configuration option + If you choose to accept the recommendations, you can reset the custom + image that you have set before. If you do this, the default value will be + used again. Use ``ceph config rm`` to reset the configuration option, in + a command of the following form: - .. code-block:: bash + .. prompt:: bash # - ceph config rm mgr mgr/cephadm/ + ceph config rm mgr mgr/cephadm/ - For example + For example: - .. code-block:: bash + .. prompt:: bash # - ceph config rm mgr mgr/cephadm/container_image_prometheus + ceph config rm mgr mgr/cephadm/container_image_prometheus See also :ref:`cephadm-airgap`. @@ -214,7 +279,7 @@ definition and management of the embedded Prometheus service. The endpoint liste ``https://:8765/sd/`` (the port is configurable through the variable ``service_discovery_port``) and returns scrape target information in `http_sd_config format -`_ +`_ Customers with external monitoring stack can use `ceph-mgr` service discovery endpoint to get scraping configuration. Root certificate of the server can be obtained by the @@ -239,14 +304,24 @@ Option names """""""""""" The following templates for files that will be generated by cephadm can be -overridden. These are the names to be used when storing with ``ceph config-key -set``: +overridden. These are the names to be used when storing with ``ceph config-key set``: - ``services/alertmanager/alertmanager.yml`` +- ``services/alertmanager/web.yml`` - ``services/grafana/ceph-dashboard.yml`` - ``services/grafana/grafana.ini`` +- ``services/ingress/haproxy.cfg`` +- ``services/ingress/keepalived.conf`` +- ``services/iscsi/iscsi-gateway.cfg`` +- ``services/mgmt-gateway/external_server.conf`` +- ``services/mgmt-gateway/internal_server.conf`` +- ``services/mgmt-gateway/nginx.conf`` +- ``services/nfs/ganesha.conf`` +- ``services/node-exporter/web.yml`` +- ``services/nvmeof/ceph-nvmeof.conf`` +- ``services/oauth2-proxy/oauth2-proxy.conf`` - ``services/prometheus/prometheus.yml`` -- ``services/prometheus/alerting/custom_alerts.yml`` +- ``services/prometheus/web.yml`` - ``services/loki.yml`` - ``services/promtail.yml`` @@ -254,9 +329,21 @@ You can look up the file templates that are currently used by cephadm in ``src/pybind/mgr/cephadm/templates``: - ``services/alertmanager/alertmanager.yml.j2`` +- ``services/alertmanager/web.yml.j2`` - ``services/grafana/ceph-dashboard.yml.j2`` - ``services/grafana/grafana.ini.j2`` +- ``services/ingress/haproxy.cfg.j2`` +- ``services/ingress/keepalived.conf.j2`` +- ``services/iscsi/iscsi-gateway.cfg.j2`` +- ``services/mgmt-gateway/external_server.conf.j2`` +- ``services/mgmt-gateway/internal_server.conf.j2`` +- ``services/mgmt-gateway/nginx.conf.j2`` +- ``services/nfs/ganesha.conf.j2`` +- ``services/node-exporter/web.yml.j2`` +- ``services/nvmeof/ceph-nvmeof.conf.j2`` +- ``services/oauth2-proxy/oauth2-proxy.conf.j2`` - ``services/prometheus/prometheus.yml.j2`` +- ``services/prometheus/web.yml.j2`` - ``services/loki.yml.j2`` - ``services/promtail.yml.j2`` diff --git a/doc/cephadm/services/nfs.rst b/doc/cephadm/services/nfs.rst index 2f12c591631f..ab616ddcb130 100644 --- a/doc/cephadm/services/nfs.rst +++ b/doc/cephadm/services/nfs.rst @@ -15,7 +15,7 @@ Deploying NFS ganesha ===================== Cephadm deploys NFS Ganesha daemon (or set of daemons). The configuration for -NFS is stored in the ``nfs-ganesha`` pool and exports are managed via the +NFS is stored in the ``.nfs`` pool and exports are managed via the ``ceph nfs export ...`` commands and via the dashboard. To deploy a NFS Ganesha gateway, run the following command: diff --git a/doc/cephadm/services/oauth2-proxy.rst b/doc/cephadm/services/oauth2-proxy.rst new file mode 100644 index 000000000000..a941b11e555a --- /dev/null +++ b/doc/cephadm/services/oauth2-proxy.rst @@ -0,0 +1,140 @@ +.. _deploy-cephadm-oauth2-proxy: + +================== +OAuth2 Proxy +================== + +Deploying oauth2-proxy +====================== + +In Ceph releases starting from Squid, the `oauth2-proxy` service introduces an advanced method +for managing authentication and access control for Ceph applications. This service integrates +with external Identity Providers (IDPs) to provide secure, flexible authentication via the +OIDC (OpenID Connect) protocol. `oauth2-proxy` acts as an authentication gateway, ensuring that +access to Ceph applications including the Ceph Dashboard and monitoring stack is tightly controlled. + +To deploy the `oauth2-proxy` service, use the following command: + +.. prompt:: bash # + + ceph orch apply oauth2-proxy [--placement ...] ... + +Once applied, `cephadm` will re-configure the necessary components to use `oauth2-proxy` for authentication, +thereby securing access to all Ceph applications. The service will handle login flows, redirect users +to the appropriate IDP for authentication, and manage session tokens to facilitate seamless user access. + + +Benefits of the oauth2-proxy service +==================================== +* ``Enhanced Security``: Provides robust authentication through integration with external IDPs using the OIDC protocol. +* ``Seamless SSO``: Enables seamless single sign-on (SSO) across all Ceph applications, improving user access control. +* ``Centralized Authentication``: Centralizes authentication management, reducing complexity and improving control over access. + + +Security enhancements +===================== + +The `oauth2-proxy` service ensures that all access to Ceph applications is authenticated, preventing unauthorized users from +accessing sensitive information. Since it makes use of the `oauth2-proxy` open source project, this service integrates +easily with a variety of `external IDPs `_ to provide +a secure and flexible authentication mechanism. + + +High availability +============================== +In general, `oauth2-proxy` is used in conjunction with the `mgmt-gateway`. The `oauth2-proxy` service can be deployed as multiple +stateless instances, with the `mgmt-gateway` (nginx reverse-proxy) handling load balancing across these instances using a round-robin strategy. +Since oauth2-proxy integrates with an external identity provider (IDP), ensuring high availability for login is managed externally +and not the responsibility of this service. + + +Accessing services with oauth2-proxy +==================================== + +After deploying `oauth2-proxy`, access to Ceph applications will require authentication through the configured IDP. Users will +be redirected to the IDP for login and then returned to the requested application. This setup ensures secure access and integrates +seamlessly with the Ceph management stack. + + +Service Specification +===================== + +Before deploying `oauth2-proxy` service please remember to deploy the `mgmt-gateway` service by turning on the `--enable_auth` flag. i.e: + +.. prompt:: bash # + + ceph orch apply mgmt-gateway --enable_auth=true + +An `oauth2-proxy` service can be applied using a specification. An example in YAML follows: + +.. code-block:: yaml + + service_type: oauth2-proxy + service_id: auth-proxy + placement: + label: mgmt + spec: + https_address: "0.0.0.0:4180" + provider_display_name: "My OIDC Provider" + client_id: "your-client-id" + oidc_issuer_url: "http://192.168.100.1:5556/dex" + client_secret: "your-client-secret" + cookie_secret: "your-cookie-secret" + ssl_certificate: | + -----BEGIN CERTIFICATE----- + MIIDtTCCAp2gAwIBAgIYMC4xNzc1NDQxNjEzMzc2MjMyXzxvQ7EcMA0GCSqGSIb3 + DQEBCwUAMG0xCzAJBgNVBAYTAlVTMQ0wCwYDVQQIDARVdGFoMRcwFQYDVQQHDA5T + [...] + -----END CERTIFICATE----- + ssl_certificate_key: | + -----BEGIN PRIVATE KEY----- + MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQC5jdYbjtNTAKW4 + /CwQr/7wOiLGzVxChn3mmCIF3DwbL/qvTFTX2d8bDf6LjGwLYloXHscRfxszX/4h + [...] + -----END PRIVATE KEY----- + +Fields specific to the ``spec`` section of the `oauth2-proxy` service are described below. More detailed +description of the fields can be found on `oauth2-proxy `_ +project documentation. + + +.. py:currentmodule:: ceph.deployment.service_spec + +.. autoclass:: OAuth2ProxySpec + :members: + +The specification can then be applied by running the below command. Once becomes available, cephadm will automatically redeploy +the `mgmt-gateway` service while adapting its configuration to redirect the authentication to the newly deployed `oauth2-service`. + +.. prompt:: bash # + + ceph orch apply -i oauth2-proxy.yaml + + +Limitations +=========== + +A non-exhaustive list of important limitations for the `oauth2-proxy` service follows: + +* High-availability configurations for `oauth2-proxy` itself are not supported. +* Proper configuration of the IDP and OAuth2 parameters is crucial to avoid authentication failures. Misconfigurations can lead to access issues. + + +Container images +~~~~~~~~~~~~~~~~ + +The container image the `oauth2-proxy` service will use can be found by running: + +:: + + ceph config get mgr mgr/cephadm/container_image_oauth2_proxy + +Admins can specify a custom image to be used by changing the `container_image_oauth2_proxy` cephadm module option. +If there were already running daemon(s), you must also redeploy the daemon(s) for them to use the new image. + +For example: + +.. code-block:: bash + + ceph config set mgr mgr/cephadm/container_image_oauth2_proxy + ceph orch redeploy oauth2-proxy diff --git a/doc/cephadm/services/osd.rst b/doc/cephadm/services/osd.rst index 4031257bf582..831bd238c796 100644 --- a/doc/cephadm/services/osd.rst +++ b/doc/cephadm/services/osd.rst @@ -1,7 +1,6 @@ *********** OSD Service *********** -.. _device management: ../rados/operations/devices .. _libstoragemgmt: https://github.com/libstorage/libstoragemgmt List Devices @@ -15,10 +14,9 @@ To print a list of devices discovered by ``cephadm``, run this command: .. prompt:: bash # - ceph orch device ls [--hostname=...] [--wide] [--refresh] + ceph orch device ls [--hostname=...] [--wide] [--refresh] -Example -:: +Example:: Hostname Path Type Serial Size Health Ident Fault Available srv-01 /dev/sdb hdd 15P0A0YFFRD6 300G Unknown N/A N/A No @@ -44,7 +42,7 @@ enable cephadm's "enhanced device scan" option as follows; .. prompt:: bash # - ceph config set mgr mgr/cephadm/device_enhanced_scan true + ceph config set mgr mgr/cephadm/device_enhanced_scan true .. warning:: Although the libstoragemgmt library performs standard SCSI inquiry calls, @@ -80,12 +78,45 @@ like this: In this example, libstoragemgmt has confirmed the health of the drives and the ability to interact with the Identification and Fault LEDs on the drive enclosures. For further -information about interacting with these LEDs, refer to `device management`_. +information about interacting with these LEDs, refer to :ref:`devices`. .. note:: The current release of `libstoragemgmt`_ (1.8.8) supports SCSI, SAS, and SATA based local disks only. There is no official support for NVMe devices (PCIe) +Retrieve Exact Size of Block Devices +==================================== + +Run a command of the following form to discover the exact size of a block +device. The value returned here is used by the orchestrator when comparing high +and low values: + +.. prompt:: bash # + + cephadm shell ceph-volume inventory --format json | jq .sys_api.human_readable_size + +The exact size in GB is the size reported in TB, multiplied by 1000. + +Example +------- +The following provides a specific example of this command based upon the +general form of the command above: + +.. prompt:: bash # + + cephadm shell ceph-volume inventory /dev/sdc --format json | jq .sys_api.human_readable_size + +:: + + "3.64 TB" + +This means that the exact device size is 3.64 * 1000, or 3640GB. + +This procedure was developed by Frédéric Nass. See `this thread on the +[ceph-users] mailing list +`_ +for discussion of this matter. + .. _cephadm-deploy-osds: Deploy OSDs @@ -175,16 +206,16 @@ will happen without actually creating the OSDs. For example: - .. prompt:: bash # +.. prompt:: bash # - ceph orch apply osd --all-available-devices --dry-run + ceph orch apply osd --all-available-devices --dry-run - :: +:: - NAME HOST DATA DB WAL - all-available-devices node1 /dev/vdb - - - all-available-devices node2 /dev/vdc - - - all-available-devices node3 /dev/vdd - - + NAME HOST DATA DB WAL + all-available-devices node1 /dev/vdb - - + all-available-devices node2 /dev/vdc - - + all-available-devices node3 /dev/vdd - - .. _cephadm-osd-declarative: @@ -199,9 +230,9 @@ command completes will be automatically found and added to the cluster. We will examine the effects of the following command: - .. prompt:: bash # +.. prompt:: bash # - ceph orch apply osd --all-available-devices + ceph orch apply osd --all-available-devices After running the above command: @@ -214,17 +245,17 @@ If you want to avoid this behavior (disable automatic creation of OSD on availab .. prompt:: bash # - ceph orch apply osd --all-available-devices --unmanaged=true + ceph orch apply osd --all-available-devices --unmanaged=true .. note:: - Keep these three facts in mind: + Keep these three facts in mind: - - The default behavior of ``ceph orch apply`` causes cephadm constantly to reconcile. This means that cephadm creates OSDs as soon as new drives are detected. + - The default behavior of ``ceph orch apply`` causes cephadm constantly to reconcile. This means that cephadm creates OSDs as soon as new drives are detected. - - Setting ``unmanaged: True`` disables the creation of OSDs. If ``unmanaged: True`` is set, nothing will happen even if you apply a new OSD service. + - Setting ``unmanaged: True`` disables the creation of OSDs. If ``unmanaged: True`` is set, nothing will happen even if you apply a new OSD service. - - ``ceph orch daemon add`` creates OSDs, but does not add an OSD service. + - ``ceph orch daemon add`` creates OSDs, but does not add an OSD service. * For cephadm, see also :ref:`cephadm-spec-unmanaged`. @@ -235,7 +266,7 @@ Remove an OSD Removing an OSD from a cluster involves two steps: -#. evacuating all placement groups (PGs) from the cluster +#. evacuating all placement groups (PGs) from the OSD #. removing the PG-free OSD from the cluster The following command performs these two steps: @@ -252,7 +283,7 @@ Example: Expected output:: - Scheduled OSD(s) for removal + Scheduled OSD(s) for removal OSDs that are not safe to destroy will be rejected. @@ -275,14 +306,14 @@ You can query the state of OSD operation with the following command: .. prompt:: bash # - ceph orch osd rm status + ceph orch osd rm status Expected output:: - OSD_ID HOST STATE PG_COUNT REPLACE FORCE STARTED_AT - 2 cephadm-dev done, waiting for purge 0 True False 2020-07-17 13:01:43.147684 - 3 cephadm-dev draining 17 False True 2020-07-17 13:01:45.162158 - 4 cephadm-dev started 42 False True 2020-07-17 13:01:45.162158 + OSD_ID HOST STATE PG_COUNT REPLACE FORCE STARTED_AT + 2 cephadm-dev done, waiting for purge 0 True False 2020-07-17 13:01:43.147684 + 3 cephadm-dev draining 17 False True 2020-07-17 13:01:45.162158 + 4 cephadm-dev started 42 False True 2020-07-17 13:01:45.162158 When no PGs are left on the OSD, it will be decommissioned and removed from the cluster. @@ -304,11 +335,11 @@ Example: .. prompt:: bash # - ceph orch osd rm stop 4 + ceph orch osd rm stop 4 Expected output:: - Stopped OSD(s) removal + Stopped OSD(s) removal This resets the initial state of the OSD and takes it off the removal queue. @@ -329,7 +360,7 @@ Example: Expected output:: - Scheduled OSD(s) for replacement + Scheduled OSD(s) for replacement This follows the same procedure as the procedure in the "Remove OSD" section, with one exception: the OSD is not permanently removed from the CRUSH hierarchy, but is @@ -436,10 +467,10 @@ the ``ceph orch ps`` output in the ``MEM LIMIT`` column:: To exclude an OSD from memory autotuning, disable the autotune option for that OSD and also set a specific memory target. For example, - .. prompt:: bash # +.. prompt:: bash # - ceph config set osd.123 osd_memory_target_autotune false - ceph config set osd.123 osd_memory_target 16G + ceph config set osd.123 osd_memory_target_autotune false + ceph config set osd.123 osd_memory_target 16G .. _drivegroups: @@ -447,13 +478,27 @@ for that OSD and also set a specific memory target. For example, Advanced OSD Service Specifications =================================== -:ref:`orchestrator-cli-service-spec`\s of type ``osd`` are a way to describe a -cluster layout, using the properties of disks. Service specifications give the -user an abstract way to tell Ceph which disks should turn into OSDs with which -configurations, without knowing the specifics of device names and paths. +:ref:`orchestrator-cli-service-spec`\s of type ``osd`` provide a way to use the +properties of disks to describe a Ceph cluster's layout. Service specifications +are an abstraction used to tell Ceph which disks it should transform into OSDs +and which configurations to apply to those OSDs. +:ref:`orchestrator-cli-service-spec`\s make it possible to target these disks +for transformation into OSDs even when the Ceph cluster operator does not know +the specific device names and paths associated with those disks. -Service specifications make it possible to define a yaml or json file that can -be used to reduce the amount of manual work involved in creating OSDs. +:ref:`orchestrator-cli-service-spec`\s make it possible to define a ``.yaml`` +or ``.json`` file that can be used to reduce the amount of manual work involved +in creating OSDs. + +.. note:: + We recommend that advanced OSD specs include the ``service_id`` field set. + OSDs created using ``ceph orch daemon add`` or ``ceph orch apply osd + --all-available-devices`` are placed in the plain ``osd`` service. Failing + to include a ``service_id`` in your OSD spec causes the Ceph cluster to mix + the OSDs from your spec with those OSDs, which can potentially result in the + overwriting of service specs created by ``cephadm`` to track them. Newer + versions of ``cephadm`` will even block creation of advanced OSD specs that + do not include the ``service_id``. For example, instead of running the following command: @@ -461,8 +506,8 @@ For example, instead of running the following command: ceph orch daemon add osd **:** -for each device and each host, we can define a yaml or json file that allows us -to describe the layout. Here's the most basic example. +for each device and each host, we can define a ``.yaml`` or ``.json`` file that +allows us to describe the layout. Here is the most basic example: Create a file called (for example) ``osd_spec.yml``: @@ -480,17 +525,18 @@ This means : #. Turn any available device (ceph-volume decides what 'available' is) into an OSD on all hosts that match the glob pattern '*'. (The glob pattern matches - against the registered hosts from `host ls`) A more detailed section on - host_pattern is available below. + against the registered hosts from `ceph orch host ls`) See + :ref:`cephadm-services-placement-by-pattern-matching` for more on using + ``host_pattern``-matching to turn devices into OSDs. -#. Then pass it to `osd create` like this: +#. Pass ``osd_spec.yml`` to ``osd create`` by using the following command: .. prompt:: bash [monitor.1]# ceph orch apply -i /path/to/osd_spec.yml - This instruction will be issued to all the matching hosts, and will deploy - these OSDs. + This instruction is issued to all the matching hosts, and will deploy these + OSDs. Setups more complex than the one specified by the ``all`` filter are possible. See :ref:`osd_filters` for details. @@ -502,7 +548,7 @@ Example .. prompt:: bash [monitor.1]# - ceph orch apply -i /path/to/osd_spec.yml --dry-run + ceph orch apply -i /path/to/osd_spec.yml --dry-run @@ -512,9 +558,9 @@ Filters ------- .. note:: - Filters are applied using an `AND` gate by default. This means that a drive - must fulfill all filter criteria in order to get selected. This behavior can - be adjusted by setting ``filter_logic: OR`` in the OSD specification. + Filters are applied using an `AND` gate by default. This means that a drive + must fulfill all filter criteria in order to get selected. This behavior can + be adjusted by setting ``filter_logic: OR`` in the OSD specification. Filters are used to assign disks to groups, using their attributes to group them. @@ -524,7 +570,7 @@ information about the attributes with this command: .. code-block:: bash - ceph-volume inventory + ceph-volume inventory Vendor or Model ^^^^^^^^^^^^^^^ @@ -633,9 +679,9 @@ but want to use only the first two, you could use `limit`: .. code-block:: yaml - data_devices: - vendor: VendorA - limit: 2 + data_devices: + vendor: VendorA + limit: 2 .. note:: `limit` is a last resort and shouldn't be used if it can be avoided. @@ -659,6 +705,21 @@ This example would deploy all OSDs with encryption enabled. all: true encrypted: true +Ceph Squid onwards support tpm2 token enrollment to LUKS2 devices. +You can add the `tpm2` to your OSD spec: + +.. code-block:: yaml + + service_type: osd + service_id: example_osd_spec_with_tpm2 + placement: + host_pattern: '*' + spec: + data_devices: + all: true + encrypted: true + tpm2: true + See a full list in the DriveGroupSpecs .. py:currentmodule:: ceph.deployment.drive_group @@ -858,8 +919,8 @@ See :ref:`orchestrator-cli-placement-spec` .. note:: - Assuming each host has a unique disk layout, each OSD - spec needs to have a different service id + Assuming each host has a unique disk layout, each OSD + spec needs to have a different service id Dedicated wal + db @@ -989,7 +1050,7 @@ activates all existing OSDs on a host. .. prompt:: bash # - ceph cephadm osd activate ... + ceph cephadm osd activate ... This will scan all existing disks for OSDs and deploy corresponding daemons. diff --git a/doc/cephadm/services/rgw.rst b/doc/cephadm/services/rgw.rst index 20ec39a88dd1..ed0b149365a5 100644 --- a/doc/cephadm/services/rgw.rst +++ b/doc/cephadm/services/rgw.rst @@ -246,6 +246,7 @@ It is a yaml format file with the following properties: virtual_interface_networks: [ ... ] # optional: list of CIDR networks use_keepalived_multicast: # optional: Default is False. vrrp_interface_network: / # optional: ex: 192.168.20.0/24 + health_check_interval: # optional: Default is 2s. ssl_cert: | # optional: SSL certificate and key -----BEGIN CERTIFICATE----- ... @@ -273,6 +274,7 @@ It is a yaml format file with the following properties: monitor_port: # ex: 1967, used by haproxy for load balancer status virtual_interface_networks: [ ... ] # optional: list of CIDR networks first_virtual_router_id: # optional: default 50 + health_check_interval: # optional: Default is 2s. ssl_cert: | # optional: SSL certificate and key -----BEGIN CERTIFICATE----- ... @@ -321,6 +323,9 @@ where the properties of this service specification are: keepalived will have different virtual_router_id. In the case of using ``virtual_ips_list``, each IP will create its own virtual router. So the first one will have ``first_virtual_router_id``, second one will have ``first_virtual_router_id`` + 1, etc. Valid values go from 1 to 255. +* ``health_check_interval`` + Default is 2 seconds. This parameter can be used to set the interval between health checks + for the haproxy with the backend servers. .. _ingress-virtual-ip: diff --git a/doc/cephadm/services/smb.rst b/doc/cephadm/services/smb.rst new file mode 100644 index 000000000000..cc36a61b9d5b --- /dev/null +++ b/doc/cephadm/services/smb.rst @@ -0,0 +1,251 @@ +.. _deploy-cephadm-smb-samba: + +=========== +SMB Service +=========== + +.. warning:: + + SMB support is under active development and many features may be + missing or immature. A Ceph MGR module, named smb, is available to help + organize and manage SMB related featues. Unless the smb module + has been determined to be unsuitable for your needs we recommend using that + module over directly using the smb service spec. + + +Deploying Samba Containers +========================== + +Cephadm deploys `Samba `_ servers using container images +built by the `samba-container project `_. + +In order to host SMB Shares with access to CephFS file systems, deploy +Samba Containers with the following command: + +.. prompt:: bash # + + ceph orch apply smb [--features ...] [--placement ...] ... + +There are a number of additional parameters that the command accepts. See +the Service Specification for a description of these options. + +Service Specification +===================== + +An SMB Service can be applied using a specification. An example in YAML follows: + +.. code-block:: yaml + + service_type: smb + service_id: tango + placement: + hosts: + - ceph0 + spec: + cluster_id: tango + features: + - domain + config_uri: rados://.smb/tango/scc.toml + custom_dns: + - "192.168.76.204" + join_sources: + - "rados:mon-config-key:smb/config/tango/join1.json" + include_ceph_users: + - client.smb.fs.cluster.tango + +The specification can then be applied by running the following command: + +.. prompt:: bash # + + ceph orch apply -i smb.yaml + + +Service Spec Options +-------------------- + +Fields specific to the ``spec`` section of the SMB Service are described below. + +cluster_id + A short name identifying the SMB "cluster". In this case a cluster is + simply a management unit of one or more Samba services sharing a common + configuration, and may not provide actual clustering or availability + mechanisms. + +features + A list of pre-defined terms enabling specific deployment characteristics. + An empty list is valid. Supported terms: + + * ``domain``: Enable domain member mode + * ``clustered``: Enable Samba native cluster mode + +config_uri + A string containing a (standard or de-facto) URI that identifies a + configuration source that should be loaded by the samba-container as the + primary configuration file. + Supported URI schemes include ``http:``, ``https:``, ``rados:``, and + ``rados:mon-config-key:``. + +user_sources + A list of strings with (standard or de-facto) URI values that will + be used to identify where credentials for authentication are located. + See ``config_uri`` for the supported list of URI schemes. + +join_sources + A list of strings with (standard or de-facto) URI values that will + be used to identify where authentication data that will be used to + perform domain joins are located. Each join source is tried in sequence + until one succeeds. + See ``config_uri`` for the supported list of URI schemes. + +custom_dns + A list of IP addresses that will be used as the DNS servers for a Samba + container. This features allows Samba Containers to integrate with + Active Directory even if the Ceph host nodes are not tied into the Active + Directory DNS domain(s). + +include_ceph_users + A list of cephx user (aka entity) names that the Samba Containers may use. + The cephx keys for each user in the list will automatically be added to + the keyring in the container. + +cluster_meta_uri + A string containing a URI that identifies where the cluster structure + metadata will be stored. Required if ``clustered`` feature is set. Must be + a RADOS pseudo-URI. + +cluster_lock_uri + A string containing a URI that identifies where Samba/CTDB will store a + cluster lock. Required if ``clustered`` feature is set. Must be a RADOS + pseudo-URI. + +cluster_public_addrs + List of objects; optional. Supported only when using Samba's clustering. + Assign "virtual" IP addresses that will be managed by the clustering + subsystem and may automatically move between nodes running Samba + containers. + Fields: + + address + Required string. An IP address with a required prefix length (example: + ``192.168.4.51/24``). This address will be assigned to one of the + host's network devices and managed automatically. + destination + Optional. String or list of strings. A ``destination`` defines where + the system will assign the managed IPs. Each string value must be a + network address (example ``192.168.4.0/24``). One or more destinations + may be supplied. The typical case is to use exactly one destination and + so the value may be supplied as a string, rather than a list with a + single item. Each destination network will be mapped to a device on a + host. Run ``cephadm list-networks`` for an example of these mappings. + If destination is not supplied the network is automatically determined + using the address value supplied and taken as the destination. + + +.. note:: + + If one desires clustering between smbd instances (also known as + High-Availability or "transparent state migration") the feature flag + ``clustered`` is needed. If this flag is not specified cephadm may deploy + multiple smb servers but they will lack the coordination needed of an actual + Highly-Avaiable cluster. When the ``clustered`` flag is specified cephadm + will deploy additional containers that manage this coordination. + Additionally, the cluster_meta_uri and cluster_lock_uri values must be + specified. The former is used by cephadm to describe the smb cluster layout + to the samba containers. The latter is used by Samba's CTDB component to + manage an internal cluster lock. + + +Configuring an SMB Service +-------------------------- + +.. warning:: + + A Manager module for SMB is under active development. Once that module + is available it will be the preferred method for managing Samba on Ceph + in an end-to-end manner. The following discussion is provided for the sake + of completeness and to explain how the software layers interact. + +Creating an SMB Service spec is not sufficient for complete operation of a +Samba Container on Ceph. It is important to create valid configurations and +place them in locations that the container can read. The complete specification +of these configurations is out of scope for this document. You can refer to the +`documentation for Samba `_ as +well as the `samba server container +`_ +and the `configuation file +`_ +it accepts. + +When one has composed a configuration it should be stored in a location +that the Samba Container can access. The recommended approach for running +Samba Containers within Ceph orchestration is to store the configuration +in the Ceph cluster. There are a few ways to store the configuration +in ceph: + +RADOS +~~~~~ + +A configuration file can be stored as a RADOS object in a pool +named ``.smb``. Within the pool there should be a namespace named after the +``cluster_id`` value. The URI used to identify this resource should be +constructed like ``rados://.smb//``. Example: +``rados://.smb/tango/config.json``. + +The containers are automatically deployed with cephx keys allowing access to +resources in these pools and namespaces. As long as this scheme is used +no additional configuration to read the object is needed. + +To copy a configuration file to a RADOS pool, use the ``rados`` command line +tool. For example: + +.. prompt:: bash # + + # assuming your config file is /tmp/config.json + rados --pool=.smb --namespace=tango put config.json /tmp/config.json + +MON Key/Value Store +~~~~~~~~~~~~~~~~~~~ + +A configuration file can be stored as a value in the Ceph Monitor Key/Value +store. The key must be named after the cluster like so: +``smb/config//``. This results in a URI that can be used to +identify this configuration constructed like +``rados:mon-config-key:smb/config//``. +Example: ``rados:mon-config-key:smb/config/tango/config.json``. + +The containers are automatically deployed with cephx keys allowing access to +resources with the key-prefix ``smb/config//``. As long as this +scheme is used no additional configuration to read the value is needed. + +To copy a configuration file into the Key/Value store use the ``ceph config-key +put ...`` tool. For example: + +.. prompt:: bash # + + # assuming your config file is /tmp/config.json + ceph config-key set smb/config/tango/config.json -i /tmp/config.json + + +HTTP/HTTPS +~~~~~~~~~~ + +A configuration file can be stored on an HTTP(S) server and automatically read +by the Samba Container. Managing a configuration file on HTTP(S) is left as an +exercise for the reader. + +.. note:: All URI schemes are supported by parameters that accept URIs. Each + scheme has different performance and security characteristics. + + +Limitations +=========== + +A non-exhaustive list of important limitations for the SMB service follows: + +* DNS is a critical component of Active Directory. If one is configuring the + SMB service for domain membership, either the Ceph host node must be + configured so that it can resolve the Active Directory (AD) domain or the + ``custom_dns`` option may be used. In both cases DNS hosts for the AD domain + must still be reachable from whatever network segment the ceph cluster is on. +* Services must bind to TCP port 445. Running multiple SMB services on the same + node is not yet supported and will trigger a port-in-use conflict. diff --git a/doc/cephadm/troubleshooting.rst b/doc/cephadm/troubleshooting.rst index 5ec692881661..a7afaa108c84 100644 --- a/doc/cephadm/troubleshooting.rst +++ b/doc/cephadm/troubleshooting.rst @@ -1,66 +1,62 @@ Troubleshooting =============== -You may wish to investigate why a cephadm command failed -or why a certain service no longer runs properly. +This section explains how to investigate why a cephadm command failed or why a +certain service no longer runs properly. -Cephadm deploys daemons within containers. This means that -troubleshooting those containerized daemons will require -a different process than traditional package-install daemons. +Cephadm deploys daemons within containers. Troubleshooting containerized +daemons requires a different process than does troubleshooting traditional +daemons that were installed by means of packages. -Here are some tools and commands to help you troubleshoot -your Ceph environment. +Here are some tools and commands to help you troubleshoot your Ceph +environment. .. _cephadm-pause: Pausing or Disabling cephadm ---------------------------- -If something goes wrong and cephadm is behaving badly, you can -pause most of the Ceph cluster's background activity by running -the following command: +If something goes wrong and cephadm is behaving badly, pause most of the Ceph +cluster's background activity by running the following command: .. prompt:: bash # ceph orch pause -This stops all changes in the Ceph cluster, but cephadm will -still periodically check hosts to refresh its inventory of -daemons and devices. You can disable cephadm completely by -running the following commands: +This stops all changes in the Ceph cluster, but cephadm will still periodically +check hosts to refresh its inventory of daemons and devices. Disable cephadm +completely by running the following commands: .. prompt:: bash # ceph orch set backend '' ceph mgr module disable cephadm -These commands disable all of the ``ceph orch ...`` CLI commands. -All previously deployed daemon containers continue to exist and -will start as they did before you ran these commands. +These commands disable all ``ceph orch ...`` CLI commands. All +previously deployed daemon containers continue to run and will start just as +they were before you ran these commands. -See :ref:`cephadm-spec-unmanaged` for information on disabling -individual services. +See :ref:`cephadm-spec-unmanaged` for more on disabling individual services. Per-service and Per-daemon Events --------------------------------- -In order to facilitate debugging failed daemons, -cephadm stores events per service and per daemon. -These events often contain information relevant to -troubleshooting your Ceph cluster. +To make it easier to debug failed daemons, cephadm stores events per service +and per daemon. These events often contain information relevant to +the troubleshooting of your Ceph cluster. Listing Service Events ~~~~~~~~~~~~~~~~~~~~~~ -To see the events associated with a certain service, run a -command of the and following form: +To see the events associated with a certain service, run a command of the +following form: .. prompt:: bash # ceph orch ls --service_name= --format yaml -This will return something in the following form: +This will return information in the following form: .. code-block:: yaml @@ -81,8 +77,8 @@ This will return something in the following form: Listing Daemon Events ~~~~~~~~~~~~~~~~~~~~~ -To see the events associated with a certain daemon, run a -command of the and following form: +To see the events associated with a certain daemon, run a command of the +following form: .. prompt:: bash # @@ -105,32 +101,41 @@ This will return something in the following form: Checking Cephadm Logs --------------------- -To learn how to monitor cephadm logs as they are generated, read :ref:`watching_cephadm_logs`. +To learn how to monitor cephadm logs as they are generated, read +:ref:`watching_cephadm_logs`. -If your Ceph cluster has been configured to log events to files, there will be a -``ceph.cephadm.log`` file on all monitor hosts (see -:ref:`cephadm-logs` for a more complete explanation). +If your Ceph cluster has been configured to log events to files, there will be +a ``ceph.cephadm.log`` file on all monitor hosts. See :ref:`cephadm-logs` for a +more complete explanation. Gathering Log Files ------------------- -Use journalctl to gather the log files of all daemons: +Use ``journalctl`` to gather the log files of all daemons: .. note:: By default cephadm now stores logs in journald. This means that you will no longer find daemon logs in ``/var/log/ceph/``. -To read the log file of one specific daemon, run:: +To read the log file of one specific daemon, run a command of the following +form: + +.. prompt:: bash - cephadm logs --name + cephadm logs --name -Note: this only works when run on the same host where the daemon is running. To -get logs of a daemon running on a different host, give the ``--fsid`` option:: +.. Note:: This works only when run on the same host that is running the daemon. + To get the logs of a daemon that is running on a different host, add the + ``--fsid`` option to the command, as in the following example: - cephadm logs --fsid --name + .. prompt:: bash -where the ```` corresponds to the cluster ID printed by ``ceph status``. + cephadm logs --fsid --name -To fetch all log files of all daemons on a given host, run:: + In this example, ```` corresponds to the cluster ID returned by the + ``ceph status`` command. + +To fetch all log files of all daemons on a given host, run the following +for-loop:: for name in $(cephadm ls | jq -r '.[].name') ; do cephadm logs --fsid --name "$name" > $name; @@ -139,39 +144,41 @@ To fetch all log files of all daemons on a given host, run:: Collecting Systemd Status ------------------------- -To print the state of a systemd unit, run:: +To print the state of a systemd unit, run a command of the following form: - systemctl status "ceph-$(cephadm shell ceph fsid)@.service"; +.. prompt:: bash + systemctl status "ceph-$(cephadm shell ceph fsid)@.service"; -To fetch all state of all daemons of a given host, run:: - fsid="$(cephadm shell ceph fsid)" - for name in $(cephadm ls | jq -r '.[].name') ; do - systemctl status "ceph-$fsid@$name.service" > $name; - done +To fetch the state of all daemons of a given host, run the following shell +script:: + + fsid="$(cephadm shell ceph fsid)" + for name in $(cephadm ls | jq -r '.[].name') ; do + systemctl status "ceph-$fsid@$name.service" > $name; + done List all Downloaded Container Images ------------------------------------ -To list all container images that are downloaded on a host: +To list all container images that are downloaded on a host, run the following +commands: -.. note:: ``Image`` might also be called `ImageID` +.. prompt:: bash # -:: + podman ps -a --format json | jq '.[].Image' "docker.io/library/centos:8" "registry.opensuse.org/opensuse/leap:15.2" - podman ps -a --format json | jq '.[].Image' - "docker.io/library/centos:8" - "registry.opensuse.org/opensuse/leap:15.2" +.. note:: ``Image`` might also be called ``ImageID``. Manually Running Containers --------------------------- Cephadm uses small wrappers when running containers. Refer to -``/var/lib/ceph///unit.run`` for the -container execution command. +``/var/lib/ceph///unit.run`` for the container +execution command. .. _cephadm-ssh-errors: @@ -187,9 +194,10 @@ Error message:: Please make sure that the host is reachable and accepts connections using the cephadm SSH key ... -Things Ceph administrators can do: +If you receive the above error message, try the following things to +troubleshoot the SSH connection between ``cephadm`` and the monitor: -1. Ensure cephadm has an SSH identity key:: +1. Ensure that ``cephadm`` has an SSH identity key:: [root@mon1~]# cephadm shell -- ceph config-key get mgr/cephadm/ssh_identity_key > ~/cephadm_private_key INFO:cephadm:Inferring fsid f8edc08a-7f17-11ea-8707-000c2915dd98 @@ -202,20 +210,21 @@ Things Ceph administrators can do: or:: - [root@mon1 ~]# cat ~/cephadm_private_key | cephadm shell -- ceph cephadm set-ssk-key -i - + [root@mon1 ~]# cat ~/cephadm_private_key | cephadm shell -- ceph cephadm set-ssh-key -i - 2. Ensure that the SSH config is correct:: [root@mon1 ~]# cephadm shell -- ceph cephadm get-ssh-config > config -3. Verify that we can connect to the host:: +3. Verify that it is possible to connect to the host:: [root@mon1 ~]# ssh -F config -i ~/cephadm_private_key root@mon1 Verifying that the Public Key is Listed in the authorized_keys file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To verify that the public key is in the authorized_keys file, run the following commands:: +To verify that the public key is in the ``authorized_keys`` file, run the +following commands:: [root@mon1 ~]# cephadm shell -- ceph cephadm get-pub-key > ~/ceph.pub [root@mon1 ~]# grep "`cat ~/ceph.pub`" /root/.ssh/authorized_keys @@ -231,27 +240,34 @@ Or this error:: Must set public_network config option or specify a CIDR network, ceph addrvec, or plain IP -This means that you must run a command of this form:: +This means that you must run a command of this form: - ceph config set mon public_network +.. prompt:: bash -For more detail on operations of this kind, see :ref:`deploy_additional_monitors` + ceph config set mon public_network + +For more detail on operations of this kind, see +:ref:`deploy_additional_monitors`. Accessing the Admin Socket -------------------------- -Each Ceph daemon provides an admin socket that bypasses the -MONs (See :ref:`rados-monitoring-using-admin-socket`). +Each Ceph daemon provides an admin socket that allows runtime option setting and statistic reading. See +:ref:`rados-monitoring-using-admin-socket`. + +#. To access the admin socket, enter the daemon container on the host:: -To access the admin socket, first enter the daemon container on the host:: + [root@mon1 ~]# cephadm enter --name - [root@mon1 ~]# cephadm enter --name - [ceph: root@mon1 /]# ceph --admin-daemon /var/run/ceph/ceph-.asok config show +#. Run a command of the following forms to see the admin socket's configuration and other available actions:: + + [ceph: root@mon1 /]# ceph --admin-daemon /var/run/ceph/ceph-.asok config show + [ceph: root@mon1 /]# ceph --admin-daemon /var/run/ceph/ceph-.asok help Running Various Ceph Tools -------------------------------- -To run Ceph tools like ``ceph-objectstore-tool`` or +To run Ceph tools such as ``ceph-objectstore-tool`` or ``ceph-monstore-tool``, invoke the cephadm CLI with ``cephadm shell --name ``. For example:: @@ -268,100 +284,232 @@ To run Ceph tools like ``ceph-objectstore-tool`` or election_strategy: 1 0: [v2:127.0.0.1:3300/0,v1:127.0.0.1:6789/0] mon.myhostname -The cephadm shell sets up the environment in a way that is suitable -for extended daemon maintenance and running daemons interactively. +The cephadm shell sets up the environment in a way that is suitable for +extended daemon maintenance and for the interactive running of daemons. .. _cephadm-restore-quorum: Restoring the Monitor Quorum ---------------------------- -If the Ceph monitor daemons (mons) cannot form a quorum, cephadm will not be -able to manage the cluster until quorum is restored. +If the Ceph Monitor daemons (mons) cannot form a quorum, ``cephadm`` will not +be able to manage the cluster until quorum is restored. In order to restore the quorum, remove unhealthy monitors form the monmap by following these steps: -1. Stop all mons. For each mon host:: +1. Stop all Monitors. Use ``ssh`` to connect to each Monitor's host, and then + while connected to the Monitor's host use ``cephadm`` to stop the Monitor + daemon: + + .. prompt:: bash + + ssh {mon-host} + cephadm unit --name {mon.hostname} stop - ssh {mon-host} - cephadm unit --name mon.`hostname` stop +2. Identify a surviving Monitor and log in to its host: -2. Identify a surviving monitor and log in to that host:: + .. prompt:: bash - ssh {mon-host} - cephadm enter --name mon.`hostname` + ssh {mon-host} + cephadm enter --name {mon.hostname} -3. Follow the steps in :ref:`rados-mon-remove-from-unhealthy` +3. Follow the steps in :ref:`rados-mon-remove-from-unhealthy`. .. _cephadm-manually-deploy-mgr: Manually Deploying a Manager Daemon ----------------------------------- -At least one manager (mgr) daemon is required by cephadm in order to manage the -cluster. If the last mgr in a cluster has been removed, follow these steps in -order to deploy a manager called (for example) -``mgr.hostname.smfvfd`` on a random host of your cluster manually. +At least one Manager (``mgr``) daemon is required by cephadm in order to manage +the cluster. If the last remaining Manager has been removed from the Ceph +cluster, follow these steps in order to deploy a fresh Manager on an arbitrary +host in your cluster. In this example, the freshly-deployed Manager daemon is +called ``mgr.hostname.smfvfd``. + +#. Disable the cephadm scheduler, in order to prevent ``cephadm`` from removing + the new Manager. See :ref:`cephadm-enable-cli`: + + .. prompt:: bash # + + ceph config-key set mgr/cephadm/pause true -Disable the cephadm scheduler, in order to prevent cephadm from removing the new -manager. See :ref:`cephadm-enable-cli`:: +#. Retrieve or create the "auth entry" for the new Manager: - ceph config-key set mgr/cephadm/pause true + .. prompt:: bash # -Then get or create the auth entry for the new manager:: + ceph auth get-or-create mgr.hostname.smfvfd mon "profile mgr" osd "allow *" mds "allow *" - ceph auth get-or-create mgr.hostname.smfvfd mon "profile mgr" osd "allow *" mds "allow *" +#. Retrieve the Monitor's configuration: -Get the ceph.conf:: + .. prompt:: bash # - ceph config generate-minimal-conf + ceph config generate-minimal-conf -Get the container image:: +#. Retrieve the container image: - ceph config get "mgr.hostname.smfvfd" container_image + .. prompt:: bash # -Create a file ``config-json.json`` which contains the information necessary to deploy -the daemon: + ceph config get "mgr.hostname.smfvfd" container_image -.. code-block:: json +#. Create a file called ``config-json.json``, which contains the information + necessary to deploy the daemon: - { - "config": "# minimal ceph.conf for 8255263a-a97e-4934-822c-00bfe029b28f\n[global]\n\tfsid = 8255263a-a97e-4934-822c-00bfe029b28f\n\tmon_host = [v2:192.168.0.1:40483/0,v1:192.168.0.1:40484/0]\n", - "keyring": "[mgr.hostname.smfvfd]\n\tkey = V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4=\n" - } + .. code-block:: json -Deploy the daemon:: + { + "config": "# minimal ceph.conf for 8255263a-a97e-4934-822c-00bfe029b28f\n[global]\n\tfsid = 8255263a-a97e-4934-822c-00bfe029b28f\n\tmon_host = [v2:192.168.0.1:40483/0,v1:192.168.0.1:40484/0]\n", + "keyring": "[mgr.hostname.smfvfd]\n\tkey = V2VyIGRhcyBsaWVzdCBpc3QgZG9vZi4=\n" + } - cephadm --image deploy --fsid --name mgr.hostname.smfvfd --config-json config-json.json +#. Deploy the Manager daemon: -Analyzing Core Dumps + .. prompt:: bash # + + cephadm --image deploy --fsid --name mgr.hostname.smfvfd --config-json config-json.json + +Capturing Core Dumps --------------------- -When a Ceph daemon crashes, cephadm supports analyzing core dumps. To enable core dumps, run +A Ceph cluster that uses ``cephadm`` can be configured to capture core dumps. +The initial capture and processing of the coredump is performed by +`systemd-coredump +`_. + + +To enable coredump handling, run the following command .. prompt:: bash # - ulimit -c unlimited + ulimit -c unlimited -Core dumps will now be written to ``/var/lib/systemd/coredump``. .. note:: - Core dumps are not namespaced by the kernel, which means - they will be written to ``/var/lib/systemd/coredump`` on - the container host. + Core dumps are not namespaced by the kernel. This means that core dumps are + written to ``/var/lib/systemd/coredump`` on the container host. The ``ulimit + -c unlimited`` setting will persist only until the system is rebooted. + +Wait for the crash to happen again. To simulate the crash of a daemon, run for +example ``killall -3 ceph-mon``. + + +Running the Debugger with cephadm +---------------------------------- + +Running a single debugging session +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Initiate a debugging session by using the ``cephadm shell`` command. +From within the shell container we need to install the debugger and debuginfo +packages. To debug a core file captured by systemd, run the following: + + +#. Start the shell session: + + .. prompt:: bash # + + cephadm shell --mount /var/lib/system/coredump + +#. From within the shell session, run the following commands: + + .. prompt:: bash # + + dnf install ceph-debuginfo gdb zstd + + .. prompt:: bash # + + unzstd /var/lib/systemd/coredump/core.ceph-*.zst + + .. prompt:: bash # + + gdb /usr/bin/ceph-mon /mnt/coredump/core.ceph-*.zst + +#. Run debugger commands at gdb's prompt: + + .. prompt:: bash (gdb) + + bt + + :: + + #0 0x00007fa9117383fc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 + #1 0x00007fa910d7f8f0 in std::condition_variable::wait(std::unique_lock&) () from /lib64/libstdc++.so.6 + #2 0x00007fa913d3f48f in AsyncMessenger::wait() () from /usr/lib64/ceph/libceph-common.so.2 + #3 0x0000563085ca3d7e in main () + + +Running repeated debugging sessions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When using ``cephadm shell``, as in the example above, any changes made to the +container that is spawned by the shell command are ephemeral. After the shell +session exits, the files that were downloaded and installed cease to be +available. You can simply re-run the same commands every time ``cephadm shell`` +is invoked, but to save time and resources you can create a new container image +and use it for repeated debugging sessions. + +In the following example, we create a simple file that constructs the +container image. The command below uses podman but it is expected to work +correctly even if ``podman`` is replaced with ``docker``:: + + cat >Containerfile < to customize the base image + +The above file creates a new local image named ``ceph:debugging``. This image +can be used on the same machine that built it. The image can also be pushed to +a container repository or saved and copied to a node that is running other Ceph +containers. See the ``podman`` or ``docker`` documentation for more +information about the container workflow. + +After the image has been built, it can be used to initiate repeat debugging +sessions. By using an image in this way, you avoid the trouble of having to +re-install the debug tools and the debuginfo packages every time you need to +run a debug session. To debug a core file using this image, in the same way as +previously described, run: + +.. prompt:: bash # + + cephadm --image ceph:debugging shell --mount /var/lib/system/coredump + + +Debugging live processes +~~~~~~~~~~~~~~~~~~~~~~~~ + +The gdb debugger can attach to running processes to debug them. This can be +achieved with a containerized process by using the debug image and attaching it +to the same PID namespace in which the process to be debugged resides. + +This requires running a container command with some custom arguments. We can +generate a script that can debug a process in a running container. + +.. prompt:: bash # + + cephadm --image ceph:debugging shell --dry-run > /tmp/debug.sh + +This creates a script that includes the container command that ``cephadm`` +would use to create a shell. Modify the script by removing the ``--init`` +argument and replace it with the argument that joins to the namespace used for +a running running container. For example, assume we want to debug the Manager +and have determnined that the Manager is running in a container named +``ceph-bc615290-685b-11ee-84a6-525400220000-mgr-ceph0-sluwsk``. In this case, +the argument +``--pid=container:ceph-bc615290-685b-11ee-84a6-525400220000-mgr-ceph0-sluwsk`` +should be used. -Now, wait for the crash to happen again. To simulate the crash of a daemon, run e.g. ``killall -3 ceph-mon``. +We can run our debugging container with ``sh /tmp/debug.sh``. Within the shell, +we can run commands such as ``ps`` to get the PID of the Manager process. In +the following example this is ``2``. While running gdb, we can attach to the +running process: -Install debug packages including ``ceph-debuginfo`` by entering the cephadm shelll:: +.. prompt:: bash (gdb) - # cephadm shell --mount /var/lib/systemd/coredump - [ceph: root@host1 /]# dnf install ceph-debuginfo gdb zstd - [ceph: root@host1 /]# unzstd /mnt/coredump/core.ceph-*.zst - [ceph: root@host1 /]# gdb /usr/bin/ceph-mon /mnt/coredump/core.ceph-... - (gdb) bt - #0 0x00007fa9117383fc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0 - #1 0x00007fa910d7f8f0 in std::condition_variable::wait(std::unique_lock&) () from /lib64/libstdc++.so.6 - #2 0x00007fa913d3f48f in AsyncMessenger::wait() () from /usr/lib64/ceph/libceph-common.so.2 - #3 0x0000563085ca3d7e in main () + attach 2 + info threads + bt diff --git a/doc/cephadm/upgrade.rst b/doc/cephadm/upgrade.rst index e0a9f610ae2a..3a15c3ac6231 100644 --- a/doc/cephadm/upgrade.rst +++ b/doc/cephadm/upgrade.rst @@ -2,7 +2,7 @@ Upgrading Ceph ============== -Cephadm can safely upgrade Ceph from one bugfix release to the next. For +Cephadm can safely upgrade Ceph from one point release to the next. For example, you can upgrade from v15.2.0 (the first Octopus release) to the next point release, v15.2.1. @@ -56,13 +56,13 @@ Before you use cephadm to upgrade Ceph, verify that all hosts are currently onli ceph -s -To upgrade (or downgrade) to a specific release, run the following command: +To upgrade to a specific release, run a command of the following form: .. prompt:: bash # ceph orch upgrade start --ceph-version -For example, to upgrade to v16.2.6, run the following command: +For example, to upgrade to v16.2.6, run a command of the following form: .. prompt:: bash # @@ -131,31 +131,45 @@ doesn't use ``cephadm shell``) to a version compatible with the new version. Potential problems ================== -There are a few health alerts that can arise during the upgrade process. + +Error: ENOENT: Module not found +------------------------------- + +The message ``Error ENOENT: Module not found`` appears in response to the command ``ceph orch upgrade status`` if the orchestrator has crashed: + +.. prompt:: bash # + + ceph orch upgrade status + +:: + + Error ENOENT: Module not found + +This is possibly caused by invalid JSON in a mgr config-key. See `Redmine tracker Issue #67329 `_ and `the discussion on the [ceph-users] mailing list `_. UPGRADE_NO_STANDBY_MGR ---------------------- This alert (``UPGRADE_NO_STANDBY_MGR``) means that Ceph does not detect an -active standby manager daemon. In order to proceed with the upgrade, Ceph -requires an active standby manager daemon (which you can think of in this +active standby Manager daemon. In order to proceed with the upgrade, Ceph +requires an active standby Manager daemon (which you can think of in this context as "a second manager"). -You can ensure that Cephadm is configured to run 2 (or more) managers by +You can ensure that Cephadm is configured to run two (or more) Managers by running the following command: .. prompt:: bash # ceph orch apply mgr 2 # or more -You can check the status of existing mgr daemons by running the following +You can check the status of existing Manager daemons by running the following command: .. prompt:: bash # ceph orch ps --daemon-type mgr -If an existing mgr daemon has stopped, you can try to restart it by running the +If an existing Manager daemon has stopped, you can try to restart it by running the following command: .. prompt:: bash # @@ -183,7 +197,7 @@ Using customized container images ================================= For most users, upgrading requires nothing more complicated than specifying the -Ceph version number to upgrade to. In such cases, cephadm locates the specific +Ceph version to which to upgrade. In such cases, cephadm locates the specific Ceph container image to use by combining the ``container_image_base`` configuration option (default: ``docker.io/ceph/ceph``) with a tag of ``vX.Y.Z``. @@ -193,7 +207,7 @@ you need. For example, the following command upgrades to a development build: .. prompt:: bash # - ceph orch upgrade start --image quay.io/ceph-ci/ceph:recent-git-branch-name + ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:recent-git-branch-name For more information about available container images, see :ref:`containers`. diff --git a/doc/cephfs/add-remove-mds.rst b/doc/cephfs/add-remove-mds.rst index 4f5ee06aa8b7..010326d9d6b4 100644 --- a/doc/cephfs/add-remove-mds.rst +++ b/doc/cephfs/add-remove-mds.rst @@ -1,11 +1,13 @@ .. _cephfs_add_remote_mds: -.. note:: - It is highly recommended to use :doc:`/cephadm/index` or another Ceph - orchestrator for setting up the ceph cluster. Use this approach only if you - are setting up the ceph cluster manually. If one still intends to use the - manual way for deploying MDS daemons, :doc:`/cephadm/services/mds/` can - also be used. +.. warning:: The material on this page is to be used only for manually setting + up a Ceph cluster. If you intend to use an automated tool such as + :doc:`/cephadm/index` to set up a Ceph cluster, do not use the + instructions on this page. + +.. note:: If you are certain that you know what you are doing and you intend to + manually deploy MDS daemons, see :doc:`/cephadm/services/mds/` before + proceeding. ============================ Deploying Metadata Servers @@ -53,8 +55,7 @@ the MDS server. Even if a single MDS daemon is unable to fully utilize the hardware, it may be desirable later on to start more active MDS daemons on the same node to fully utilize the available cores and memory. Additionally, it may become clear with workloads on the cluster that performance improves with -multiple active MDS on the same node rather than over-provisioning a single -MDS. +multiple active MDS on the same node rather than a single overloaded MDS. Finally, be aware that CephFS is a highly-available file system by supporting standby MDS (see also :ref:`mds-standby`) for rapid failover. To get a real @@ -115,4 +116,11 @@ the following method. $ sudo rm -rf /var/lib/ceph/mds/ceph-${id} + +.. note:: When an active MDS either has health warning MDS_TRIM or + MDS_CACHE_OVERSIZED, confirmation flag (--yes-i-really-mean-it) + needs to be passed, else the command will fail. It is not recommended to + restart an MDS which has these warnings since slow recovery at restart may + lead to more problems. + .. _MDS Config Reference: ../mds-config-ref diff --git a/doc/cephfs/administration.rst b/doc/cephfs/administration.rst index cd912b42aeaf..07646bff0678 100644 --- a/doc/cephfs/administration.rst +++ b/doc/cephfs/administration.rst @@ -61,10 +61,17 @@ is a subset of the same information from the ``ceph fs dump`` command. :: - ceph fs set + ceph fs set [--yes-i-really-mean-it] Change a setting on a file system. These settings are specific to the named -file system and do not affect other file systems. +file system and do not affect other file systems. Confirmation flag is only +needed for changing ``max_mds`` when cluster is unhealthy. + +.. note:: It is mandatory to pass confirmation flag (--yes--i-really-mean-it) + for modifying FS setting variable ``max_mds`` when cluster is unhealthy. + It has been added a precaution to tell users that modifying ``max_mds`` + during troubleshooting or recovery might not help. Instead, it might + further destabilize the cluster. :: @@ -92,6 +99,46 @@ The CephX IDs authorized to the old file system name need to be reauthorized to the new name. Any on-going operations of the clients using these IDs may be disrupted. Mirroring is expected to be disabled on the file system. +:: + + fs swap [--swap-fscids=yes|no] [--yes-i-really-mean-it] + +Swaps names of two Ceph file sytems and updates the application tags on all +pools of both FSs accordingly. Certain tools that track FSCIDs of the file +systems, besides the FS names, might get confused due to this operation. For +this reason, mandatory option ``--swap-fscids`` has been provided that must be +used to indicate whether or not FSCIDs must be swapped. + +.. note:: FSCID stands for "File System Cluster ID". + +Before the swap, mirroring should be disabled on both the CephFSs +(because the cephfs-mirror daemon uses the fscid internally and changing it +while the daemon is running could result in undefined behaviour), both the +CephFSs should be offline and the file system flag ``refuse_client_sessions`` +must be set for both the CephFS. + +The function of this API is to facilitate disaster recovery where a new file +system reconstructed from the previous one is ready to take over for the +possibly damaged file system. Instead of two ``fs rename`` operations, the +operator can use a swap so there is no FSMap epoch where the primary (or +production) named file system does not exist. This is important when Ceph is +monitored by automatic storage operators like (Rook) which try to reconcile +the storage system continuously. That operator may attempt to recreate the +file system as soon as it is seen to not exist. + +After the swap, CephX credentials may need to be reauthorized if the existing +mounts should "follow" the old file system to its new name. Generally, for +disaster recovery, its desirable for the existing mounts to continue using +the same file system name. Any active file system mounts for either CephFSs +must remount. Existing unflushed operations will be lost. When it is judged +that one of the swapped file systems is ready for clients, run:: + + ceph fs set joinable true + ceph fs set refuse_client_sessions false + +Keep in mind that one of the swapped file systems may be left offline for +future analysis if doing a disaster recovery swap. + Settings -------- @@ -153,7 +200,11 @@ file system and MDS daemons down, use the ``ceph fs fail`` command: :: - ceph fs fail + ceph fs fail {--yes-i-really-mean-it} + +.. note:: Note that confirmation flag is optional because it is only required + when the MDS is active and has health warning MDS_TRIM or + MDS_CACHE_OVERSIZED. This command sets a file system flag to prevent standbys from activating on the file system (the ``joinable`` flag). @@ -170,7 +221,11 @@ respawn as standbys. The file system will be left in a degraded state. :: # For all ranks, 0-N: - ceph mds fail : + ceph mds fail : {--yes-i-really-mean-it} + +.. note:: Note that confirmation flag is optional because it is only required + when the MDS is active and has health warning MDS_TRIM or + MDS_CACHE_OVERSIZED. Once all ranks are inactive, the file system may also be deleted or left in this state for other purposes (perhaps disaster recovery). @@ -232,6 +287,17 @@ Mark the file system rank as repaired. Unlike the name suggests, this command does not change a MDS; it manipulates the file system rank which has been marked damaged. +:: + + ceph mds last-seen + +Learn the when the MDS named ``name`` was last in the FSMap. The JSON output +includes the epoch the MDS was last seen. Historically information is limited by +the following ``mon`` configuration: + + +.. confval:: mon_fsmap_prune_threshold + Required Client Features ------------------------ @@ -258,31 +324,47 @@ Clients that are missing newly added features will be evicted automatically. Here are the current CephFS features and first release they came out: -+------------------+--------------+-----------------+ -| Feature | Ceph release | Upstream Kernel | -+==================+==============+=================+ -| jewel | jewel | 4.5 | -+------------------+--------------+-----------------+ -| kraken | kraken | 4.13 | -+------------------+--------------+-----------------+ -| luminous | luminous | 4.13 | -+------------------+--------------+-----------------+ -| mimic | mimic | 4.19 | -+------------------+--------------+-----------------+ -| reply_encoding | nautilus | 5.1 | -+------------------+--------------+-----------------+ -| reclaim_client | nautilus | N/A | -+------------------+--------------+-----------------+ -| lazy_caps_wanted | nautilus | 5.1 | -+------------------+--------------+-----------------+ -| multi_reconnect | nautilus | 5.1 | -+------------------+--------------+-----------------+ -| deleg_ino | octopus | 5.6 | -+------------------+--------------+-----------------+ -| metric_collect | pacific | N/A | -+------------------+--------------+-----------------+ -| alternate_name | pacific | PLANNED | -+------------------+--------------+-----------------+ ++----------------------------+--------------+-----------------+ +| Feature | Ceph release | Upstream Kernel | ++============================+==============+=================+ +| jewel | jewel | 4.5 | ++----------------------------+--------------+-----------------+ +| kraken | kraken | 4.13 | ++----------------------------+--------------+-----------------+ +| luminous | luminous | 4.13 | ++----------------------------+--------------+-----------------+ +| mimic | mimic | 4.19 | ++----------------------------+--------------+-----------------+ +| reply_encoding | nautilus | 5.1 | ++----------------------------+--------------+-----------------+ +| reclaim_client | nautilus | N/A | ++----------------------------+--------------+-----------------+ +| lazy_caps_wanted | nautilus | 5.1 | ++----------------------------+--------------+-----------------+ +| multi_reconnect | nautilus | 5.1 | ++----------------------------+--------------+-----------------+ +| deleg_ino | octopus | 5.6 | ++----------------------------+--------------+-----------------+ +| metric_collect | pacific | N/A | ++----------------------------+--------------+-----------------+ +| alternate_name | pacific | 6.5 | ++----------------------------+--------------+-----------------+ +| notify_session_state | quincy | 5.19 | ++----------------------------+--------------+-----------------+ +| op_getvxattr | quincy | 6.0 | ++----------------------------+--------------+-----------------+ +| 32bits_retry_fwd | reef | 6.6 | ++----------------------------+--------------+-----------------+ +| new_snaprealm_info | reef | UNKNOWN | ++----------------------------+--------------+-----------------+ +| has_owner_uidgid | reef | 6.6 | ++----------------------------+--------------+-----------------+ +| client_mds_auth_caps | squid+bp | PLANNED | ++----------------------------+--------------+-----------------+ + +.. + Comment: use `git describe --tags --abbrev=0 ` to lookup release + CephFS Feature Descriptions @@ -340,6 +422,15 @@ Clients can send performance metric to MDS if MDS support this feature. Clients can set and understand "alternate names" for directory entries. This is to be used for encrypted file name support. +:: + + client_mds_auth_caps + +To effectively implement ``root_squash`` in a client's ``mds`` caps, the client +must understand that it is enforcing ``root_squash`` and other cap metadata. +Clients without this feature are in danger of dropping updates to files. It is +recommend to set this feature bit. + Global settings --------------- diff --git a/doc/cephfs/cache-configuration.rst b/doc/cephfs/cache-configuration.rst index 3fc757005d1a..ecdedea1d6d7 100644 --- a/doc/cephfs/cache-configuration.rst +++ b/doc/cephfs/cache-configuration.rst @@ -209,3 +209,70 @@ cache. The limit is configured via: It is not recommended to set this value above 5M but it may be helpful with some workloads. + + +Dealing with "clients failing to respond to cache pressure" messages +-------------------------------------------------------------------- + +Every second (or every interval set by the ``mds_cache_trim_interval`` +configuration paramater), the MDS runs the "cache trim" procedure. One of the +steps of this procedure is "recall client state". During this step, the MDS +checks every client (session) to determine whether it needs to recall caps. +If any of the following are true, then the MDS needs to recall caps: + +1. the cache is full (the ``mds_cache_memory_limit`` has been exceeded) and + needs some inodes to be released +2. the client exceeds ``mds_max_caps_per_client`` (1M by default) +3. the client is inactive + +To determine whether a client (a session) is inactive, the session's +``cache_liveness`` parameters is checked and compared with the value:: + + (num_caps >> mds_session_cache_liveness_magnitude) + +where ``mds_session_cache_liveness_magnitude`` is a config param (``10`` by +default). If ``cache_liveness`` is smaller than this calculated value, the +session is considered inactive and the MDS sends a "recall caps" request for +all cached caps (the actual recall value is ``num_caps - +mds_min_caps_per_client(100)``). + +Under certain circumstances, many "recall caps" requests can be sent so quickly +that the health warning is generated: "clients failing to respond to cache +pressure". If the client does not release the caps fast enough, the MDS repeats +the "recall caps" request one second later. This means that the MDS will send +"recall caps" again and again. The "total" counter of "recall caps" for the +session will grow and grow, and will eventually exceed the "mon warning limit". + +A throttling mechanism, controlled by the ``mds_recall_max_decay_threshold`` +parameter (126K by default), is available for reducing the rate of "recall +caps" counter growth, but sometimes it is not enough to slow the "recall caps" +counter's growth rate. If altering the ``mds_recall_max_decay_threshold`` value +does not sufficiently reduce the rate of the "recall caps" counter's growth, +decrease ``mds_recall_max_caps`` incrementally until the "clients failing to +respond to cache pressure" messages no longer appear in the logs. + +Example Scenario +~~~~~~~~~~~~~~~~ + +Here is an example. A client is having 20k caps cached. At some moment the +server decides the client is inactive (because the session's ``cache_liveness`` +value is low). It starts to ask the client to release caps down to +``mds_min_caps_per_client`` value (100 by default). Every second, it +sends recall_caps asking to release ``caps_num - mds_min_caps_per_client`` caps +(but not more than ``mds_recall_max_caps``, which is 30k by default). A client +is starting to release, but is releasing with a rate of (for example) only 100 +caps per second. + +So in the first second of time, the mds sends recall_caps = 20k - 100 the +second second recall_caps = (20k - 100) - 100 the third second recall_caps = +(20k - 200) - 100 and so on. And every time it sends recall_caps it updates the +session's recall_caps value, which is calculated how many recall_caps sent in +the last minute. I.e. the counter is growing quickly, eventually exceeding +mds_recall_warning_threshold, which is 128K by default, and ceph starts to +report "failing to respond to cache pressure" warning in the status. Now, +after we set mds_recall_max_caps to 3K, in this situation the mds server sends +only 3K recall_caps per second, and the maximum value the session's recall_caps +value may have (if the mds is sending 3K every second for at least one minute) +is 60 * 3K = 180K. This means that it is still possible to achieve +``mds_recall_warning_threshold`` but only if a client does not "respond" for a +long time, and as your experiments show it is not the case. diff --git a/doc/cephfs/ceph-dokan.rst b/doc/cephfs/ceph-dokan.rst index b9fb6c59287b..4146761869cc 100644 --- a/doc/cephfs/ceph-dokan.rst +++ b/doc/cephfs/ceph-dokan.rst @@ -24,7 +24,7 @@ This will mount the default ceph filesystem using the drive letter ``x``. If ``ceph.conf`` is placed at the default location, which is ``%ProgramData%\ceph\ceph.conf``, then this argument becomes optional. -The ``-l`` argument also allows using an empty folder as a mountpoint +The ``-l`` argument also allows using an empty folder as a mount point instead of a drive letter. The uid and gid used for mounting the filesystem default to 0 and may be @@ -75,7 +75,7 @@ like so:: ceph-dokan.exe unmap -l x -Note that when unmapping Ceph filesystems, the exact same mountpoint argument +Note that when unmapping Ceph filesystems, the exact same mount point argument must be used as when the mapping was created. Limitations diff --git a/doc/cephfs/cephfs-io-path.rst b/doc/cephfs/cephfs-io-path.rst index 8c7810ba0a4e..d5ae17197039 100644 --- a/doc/cephfs/cephfs-io-path.rst +++ b/doc/cephfs/cephfs-io-path.rst @@ -47,4 +47,4 @@ client cache. | MDSs | -=-------> | OSDs | +---------------------+ +--------------------+ -.. _Architecture: ../architecture +.. _Architecture: ../../architecture diff --git a/doc/cephfs/cephfs-journal-tool.rst b/doc/cephfs/cephfs-journal-tool.rst index 64a113091182..3ae1139ceac2 100644 --- a/doc/cephfs/cephfs-journal-tool.rst +++ b/doc/cephfs/cephfs-journal-tool.rst @@ -15,7 +15,8 @@ examining, modifying, and extracting data from journals. This tool is **dangerous** because it directly modifies internal data structures of the file system. Make backups, be careful, and - seek expert advice. If you are unsure, do not run this tool. + seek expert advice. If you are unsure, do not run this tool. As a + precaution, cephfs-journal-tool doesn't work on an active filesystem. Syntax ------ @@ -104,12 +105,12 @@ Example: header get/set "write_pos": 4274947, "expire_pos": 4194304, "trimmed_pos": 4194303, + "stream_format": 1, "layout": { "stripe_unit": 4194304, - "stripe_count": 4194304, + "stripe_count": 1, "object_size": 4194304, - "cas_hash": 4194304, - "object_stripe_unit": 4194304, - "pg_pool": 4194304}} + "pool_id": 2, + "pool_ns": ""}} # cephfs-journal-tool header set trimmed_pos 4194303 Updating trimmed_pos 0x400000 -> 0x3fffff diff --git a/doc/cephfs/cephfs-mirroring.rst b/doc/cephfs/cephfs-mirroring.rst index fd00a1eef2e3..35cef840558d 100644 --- a/doc/cephfs/cephfs-mirroring.rst +++ b/doc/cephfs/cephfs-mirroring.rst @@ -93,6 +93,15 @@ providing high-availability. .. note:: Deploying a single mirror daemon is recommended. Running multiple daemons is untested. +The following file types are supported by the mirroring: + +- Regular files (-) +- Directory files (d) +- Symbolic link file (l) + +The other file types are ignored by the mirroring. So they won't be +available on a successfully synchronized peer. + The mirroring module is disabled by default. To enable the mirroring module, run the following command: @@ -111,7 +120,9 @@ system, run a command of the following form: .. note:: "Mirroring module" commands are prefixed with ``fs snapshot mirror``. This distinguishes them from "monitor commands", which are prefixed with ``fs - mirror``. Be sure (in this context) to use module commands. + mirror``. Enabling mirroring by using monitor commands will result in the mirror daemon + entering the "failed" state due to the absence of the `cephfs_mirror` index object. + So be sure (in this context) to use module commands. To disable mirroring for a given file system, run a command of the following form: @@ -180,6 +191,12 @@ To configure a directory for mirroring, run a command of the following form: ceph fs snapshot mirror add +To list the configured directories, run a command of the following form: + +.. prompt:: bash $ + + ceph fs snapshot mirror ls + To stop mirroring directory snapshots, run a command of the following form: .. prompt:: bash $ @@ -243,6 +260,13 @@ e.g.:: .. _cephfs_mirroring_mirroring_status: +Snapshot Mirroring +------------------ + +To initiate snapshot mirroring, create a snapshot of the configured directory in the primary cluster:: + + $ mkdir -p /d0/d1/d2/.snap/snap1 + Mirroring Status ---------------- @@ -331,8 +355,9 @@ command is of format `filesystem-name@filesystem-id peer-uuid`:: "last_synced_snap": { "id": 120, "name": "snap1", - "sync_duration": 0.079997898999999997, - "sync_time_stamp": "274900.558797s" + "sync_duration": 3, + "sync_time_stamp": "274900.558797s", + "sync_bytes": 52428800 }, "snaps_synced": 2, "snaps_deleted": 0, @@ -350,6 +375,32 @@ A directory can be in one of the following states:: - `syncing`: The directory is currently being synchronized - `failed`: The directory has hit upper limit of consecutive failures +When a directory is currently being synchronized, the mirror daemon marks it as `syncing` and +`fs mirror peer status` shows the snapshot being synchronized under the `current_syncing_snap`:: + + $ ceph --admin-daemon /var/run/ceph/cephfs-mirror.asok fs mirror peer status cephfs@360 a2dc7784-e7a1-4723-b103-03ee8d8768f8 + { + "/d0": { + "state": "syncing", + "current_syncing_snap": { + "id": 121, + "name": "snap2" + }, + "last_synced_snap": { + "id": 120, + "name": "snap1", + "sync_duration": 3, + "sync_time_stamp": "274900.558797s", + "sync_bytes": 52428800 + }, + "snaps_synced": 2, + "snaps_deleted": 0, + "snaps_renamed": 0 + } + } + +The mirror daemon marks it back to `idle`, when the syncing completes. + When a directory experiences a configured number of consecutive synchronization failures, the mirror daemon marks it as `failed`. Synchronization for these directories is retried. By default, the number of consecutive failures before a directory is marked as failed @@ -364,10 +415,46 @@ E.g., adding a regular file for synchronization would result in failed status:: { "/d0": { "state": "idle", + "last_synced_snap": { + "id": 121, + "name": "snap2", + "sync_duration": 5, + "sync_time_stamp": "500900.600797s", + "sync_bytes": 78643200 + }, + "snaps_synced": 3, + "snaps_deleted": 0, + "snaps_renamed": 0 + }, + "/f0": { + "state": "failed", + "snaps_synced": 0, + "snaps_deleted": 0, + "snaps_renamed": 0 + } + } + +This allows a user to add a non-existent directory for synchronization. The mirror daemon +will mark such a directory as failed and retry (less frequently). When the directory is +created, the mirror daemon will clear the failed state upon successful synchronization. + +Adding a new snapshot or a new directory manually in the .snap directory of the +remote filesystem will result in failed status of the corresponding configured directory. +In the remote filesystem:: + + $ ceph fs subvolume snapshot create cephfs subvol1 snap2 group1 + or + $ mkdir /d0/.snap/snap2 + + $ ceph --admin-daemon /var/run/ceph/cephfs-mirror.asok fs mirror peer status cephfs@360 a2dc7784-e7a1-4723-b103-03ee8d8768f8 + { + "/d0": { + "state": "failed", + "failure_reason": "snapshot 'snap2' has invalid metadata", "last_synced_snap": { "id": 120, "name": "snap1", - "sync_duration": 0.079997898999999997, + "sync_duration": 3, "sync_time_stamp": "274900.558797s" }, "snaps_synced": 2, @@ -382,13 +469,79 @@ E.g., adding a regular file for synchronization would result in failed status:: } } -This allows a user to add a non-existent directory for synchronization. The mirror daemon -will mark such a directory as failed and retry (less frequently). When the directory is -created, the mirror daemon will clear the failed state upon successful synchronization. +When the snapshot or the directory is removed from the remote filesystem, the mirror daemon will +clear the failed state upon successful synchronization of the pending snapshots, if any. + +.. note:: Treat the remote filesystem as read-only. Nothing is inherently enforced by CephFS. + But with the right mds caps, users would not be able to snapshot directories in the + remote file system. When mirroring is disabled, the respective `fs mirror status` command for the file system will not show up in command help. +Metrics +------- + +CephFS exports mirroring metrics as :ref:`Labeled Perf Counters` which will be consumed by the OCP/ODF Dashboard to provide monitoring of the Geo Replication. These metrics can be used to measure the progress of cephfs_mirror syncing and thus provide the monitoring capability. CephFS exports the following mirroring metrics, which are displayed using the ``counter dump`` command. + +.. list-table:: Mirror Status Metrics + :widths: 25 25 75 + :header-rows: 1 + + * - Name + - Type + - Description + * - mirroring_peers + - Gauge + - The number of peers involved in mirroring + * - directory_count + - Gauge + - The total number of directories being synchronized + * - mirrored_filesystems + - Gauge + - The total number of filesystems which are mirrored + * - mirror_enable_failures + - Counter + - Enable mirroring failures + +.. list-table:: Replication Metrics + :widths: 25 25 75 + :header-rows: 1 + + * - Name + - Type + - Description + * - snaps_synced + - Counter + - The total number of snapshots successfully synchronized + * - sync_bytes + - Counter + - The total bytes being synchronized + * - sync_failures + - Counter + - The total number of failed snapshot synchronizations + * - snaps_deleted + - Counter + - The total number of snapshots deleted + * - snaps_renamed + - Counter + - The total number of snapshots renamed + * - avg_sync_time + - Gauge + - The average time taken by all snapshot synchronizations + * - last_synced_start + - Gauge + - The sync start time of the last synced snapshot + * - last_synced_end + - Gauge + - The sync end time of the last synced snapshot + * - last_synced_duration + - Gauge + - The time duration of the last synchronization + * - last_synced_bytes + - counter + - The total bytes being synchronized for the last synced snapshot + Configuration Options --------------------- @@ -401,6 +554,7 @@ Configuration Options .. confval:: cephfs_mirror_retry_failed_directories_interval .. confval:: cephfs_mirror_restart_mirror_on_failure_interval .. confval:: cephfs_mirror_mount_timeout +.. confval:: cephfs_mirror_perf_stats_prio Re-adding Peers --------------- diff --git a/doc/cephfs/cephfs-top.rst b/doc/cephfs/cephfs-top.rst index 49439a4bd18b..1588c4f5ced7 100644 --- a/doc/cephfs/cephfs-top.rst +++ b/doc/cephfs/cephfs-top.rst @@ -63,6 +63,62 @@ By default, `cephfs-top` uses `client.fstop` user to connect to a Ceph cluster:: $ ceph auth get-or-create client.fstop mon 'allow r' mds 'allow r' osd 'allow r' mgr 'allow r' $ cephfs-top +Description of Fields +--------------------- + +1. chit : Cap hit + Percentage of file capability hits over total number of caps + +2. dlease : Dentry lease + Percentage of dentry leases handed out over the total dentry lease requests + +3. ofiles : Opened files + Number of opened files + +4. oicaps : Pinned caps + Number of pinned caps + +5. oinodes : Opened inodes + Number of opened inodes + +6. rtio : Total size of read IOs + Number of bytes read in input/output operations generated by all process + +7. wtio : Total size of write IOs + Number of bytes written in input/output operations generated by all processes + +8. raio : Average size of read IOs + Mean of number of bytes read in input/output operations generated by all + process over total IO done + +9. waio : Average size of write IOs + Mean of number of bytes written in input/output operations generated by all + process over total IO done + +10. rsp : Read speed + Speed of read IOs with respect to the duration since the last refresh of clients + +11. wsp : Write speed + Speed of write IOs with respect to the duration since the last refresh of clients + +12. rlatavg : Average read latency + Mean value of the read latencies + +13. rlatsd : Standard deviation (variance) for read latency + Dispersion of the metric for the read latency relative to its mean + +14. wlatavg : Average write latency + Mean value of the write latencies + +15. wlatsd : Standard deviation (variance) for write latency + Dispersion of the metric for the write latency relative to its mean + +16. mlatavg : Average metadata latency + Mean value of the metadata latencies + +17. mlatsd : Standard deviation (variance) for metadata latency + Dispersion of the metric for the metadata latency relative to its mean + Command-Line Options -------------------- diff --git a/doc/cephfs/client-auth.rst b/doc/cephfs/client-auth.rst index 5a642e4f8ae9..61305e42212b 100644 --- a/doc/cephfs/client-auth.rst +++ b/doc/cephfs/client-auth.rst @@ -2,52 +2,55 @@ CephFS Client Capabilities ================================ -Use Ceph authentication capabilities to restrict your file system clients -to the lowest possible level of authority needed. +Ceph authentication capabilities are used to restrict CephFS clients to +the lowest level of authority necessary. -.. note:: Path restriction and layout modification restriction are new features - in the Jewel release of Ceph. +.. note:: Path restriction and layout-modification restriction were introduced + in the Jewel release of Ceph. -.. note:: Using Erasure Coded(EC) pools with CephFS is supported only with the - BlueStore Backend. They cannot be used as metadata pools and overwrites must - be enabled on the data pools. +.. note:: Using Erasure Coded (EC) pools with CephFS is supported only with + :term:`BlueStore`. Erasure-coded pools cannot be used as metadata pools. + Overwrites must be enabled on erasure-coded data pools. Path restriction ================ -By default, clients are not restricted in what paths they are allowed to -mount. Further, when clients mount a subdirectory, e.g., ``/home/user``, the -MDS does not by default verify that subsequent operations are ‘locked’ within -that directory. +By default, clients are not restricted in the paths that they are allowed to +mount. When clients mount a subdirectory (for example ``/home/user``), the MDS +does not by default verify that subsequent operations are "locked" within that +directory. -To restrict clients to only mount and work within a certain directory, use -path-based MDS authentication capabilities. +To restrict clients so that they mount and work only within a certain +directory, use path-based MDS authentication capabilities. -Note that this restriction *only* impacts the filesystem hierarchy -- the metadata -tree managed by the MDS. Clients will still be able to access the underlying -file data in RADOS directly. To segregate clients fully, you must also isolate -untrusted clients in their own RADOS namespace. You can place a client's -filesystem subtree in a particular namespace using `file layouts`_ and then -restrict their RADOS access to that namespace using `OSD capabilities`_ +This restriction impacts *only* the filesystem hierarchy, or, in other words, +the metadata tree that is managed by the MDS. Clients will still be able to +access the underlying file data in RADOS directly. To segregate clients fully, +isolate untrusted clients in their own RADOS namespace. You can place a +client's filesystem subtree in a particular namespace using :ref:`file +layouts` and then restrict their RADOS access to that namespace +using :ref:`OSD capabilities`. -.. _file layouts: ./file-layouts -.. _OSD capabilities: ../rados/operations/user-management/#authorization-capabilities Syntax ------ -To grant rw access to the specified directory only, we mention the specified -directory while creating key for a client using the following syntax:: +To grant ``rw`` access to the specified directory only, mention the specified +directory while creating key for a client. Use a command of the following form: - ceph fs authorize client. rw +.. prompt:: bash # -For example, to restrict client ``foo`` to writing only in the ``bar`` -directory of file system ``cephfs_a``, use :: + ceph fs authorize client. rw - ceph fs authorize cephfs_a client.foo / r /bar rw +For example, to restrict a client named ``foo`` so that it can write only in +the ``bar`` directory of file system ``cephfs_a``, run the following command: - results in: +.. prompt:: bash # + + ceph fs authorize cephfs_a client.foo / r /bar rw + +This results in:: client.foo key: *key* @@ -56,59 +59,65 @@ directory of file system ``cephfs_a``, use :: caps: [osd] allow rw tag cephfs data=cephfs_a To completely restrict the client to the ``bar`` directory, omit the -root directory :: +root directory : - ceph fs authorize cephfs_a client.foo /bar rw +.. prompt:: bash # -Note that if a client's read access is restricted to a path, they will only -be able to mount the file system when specifying a readable path in the -mount command (see below). + ceph fs authorize cephfs_a client.foo /bar rw -Supplying ``all`` or ``*`` as the file system name will grant access to every -file system. Note that it is usually necessary to quote ``*`` to protect it -from the shell. +If a client's read access is restricted to a path, the client will be able to +mount the file system only by specifying a readable path in the mount command +(see below). -See `User Management - Add a User to a Keyring`_. for additional details on -user management +Supplying ``all`` or ``*`` as the file system name grants access to every file +system. It is usually necessary to quote ``*`` to protect it from the +shell. -To restrict a client to the specified sub-directory only, we mention the -specified directory while mounting using the following syntax:: +See `User Management - Add a User to a Keyring`_ for more on user management. - ceph-fuse -n client. -r *directory_to_be_mounted* +To restrict a client to only the specified sub-directory, mention the specified +directory while mounting. Use a command of the following form: -For example, to restrict client ``foo`` to ``mnt/bar`` directory, we will -use:: +.. prompt:: bash # - ceph-fuse -n client.foo mnt -r /bar + ceph-fuse -n client. -r *directory_to_be_mounted* -Free space reporting --------------------- +For example, to restrict client ``foo`` to ``mnt/bar`` directory, use the +following command: -By default, when a client is mounting a sub-directory, the used space (``df``) -will be calculated from the quota on that sub-directory, rather than reporting -the overall amount of space used on the cluster. +.. prompt:: bash # -If you would like the client to report the overall usage of the file system, -and not just the quota usage on the sub-directory mounted, then set the -following config option on the client:: + ceph-fuse -n client.foo mnt -r /bar + +Reporting free space +-------------------- +When a client has mounted a sub-directory, the used space (``df``) is +calculated from the quota on that sub-directory rather than from the overall +amount of space used on the CephFS file system. + +To make the client report the overall usage of the file system and not only the +quota usage on the mounted sub-directory, set the following config option on +the client:: client quota df = false -If quotas are not enabled, or no quota is set on the sub-directory mounted, -then the overall usage of the file system will be reported irrespective of -the value of this setting. +If quotas are not enabled or if no quota is set on the mounted sub-directory, +then the overall usage of the file system will be reported irrespective of the +value of this setting. + +.. _cephfs-layout-and-quota-restriction: Layout and Quota restriction (the 'p' flag) =========================================== -To set layouts or quotas, clients require the 'p' flag in addition to 'rw'. -This restricts all the attributes that are set by special extended attributes -with a "ceph." prefix, as well as restricting other means of setting -these fields (such as openc operations with layouts). +To set layouts or quotas, clients require the ``p`` flag in addition to ``rw``. +Using the ``p`` flag with ``rw`` restricts all the attributes that are set by +special extended attributes by using a ``ceph.`` prefix, and restricts +other means of setting these fields (such as ``openc`` operations with layouts). -For example, in the following snippet client.0 can modify layouts and quotas -on the file system cephfs_a, but client.1 cannot:: +For example, in the following snippet ``client.0`` can modify layouts and +quotas on the file system ``cephfs_a``, but ``client.1`` cannot:: client.0 key: AQAz7EVWygILFRAAdIcuJ12opU/JKyfFmxhuaw== @@ -126,12 +135,12 @@ on the file system cephfs_a, but client.1 cannot:: Snapshot restriction (the 's' flag) =========================================== -To create or delete snapshots, clients require the 's' flag in addition to -'rw'. Note that when capability string also contains the 'p' flag, the 's' -flag must appear after it (all flags except 'rw' must be specified in +To create or delete snapshots, clients require the ``s`` flag in addition to +``rw``. Note that when capability string also contains the ``p`` flag, the +``s`` flag must appear after it (all flags except ``rw`` must be specified in alphabetical order). -For example, in the following snippet client.0 can create or delete snapshots +For example, in the following snippet ``client.0`` can create or delete snapshots in the ``bar`` directory of file system ``cephfs_a``:: client.0 @@ -154,9 +163,9 @@ Network restriction caps: [mon] allow r network 10.0.0.0/8 caps: [osd] allow rw tag cephfs data=cephfs_a network 10.0.0.0/8 -The optional ``{network/prefix}`` is a standard network name and -prefix length in CIDR notation (e.g., ``10.3.0.0/16``). If present, -the use of this capability is restricted to clients connecting from +The optional ``{network/prefix}`` is a standard network-name-and-prefix length +in CIDR notation (for example, ``10.3.0.0/16``). If ``{network/prefix}}`` is +present, the use of this capability is restricted to clients connecting from this network. .. _fs-authorize-multifs: @@ -164,96 +173,164 @@ this network. File system Information Restriction =================================== -If desired, the monitor cluster can present a limited view of the file systems -available. In this case, the monitor cluster will only inform clients about -file systems specified by the administrator. Other file systems will not be -reported and commands affecting them will fail as if the file systems do -not exist. +The monitor cluster can present a limited view of the available file systems. +In this case, the monitor cluster informs clients only about file systems +specified by the administrator. Other file systems are not reported and +commands affecting them fail as though the file systems do not exist. + +Consider following example. The Ceph cluster has 2 file systems: + +.. prompt:: bash # -Consider following example. The Ceph cluster has 2 FSs:: + ceph fs ls + +:: - $ ceph fs ls name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ] name: cephfs2, metadata pool: cephfs2_metadata, data pools: [cephfs2_data ] -But we authorize client ``someuser`` for only one FS:: +We authorize client ``someuser`` for only one file system: + +.. prompt:: bash # + + ceph fs authorize cephfs client.someuser / rw + +:: - $ ceph fs authorize cephfs client.someuser / rw [client.someuser] key = AQAmthpf89M+JhAAiHDYQkMiCq3x+J0n9e8REQ== - $ cat ceph.client.someuser.keyring + +.. prompt:: bash # + + cat ceph.client.someuser.keyring + +:: + [client.someuser] key = AQAmthpf89M+JhAAiHDYQkMiCq3x+J0n9e8REQ== caps mds = "allow rw fsname=cephfs" caps mon = "allow r fsname=cephfs" caps osd = "allow rw tag cephfs data=cephfs" -And the client can only see the FS that it has authorization for:: +The client can see only the file system that it is authorized to see: - $ ceph fs ls -n client.someuser -k ceph.client.someuser.keyring - name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ] +.. prompt:: bash # + + ceph fs ls -n client.someuser -k ceph.client.someuser.keyring + +:: + + name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ] -Standby MDS daemons will always be displayed. Note that the information about -restricted MDS daemons and file systems may become available by other means, -such as ``ceph health detail``. +Standby MDS daemons are always displayed. Information about restricted MDS +daemons and file systems may become available by other means, such as by +running ``ceph health detail``. MDS communication restriction ============================= -By default, user applications may communicate with any MDS, whether or not -they are allowed to modify data on an associated file system (see -`Path restriction` above). Client's communication can be restricted to MDS -daemons associated with particular file system(s) by adding MDS caps for that +By default, user applications may communicate with any MDS, regardless of +whether they are allowed to modify data on an associated file system (see `Path +restriction` above). Client communication can be restricted to MDS daemons +associated with particular file system(s) by adding MDS caps for that particular file system. Consider the following example where the Ceph cluster -has 2 FSs:: +has two file systems: + +.. prompt:: bash # + + ceph fs ls + +:: - $ ceph fs ls name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ] name: cephfs2, metadata pool: cephfs2_metadata, data pools: [cephfs2_data ] -Client ``someuser`` is authorized only for one FS:: +Client ``someuser`` is authorized for only one file system: + +.. prompt:: bash # + + ceph fs authorize cephfs client.someuser / rw + +:: - $ ceph fs authorize cephfs client.someuser / rw [client.someuser] key = AQBPSARfg8hCJRAAEegIxjlm7VkHuiuntm6wsA== - $ ceph auth get client.someuser > ceph.client.someuser.keyring + +.. prompt:: bash # + + ceph auth get client.someuser > ceph.client.someuser.keyring + +:: + exported keyring for client.someuser - $ cat ceph.client.someuser.keyring + +.. prompt:: bash # + + cat ceph.client.someuser.keyring + +:: + [client.someuser] key = AQBPSARfg8hCJRAAEegIxjlm7VkHuiuntm6wsA== caps mds = "allow rw fsname=cephfs" caps mon = "allow r" caps osd = "allow rw tag cephfs data=cephfs" -Mounting ``cephfs1`` with ``someuser`` works:: +Mounting ``cephfs1`` on the already-created mount point ``/mnt/cephfs1`` with +``someuser`` works: + +.. prompt:: bash # + + sudo ceph-fuse /mnt/cephfs1 -n client.someuser -k ceph.client.someuser.keyring --client-fs=cephfs + +.. note:: If ``/mnt/cephfs`` does not exist prior to running the above command, + create it by running ``mkdir /mnt/cephfs1``. + +:: - $ sudo ceph-fuse /mnt/cephfs1 -n client.someuser -k ceph.client.someuser.keyring --client-fs=cephfs ceph-fuse[96634]: starting ceph client ceph-fuse[96634]: starting fuse - $ mount | grep ceph-fuse + +.. prompt:: bash # + + mount | grep ceph-fuse + +:: + ceph-fuse on /mnt/cephfs1 type fuse.ceph-fuse (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other) -But mounting ``cephfs2`` does not:: +Mounting ``cephfs2`` with ``someuser`` does not work: + +.. prompt:: bash # - $ sudo ceph-fuse /mnt/cephfs2 -n client.someuser -k ceph.client.someuser.keyring --client-fs=cephfs2 - ceph-fuse[96599]: starting ceph client - ceph-fuse[96599]: ceph mount failed with (1) Operation not permitted + sudo ceph-fuse /mnt/cephfs2 -n client.someuser -k ceph.client.someuser.keyring --client-fs=cephfs2 + +:: + + ceph-fuse[96599]: starting ceph client + ceph-fuse[96599]: ceph mount failed with (1) Operation not permitted Root squash =========== The ``root squash`` feature is implemented as a safety measure to prevent -scenarios such as accidental ``sudo rm -rf /path``. You can enable -``root_squash`` mode in MDS caps to disallow clients with uid=0 or gid=0 to -perform write access operations -- e.g., rm, rmdir, rmsnap, mkdir, mksnap. -However, the mode allows the read operations of a root client unlike in -other file systems. +scenarios such as an accidental forced removal of a path (for example, ``sudo +rm -rf /path``). Enable ``root_squash`` mode in MDS caps to disallow clients +with ``uid=0`` or ``gid=0`` to perform write access operations (for example +``rm``, ``rmdir``, ``rmsnap``, ``mkdir``, and ``mksnap``). This mode permits +the read operations on a root client, unlike the behavior of other file +systems. + +Here is an example of enabling ``root_squash`` in a filesystem, except within +the ``/volumes`` directory tree in the filesystem: -Following is an example of enabling root_squash in a filesystem except within -'/volumes' directory tree in the filesystem:: +.. prompt:: bash # + + ceph fs authorize a client.test_a / rw root_squash /volumes rw + ceph auth get client.test_a + +:: - $ ceph fs authorize a client.test_a / rw root_squash /volumes rw - $ ceph auth get client.test_a [client.test_a] key = AQBZcDpfEbEUKxAADk14VflBXt71rL9D966mYA== caps mds = "allow rw fsname=a root_squash, allow rw fsname=a path=/volumes" @@ -262,73 +339,124 @@ Following is an example of enabling root_squash in a filesystem except within Updating Capabilities using ``fs authorize`` ============================================ -After Ceph's Reef version, ``fs authorize`` can not only be used to create a -new client with caps for a CephFS but it can also be used to add new caps -(for a another CephFS or another path in same FS) to an already existing -client. -Let's say we run following and create a new client:: +Beginning with the Reef release of Ceph, ``fs authorize`` can be used to add +new caps to an existing client (for another CephFS or another path in the same +file system). - $ ceph fs authorize a client.x / rw - [client.x] - key = AQAOtSVk9WWtIhAAJ3gSpsjwfIQ0gQ6vfSx/0w== - $ ceph auth get client.x - [client.x] +The following example demonstrates the behavior that results from running the command ``ceph fs authorize a client.x / rw`` twice. + +#. Create a new client: + + .. prompt:: bash # + + ceph fs authorize a client.x / rw + + :: + + [client.x] + key = AQAOtSVk9WWtIhAAJ3gSpsjwfIQ0gQ6vfSx/0w== + +#. Get the client capabilities: + + .. prompt:: bash # + + ceph auth get client.x + + :: + + [client.x] key = AQAOtSVk9WWtIhAAJ3gSpsjwfIQ0gQ6vfSx/0w== caps mds = "allow rw fsname=a" caps mon = "allow r fsname=a" caps osd = "allow rw tag cephfs data=a" -Previously, running ``fs authorize a client.x / rw`` a second time used to -print an error message. But after Reef, it instead prints message that -there's not update:: +#. Previously, running ``fs authorize a client.x / rw`` a second time printed + an error message. In the Reef release and in later releases, this command + prints a message reporting that the capabilities did not get updated: + + .. prompt:: bash # + + ./bin/ceph fs authorize a client.x / rw - $ ./bin/ceph fs authorize a client.x / rw - no update for caps of client.x + :: + + no update for caps of client.x Adding New Caps Using ``fs authorize`` -------------------------------------- -Users can now add caps for another path in same CephFS:: - $ ceph fs authorize a client.x /dir1 rw +Add capabilities for another path in same CephFS: + +.. prompt:: bash # + + ceph fs authorize a client.x /dir1 rw + +:: + updated caps for client.x - $ ceph auth get client.x - [client.x] - key = AQAOtSVk9WWtIhAAJ3gSpsjwfIQ0gQ6vfSx/0w== - caps mds = "allow r fsname=a, allow rw fsname=a path=some/dir" - caps mon = "allow r fsname=a" - caps osd = "allow rw tag cephfs data=a" -And even add caps for another CephFS on Ceph cluster:: +.. prompt:: bash # + + ceph auth get client.x + +:: + + [client.x] + key = AQAOtSVk9WWtIhAAJ3gSpsjwfIQ0gQ6vfSx/0w== + caps mds = "allow r fsname=a, allow rw fsname=a path=some/dir" + caps mon = "allow r fsname=a" + caps osd = "allow rw tag cephfs data=a" + +Add capabilities for another CephFS on the Ceph cluster: + +.. prompt:: bash # + + ceph fs authorize b client.x / rw + +:: - $ ceph fs authorize b client.x / rw updated caps for client.x - $ ceph auth get client.x - [client.x] - key = AQD6tiVk0uJdARAABMaQuLRotxTi3Qdj47FkBA== - caps mds = "allow rw fsname=a, allow rw fsname=b" - caps mon = "allow r fsname=a, allow r fsname=b" - caps osd = "allow rw tag cephfs data=a, allow rw tag cephfs data=b" + +.. prompt:: bash # + + ceph auth get client.x + +:: + + [client.x] + key = AQD6tiVk0uJdARAABMaQuLRotxTi3Qdj47FkBA== + caps mds = "allow rw fsname=a, allow rw fsname=b" + caps mon = "allow r fsname=a, allow r fsname=b" + caps osd = "allow rw tag cephfs data=a, allow rw tag cephfs data=b" Changing rw permissions in caps ------------------------------- -It's not possible to modify caps by running ``fs authorize`` except for the -case when read/write permissions have to be changed. This so because the -``fs authorize`` becomes ambiguous. For example, user runs ``fs authorize -cephfs1 /dir1 client.x rw`` to create a client and then runs ``fs authorize -cephfs1 /dir2 client.x rw`` (notice ``/dir1`` is changed to ``/dir2``). -Running second command can be interpreted as changing ``/dir1`` to ``/dir2`` -in current cap or can also be interpreted as authorizing the client with a -new cap for path ``/dir2``. As seen in previous sections, second -interpretation is chosen and therefore it's impossible to update a part of -capability granted except rw permissions. Following is how read/write -permissions for ``client.x`` (that was created above) can be changed:: - - $ ceph fs authorize a client.x / r +Capabilities can be modified by running ``fs authorize`` only in the case when +read/write permissions must be changed. This is because the command ``fs +authorize`` becomes ambiguous. For example, a user runs ``fs authorize cephfs1 +client.x /dir1 rw`` to create a client and then runs ``fs authorize cephfs1 +client.x /dir2 rw`` (notice that ``/dir1`` has been changed to ``/dir2``). +Running the second command could be interpreted to change ``/dir1`` to +``/dir2`` with current capabilities or could be interpreted to authorize the +client with a new capability for the path ``/dir2``. As shown previously, the +second interpretation is chosen and it is therefore impossible to update a part +of the capabilities granted except ``rw`` permissions. The following shows how +read/write permissions for ``client.x`` can be changed: + +.. prompt:: bash # + + ceph fs authorize a client.x / r [client.x] key = AQBBKjBkIFhBDBAA6q5PmDDWaZtYjd+jafeVUQ== - $ ceph auth get client.x + +.. prompt:: bash # + + ceph auth get client.x + +:: + [client.x] key = AQBBKjBkIFhBDBAA6q5PmDDWaZtYjd+jafeVUQ== caps mds = "allow r fsname=a" @@ -337,41 +465,75 @@ permissions for ``client.x`` (that was created above) can be changed:: ``fs authorize`` never deducts any part of caps ----------------------------------------------- -It's not possible to remove caps issued to a client by running ``fs -authorize`` again. For example, if a client cap has ``root_squash`` applied -on a certain CephFS, running ``fs authorize`` again for the same CephFS but -without ``root_squash`` will not lead to any update, the client caps will -remain unchanged:: +Capabilities that have been issued to a client can not be removed by running +``fs authorize`` again. For example, if a client capability has ``root_squash`` +applied on a certain CephFS, running ``fs authorize`` again for the same CephFS +but without ``root_squash`` will not lead to any update and the client caps will +remain unchanged: + +.. prompt:: bash # + + ceph fs authorize a client.x / rw root_squash + +:: - $ ceph fs authorize a client.x / rw root_squash [client.x] key = AQD61CVkcA1QCRAAd0XYqPbHvcc+lpUAuc6Vcw== - $ ceph auth get client.x + +.. prompt:: bash # + + ceph auth get client.x + +:: + [client.x] key = AQD61CVkcA1QCRAAd0XYqPbHvcc+lpUAuc6Vcw== caps mds = "allow rw fsname=a root_squash" caps mon = "allow r fsname=a" caps osd = "allow rw tag cephfs data=a" - $ ceph fs authorize a client.x / rw + +.. prompt:: bash # + + ceph fs authorize a client.x / rw + +:: + [client.x] key = AQD61CVkcA1QCRAAd0XYqPbHvcc+lpUAuc6Vcw== no update was performed for caps of client.x. caps of client.x remains unchanged. -And if a client already has a caps for FS name ``a`` and path ``dir1``, -running ``fs authorize`` again for FS name ``a`` but path ``dir2``, instead -of modifying the caps client already holds, a new cap for ``dir2`` will be -granted:: +If a client already has a capability for file-system name ``a`` and path +``dir1``, running ``fs authorize`` again for FS name ``a`` but path ``dir2``, +instead of modifying the capabilities client already holds, a new cap for +``dir2`` will be granted: + +.. prompt:: bash # + + ceph fs authorize a client.x /dir1 rw + ceph auth get client.x + +:: - $ ceph fs authorize a client.x /dir1 rw - $ ceph auth get client.x [client.x] key = AQC1tyVknMt+JxAAp0pVnbZGbSr/nJrmkMNKqA== caps mds = "allow rw fsname=a path=/dir1" caps mon = "allow r fsname=a" caps osd = "allow rw tag cephfs data=a" - $ ceph fs authorize a client.x /dir2 rw + +.. prompt:: bash # + + ceph fs authorize a client.x /dir2 rw + +:: + updated caps for client.x - $ ceph auth get client.x + +.. prompt:: bash # + + ceph auth get client.x + +:: + [client.x] key = AQC1tyVknMt+JxAAp0pVnbZGbSr/nJrmkMNKqA== caps mds = "allow rw fsname=a path=dir1, allow rw fsname=a path=dir2" diff --git a/doc/cephfs/createfs.rst b/doc/cephfs/createfs.rst index 4a282e562fe3..ce91660c2ef2 100644 --- a/doc/cephfs/createfs.rst +++ b/doc/cephfs/createfs.rst @@ -52,13 +52,16 @@ Once the pools are created, you may enable the file system using the ``fs new`` .. code:: bash - $ ceph fs new [--force] [--allow-dangerous-metadata-overlay] [] [--recover] + $ ceph fs new [--force] [--allow-dangerous-metadata-overlay] [] [--recover] [--yes-i-really-really-mean-it] [...] This command creates a new file system with specified metadata and data pool. The specified data pool is the default data pool and cannot be changed once set. Each file system has its own set of MDS daemons assigned to ranks so ensure that you have sufficient standby daemons available to accommodate the new file system. +.. note:: + ``--yes-i-really-really-mean-it`` may be used for some ``fs set`` commands + The ``--force`` option is used to achieve any of the following: - To set an erasure-coded pool for the default data pool. Use of an EC pool for the @@ -82,11 +85,14 @@ failed. So when a MDS daemon eventually picks up rank 0, the daemon reads the existing in-RADOS metadata and doesn't overwrite it. The flag also prevents the standby MDS daemons to join the file system. +The ``set`` option allows to set multiple options supported by ``fs set`` +atomically with the creation of the file system. + For example: .. code:: bash - $ ceph fs new cephfs cephfs_metadata cephfs_data + $ ceph fs new cephfs cephfs_metadata cephfs_data set max_mds 2 allow_standby_replay true $ ceph fs ls name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ] diff --git a/doc/cephfs/disaster-recovery-experts.rst b/doc/cephfs/disaster-recovery-experts.rst index c881c24239b6..7677b42f47e1 100644 --- a/doc/cephfs/disaster-recovery-experts.rst +++ b/doc/cephfs/disaster-recovery-experts.rst @@ -15,7 +15,7 @@ Advanced: Metadata repair tools file system before attempting to repair it. If you do not have access to professional support for your cluster, - consult the ceph-users mailing list or the #ceph IRC channel. + consult the ceph-users mailing list or the #ceph IRC/Slack channel. Journal export @@ -68,9 +68,9 @@ truncate it like so: :: - cephfs-journal-tool [--rank=N] journal reset + cephfs-journal-tool [--rank=:{mds-rank|all}] journal reset --yes-i-really-really-mean-it -Specify the MDS rank using the ``--rank`` option when the file system has/had +Specify the filesystem and the MDS rank using the ``--rank`` option when the file system has/had multiple active MDS. .. warning:: @@ -135,7 +135,7 @@ objects. # InoTable cephfs-table-tool 0 reset inode # Journal - cephfs-journal-tool --rank=0 journal reset + cephfs-journal-tool --rank=:0 journal reset --yes-i-really-really-mean-it # Root inodes ("/" and MDS directory) cephfs-data-scan init @@ -253,7 +253,7 @@ Next, we will create the intial metadata for the fs: cephfs-table-tool cephfs_recovery:0 reset session cephfs-table-tool cephfs_recovery:0 reset snap cephfs-table-tool cephfs_recovery:0 reset inode - cephfs-journal-tool --rank cephfs_recovery:0 journal reset --force + cephfs-journal-tool --rank cephfs_recovery:0 journal reset --force --yes-i-really-really-mean-it Now perform the recovery of the metadata pool from the data pool: diff --git a/doc/cephfs/file-layouts.rst b/doc/cephfs/file-layouts.rst index 2cdb26efc1e1..306bbc6eb089 100644 --- a/doc/cephfs/file-layouts.rst +++ b/doc/cephfs/file-layouts.rst @@ -6,6 +6,9 @@ File layouts The layout of a file controls how its contents are mapped to Ceph RADOS objects. You can read and write a file's layout using *virtual extended attributes* or xattrs. +Clients must use the ``p`` flag when writing a file's layout. See :ref:`Layout +and Quota restriction (the 'p' flag) `. + The name of the layout xattrs depends on whether a file is a regular file or a directory. Regular files' layout xattrs are called ``ceph.file.layout``, whereas directories' layout xattrs are called ``ceph.dir.layout``. Where subsequent examples refer to ``ceph.file.layout``, substitute ``dir`` as appropriate @@ -20,26 +23,38 @@ Layout fields ------------- pool - String, giving ID or name. String can only have characters in the set [a-zA-Z0-9\_-.]. Which RADOS pool a file's data objects will be stored in. + This is a string and contains either an ID or a name. Strings may contain + only characters in the set ``[a-zA-Z0-9\_-.]``. This determines the RADOS + pool that stores a file's data objects. pool_id - String of digits. This is the system assigned pool id for the RADOS pool whenever it is created. + This is a string of digits. This is the pool ID that was assigned by Ceph + at the time of the creation of the RADOS pool. pool_name - String, given name. This is the user defined name for the RADOS pool whenever user creates it. + This is a string. This is the name of the RADOS pool as defined by the user + when the pool was created. pool_namespace - String with only characters in the set [a-zA-Z0-9\_-.]. Within the data pool, which RADOS namespace the objects will - be written to. Empty by default (i.e. default namespace). + This is a string containing only characters in the set ``[a-zA-Z0-9\_-.]``. + This determines which RADOS namespace within the data pool that the objects + will be written to. + Empty by default (i.e. default namespace). stripe_unit - Integer in bytes. The size (in bytes) of a block of data used in the RAID 0 distribution of a file. All stripe units for a file have equal size. The last stripe unit is typically incomplete–i.e. it represents the data at the end of the file as well as unused “space” beyond it up to the end of the fixed stripe unit size. + This is an integer. The size (in bytes) of a block of data used in the + distribution of a file. All stripe units for a file have equal size. The + last stripe unit is typically only partly full of data: it holds file data + through EOF as well as padding that fills the balance of the fixed stripe + unit size. stripe_count - Integer. The number of consecutive stripe units that constitute a RAID 0 “stripe” of file data. + Integer. The number of consecutive stripe units that constitute a RAID 0 + “stripe” of file data. object_size - Integer in bytes. File data is chunked into RADOS objects of this size. + Integer. The size of the object in bytes. File data is chunked into RADOS + objects of this size. .. tip:: diff --git a/doc/cephfs/fs-volumes.rst b/doc/cephfs/fs-volumes.rst index 1e7adf3a03d4..6a4e5fb1b8f0 100644 --- a/doc/cephfs/fs-volumes.rst +++ b/doc/cephfs/fs-volumes.rst @@ -14,17 +14,17 @@ abstractions: * FS volumes, an abstraction for CephFS file systems -* FS subvolumes, an abstraction for independent CephFS directory trees - * FS subvolume groups, an abstraction for a directory level higher than FS subvolumes. Used to effect policies (e.g., :doc:`/cephfs/file-layouts`) across a set of subvolumes -Some possible use-cases for the export abstractions: +* FS subvolumes, an abstraction for independent CephFS directory trees + +Possible use-cases for the export abstractions: * FS subvolumes used as Manila shares or CSI volumes -* FS subvolume groups used as Manila share groups +* FS-subvolume groups used as Manila share groups Requirements ------------ @@ -46,9 +46,9 @@ Create a volume by running the following command: ceph fs volume create [placement] -This creates a CephFS file system and its data and metadata pools. It can also -deploy MDS daemons for the filesystem using a ceph-mgr orchestrator module (for -example Rook). See :doc:`/mgr/orchestrator`. +This creates a CephFS file system and its data and metadata pools. This command +can also deploy MDS daemons for the filesystem using a Ceph Manager orchestrator +module (for example Rook). See :doc:`/mgr/orchestrator`. ```` is the volume name (an arbitrary string). ``[placement]`` is an optional string that specifies the :ref:`orchestrator-cli-placement-spec` for @@ -64,13 +64,21 @@ To remove a volume, run the following command: ceph fs volume rm [--yes-i-really-mean-it] -This removes a file system and its data and metadata pools. It also tries to -remove MDS daemons using the enabled ceph-mgr orchestrator module. +This command removes a file system and its data and metadata pools. It also +tries to remove MDS daemons using the enabled Ceph Manager orchestrator module. + +.. note:: After volume deletion, we recommend restarting `ceph-mgr` if a new + file system is created on the same cluster and the subvolume interface is + being used. See https://tracker.ceph.com/issues/49605#note-5 for more + details. -.. note:: After volume deletion, it is recommended to restart `ceph-mgr` - if a new file system is created on the same cluster and subvolume interface - is being used. Please see https://tracker.ceph.com/issues/49605#note-5 - for more details. +.. note:: If the snap-schedule Ceph Manager module is being used for a volume + and the volume is deleted, then the snap-schedule Ceph Manager module will + continue to hold references to the old pools. This will lead to the + snap-schedule Ceph Manager module faulting and logging errors. To remedy + this scenario, we recommend that the snap-schedule Ceph Manager module + be restarted after volume deletion. If the faults still persist, then we + recommend restarting `ceph-mgr`. List volumes by running the following command: @@ -86,17 +94,17 @@ Rename a volume by running the following command: Renaming a volume can be an expensive operation that requires the following: -- Renaming the orchestrator-managed MDS service to match the . - This involves launching a MDS service with ```` and bringing - down the MDS service with ````. -- Renaming the file system matching ```` to ````. -- Changing the application tags on the data and metadata pools of the file system - to ````. +- Renaming the orchestrator-managed MDS service to match the + ````. This involves launching a MDS service with + ```` and bringing down the MDS service with ````. +- Renaming the file system from ```` to ````. +- Changing the application tags on the data and metadata pools of the file + system to ````. - Renaming the metadata and data pools of the file system. The CephX IDs that are authorized for ```` must be reauthorized for -````. Any ongoing operations of the clients using these IDs may -be disrupted. Ensure that mirroring is disabled on the volume. +````. Any ongoing operations of the clients that are using these +IDs may be disrupted. Ensure that mirroring is disabled on the volume. To fetch the information of a CephFS volume, run the following command: @@ -104,7 +112,8 @@ To fetch the information of a CephFS volume, run the following command: ceph fs volume info vol_name [--human_readable] -The ``--human_readable`` flag shows used and available pool capacities in KB/MB/GB. +The ``--human_readable`` flag shows used and available pool capacities in +KB/MB/GB. The output format is JSON and contains fields as follows: @@ -159,7 +168,7 @@ Create a subvolume group by running the following command: The command succeeds even if the subvolume group already exists. -When creating a subvolume group you can specify its data pool layout (see +When you create a subvolume group, you can specify its data pool layout (see :doc:`/cephfs/file-layouts`), uid, gid, file mode in octal numerals, and size in bytes. The size of the subvolume group is specified by setting a quota on it (see :doc:`/cephfs/quota`). By default, the subvolume group @@ -173,11 +182,11 @@ Remove a subvolume group by running a command of the following form: ceph fs subvolumegroup rm [--force] The removal of a subvolume group fails if the subvolume group is not empty or -is non-existent. The ``--force`` flag allows the non-existent "subvolume group remove -command" to succeed. - +is non-existent. The ``--force`` flag allows the command to succeed when its +argument is a non-existent subvolume group. -Fetch the absolute path of a subvolume group by running a command of the following form: +Fetch the absolute path of a subvolume group by running a command of the +following form: .. prompt:: bash # @@ -192,7 +201,8 @@ List subvolume groups by running a command of the following form: .. note:: Subvolume group snapshot feature is no longer supported in mainline CephFS (existing group snapshots can still be listed and deleted) -Fetch the metadata of a subvolume group by running a command of the following form: +Fetch the metadata of a subvolume group by running a command of the following +form: .. prompt:: bash # @@ -200,9 +210,13 @@ Fetch the metadata of a subvolume group by running a command of the following fo The output format is JSON and contains fields as follows: -* ``atime``: access time of the subvolume group path in the format "YYYY-MM-DD HH:MM:SS" -* ``mtime``: modification time of the subvolume group path in the format "YYYY-MM-DD HH:MM:SS" -* ``ctime``: change time of the subvolume group path in the format "YYYY-MM-DD HH:MM:SS" +* ``atime``: access time of the subvolume group path in the format ``YYYY-MM-DD + HH:MM:SS`` +* ``mtime``: time of the most recent modification of the subvolume group path + in the format + ``YYYY-MM-DD HH:MM:SS`` +* ``ctime``: time of the most recent change of the subvolume group path in the + format ``YYYY-MM-DD HH:MM:SS`` * ``uid``: uid of the subvolume group path * ``gid``: gid of the subvolume group path * ``mode``: mode of the subvolume group path @@ -213,7 +227,8 @@ The output format is JSON and contains fields as follows: * ``created_at``: creation time of the subvolume group in the format "YYYY-MM-DD HH:MM:SS" * ``data_pool``: data pool to which the subvolume group belongs -Check the presence of any subvolume group by running a command of the following form: +Check for the presence of a given subvolume group by running a command of the +following form: .. prompt:: bash # @@ -221,13 +236,13 @@ Check the presence of any subvolume group by running a command of the following The ``exist`` command outputs: -* "subvolumegroup exists": if any subvolumegroup is present -* "no subvolumegroup exists": if no subvolumegroup is present +* ``subvolumegroup exists``: if any subvolumegroup is present +* ``no subvolumegroup exists``: if no subvolumegroup is present .. note:: This command checks for the presence of custom groups and not - presence of the default one. To validate the emptiness of the volume, a - subvolumegroup existence check alone is not sufficient. Subvolume existence - also needs to be checked as there might be subvolumes in the default group. + presence of the default one. A subvolumegroup-existence check alone is not + sufficient to validate the emptiness of the volume. Subvolume existence must + also be checked, as there might be subvolumes in the default group. Resize a subvolume group by running a command of the following form: @@ -235,21 +250,22 @@ Resize a subvolume group by running a command of the following form: ceph fs subvolumegroup resize [--no_shrink] -The command resizes the subvolume group quota, using the size specified by +This command resizes the subvolume group quota, using the size specified by ``new_size``. The ``--no_shrink`` flag prevents the subvolume group from shrinking below the current used size. The subvolume group may be resized to an infinite size by passing ``inf`` or ``infinite`` as the ``new_size``. -Remove a snapshot of a subvolume group by running a command of the following form: +Remove a snapshot of a subvolume group by running a command of the following +form: .. prompt:: bash # ceph fs subvolumegroup snapshot rm [--force] -Supplying the ``--force`` flag allows the command to succeed when it would otherwise -fail due to the nonexistence of the snapshot. +Supplying the ``--force`` flag allows the command to succeed when it would +otherwise fail due to the nonexistence of the snapshot. List snapshots of a subvolume group by running a command of the following form: @@ -261,140 +277,224 @@ List snapshots of a subvolume group by running a command of the following form: FS Subvolumes ------------- -Create a subvolume using: +Creating a subvolume +~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to create a subvolume: .. prompt:: bash # - ceph fs subvolume create [--size ] [--group_name ] [--pool_layout ] [--uid ] [--gid ] [--mode ] [--namespace-isolated] + ceph fs subvolume create [--size ] [--group_name ] [--pool_layout ] [--uid ] [--gid ] [--mode ] [--namespace-isolated] [--earmark ] The command succeeds even if the subvolume already exists. -When creating a subvolume you can specify its subvolume group, data pool layout, -uid, gid, file mode in octal numerals, and size in bytes. The size of the subvolume is -specified by setting a quota on it (see :doc:`/cephfs/quota`). The subvolume can be -created in a separate RADOS namespace by specifying --namespace-isolated option. By -default a subvolume is created within the default subvolume group, and with an octal file -mode '755', uid of its subvolume group, gid of its subvolume group, data pool layout of -its parent directory and no size limit. +When creating a subvolume, you can specify its subvolume group, data pool +layout, uid, gid, file mode in octal numerals, and size in bytes. The size of +the subvolume is specified by setting a quota on it (see :doc:`/cephfs/quota`). +The subvolume can be created in a separate RADOS namespace by specifying the +``--namespace-isolated`` option. By default, a subvolume is created within the +default subvolume group with an octal file mode of ``755``, a uid of its +subvolume group, a gid of its subvolume group, a data pool layout of its parent +directory, and no size limit. +You can also assign an earmark to a subvolume using the ``--earmark`` option. +The earmark is a unique identifier that tags the subvolume for specific purposes, +such as NFS or SMB services. By default, no earmark is set, allowing for flexible +assignment based on administrative needs. An empty string ("") can be used to remove +any existing earmark from a subvolume. + +The earmarking mechanism ensures that subvolumes are correctly tagged and managed, +helping to avoid conflicts and ensuring that each subvolume is associated +with the intended service or use case. + +Valid Earmarks +~~~~~~~~~~~~~~~~~~~~ + +- **For NFS:** + - The valid earmark format is the top-level scope: ``'nfs'``. + +- **For SMB:** + - The valid earmark formats are: + - The top-level scope: ``'smb'``. + - The top-level scope with an intra-module level scope: ``'smb.cluster.{cluster_id}'``, where ``cluster_id`` is a short string uniquely identifying the cluster. + - Example without intra-module scope: ``smb`` + - Example with intra-module scope: ``smb.cluster.cluster_1`` + +.. note:: If you are changing an earmark from one scope to another (e.g., from nfs to smb or vice versa), + be aware that user permissions and ACLs associated with the previous scope might still apply. Ensure that + any necessary permissions are updated as needed to maintain proper access control. -Remove a subvolume using: + +Removing a subvolume +~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to remove a subvolume: .. prompt:: bash # ceph fs subvolume rm [--group_name ] [--force] [--retain-snapshots] -The command removes the subvolume and its contents. It does this in two steps. -First, it moves the subvolume to a trash folder, and then asynchronously purges -its contents. +This command removes the subvolume and its contents. This is done in two steps. +First, the subvolume is moved to a trash folder. Second, the contents of that +trash folder are purged asynchronously. + +Subvolume removal fails if the subvolume has snapshots or is non-existent. The +``--force`` flag allows the "non-existent subvolume remove" command to succeed. -The removal of a subvolume fails if it has snapshots, or is non-existent. -'--force' flag allows the non-existent subvolume remove command to succeed. +To remove a subvolume while retaining snapshots of the subvolume, use the +``--retain-snapshots`` flag. If snapshots associated with a given subvolume are +retained, then the subvolume is considered empty for all operations that do not +involve the retained snapshots. -A subvolume can be removed retaining existing snapshots of the subvolume using the -'--retain-snapshots' option. If snapshots are retained, the subvolume is considered -empty for all operations not involving the retained snapshots. +.. note:: Snapshot-retained subvolumes can be recreated using ``ceph fs + subvolume create``. -.. note:: Snapshot retained subvolumes can be recreated using 'ceph fs subvolume create' +.. note:: Retained snapshots can be used as clone sources for recreating the + subvolume or for cloning to a newer subvolume. -.. note:: Retained snapshots can be used as a clone source to recreate the subvolume, or clone to a newer subvolume. +Resizing a subvolume +~~~~~~~~~~~~~~~~~~~~ -Resize a subvolume using: +Use a command of the following form to resize a subvolume: .. prompt:: bash # ceph fs subvolume resize [--group_name ] [--no_shrink] -The command resizes the subvolume quota using the size specified by ``new_size``. -The `--no_shrink`` flag prevents the subvolume from shrinking below the current used size of the subvolume. +This command resizes the subvolume quota, using the size specified by +``new_size``. The ``--no_shrink`` flag prevents the subvolume from shrinking +below the current "used size" of the subvolume. -The subvolume can be resized to an unlimited (but sparse) logical size by passing ``inf`` or ``infinite`` as `` new_size``. +The subvolume can be resized to an unlimited (but sparse) logical size by +passing ``inf`` or ``infinite`` as ````. -Authorize cephx auth IDs, the read/read-write access to fs subvolumes: +Authorizing CephX auth IDs +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to authorize CephX auth IDs. This provides +the read/read-write access to file system subvolumes: .. prompt:: bash # ceph fs subvolume authorize [--group_name=] [--access_level=] -The ``access_level`` takes ``r`` or ``rw`` as value. +The ```` option takes either ``r`` or ``rw`` as a value. + +De-authorizing CephX auth IDs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Deauthorize cephx auth IDs, the read/read-write access to fs subvolumes: +Use a command of the following form to deauthorize CephX auth IDs. This removes +the read/read-write access to file system subvolumes: .. prompt:: bash # ceph fs subvolume deauthorize [--group_name=] -List cephx auth IDs authorized to access fs subvolume: +Listing CephX auth IDs +~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to list CephX auth IDs authorized to access +the file system subvolume: .. prompt:: bash # ceph fs subvolume authorized_list [--group_name=] -Evict fs clients based on auth ID and subvolume mounted: +Evicting File System Clients (Auth ID) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to evict file system clients based on the +auth ID and the subvolume mounted: .. prompt:: bash # ceph fs subvolume evict [--group_name=] -Fetch the absolute path of a subvolume using: +Fetching the Absolute Path of a Subvolume +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to fetch the absolute path of a subvolume: .. prompt:: bash # ceph fs subvolume getpath [--group_name ] -Fetch the information of a subvolume using: +Fetching a Subvolume's Information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to fetch a subvolume's information: .. prompt:: bash # ceph fs subvolume info [--group_name ] -The output format is JSON and contains fields as follows. +The output format is JSON and contains the following fields. -* ``atime``: access time of the subvolume path in the format "YYYY-MM-DD HH:MM:SS" -* ``mtime``: modification time of the subvolume path in the format "YYYY-MM-DD HH:MM:SS" -* ``ctime``: change time of the subvolume path in the format "YYYY-MM-DD HH:MM:SS" +* ``atime``: access time of the subvolume path in the format ``YYYY-MM-DD + HH:MM:SS`` +* ``mtime``: modification time of the subvolume path in the format ``YYYY-MM-DD + HH:MM:SS`` +* ``ctime``: change time of the subvolume path in the format ``YYYY-MM-DD + HH:MM:SS`` * ``uid``: uid of the subvolume path * ``gid``: gid of the subvolume path * ``mode``: mode of the subvolume path * ``mon_addrs``: list of monitor addresses -* ``bytes_pcent``: quota used in percentage if quota is set, else displays ``undefined`` -* ``bytes_quota``: quota size in bytes if quota is set, else displays ``infinite`` +* ``bytes_pcent``: quota used in percentage if quota is set; else displays + ``undefined`` +* ``bytes_quota``: quota size in bytes if quota is set; else displays + ``infinite`` * ``bytes_used``: current used size of the subvolume in bytes -* ``created_at``: creation time of the subvolume in the format "YYYY-MM-DD HH:MM:SS" +* ``created_at``: creation time of the subvolume in the format ``YYYY-MM-DD + HH:MM:SS`` * ``data_pool``: data pool to which the subvolume belongs * ``path``: absolute path of a subvolume -* ``type``: subvolume type indicating whether it's clone or subvolume +* ``type``: subvolume type, indicating whether it is ``clone`` or ``subvolume`` * ``pool_namespace``: RADOS namespace of the subvolume * ``features``: features supported by the subvolume * ``state``: current state of the subvolume +* ``earmark``: earmark of the subvolume -If a subvolume has been removed retaining its snapshots, the output contains only fields as follows. +If a subvolume has been removed but its snapshots have been retained, the +output contains only the following fields. -* ``type``: subvolume type indicating whether it's clone or subvolume +* ``type``: subvolume type indicating whether it is ``clone`` or ``subvolume`` * ``features``: features supported by the subvolume * ``state``: current state of the subvolume -A subvolume's ``features`` are based on the internal version of the subvolume and are -a subset of the following: +A subvolume's ``features`` are based on the internal version of the subvolume +and are a subset of the following: -* ``snapshot-clone``: supports cloning using a subvolumes snapshot as the source -* ``snapshot-autoprotect``: supports automatically protecting snapshots, that are active clone sources, from deletion -* ``snapshot-retention``: supports removing subvolume contents, retaining any existing snapshots +* ``snapshot-clone``: supports cloning using a subvolume's snapshot as the + source +* ``snapshot-autoprotect``: supports automatically protecting snapshots from + deletion if they are active clone sources +* ``snapshot-retention``: supports removing subvolume contents, retaining any + existing snapshots -A subvolume's ``state`` is based on the current state of the subvolume and contains one of the following values. +A subvolume's ``state`` is based on the current state of the subvolume and +contains one of the following values. * ``complete``: subvolume is ready for all operations * ``snapshot-retained``: subvolume is removed but its snapshots are retained -List subvolumes using: +Listing Subvolumes +~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to list subvolumes: .. prompt:: bash # ceph fs subvolume ls [--group_name ] -.. note:: subvolumes that are removed but have snapshots retained, are also listed. +.. note:: Subvolumes that have been removed but have snapshots retained, are + also listed. + +Checking for the Presence of a Subvolume +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Check the presence of any subvolume using: +Use a command of the following form to check for the presence of a given +subvolume: .. prompt:: bash # @@ -402,10 +502,14 @@ Check the presence of any subvolume using: These are the possible results of the ``exist`` command: -* ``subvolume exists``: if any subvolume of given group_name is present -* ``no subvolume exists``: if no subvolume of given group_name is present +* ``subvolume exists``: if any subvolume of given ``group_name`` is present +* ``no subvolume exists``: if no subvolume of given ``group_name`` is present + +Setting Custom Metadata On a Subvolume +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Set custom metadata on the subvolume as a key-value pair using: +Use a command of the following form to set custom metadata on the subvolume as +a key-value pair: .. prompt:: bash # @@ -413,67 +517,127 @@ Set custom metadata on the subvolume as a key-value pair using: .. note:: If the key_name already exists then the old value will get replaced by the new value. -.. note:: key_name and value should be a string of ASCII characters (as specified in python's string.printable). key_name is case-insensitive and always stored in lower case. +.. note:: ``key_name`` and ``value`` should be a string of ASCII characters (as + specified in Python's ``string.printable``). ``key_name`` is + case-insensitive and always stored in lower case. -.. note:: Custom metadata on a subvolume is not preserved when snapshotting the subvolume, and hence, is also not preserved when cloning the subvolume snapshot. +.. note:: Custom metadata on a subvolume is not preserved when snapshotting the + subvolume, and is therefore also not preserved when cloning the subvolume + snapshot. -Get custom metadata set on the subvolume using the metadata key: +Getting The Custom Metadata Set of a Subvolume +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to get the custom metadata set on the +subvolume using the metadata key: .. prompt:: bash # ceph fs subvolume metadata get [--group_name ] -List custom metadata (key-value pairs) set on the subvolume using: +Listing The Custom Metadata Set of a Subvolume +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to list custom metadata (key-value pairs) +set on the subvolume: .. prompt:: bash # ceph fs subvolume metadata ls [--group_name ] -Remove custom metadata set on the subvolume using the metadata key: +Removing a Custom Metadata Set from a Subvolume +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to remove custom metadata set on the +subvolume using the metadata key: .. prompt:: bash # ceph fs subvolume metadata rm [--group_name ] [--force] -Using the ``--force`` flag allows the command to succeed that would otherwise -fail if the metadata key did not exist. +Using the ``--force`` flag allows the command to succeed when it would +otherwise fail (if the metadata key did not exist). + +Getting earmark of a subvolume +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to get the earmark of a subvolume: + +.. prompt:: bash # + + ceph fs subvolume earmark get [--group_name ] + +Setting earmark of a subvolume +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to set the earmark of a subvolume: + +.. prompt:: bash # + + ceph fs subvolume earmark set [--group_name ] + +Removing earmark of a subvolume +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to remove the earmark of a subvolume: + +.. prompt:: bash # + + ceph fs subvolume earmark rm [--group_name ] -Create a snapshot of a subvolume using: +Creating a Snapshot of a Subvolume +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to create a snapshot of a subvolume: .. prompt:: bash # ceph fs subvolume snapshot create [--group_name ] -Remove a snapshot of a subvolume using: + +Removing a Snapshot of a Subvolume +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to remove a snapshot of a subvolume: .. prompt:: bash # ceph fs subvolume snapshot rm [--group_name ] [--force] -Using the ``--force`` flag allows the command to succeed that would otherwise -fail if the snapshot did not exist. +Using the ``--force`` flag allows the command to succeed when it would +otherwise fail (if the snapshot did not exist). .. note:: if the last snapshot within a snapshot retained subvolume is removed, the subvolume is also removed -List snapshots of a subvolume using: +Listing the Snapshots of a Subvolume +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following from to list the snapshots of a subvolume: .. prompt:: bash # ceph fs subvolume snapshot ls [--group_name ] -Fetch the information of a snapshot using: +Fetching a Snapshot's Information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to fetch a snapshot's information: .. prompt:: bash # ceph fs subvolume snapshot info [--group_name ] -The output format is JSON and contains fields as follows. +The output format is JSON and contains the following fields. -* ``created_at``: creation time of the snapshot in the format "YYYY-MM-DD HH:MM:SS:ffffff" +* ``created_at``: creation time of the snapshot in the format ``YYYY-MM-DD + HH:MM:SS:ffffff`` * ``data_pool``: data pool to which the snapshot belongs -* ``has_pending_clones``: ``yes`` if snapshot clone is in progress, otherwise ``no`` -* ``pending_clones``: list of in-progress or pending clones and their target group if any exist, otherwise this field is not shown -* ``orphan_clones_count``: count of orphan clones if the snapshot has orphan clones, otherwise this field is not shown +* ``has_pending_clones``: ``yes`` if snapshot clone is in progress, otherwise + ``no`` +* ``pending_clones``: list of in-progress or pending clones and their target + groups if any exist; otherwise this field is not shown +* ``orphan_clones_count``: count of orphan clones if the snapshot has orphan + clones, otherwise this field is not shown Sample output when snapshot clones are in progress or pending: @@ -516,50 +680,74 @@ Sample output when no snapshot clone is in progress or pending: "has_pending_clones": "no" } -Set custom key-value metadata on the snapshot by running: +Setting Custom Key-Value Pair Metadata on a Snapshot +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to set custom key-value metadata on the +snapshot: .. prompt:: bash # ceph fs subvolume snapshot metadata set [--group_name ] -.. note:: If the key_name already exists then the old value will get replaced by the new value. +.. note:: If the ``key_name`` already exists then the old value will get replaced + by the new value. + +.. note:: The ``key_name`` and value should be a strings of ASCII characters + (as specified in Python's ``string.printable``). The ``key_name`` is + case-insensitive and always stored in lowercase. -.. note:: The key_name and value should be a strings of ASCII characters (as specified in Python's ``string.printable``). The key_name is case-insensitive and always stored in lowercase. +.. note:: Custom metadata on a snapshot is not preserved when snapshotting the + subvolume, and is therefore not preserved when cloning the subvolume + snapshot. -.. note:: Custom metadata on a snapshot is not preserved when snapshotting the subvolume, and hence is also not preserved when cloning the subvolume snapshot. +Getting Custom Metadata That Has Been Set on a Snapshot +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Get custom metadata set on the snapshot using the metadata key: +Use a command of the following form to get custom metadata that has been set on +the snapshot using the metadata key: .. prompt:: bash # ceph fs subvolume snapshot metadata get [--group_name ] -List custom metadata (key-value pairs) set on the snapshot using: +Listing Custom Metadata that has been Set on a Snapshot +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following from to list custom metadata (key-value pairs) +set on the snapshot: .. prompt:: bash # ceph fs subvolume snapshot metadata ls [--group_name ] -Remove custom metadata set on the snapshot using the metadata key: +Removing Custom Metadata from a Snapshot +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use a command of the following form to remove custom metadata set on the +snapshot using the metadata key: .. prompt:: bash # ceph fs subvolume snapshot metadata rm [--group_name ] [--force] -Using the ``--force`` flag allows the command to succeed that would otherwise -fail if the metadata key did not exist. +Using the ``--force`` flag allows the command to succeed when it would otherwise +fail (if the metadata key did not exist). Cloning Snapshots ----------------- -Subvolumes can be created by cloning subvolume snapshots. Cloning is an asynchronous operation that copies -data from a snapshot to a subvolume. Due to this bulk copying, cloning is inefficient for very large -data sets. +Subvolumes can be created by cloning subvolume snapshots. Cloning is an +asynchronous operation that copies data from a snapshot to a subvolume. Because +cloning is an operation that involves bulk copying, it is slow for +very large data sets. -.. note:: Removing a snapshot (source subvolume) would fail if there are pending or in progress clone operations. +.. note:: Removing a snapshot (source subvolume) fails when there are + pending or in-progress clone operations. -Protecting snapshots prior to cloning was a prerequisite in the Nautilus release, and the commands to protect/unprotect -snapshots were introduced for this purpose. This prerequisite, and hence the commands to protect/unprotect, is being +Protecting snapshots prior to cloning was a prerequisite in the Nautilus +release. Commands that made possible the protection and unprotection of +snapshots were introduced for this purpose. This prerequisite is being deprecated and may be removed from a future release. The commands being deprecated are: @@ -573,37 +761,38 @@ The commands being deprecated are: .. note:: Use the ``subvolume info`` command to fetch subvolume metadata regarding supported ``features`` to help decide if protect/unprotect of snapshots is required, based on the availability of the ``snapshot-autoprotect`` feature. -To initiate a clone operation use: +Run a command of the following form to initiate a clone operation: .. prompt:: bash # ceph fs subvolume snapshot clone -If a snapshot (source subvolume) is a part of non-default group, the group name needs to be specified: +.. note:: ``subvolume snapshot clone`` command depends upon the above mentioned config option ``snapshot_clone_no_wait`` + +Run a command of the following form when a snapshot (source subvolume) is a +part of non-default group. Note that the group name needs to be specified: .. prompt:: bash # ceph fs subvolume snapshot clone --group_name -Cloned subvolumes can be a part of a different group than the source snapshot (by default, cloned subvolumes are created in default group). To clone to a particular group use: +Cloned subvolumes can be a part of a different group than the source snapshot +(by default, cloned subvolumes are created in default group). Run a command of +the following form to clone to a particular group use: .. prompt:: bash # ceph fs subvolume snapshot clone --target_group_name -Similar to specifying a pool layout when creating a subvolume, pool layout can be specified when creating a cloned subvolume. To create a cloned subvolume with a specific pool layout use: +Pool layout can be specified when creating a cloned subvolume in a way that is +similar to specifying a pool layout when creating a subvolume. Run a command of +the following form to create a cloned subvolume with a specific pool layout: .. prompt:: bash # ceph fs subvolume snapshot clone --pool_layout -Configure the maximum number of concurrent clones. The default is 4: - -.. prompt:: bash # - - ceph config set mgr mgr/volumes/max_concurrent_clones - -To check the status of a clone operation use: +Run a command of the following form to check the status of a clone operation: .. prompt:: bash # @@ -632,16 +821,40 @@ Here is an example of an ``in-progress`` clone: :: { - "status": { - "state": "in-progress", - "source": { - "volume": "cephfs", - "subvolume": "subvol1", - "snapshot": "snap1" - } + "status": { + "state": "in-progress", + "source": { + "volume": "cephfs", + "subvolume": "subvol1", + "snapshot": "snap1" + }, + "progress_report": { + "percentage cloned": "12.24%", + "amount cloned": "376M/3.0G", + "files cloned": "4/6" } + } } +A progress report is also printed in the output when clone is ``in-progress``. +Here the progress is reported only for the specific clone. For collective +progress made by all ongoing clones, a progress bar is printed at the bottom +in ouput of ``ceph status`` command:: + + progress: + 3 ongoing clones - average progress is 47.569% (10s) + [=============...............] (remaining: 11s) + +If the number of clone jobs are more than cloner threads, two progress bars +are printed, one for ongoing clones (same as above) and other for all +(ongoing+pending) clones:: + + progress: + 4 ongoing clones - average progress is 27.669% (15s) + [=======.....................] (remaining: 41s) + Total 5 clones - average progress is 41.667% (3s) + [===========.................] (remaining: 4s) + .. note:: The ``failure`` section will be shown only if the clone's state is ``failed`` or ``cancelled`` Here is an example of a ``failed`` clone: @@ -669,11 +882,14 @@ Here is an example of a ``failed`` clone: } } -(NOTE: since ``subvol1`` is in the default group, the ``source`` object's ``clone status`` does not include the group name) +.. note:: Because ``subvol1`` is in the default group, the ``source`` object's + ``clone status`` does not include the group name) -.. note:: Cloned subvolumes are accessible only after the clone operation has successfully completed. +.. note:: Cloned subvolumes are accessible only after the clone operation has + successfully completed. -After a successful clone operation, ``clone status`` will look like the below: +After a successful clone operation, ``clone status`` will look like the +following: .. prompt:: bash # @@ -689,23 +905,28 @@ After a successful clone operation, ``clone status`` will look like the below: If a clone operation is unsuccessful, the ``state`` value will be ``failed``. -To retry a failed clone operation, the incomplete clone must be deleted and the clone operation must be issued again. -To delete a partial clone use: +To retry a failed clone operation, the incomplete clone must be deleted and the +clone operation must be issued again. + +Run a command of the following form to delete a partial clone: .. prompt:: bash # ceph fs subvolume rm [--group_name ] --force -.. note:: Cloning synchronizes only directories, regular files and symbolic links. Inode timestamps (access and - modification times) are synchronized up to seconds granularity. +.. note:: Cloning synchronizes only directories, regular files and symbolic + links. inode timestamps (access and modification times) are synchronized up + to a second's granularity. -An ``in-progress`` or a ``pending`` clone operation may be canceled. To cancel a clone operation use the ``clone cancel`` command: +An ``in-progress`` or a ``pending`` clone operation may be canceled. To cancel +a clone operation use the ``clone cancel`` command: .. prompt:: bash # ceph fs clone cancel [--group_name ] -On successful cancellation, the cloned subvolume is moved to the ``canceled`` state: +On successful cancellation, the cloned subvolume is moved to the ``canceled`` +state: .. prompt:: bash # @@ -726,7 +947,36 @@ On successful cancellation, the cloned subvolume is moved to the ``canceled`` st } } -.. note:: The canceled cloned may be deleted by supplying the ``--force`` option to the `fs subvolume rm` command. +.. note:: Delete the canceled cloned by supplying the ``--force`` option to the + ``fs subvolume rm`` command. + +Configurables +~~~~~~~~~~~~~ + +Configure the maximum number of concurrent clone operations. The default is 4: + +.. prompt:: bash # + + ceph config set mgr mgr/volumes/max_concurrent_clones + +Configure the ``snapshot_clone_no_wait`` option: + +The ``snapshot_clone_no_wait`` config option is used to reject clone-creation +requests when cloner threads (which can be configured using the above options, +for example, ``max_concurrent_clones``) are not available. It is enabled by +default. This means that the value is set to ``True``, but it can be configured +by using the following command: + +.. prompt:: bash # + + ceph config set mgr mgr/volumes/snapshot_clone_no_wait + +The current value of ``snapshot_clone_no_wait`` can be fetched by running the +following command. + +.. prompt:: bash # + + ceph config get mgr mgr/volumes/snapshot_clone_no_wait .. _subvol-pinning: @@ -739,33 +989,466 @@ to policies. This can distribute load across MDS ranks in predictable and stable ways. Review :ref:`cephfs-pinning` and :ref:`cephfs-ephemeral-pinning` for details on how pinning works. -Pinning is configured by: +Run a command of the following form to configure pinning for subvolume groups: .. prompt:: bash # ceph fs subvolumegroup pin -or for subvolumes: +Run a command of the following form to configure pinning for subvolumes: .. prompt:: bash # ceph fs subvolume pin -Typically you will want to set subvolume group pins. The ``pin_type`` may be -one of ``export``, ``distributed``, or ``random``. The ``pin_setting`` -corresponds to the extended attributed "value" as in the pinning documentation -referenced above. +Under most circumstances, you will want to set subvolume group pins. The +``pin_type`` may be ``export``, ``distributed``, or ``random``. The +``pin_setting`` corresponds to the extended attributed "value" as in the +pinning documentation referenced above. -So, for example, setting a distributed pinning strategy on a subvolume group: +Here is an example of setting a distributed pinning strategy on a subvolume +group: .. prompt:: bash # ceph fs subvolumegroup pin cephfilesystem-a csi distributed 1 -Will enable distributed subtree partitioning policy for the "csi" subvolume -group. This will cause every subvolume within the group to be automatically +This enables distributed subtree partitioning policy for the "csi" subvolume +group. This will cause every subvolume within the group to be automatically pinned to one of the available ranks on the file system. +Subvolume quiesce +----------------- + +.. note:: The information in this section applies only to Squid and later + releases of Ceph. + +CephFS snapshots do not provide strong-consistency guarantees in cases involving writes +performed by multiple clients, which makes consistent backups and disaster recovery a serious +challenge for distributed applications. Even in a case where an application uses +file system flushes to synchronize checkpoints across its distributed components, there is +no guarantee that all acknowledged writes will be part of a given snapshot. + +The subvolume quiesce feature has been developed to provide enterprise-level consistency guarantees +for multi-client applications that work with one or more subvolumes. The feature makes it possible to pause IO +to a set of subvolumes of a given volume (file system). Enforcing such a pause across all clients makes +it possible to guarantee that any persistent checkpoints reached by the application before the pause +will be recoverable from the snapshots made during the pause. + +The `volumes` plugin provides a CLI to initiate and await the pause for a set of subvolumes. +This pause is called a `quiesce`, which is also used as the command name: + +.. prompt:: bash $ auto + + $ ceph fs quiesce --set-id myset1 <[group_name/]sub_name...> --await + # perform actions while the IO pause is active, like taking snapshots + $ ceph fs quiesce --set-id myset1 --release --await + # if successful, all members of the set were confirmed as still paused and released + +The ``fs quiesce`` functionality is based on a lower level ``quiesce db`` service provided by the MDS +daemons, which operates at a file system path granularity. +The `volumes` plugin merely maps the subvolume names to their corresponding paths on the given file system +and then issues the corresponding ``quiesce db`` command to the MDS. You can learn more about the low-level service +in the developer guides. + +Operations +~~~~~~~~~~ + +The quiesce can be requested for a set of one or more subvolumes (i.e. paths in a filesystem). +This set is referred to as `quiesce set`. Every quiesce set is identified by a unique `set id`. +A quiesce set can be manipulated in the following ways: + +* **include** one or more subvolumes - quiesce set members +* **exclude** one or more members +* **cancel** the set, asynchronously aborting the pause on all its current members +* **release** the set, requesting the end of the pause from all members and expecting an ack from all clients +* **query** the current state of a set by id or all active sets or all known sets +* **cancel all** active sets in case an immediate resume of IO is required. + +The operations listed above are non-blocking: they attempt the intended modification +and return with an up to date version of the target set, whether the operation was successful or not. +The set may change states as a result of the modification, and the version that's returned in the response +is guaranteed to be in a state consistent with this and potentialy other successful operations from +the same control loop batch. + +Some set states are `awaitable`. We will discuss those below, but for now it's important to mention that +any of the commands above can be amended with an **await** modifier, which will cause them to block +on the set after applying their intended modification, as long as the resulting set state is `awaitable`. +Such a command will block until the set reaches the awaited state, gets modified by another command, +or transitions into another state. The return code will unambiguously identify the exit condition, and +the contents of the response will always carry the latest known set state. + +.. image:: quiesce-set-states.svg + +`Awaitable` states on the diagram are marked with ``(a)`` or ``(A)``. Blocking versions of the operations +will pend while the set is in an ``(a)`` state and will complete with success if it reaches an ``(A)`` state. +If the set is already at an ``(A)`` state, the operation completes immediately with a success. + +Most of the operations require a set-id. The exceptions are: + +* creation of a new set without specifying a set id, +* query of active or all known sets, and +* the cancel all + +Creating a new set is achieved by including member(s) via the `include` or `reset` commands. +It's possible to specify a set id, and if it's a new id then the set will be created +with the specified member(s) in the `QUIESCING` state. When no set id is specified while including +or resetting members, then a new set with a unique set id is created. The set id will be known +to the caller by inspecting the output + +.. prompt:: bash $ auto + + $ ceph fs quiesce fs1 sub1 --set-id=unique-id + { + "epoch": 3, + "set_version": 1, + "sets": { + "unique-id": { + "version": 1, + "age_ref": 0.0, + "state": { + "name": "TIMEDOUT", + "age": 0.0 + }, + "timeout": 0.0, + "expiration": 0.0, + "members": { + "file:/volumes/_nogroup/sub1/b1fcce76-3418-42dd-aa76-f9076d047dd3": { + "excluded": false, + "state": { + "name": "QUIESCING", + "age": 0.0 + } + } + } + } + } + } + +The output contains the set we just created successfully, however it's already `TIMEDOUT`. +This is expected, since we have not specified the timeout for this quiesce, +and we can see in the output that it was initialized to 0 by default, along with the expiration. + +Timeouts +~~~~~~~~ + +The two timeout parameters, `timeout` and `expiration`, are the main guards against +accidentally causing a DOS condition for our application. Any command to an active set +may carry the ``--timeout`` or ``--expiration`` arguments to update these values for the set. +If present, the values will be applied before the action this command requests. + +.. prompt:: bash $ auto + + $ ceph fs quiesce fs1 --set-id=unique-id --timeout=10 > /dev/null + Error EPERM: + +It's too late for our ``unique-id`` set, as it's in a terminal state. No changes are allowed +to sets that are in their terminal states, i.e. inactive. Let's create a new set: + +.. prompt:: bash $ auto + + $ ceph fs quiesce fs1 sub1 --timeout 60 + { + "epoch": 3, + "set_version": 2, + "sets": { + "8988b419": { + "version": 2, + "age_ref": 0.0, + "state": { + "name": "QUIESCING", + "age": 0.0 + }, + "timeout": 60.0, + "expiration": 0.0, + "members": { + "file:/volumes/_nogroup/sub1/b1fcce76-3418-42dd-aa76-f9076d047dd3": { + "excluded": false, + "state": { + "name": "QUIESCING", + "age": 0.0 + } + } + } + } + } + } + +This time, we haven't specified a set id, so the system created a new one. We see its id +in the output, it's ``8988b419``. The command was a success and we see that +this time the set is `QUIESCING`. At this point, we can add more members to the set + +.. prompt:: bash $ auto + + $ ceph fs quiesce fs1 --set-id 8988b419 --include sub2 sub3 + { + "epoch": 3, + "set_version": 3, + "sets": { + "8988b419": { + "version": 3, + "age_ref": 0.0, + "state": { + "name": "QUIESCING", + "age": 30.7 + }, + "timeout": 60.0, + "expiration": 0.0, + "members": { + "file:/volumes/_nogroup/sub1/b1fcce76-3418-42dd-aa76-f9076d047dd3": { + "excluded": false, + "state": { + "name": "QUIESCING", + "age": 30.7 + } + }, + "file:/volumes/_nogroup/sub2/bc8f770e-7a43-48f3-aa26-d6d76ef98d3e": { + "excluded": false, + "state": { + "name": "QUIESCING", + "age": 0.0 + } + }, + "file:/volumes/_nogroup/sub3/24c4b57b-e249-4b89-b4fa-7a810edcd35b": { + "excluded": false, + "state": { + "name": "QUIESCING", + "age": 0.0 + } + } + } + } + } + } + +The ``--include`` bit is optional, as if no operation is given while members are provided, +then "include" is assumed. + +As we have seen, the timeout argument specifies how much time we are ready to give the system +to reach the `QUIESCED` state on the set. However, since new members can be added to an +active set at any time, it wouldn't be fair to measure the timeout from the set creation time. +Hence, the timeout is tracked per member: every member has `timeout` seconds to quiesce, +and if any one takes longer than that, the whole set is marked as `TIMEDOUT` and the pause is released. + +Once the set is in the `QUIESCED` state, it will begin its expiration timer. This timer is tracked +per set as a whole, not per members. Once the `expiration` seconds elapse, the set will transition +into an `EXPIRED` state, unless it was actively released or canceled by a dedicated operation. + +It's possible to add new members to a `QUIESCED` set. In this case, it will transition back to `QUIESCING`, +and the new member(s) will have their own timeout to quiesce. If they succeed, then the set will +again be `QUIESCED` and the expiration timer will restart. + +.. warning:: + * The `expiration timer` doesn't apply when a set is `QUIESCING`; it is reset to the + value of the `expiration` property when the **set** becomes `QUIESCED` + * The `timeout` doesn't apply to **members** that are `QUIESCED` + +Awaiting +~~~~~~~~ + +Note that the commands above are all non-blocking. If we want to wait for the quiesce set +to reach the `QUIESCED` state, we should await it at some point. ``--await`` can be given +along with other arguments to let the system know our intention. + +There are two types of await: `quiesce await` and `release await`. The former is the default, +and the latter can only be achieved with ``--release`` present in the argument list. +To avoid confision, it is not permitted to issue a `quiesce await` when the set is not `QUIESCING`. +Trying to ``--release`` a set that is not `QUIESCED` is an ``EPERM`` error as well, regardless +of whether await is requested alongside. However, it's not an error to `release await` +an already released set, or to `quiesce await` a `QUIESCED` one - those are successful no-ops. + +Since a set is awaited after the application of the ``--await``-augmented command, the await operation +may mask a successful result with its own error. A good example is trying to cancel-await a set: + +.. prompt:: bash $ auto + + $ ceph fs quiesce fs1 --set-id set1 --cancel --await + { + // ... + "sets": { + "set1": { + // ... + "state": { + "name": "CANCELED", + "age": 0 + }, + // ... + } + } + } + Error EPERM: + +Although ``--cancel`` will succeed syncrhonously for a set in an active state, awaiting a canceled +set is not permitted, hence this call will result in an ``EPERM``. This is deliberately different from +returning a ``EINVAL`` error, denoting an error on the user's side, to simplify the system's behavior +when ``--await`` is requested. As a result, it's also a simpler model for the user to work with. + +When awaiting, one may specify a maximum duration that they would like this await request to block for, +orthogonally to the two intrinsic set timeouts discussed above. If the target awaited state isn't reached +within the specified duration, then ``EINPROGRESS`` is returned. For that, one should use the argument +``--await-for=``. One could think of ``--await`` as equivalent to ``--await-for=Infinity``. +While it doesn't make sense to specify both arguments, it is not considered an error. If +both ``--await`` and ``--await-for`` are present, then the former is ignored, and the time limit +from ``--await-for`` is honored. + +.. prompt:: bash $ auto + + $ time ceph fs quiesce fs1 sub1 --timeout=10 --await-for=2 + { + "epoch": 6, + "set_version": 3, + "sets": { + "c3c1d8de": { + "version": 3, + "age_ref": 0.0, + "state": { + "name": "QUIESCING", + "age": 2.0 + }, + "timeout": 10.0, + "expiration": 0.0, + "members": { + "file:/volumes/_nogroup/sub1/b1fcce76-3418-42dd-aa76-f9076d047dd3": { + "excluded": false, + "state": { + "name": "QUIESCING", + "age": 2.0 + } + } + } + } + } + } + Error EINPROGRESS: + ceph fs quiesce fs1 sub1 --timeout=10 --await-for=2 0.41s user 0.04s system 17% cpu 2.563 total + +(there is a ~0.5 sec overhead that the ceph client adds, at least in a local debug setup) + +Quiesce-Await and Expiration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Quiesce await has a side effect: it resets the internal expiration timer. This allows for a watchdog +approach to a long running multistep process under the IO pause by repeatedly ``--await``\ ing an already +`QUIESCED` set. Consider the following example script: + +.. prompt:: bash $ auto + + $ set -e # (1) + $ ceph fs quiesce fs1 sub1 sub2 sub3 --timeout=30 --expiration=10 --set-id="snapshots" --await # (2) + $ ceph fs subvolume snapshot create a sub1 snap1-sub1 # (3) + $ ceph fs quiesce fs1 --set-id="snapshots" --await # (4) + $ ceph fs subvolume snapshot create a sub2 snap1-sub2 # (3) + $ ceph fs quiesce fs1 --set-id="snapshots" --await # (4) + $ ceph fs subvolume snapshot create a sub3 snap1-sub3 # (3) + $ ceph fs quiesce fs1 --set-id="snapshots" --release --await # (5) + +.. warning:: This example uses arbitrary timeouts to convey the concept. In real life, the values must be carefully + chosen in accordance with the actual system requirements and specifications. + +The goal of the script is to take consistent snapshots of 3 subvolumes. +We begin by setting the bash ``-e`` option `(1)` to exit this script if any or the following commands +returns with a non-zero status. + +We go on requesting an IO pause for the three subvolumes `(2)`. We set our timeouts allowing +the system to spend up to 30 seconds reaching the quiesced state across all members +and stay quiesced for up to 10 seconds before the quiesce expires and the IO +is resumed. We also specify ``--await`` to only proceed once the quiesce is reached. + +We then proceed with a set of command pairs that take the next snapshot and call ``--await`` on our set +to extend the expiration timeout for 10 more seconds `(3,4)`. This approach gives us up to 10 seconds +for every snapshot, but also allows taking as many snapshots as we need without losing the IO pause, +and with it - consistency. If we wanted, we could update the `expiration` every time we called for await. + +If any of the snapshots gets stuck and takes longer than 10 seconds to complete, then the next call +to ``--await`` will return an error since the set will be `EXPIRED` which is not an awaitable state. +This limits the impact on the applications in the bad case scenarios. + +We could have set the `expiration` timeout to 30 at the beginning `(2)`, but that would mean that +a single stuck snapshot would keep the applications pending for all this time. + +If Version +~~~~~~~~~~ + +Sometimes, it's not enough to just observe the successful quiesce or release. The reason could be +a concurrent change of the set by another client. Consider this example: + +.. prompt:: bash $ auto + + $ ceph fs quiesce fs1 sub1 sub2 sub3 --timeout=30 --expiration=60 --set-id="snapshots" --await # (1) + $ ceph fs subvolume snapshot create a sub1 snap1-sub1 # (2) + $ ceph fs subvolume snapshot create a sub2 snap1-sub2 # (3) + $ ceph fs subvolume snapshot create a sub3 snap1-sub3 # (4) + $ ceph fs quiesce fs1 --set-id="snapshots" --release --await # (5) + +The sequence looks good, and the release `(5)` completes successfully. However, it could be that +before snap for sub3 `(4)` is taken, another session excludes sub3 from the set, resuming its IOs + +.. prompt:: bash $ auto + + $ ceph fs quiesce fs1 --set-id="snapshots" --exclude sub3 + +Since removing a member from a set doesn't affect its `QUIESCED` state, the release command `(5)` +has no reason to fail. It will ack the two unexcluded members sub1 and sub2 and report success. + +In order to address this or similar problems, the quiesce command supports an optimistic concurrency +mode. To activate it, one needs to pass an ``--if-version=`` that will be compared +to the set's db version and the operation will only proceed if the values match. Otherwise, the command +will not be executed and the return status will be ``ESTALE``. + +It's easy to know which version to expect of a set, since every command that modifies a set will return +this set on the stdout, regarldess of the exit status. In the examples above one can notice that every +set carries a ``"version"`` property which gets updated whenever this set is modified, explicitly +by the user or implicitly during + +In the example at the beginning of this subsection, the initial quiesce command `(1)` would have returned +the newly created set with id ``"snapshots"`` and some version, let's say ``13``. Since we don't expect any other +changes to the set while we are making snapshots with the commands `(2,3,4)`, the release command `(5)` +could have looked like + +.. prompt:: bash $ auto + + $ ceph fs quiesce fs1 --set-id="snapshots" --release --await --if-version=13 # (5) + +This way, the result of the release command would have been ``ESTALE`` instead of 0, and we would +know that something wasn't right with the quiesce set and our snapshots might not be consistent. + +.. tip:: When ``--if-version`` and the command returns ``ESTALE``, the requested action is **not** executed. + It means that the script may want to execute some unconditional command on the set to adjust its state + according to the requirements + +There is another use of the ``--if-version`` argument which could come handy for automation software. +As we have discussed earlier, it is possible to create a new quiesce set with a given set id. Drivers like +the CSI for Kubernetes could use their internal request id to eliminate the need to keep an additional mapping +to the quiesce set id. However, to guarantee uniqueness, the driver may want to verify that the set is +indeed new. For that, ``if-version=0`` may be used, and it will only create the new set if no other +set with this id was present in the database + +.. prompt:: bash $ auto + + $ ceph fs quiesce fs1 sub1 sub2 sub3 --set-id="external-id" --if-version=0 + + +.. _disabling-volumes-plugin: + +Disabling Volumes Plugin +------------------------ +By default the volumes plugin is enabled and set to ``always on``. However, in +certain cases it might be appropriate to disable it. For example, when a CephFS +is in a degraded state, the volumes plugin commands may accumulate in MGR +instead of getting served. Which eventually causes policy throttles to kick in +and the MGR becomes unresponsive. + +In this event, volumes plugin can be disabled even though it is an +``always on`` module in MGR. To do so, run ``ceph mgr module disable volumes +--yes-i-really-mean-it``. Do note that this command will disable operations +and remove commands of volumes plugin since it will disable all CephFS +services on the Ceph cluster accessed through this plugin. + +Before resorting to a measure as drastic as this, it is a good idea to try less +drastic measures and then assess if the file system experience has improved due +to it. One example of such less drastic measure is to disable asynchronous +threads launched by volumes plugins for cloning and purging trash. + .. _manila: https://github.com/openstack/manila .. _CSI: https://github.com/ceph/ceph-csi diff --git a/doc/cephfs/health-messages.rst b/doc/cephfs/health-messages.rst index 8fb23715d2d3..0f171c6ccc98 100644 --- a/doc/cephfs/health-messages.rst +++ b/doc/cephfs/health-messages.rst @@ -252,3 +252,20 @@ other daemons, please see :ref:`health-checks`. dirty data for cap revokes). If ``defer_client_eviction_on_laggy_osds`` is set to true (default true), client eviction will not take place and thus this health warning will be generated. + +``MDS_CLIENTS_BROKEN_ROOTSQUASH`` +--------------------------------- + Message + "X client(s) with broken root_squash implementation (MDS_CLIENTS_BROKEN_ROOTSQUASH)" + + Description + A bug was discovered in root_squash which would potentially lose changes made by a + client restricted with root_squash caps. The fix required a change to the protocol + and a client upgrade is required. + + This is a HEALTH_ERR warning because of the danger of inconsistency and lost + data. It is recommended to either upgrade your clients, discontinue using + root_squash in the interim, or silence the warning if desired. + + To evict and permanently block broken clients from connecting to the + cluster, set the ``required_client_feature`` bit ``client_mds_auth_caps``. diff --git a/doc/cephfs/index.rst b/doc/cephfs/index.rst index 3d52aef38449..57ea336c00be 100644 --- a/doc/cephfs/index.rst +++ b/doc/cephfs/index.rst @@ -10,14 +10,13 @@ a state-of-the-art, multi-use, highly available, and performant file store for a variety of applications, including traditional use-cases like shared home directories, HPC scratch space, and distributed workflow shared storage. -CephFS achieves these goals through the use of some novel architectural -choices. Notably, file metadata is stored in a separate RADOS pool from file -data and served via a resizable cluster of *Metadata Servers*, or **MDS**, -which may scale to support higher throughput metadata workloads. Clients of -the file system have direct access to RADOS for reading and writing file data -blocks. For this reason, workloads may linearly scale with the size of the -underlying RADOS object store; that is, there is no gateway or broker mediating -data I/O for clients. +CephFS achieves these goals through novel architectural choices. Notably, file +metadata is stored in a RADOS pool separate from file data and is served via a +resizable cluster of *Metadata Servers*, or **MDS**\es, which scale to support +higher-throughput workloads. Clients of the file system have direct access to +RADOS for reading and writing file data blocks. This makes it possible for +workloads to scale linearly with the size of the underlying RADOS object store. +There is no gateway or broker that mediates data I/O for clients. Access to data is coordinated through the cluster of MDS which serve as authorities for the state of the distributed metadata cache cooperatively @@ -193,6 +192,7 @@ Developer Guides Client's Capabilities Java and Python bindings Mantle + Metrics .. raw:: html diff --git a/doc/cephfs/mantle.rst b/doc/cephfs/mantle.rst index dc9e624617d8..39e0af3ba8eb 100644 --- a/doc/cephfs/mantle.rst +++ b/doc/cephfs/mantle.rst @@ -6,20 +6,25 @@ Mantle Mantle is for research and development of metadata balancer algorithms, not for use on production CephFS clusters. -Multiple, active MDSs can migrate directories to balance metadata load. The -policies for when, where, and how much to migrate are hard-coded into the -metadata balancing module. Mantle is a programmable metadata balancer built -into the MDS. The idea is to protect the mechanisms for balancing load -(migration, replication, fragmentation) but stub out the balancing policies -using Lua. Mantle is based on [1] but the current implementation does *NOT* -have the following features from that paper: +Mantle is a programmable metadata balancer that is built into the MDS. + +By default (without Mantle), multiple, active MDSs can migrate directories to +balance metadata load. The policies for when, where, and how much to migrate +are hard-coded into the metadata balancing module. + +Mantle works by protecting the mechanisms for balancing load (migration, +replication, fragmentation) while suppressing the balancing policies using Lua. +Mantle is based on [1] but the current implementation does *NOT* have the +following features from that paper: 1. Balancing API: in the paper, the user fills in when, where, how much, and - load calculation policies; currently, Mantle only requires that Lua policies - return a table of target loads (e.g., how much load to send to each MDS) -2. "How much" hook: in the paper, there was a hook that let the user control - the fragment selector policy; currently, Mantle does not have this hook -3. Instantaneous CPU utilization as a metric + load calculation policies. Currently, Mantle requires only that Lua policies + return a table of target loads (for example, how much load to send to each + MDS) +2. The "how much" hook: in the paper, there was a hook that allowed the user to + control the "fragment selector policy". Currently, Mantle does not have this + hook. +3. "Instantaneous CPU utilization" as a metric. [1] Supercomputing '15 Paper: http://sc15.supercomputing.org/schedule/event_detail-evid=pap168.html @@ -30,10 +35,11 @@ Quickstart with vstart .. warning:: Developing balancers with vstart is difficult because running all daemons - and clients on one node can overload the system. Let it run for a while, even - though you will likely see a bunch of lost heartbeat and laggy MDS warnings. - Most of the time this guide will work but sometimes all MDSs lock up and you - cannot actually see them spill. It is much better to run this on a cluster. + and clients on one node can overload the system. Let the system run for a + while, even though there will likely be many lost heartbeat warnings and + many laggy MDS warnings. In most cases this guide will work, but sometimes + when developing with vstart all MDSs will lock up and you cannot actually + see them spill. It is better to run this on a multi-node cluster. As a prerequisite, we assume you have installed `mdtest `_ or pulled the `Docker image diff --git a/doc/cephfs/mds-config-ref.rst b/doc/cephfs/mds-config-ref.rst index e578b7f25148..9176a739801e 100644 --- a/doc/cephfs/mds-config-ref.rst +++ b/doc/cephfs/mds-config-ref.rst @@ -57,7 +57,7 @@ .. confval:: mds_kill_link_at .. confval:: mds_kill_rename_at .. confval:: mds_inject_skip_replaying_inotable -.. confval:: mds_kill_skip_replaying_inotable +.. confval:: mds_kill_after_journal_logs_flushed .. confval:: mds_wipe_sessions .. confval:: mds_wipe_ino_prealloc .. confval:: mds_skip_ino diff --git a/doc/cephfs/mds-journaling.rst b/doc/cephfs/mds-journaling.rst index b6ccf27c8c0a..9325eab7a2d4 100644 --- a/doc/cephfs/mds-journaling.rst +++ b/doc/cephfs/mds-journaling.rst @@ -141,14 +141,12 @@ The targetted size of a log segment in terms of number of events is controlled b .. confval:: mds_log_events_per_segment -The frequency of major segments (noted by the journaling of the latest ``ESubtreeMap``) is controlled by: +The number of minor mds log segments since last major segment is controlled by: -.. confval:: mds_log_major_segment_event_ratio +.. confval:: mds_log_minor_segments_per_major_segment -When ``mds_log_events_per_segment * mds_log_major_segment_event_ratio`` -non-``ESubtreeMap`` events are logged, the MDS will journal a new -``ESubtreeMap``. This is necessary to allow the journal to shrink in size -during the trimming of expired segments. +This controls how often the MDS trims expired log segments (higher the value, less +often the MDS updates the journal expiry position for trimming). The target maximum number of segments is controlled by: diff --git a/doc/cephfs/metrics.rst b/doc/cephfs/metrics.rst new file mode 100644 index 000000000000..1befec0c4ae1 --- /dev/null +++ b/doc/cephfs/metrics.rst @@ -0,0 +1,132 @@ +.. _cephfs_metrics: + +Metrics +======= + +CephFS uses :ref:`Perf Counters` to track metrics. The counters can be labeled (:ref:`Labeled Perf Counters`). + +Client Metrics +-------------- + +CephFS exports client metrics as :ref:`Labeled Perf Counters`, which could be used to monitor the client performance. CephFS exports the below client metrics. + +.. list-table:: Client Metrics + :widths: 25 25 75 + :header-rows: 1 + + * - Name + - Type + - Description + * - num_clients + - Gauge + - Number of client sessions + * - cap_hits + - Gauge + - Percentage of file capability hits over total number of caps + * - cap_miss + - Gauge + - Percentage of file capability misses over total number of caps + * - avg_read_latency + - Gauge + - Mean value of the read latencies + * - avg_write_latency + - Gauge + - Mean value of the write latencies + * - avg_metadata_latency + - Gauge + - Mean value of the metadata latencies + * - dentry_lease_hits + - Gauge + - Percentage of dentry lease hits handed out over the total dentry lease requests + * - dentry_lease_miss + - Gauge + - Percentage of dentry lease misses handed out over the total dentry lease requests + * - opened_files + - Gauge + - Number of opened files + * - opened_inodes + - Gauge + - Number of opened inodes + * - pinned_icaps + - Gauge + - Number of pinned Inode Caps + * - total_inodes + - Gauge + - Total number of Inodes + * - total_read_ops + - Gauge + - Total number of read operations generated by all process + * - total_read_size + - Gauge + - Number of bytes read in input/output operations generated by all process + * - total_write_ops + - Gauge + - Total number of write operations generated by all process + * - total_write_size + - Gauge + - Number of bytes written in input/output operations generated by all processes + +Getting Metrics +=============== + +The metrics could be scraped from the MDS admin socket as well as using the tell interface. The ``mds_client_metrics-`` section in the output of ``counter dump`` command displays the metrics for each client as shown below:: + + "mds_client_metrics": [ + { + "labels": { + "fs_name": "", + "id": "14213" + }, + "counters": { + "num_clients": 2 + } + } + ], + "mds_client_metrics-": [ + { + "labels": { + "client": "client.0", + "rank": "0" + }, + "counters": { + "cap_hits": 5149, + "cap_miss": 1, + "avg_read_latency": 0.000000000, + "avg_write_latency": 0.000000000, + "avg_metadata_latency": 0.000000000, + "dentry_lease_hits": 0, + "dentry_lease_miss": 0, + "opened_files": 1, + "opened_inodes": 2, + "pinned_icaps": 2, + "total_inodes": 2, + "total_read_ops": 0, + "total_read_size": 0, + "total_write_ops": 4836, + "total_write_size": 633864192 + } + }, + { + "labels": { + "client": "client.1", + "rank": "0" + }, + "counters": { + "cap_hits": 3375, + "cap_miss": 8, + "avg_read_latency": 0.000000000, + "avg_write_latency": 0.000000000, + "avg_metadata_latency": 0.000000000, + "dentry_lease_hits": 0, + "dentry_lease_miss": 0, + "opened_files": 1, + "opened_inodes": 2, + "pinned_icaps": 2, + "total_inodes": 2, + "total_read_ops": 0, + "total_read_size": 0, + "total_write_ops": 3169, + "total_write_size": 415367168 + } + } + ] diff --git a/doc/cephfs/mount-prerequisites.rst b/doc/cephfs/mount-prerequisites.rst index 6ed8a19b6205..02b22fb9ac6a 100644 --- a/doc/cephfs/mount-prerequisites.rst +++ b/doc/cephfs/mount-prerequisites.rst @@ -1,11 +1,10 @@ Mount CephFS: Prerequisites =========================== -You can use CephFS by mounting it to your local filesystem or by using -`cephfs-shell`_. Mounting CephFS requires superuser privileges to trim -dentries by issuing a remount of itself. CephFS can be mounted -`using kernel`_ as well as `using FUSE`_. Both have their own -advantages. Read the following section to understand more about both of +You can use CephFS by mounting the file system on a machine or by using +:ref:`cephfs-shell `. A system mount can be performed using `the +kernel driver`_ as well as `the FUSE driver`_. Both have their own advantages +and disadvantages. Read the following section to understand more about both of these ways to mount CephFS. For Windows CephFS mounts, please check the `ceph-dokan`_ page. @@ -69,7 +68,7 @@ Ceph MON resides. individually, please check respective mount documents. .. _Client Authentication: ../client-auth -.. _cephfs-shell: ../cephfs-shell -.. _using kernel: ../mount-using-kernel-driver -.. _using FUSE: ../mount-using-fuse +.. _cephfs-shell: ..cephfs-shell +.. _the kernel driver: ../mount-using-kernel-driver +.. _the FUSE driver: ../mount-using-fuse .. _ceph-dokan: ../ceph-dokan diff --git a/doc/cephfs/mount-using-fuse.rst b/doc/cephfs/mount-using-fuse.rst index bd098dc91de3..67e5a424d8af 100644 --- a/doc/cephfs/mount-using-fuse.rst +++ b/doc/cephfs/mount-using-fuse.rst @@ -2,24 +2,32 @@ Mount CephFS using FUSE ======================== -`ceph-fuse`_ is an alternate way of mounting CephFS, although it mounts it -in userspace. Therefore, performance of FUSE can be relatively lower but FUSE -clients can be more manageable, especially while upgrading CephFS. +`ceph-fuse`_ can be used as an alternative to the :ref:`CephFS kernel +driver` to mount CephFS file systems. +`ceph-fuse`_ mounts are made in userspace. This means that `ceph-fuse`_ mounts +are less performant than kernel driver mounts, but they are easier to manage +and easier to upgrade. Prerequisites ============= -Go through the prerequisites required by both, kernel as well as FUSE mounts, -in `Mount CephFS: Prerequisites`_ page. +Ensure that you have all the prerequisites required by both kernel and FUSE +mounts, as listed on the `Mount CephFS: Prerequisites`_ page. -.. note:: Mounting CephFS using FUSE requires superuser privileges to trim dentries - by issuing a remount of itself. +.. note:: Mounting CephFS using FUSE requires superuser privileges (sudo/root). + The libfuse interface does not provide a mechanism to trim cache entries in + the kernel so a remount (``mount(2)``) system call is required to force the + kernel to drop the cached metadata. ``ceph-fuse`` issues these remount + system calls periodically in response to cache pressure in the MDS or due to + metadata cache revocations. Synopsis ======== -In general, the command to mount CephFS via FUSE looks like this:: +This is the general form of the command for mounting CephFS via FUSE: - ceph-fuse {mountpoint} {options} +.. prompt:: bash # + + ceph-fuse {mount point} {options} Mounting CephFS =============== @@ -28,7 +36,7 @@ To FUSE-mount the Ceph file system, use the ``ceph-fuse`` command:: mkdir /mnt/mycephfs ceph-fuse --id foo /mnt/mycephfs -Option ``-id`` passes the name of the CephX user whose keyring we intend to +Option ``--id`` passes the name of the CephX user whose keyring we intend to use for mounting CephFS. In the above command, it's ``foo``. You can also use ``-n`` instead, although ``--id`` is evidently easier:: diff --git a/doc/cephfs/mount-using-kernel-driver.rst b/doc/cephfs/mount-using-kernel-driver.rst index 9d9a4a683bae..22ede055d0b5 100644 --- a/doc/cephfs/mount-using-kernel-driver.rst +++ b/doc/cephfs/mount-using-kernel-driver.rst @@ -1,3 +1,5 @@ +.. _cephfs-mount-using-kernel-driver: + ================================= Mount CephFS using Kernel Driver ================================= @@ -20,16 +22,18 @@ Complete General Prerequisites Go through the prerequisites required by both, kernel as well as FUSE mounts, in `Mount CephFS: Prerequisites`_ page. -Is mount helper is present? ---------------------------- +Is mount helper present? +------------------------ ``mount.ceph`` helper is installed by Ceph packages. The helper passes the -monitor address(es) and CephX user keyrings automatically saving the Ceph -admin the effort to pass these details explicitly while mounting CephFS. In -case the helper is not present on the client machine, CephFS can still be -mounted using kernel but by passing these details explicitly to the ``mount`` -command. To check whether it is present on your system, do:: +monitor address(es) and CephX user keyrings, saving the Ceph admin the effort +of passing these details explicitly while mounting CephFS. If the helper is not +present on the client machine, CephFS can still be mounted using the kernel +driver, but only by passing these details explicitly to the ``mount`` command. +To check whether ``mount.ceph`` is present on your system, run the following command: + +.. prompt:: bash # - stat /sbin/mount.ceph + stat /sbin/mount.ceph Which Kernel Version? --------------------- diff --git a/doc/cephfs/multimds.rst b/doc/cephfs/multimds.rst index e50a5148ec2d..3d7a4bc8a061 100644 --- a/doc/cephfs/multimds.rst +++ b/doc/cephfs/multimds.rst @@ -116,7 +116,7 @@ The mechanism provided for this purpose is called an ``export pin``, an extended attribute of directories. The name of this extended attribute is ``ceph.dir.pin``. Users can set this attribute using standard commands: -:: +.. prompt:: bash # setfattr -n ceph.dir.pin -v 2 path/to/dir @@ -128,7 +128,7 @@ pin. In this way, setting the export pin on a directory affects all of its children. However, the parents pin can be overridden by setting the child directory's export pin. For example: -:: +.. prompt:: bash # mkdir -p a/b # "a" and "a/b" both start without an export pin set @@ -173,7 +173,7 @@ immediate children across a range of MDS ranks. The canonical example use-case would be the ``/home`` directory: we want every user's home directory to be spread across the entire MDS cluster. This can be set via: -:: +.. prompt:: bash # setfattr -n ceph.dir.pin.distributed -v 1 /cephfs/home @@ -183,7 +183,7 @@ may be ephemerally pinned. This is set through the extended attribute ``ceph.dir.pin.random`` with the value set to the percentage of directories that should be pinned. For example: -:: +.. prompt:: bash # setfattr -n ceph.dir.pin.random -v 0.5 /cephfs/tmp @@ -205,7 +205,7 @@ Ephemeral pins may override parent export pins and vice versa. What determines which policy is followed is the rule of the closest parent: if a closer parent directory has a conflicting policy, use that one instead. For example: -:: +.. prompt:: bash # mkdir -p foo/bar1/baz foo/bar2 setfattr -n ceph.dir.pin -v 0 foo @@ -217,7 +217,7 @@ directory will obey the pin on ``foo`` normally. For the reverse situation: -:: +.. prompt:: bash # mkdir -p home/{patrick,john} setfattr -n ceph.dir.pin.distributed -v 1 home @@ -229,7 +229,8 @@ because its export pin overrides the policy on ``home``. To remove a partitioning policy, remove the respective extended attribute or set the value to 0. -.. code::bash +.. prompt:: bash # + $ setfattr -n ceph.dir.pin.distributed -v 0 home # or $ setfattr -x ceph.dir.pin.distributed home @@ -237,50 +238,79 @@ or set the value to 0. For export pins, remove the extended attribute or set the extended attribute value to `-1`. -.. code::bash +.. prompt:: bash # + $ setfattr -n ceph.dir.pin -v -1 home +Dynamic Subtree Partitioning +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +CephFS has long had a dynamic metadata balancer (sometimes called the "default +balancer") which can split or merge subtrees while placing them on "colder" MDS +ranks. Moving the metadata in this way improves overall file system throughput +and cache size. + +However, the balancer is sometimes inefficient or slow, so by default it is +turned off. This is to avoid an administrator "turning on multimds" by +increasing the ``max_mds`` setting only to find that the balancer has made a +mess of the cluster performance (reverting from this messy state of affairs is +straightforward but can take time). + +To turn on the balancer, run a command of the following form: + +.. prompt:: bash # + + ceph fs set balance_automate true + +Turn on the balancer only with an appropriate configuration, such as a +configuration that includes the ``bal_rank_mask`` setting (described +:ref:`below `). + +Careful monitoring of the file system performance and MDS is advised. + + Dynamic subtree partitioning with Balancer on specific ranks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The CephFS file system provides the ``bal_rank_mask`` option to enable the balancer -to dynamically rebalance subtrees within particular active MDS ranks. This -allows administrators to employ both the dynamic subtree partitioning and -static pining schemes in different active MDS ranks so that metadata loads -are optimized based on user demand. For instance, in realistic cloud -storage environments, where a lot of subvolumes are allotted to multiple -computing nodes (e.g., VMs and containers), some subvolumes that require -high performance are managed by static partitioning, whereas most subvolumes -that experience a moderate workload are managed by the balancer. As the balancer -evenly spreads the metadata workload to all active MDS ranks, performance of -static pinned subvolumes inevitably may be affected or degraded. If this option -is enabled, subtrees managed by the balancer are not affected by -static pinned subtrees. +.. _bal-rank-mask: + +The CephFS file system provides the ``bal_rank_mask`` option to enable the +balancer to dynamically rebalance subtrees within particular active MDS ranks. +This allows administrators to employ both the dynamic subtree partitioning and +static pining schemes in different active MDS ranks so that metadata loads are +optimized based on user demand. For instance, in realistic cloud storage +environments, where a lot of subvolumes are allotted to multiple computing +nodes (e.g., VMs and containers), some subvolumes that require high performance +are managed by static partitioning, whereas most subvolumes that experience a +moderate workload are managed by the balancer. As the balancer evenly spreads +the metadata workload to all active MDS ranks, performance of static pinned +subvolumes inevitably may be affected or degraded. If this option is enabled, +subtrees managed by the balancer are not affected by static pinned subtrees. This option can be configured with the ``ceph fs set`` command. For example: -:: +.. prompt:: bash # ceph fs set bal_rank_mask Each bitfield of the ```` number represents a dedicated rank. If the ```` is set to ``0x3``, the balancer runs on active ``0`` and ``1`` ranks. For example: -:: +.. prompt:: bash # ceph fs set bal_rank_mask 0x3 If the ``bal_rank_mask`` is set to ``-1`` or ``all``, all active ranks are masked and utilized by the balancer. As an example: -:: +.. prompt:: bash # ceph fs set bal_rank_mask -1 On the other hand, if the balancer needs to be disabled, the ``bal_rank_mask`` should be set to ``0x0``. For example: -:: +.. prompt:: bash # ceph fs set bal_rank_mask 0x0 diff --git a/doc/cephfs/quiesce-set-states.svg b/doc/cephfs/quiesce-set-states.svg new file mode 100644 index 000000000000..82f53886c7f3 --- /dev/null +++ b/doc/cephfs/quiesce-set-states.svg @@ -0,0 +1,142 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/cephfs/quota.rst b/doc/cephfs/quota.rst index e78173bcc3e6..b3d0d63b3b19 100644 --- a/doc/cephfs/quota.rst +++ b/doc/cephfs/quota.rst @@ -45,15 +45,28 @@ To view quota limit:: system call. Instead, a specific CephFS extended attribute can be viewed by running ``getfattr /some/dir -n ceph.``. -To remove a quota, set the value of extended attribute to ``0``:: +To remove or disable a quota, remove the respective extended attribute or set +the value to ``0``. + +Utilizing remove:: + + $ setfattr -x ceph.quota.max_bytes /some/dir + $ getfattr /some/dir -n ceph.quota.max_bytes + /some/dir/: ceph.quota.max_bytes: No such attribute + $ + $ setfattr -x ceph.quota.max_files /some/dir + $ getfattr /some/dir/ -n ceph.quota.max_files + /some/dir/: ceph.quota.max_files: No such attribute + +Remove by setting value to zero:: $ setfattr -n ceph.quota.max_bytes -v 0 /some/dir $ getfattr /some/dir -n ceph.quota.max_bytes - dir1/: ceph.quota.max_bytes: No such attribute + /some/dir/: ceph.quota.max_bytes: No such attribute $ $ setfattr -n ceph.quota.max_files -v 0 /some/dir - $ getfattr dir1/ -n ceph.quota.max_files - dir1/: ceph.quota.max_files: No such attribute + $ getfattr /some/dir/ -n ceph.quota.max_files + /some/dir/: ceph.quota.max_files: No such attribute Space Usage Reporting and CephFS Quotas --------------------------------------- diff --git a/doc/cephfs/scrub.rst b/doc/cephfs/scrub.rst index 5b813f1c41ad..9d6745ef7bf0 100644 --- a/doc/cephfs/scrub.rst +++ b/doc/cephfs/scrub.rst @@ -143,6 +143,14 @@ The types of damage that can be reported and repaired by File System Scrub are: * BACKTRACE : Inode's backtrace in the data pool is corrupted. +These above named MDS damages can be repaired by using the following command:: + + ceph tell mds.:0 scrub start /path recursive, repair, force + +If scrub is able to repair the damage, the corresponding entry is automatically +removed from the damage table. + + Evaluate strays using recursive scrub ===================================== diff --git a/doc/cephfs/snap-schedule.rst b/doc/cephfs/snap-schedule.rst index ef746be23590..a94d938040ff 100644 --- a/doc/cephfs/snap-schedule.rst +++ b/doc/cephfs/snap-schedule.rst @@ -31,7 +31,7 @@ Snapshot schedules are identified by path, their repeat interval and their start time. The repeat interval defines the time between two subsequent snapshots. It is specified by a number and a period multiplier, one of `h(our)`, `d(ay)`, -`w(eek)`, `M(onth)` and `Y(ear)`. E.g. a repeat interval of `12h` specifies one +`w(eek)`, `M(onth)` and `y(ear)`. E.g. a repeat interval of `12h` specifies one snapshot every 12 hours. The start time is specified as a time string (more details about passing times below). By default @@ -53,7 +53,7 @@ The semantics are that a spec will ensure `` snapshots are kept that are at least `
    diff --git a/qa/workunits/erasure-code/bench.sh b/qa/workunits/erasure-code/bench.sh index 8e288f053eca..87e997c3500f 100755 --- a/qa/workunits/erasure-code/bench.sh +++ b/qa/workunits/erasure-code/bench.sh @@ -17,7 +17,8 @@ # # Test that it works from sources with: # -# CEPH_ERASURE_CODE_BENCHMARK=src/ceph_erasure_code_benchmark \ +# TOTAL_SIZE=$((4 * 1024 * 1024)) SIZE=4096 \ +# CEPH_ERASURE_CODE_BENCHMARK=build/bin/ceph_erasure_code_benchmark \ # PLUGIN_DIRECTORY=build/lib \ # qa/workunits/erasure-code/bench.sh fplot jerasure | # tee qa/workunits/erasure-code/bench.js @@ -34,10 +35,14 @@ # firefox qa/workunits/erasure-code/bench.html # # Once it is confirmed to work, it can be run with a more significant -# volume of data so that the measures are more reliable: +# volume of data so that the measures are more reliable. Ideally the size +# of the buffers (SIZE) should be larger than the L3 cache to avoid cache hits. +# The following example uses an 80MB (80 * 1024 * 1024) buffer. +# A larger buffer with fewer iterations (iterations = TOTAL SIZE / SIZE) should result in +# more time spent encoding/decoding and less time allocating/aligning buffers: # -# TOTAL_SIZE=$((4 * 1024 * 1024 * 1024)) \ -# CEPH_ERASURE_CODE_BENCHMARK=src/ceph_erasure_code_benchmark \ +# TOTAL_SIZE=$((100 * 80 * 1024 * 1024)) SIZE=$((80 * 1024 * 1024)) \ +# CEPH_ERASURE_CODE_BENCHMARK=build/bin/ceph_erasure_code_benchmark \ # PLUGIN_DIRECTORY=build/lib \ # qa/workunits/erasure-code/bench.sh fplot jerasure | # tee qa/workunits/erasure-code/bench.js @@ -50,11 +55,25 @@ export PATH=/sbin:$PATH : ${CEPH_ERASURE_CODE_BENCHMARK:=ceph_erasure_code_benchmark} : ${PLUGIN_DIRECTORY:=/usr/lib/ceph/erasure-code} : ${PLUGINS:=isa jerasure} -: ${TECHNIQUES:=vandermonde cauchy} -: ${TOTAL_SIZE:=$((1024 * 1024))} -: ${SIZE:=4096} +: ${TECHNIQUES:=vandermonde cauchy liberation reed_sol_r6_op blaum_roth liber8tion} +: ${TOTAL_SIZE:=$((100 * 80 * 1024 * 1024))} #TOTAL_SIZE / SIZE = number of encode or decode iterations to run +: ${SIZE:=$((80 * 1024 * 1024))} #size of buffer to encode/decode : ${PARAMETERS:=--parameter jerasure-per-chunk-alignment=true} +declare -rA isa_techniques=( + [vandermonde]="reed_sol_van" + [cauchy]="cauchy" +) + +declare -rA jerasure_techniques=( + [vandermonde]="reed_sol_van" + [cauchy]="cauchy_good" + [reed_sol_r6_op]="reed_sol_r6_op" + [blaum_roth]="blaum_roth" + [liberation]="liberation" + [liber8tion]="liber8tion" +) + function bench_header() { echo -e "seconds\tKB\tplugin\tk\tm\twork.\titer.\tsize\teras.\tcommand." } @@ -100,6 +119,25 @@ function packetsize() { echo $p } +function get_technique_name() +{ + local plugin=$1 + local technique=$2 + + declare -n techniques="${plugin}_techniques" + echo ${techniques["$technique"]} +} + +function technique_is_raid6() { + local technique=$1 + local r6_techniques="liberation reed_sol_r6_op blaum_roth liber8tion" + + if [[ $r6_techniques =~ $technique ]]; then + return 0 + fi + return 1 +} + function bench_run() { local plugin=jerasure local w=8 @@ -111,31 +149,31 @@ function bench_run() { k2ms[4]="2 3" k2ms[6]="2 3 4" k2ms[10]="3 4" - local isa2technique_vandermonde='reed_sol_van' - local isa2technique_cauchy='cauchy' - local jerasure2technique_vandermonde='reed_sol_van' - local jerasure2technique_cauchy='cauchy_good' + for technique in ${TECHNIQUES} ; do for plugin in ${PLUGINS} ; do - eval technique_parameter=\$${plugin}2technique_${technique} + technique_parameter=$(get_technique_name $plugin $technique) + if [[ -z $technique_parameter ]]; then continue; fi echo "serie encode_${technique}_${plugin}" for k in $ks ; do for m in ${k2ms[$k]} ; do + if [ $m -ne 2 ] && technique_is_raid6 $technique; then continue; fi bench $plugin $k $m encode $(($TOTAL_SIZE / $SIZE)) $SIZE 0 \ --parameter packetsize=$(packetsize $k $w $VECTOR_WORDSIZE $SIZE) \ ${PARAMETERS} \ --parameter technique=$technique_parameter - done done done done for technique in ${TECHNIQUES} ; do for plugin in ${PLUGINS} ; do - eval technique_parameter=\$${plugin}2technique_${technique} + technique_parameter=$(get_technique_name $plugin $technique) + if [[ -z $technique_parameter ]]; then continue; fi echo "serie decode_${technique}_${plugin}" for k in $ks ; do for m in ${k2ms[$k]} ; do + if [ $m -ne 2 ] && technique_is_raid6 $technique; then continue; fi echo for erasures in $(seq 1 $m) ; do bench $plugin $k $m decode $(($TOTAL_SIZE / $SIZE)) $SIZE $erasures \ @@ -150,27 +188,42 @@ function bench_run() { } function fplot() { - local serie - bench_run | while read seconds total plugin k m workload iteration size erasures rest ; do + local serie="" + local plot="" + local encode_table="var encode_table = [\n" + local decode_table="var decode_table = [\n" + while read seconds total plugin k m workload iteration size erasures rest ; do if [ -z $seconds ] ; then - echo null, + plot="$plot null,\n" elif [ $seconds = serie ] ; then if [ "$serie" ] ; then - echo '];' + echo -e "$plot];\n" fi local serie=`echo $total | sed 's/cauchy_\([0-9]\)/cauchy_good_\1/g'` - echo "var $serie = [" + plot="var $serie = [\n" else local x + local row + local technique=`echo $rest | grep -Po "(?<=technique=)\w*"` + local packetsize=`echo $rest | grep -Po "(?<=packetsize=)\w*"` if [ $workload = encode ] ; then x=$k/$m + row="[ '$plugin', '$technique', $seconds, $total, $k, $m, $iteration, $packetsize ]," + encode_table="$encode_table $row\n" + else x=$k/$m/$erasures + row="[ '$plugin', '$technique', $seconds, $total, $k, $m, $iteration, $packetsize, $erasures ]," + decode_table="$decode_table $row\n" fi - echo "[ '$x', " $(echo "( $total / 1024 / 1024 ) / $seconds" | bc -ql) " ], " + local out_time="$(echo "( $total / 1024 / 1024 ) / $seconds" | bc -ql)" + plot="$plot [ '$x', $out_time ],\n" fi - done - echo '];' + done < <(bench_run) + + echo -e "$plot];\n" + echo -e "$encode_table];\n" + echo -e "$decode_table];\n" } function main() { diff --git a/qa/workunits/erasure-code/examples.css b/qa/workunits/erasure-code/examples.css index ee4724778fcf..7d4c2ae18284 100644 --- a/qa/workunits/erasure-code/examples.css +++ b/qa/workunits/erasure-code/examples.css @@ -94,4 +94,22 @@ input[type=checkbox] { .legend table { border-spacing: 5px; -} \ No newline at end of file +} + +#encode-table, #decode-table { + margin: 0px 0px 15px 15px; + font-size: 12px; + border-collapse: collapse; + width: 100%; +} + +#encode-table td, #decode-table td, #encode-table th, #decode-table th { + border: 1px solid #ddd; + padding: 4px; +} + +#encode-table th, #decode-table th { + padding-top: 4px; + padding-bottom: 4px; + text-align: left; +} diff --git a/qa/workunits/erasure-code/plot.js b/qa/workunits/erasure-code/plot.js index bd2bba5bbada..af91a9963891 100644 --- a/qa/workunits/erasure-code/plot.js +++ b/qa/workunits/erasure-code/plot.js @@ -32,6 +32,38 @@ $(function() { lines: { show: true }, }); } + if (typeof encode_reed_sol_r6_op_jerasure != 'undefined') { + encode.push({ + data: encode_reed_sol_r6_op_jerasure, + label: "Jerasure, Reed Solomon RAID6", + points: { show: true }, + lines: { show: true }, + }); + } + if (typeof encode_liberation_jerasure != 'undefined') { + encode.push({ + data: encode_liberation_jerasure, + label: "Jerasure, Liberation", + points: { show: true }, + lines: { show: true }, + }); + } + if (typeof encode_liber8tion_jerasure != 'undefined') { + encode.push({ + data: encode_liber8tion_jerasure, + label: "Jerasure, Liber8tion", + points: { show: true }, + lines: { show: true }, + }); + } + if (typeof encode_blaum_roth_jerasure != 'undefined') { + encode.push({ + data: encode_blaum_roth_jerasure, + label: "Jerasure, Blaum Roth", + points: { show: true }, + lines: { show: true }, + }); + } $.plot("#encode", encode, { xaxis: { mode: "categories", @@ -72,11 +104,42 @@ $(function() { lines: { show: true }, }); } + if (typeof decode_reed_sol_r6_op_jerasure != 'undefined') { + decode.push({ + data: decode_reed_sol_r6_op_jerasure, + label: "Jerasure, Reed Solomon RAID6", + points: { show: true }, + lines: { show: true }, + }); + } + if (typeof decode_liberation_jerasure != 'undefined') { + decode.push({ + data: decode_liberation_jerasure, + label: "Jerasure, Liberation", + points: { show: true }, + lines: { show: true }, + }); + } + if (typeof decode_liber8tion_jerasure != 'undefined') { + decode.push({ + data: decode_liber8tion_jerasure, + label: "Jerasure, Liber8tion", + points: { show: true }, + lines: { show: true }, + }); + } + if (typeof decode_blaum_roth_jerasure != 'undefined') { + decode.push({ + data: decode_blaum_roth_jerasure, + label: "Jerasure, Blaum Roth", + points: { show: true }, + lines: { show: true }, + }); + } $.plot("#decode", decode, { xaxis: { mode: "categories", tickLength: 0 }, }); - }); diff --git a/qa/workunits/erasure-code/tables.js b/qa/workunits/erasure-code/tables.js new file mode 100644 index 000000000000..cf224ff4033d --- /dev/null +++ b/qa/workunits/erasure-code/tables.js @@ -0,0 +1,28 @@ +$(function() { + if (typeof encode_table != 'undefined') { + let table_rows = ''; + for (let row of encode_table) { + table_rows += `` + for (let cell of row) + { + table_rows += `${cell}` + } + table_rows += ``; + console.log(table_rows); + } + $('#encode-table').append(table_rows); + } + + if (typeof decode_table != 'undefined') { + let table_rows = ''; + for (let row of decode_table) { + table_rows += `` + for (let cell of row) + { + table_rows += `${cell}` + } + table_rows += ``; + } + $('#decode-table').append(table_rows); + } +}); \ No newline at end of file diff --git a/qa/workunits/fs/damage/test-first-damage-lost-found.sh b/qa/workunits/fs/damage/test-first-damage-lost-found.sh new file mode 100755 index 000000000000..2c532c0b108c --- /dev/null +++ b/qa/workunits/fs/damage/test-first-damage-lost-found.sh @@ -0,0 +1,196 @@ +#!/bin/bash + +set -ex + +FIRST_DAMAGE="first-damage.py" +FS=cephfs +METADATA_POOL=cephfs.a.meta +DATA_POOL=cephfs.a.data +MOUNT=/mnt1 +PYTHON=python3 + +function usage { + printf '%s: [--fs=] [--metadata-pool=] [--first-damage=]\n' + exit 1 +} + + +function create { + ceph config set mds mds_bal_fragment_dirs 0 + mkdir dir1 + DIR1_INODE=$(stat -c '%i' dir1) + touch dir1/file1 + DIR1_FILE1_INODE=$(stat -c '%i' dir1/file1) +} + +function flush { + ceph tell mds."$FS":0 flush journal +} + +function damage_backtrace { + flush + ceph fs fail "$FS" + sleep 5 + + cephfs-journal-tool --rank="$FS":0 event recover_dentries summary + # required here as the flush would re-write the below deleted omap + cephfs-journal-tool --rank="$FS":0 journal reset + + #remove dir1/file1 omap entry from metadata pool + local DIS=$(printf '%llx.%08llx' "$DIR1_INODE" 0) + rados --pool="$METADATA_POOL" rmomapkey "$DIS" "file1_head" + + #remove backtrace + local FIS=$(printf '%llx.%08llx' "$DIR1_FILE1_INODE" 0) + rados --pool="$DATA_POOL" rmxattr "$FIS" "parent" + + ceph fs set "$FS" joinable true + sleep 5 +} + +function damage_lost_found { + flush + ceph fs fail "$FS" + sleep 5 + local IS=$(printf '%llx.%08llx' "1" 0) + + local T=$(mktemp -p /tmp) + # nuke head version of "lost+found" + rados --pool="$METADATA_POOL" getomapval "$IS" lost+found_head "$T" + printf '\xff\xff\xff\xf0' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat + rados --pool="$METADATA_POOL" setomapval "$IS" lost+found_head --input-file="$T" + ceph fs set "$FS" joinable true + sleep 5 +} + +function recover_damaged_backtrace_file { + flush + ceph fs fail "$FS" + sleep 5 + + cephfs-journal-tool --rank="$FS":0 journal reset + + #creates lost+found directory and recovers the damaged backtrace file + cephfs-data-scan cleanup + cephfs-data-scan init + cephfs-data-scan scan_extents + cephfs-data-scan scan_inodes + cephfs-data-scan scan_links + + ceph fs set "$FS" joinable true + sleep 5 +} + +function recover { + flush + ceph fs fail "$FS" + sleep 5 + cephfs-journal-tool --rank="$FS":0 event recover_dentries summary + cephfs-journal-tool --rank="$FS":0 journal reset + "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug1 --memo /tmp/memo1 "$METADATA_POOL" + "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug2 --memo /tmp/memo2 --repair-nosnap "$METADATA_POOL" + "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug3 --memo /tmp/memo3 --remove "$METADATA_POOL" + ceph fs set "$FS" joinable true + sleep 5 +} + +function check_lost_found { + stat lost+found || exit 2 +} +function check { + if stat lost+found; then + echo should be gone + exit 1 + fi +} + +function mount { + #sudo --preserve-env=CEPH_CONF bin/mount.ceph :/ "$MOUNT" -o name=admin,noshare + sudo bin/ceph-fuse -c ./ceph.conf /mnt1 + df -h "$MOUNT" +} + +function main { + eval set -- $(getopt --name "$0" --options '' --longoptions 'help,fs:,metadata-pool:,first-damage:,mount:,python:' -- "$@") + + while [ "$#" -gt 0 ]; do + echo "$*" + echo "$1" + case "$1" in + -h|--help) + usage + ;; + --fs) + FS="$2" + shift 2 + ;; + --metadata-pool) + METADATA_POOL="$2" + shift 2 + ;; + --mount) + MOUNT="$2" + shift 2 + ;; + --first-damage) + FIRST_DAMAGE="$2" + shift 2 + ;; + --python) + PYTHON="$2" + shift 2 + ;; + --) + shift + break + ;; + *) + usage + ;; + esac + done + + mount + + pushd "$MOUNT" + create + popd + + sudo umount -f "$MOUNT" + + # flush dentries/inodes to omap + flush + + damage_backtrace + # creates lost+found directory + recover_damaged_backtrace_file + + sleep 5 # for mds to join + mount + pushd "$MOUNT" + sleep 5 # wait for mount to complete + + # check lost+found is created + check_lost_found + popd + sudo umount -f "$MOUNT" + # flush dentries/inodes to omap + flush + + # damage lost+found directory + damage_lost_found + recover + + mount + + pushd "$MOUNT" + sleep 5 # wait for mount to complete + + #check 'lost+found' dentry should be gone + check + popd + + sudo umount -f "$MOUNT" +} + +main "$@" diff --git a/qa/workunits/fs/damage/test-first-damage.sh b/qa/workunits/fs/damage/test-first-damage.sh index 57447b957d78..5038ef3cd050 100755 --- a/qa/workunits/fs/damage/test-first-damage.sh +++ b/qa/workunits/fs/damage/test-first-damage.sh @@ -84,7 +84,7 @@ function recover { ceph fs fail "$FS" sleep 5 cephfs-journal-tool --rank="$FS":0 event recover_dentries summary - cephfs-journal-tool --rank="$FS":0 journal reset + cephfs-journal-tool --rank="$FS":0 journal reset --yes-i-really-really-mean-it "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug1 --memo /tmp/memo1 "$METADATA_POOL" "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug2 --memo /tmp/memo2 --repair-nosnap "$METADATA_POOL" "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug3 --memo /tmp/memo3 --remove "$METADATA_POOL" diff --git a/qa/workunits/fs/full/subvolume_clone.sh b/qa/workunits/fs/full/subvolume_clone.sh index 75648f306f8b..cd4e043afede 100755 --- a/qa/workunits/fs/full/subvolume_clone.sh +++ b/qa/workunits/fs/full/subvolume_clone.sh @@ -7,8 +7,8 @@ set -ex # Hence the subsequent subvolume commands on the clone fails with # 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' traceback. -# The osd is of the size 1GB. The full-ratios are set so that osd is treated full -# at around 600MB. The subvolume is created and 100MB is written. +# The osd is of the size 2GiB. The full-ratios are set so that osd is treated full +# at around 1.2GB. The subvolume is created and 200MB is written. # The subvolume is snapshotted and cloned ten times. Since the clone delay is set to 15 seconds, # all the clones reach pending state for sure. Among ten clones, only few succeed and rest fails # with ENOSPACE. @@ -46,7 +46,7 @@ echo "After ratios are set" df -h ceph osd df -for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/1MB_file-$i status=progress bs=1M count=1 conv=fdatasync;done +for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/2MB_file-$i status=progress bs=1M count=2 conv=fdatasync;done # For debugging echo "After subvolumes are written" @@ -59,6 +59,9 @@ ceph fs subvolume snapshot create cephfs sub_0 snap_0 # Set clone snapshot delay ceph config set mgr mgr/volumes/snapshot_clone_delay 15 +# Disable the snapshot_clone_no_wait config option +ceph config set mgr mgr/volumes/snapshot_clone_no_wait false + # Schedule few clones, some would fail with no space for i in $(eval echo {1..$NUM_CLONES});do ceph fs subvolume snapshot clone cephfs sub_0 snap_0 clone_$i;done diff --git a/qa/workunits/fs/full/subvolume_rm.sh b/qa/workunits/fs/full/subvolume_rm.sh index a464e30f56e9..2a3bf956df33 100755 --- a/qa/workunits/fs/full/subvolume_rm.sh +++ b/qa/workunits/fs/full/subvolume_rm.sh @@ -2,8 +2,8 @@ set -ex # This testcase tests the scenario of the 'ceph fs subvolume rm' mgr command -# when the osd is full. The command used to hang. The osd is of the size 1GB. -# The subvolume is created and 500MB file is written. The full-ratios are +# when the osd is full. The command used to hang. The osd is of the size 2GiB. +# The subvolume is created and 1GB file is written. The full-ratios are # set below 500MB such that the osd is treated as full. Now the subvolume is # is removed. This should be successful with the introduction of FULL # capabilities which the mgr holds. @@ -21,7 +21,7 @@ echo "Before write" df -h ceph osd df -sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/500MB_file-1 status=progress bs=1M count=500 +sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/1GB_file-1 status=progress bs=1M count=1000 ceph osd set-full-ratio 0.2 ceph osd set-nearfull-ratio 0.16 diff --git a/qa/workunits/fs/full/subvolume_snapshot_rm.sh b/qa/workunits/fs/full/subvolume_snapshot_rm.sh index f6d0add9fda4..8df89d3c7a3b 100755 --- a/qa/workunits/fs/full/subvolume_snapshot_rm.sh +++ b/qa/workunits/fs/full/subvolume_snapshot_rm.sh @@ -7,8 +7,8 @@ set -ex # snapshot rm of the same snapshot fails with 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' # traceback. -# The osd is of the size 1GB. The subvolume is created and 800MB file is written. -# Then full-ratios are set below 500MB such that the osd is treated as full. +# The osd is of the size 2GiB. The subvolume is created and 1.6GB file is written. +# Then full-ratios are set below 1GiB such that the osd is treated as full. # The subvolume snapshot is taken which succeeds as no extra space is required # for snapshot. Now, the removal of the snapshot fails with ENOSPACE as it # fails to remove the snapshot metadata set. The snapshot removal fails @@ -31,8 +31,8 @@ echo "Before write" df $CEPH_MNT ceph osd df -# Write 800MB file and set full ratio to around 200MB -ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/800MB_file-1 status=progress bs=1M count=800 conv=fdatasync +# Write 1.6GB file and set full ratio to around 400MB +ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/1.6GB_file-1 status=progress bs=1M count=1600 conv=fdatasync ceph osd set-full-ratio 0.2 ceph osd set-nearfull-ratio 0.16 diff --git a/qa/workunits/fs/misc/fallocate.sh b/qa/workunits/fs/misc/fallocate.sh new file mode 100755 index 000000000000..253e6cb7a377 --- /dev/null +++ b/qa/workunits/fs/misc/fallocate.sh @@ -0,0 +1,17 @@ +#!/bin/sh -x + +# fallocate with mode 0 should fail with EOPNOTSUPP +set -e +mkdir -p testdir +cd testdir + +expect_failure() { + if "$@"; then return 1; else return 0; fi +} + +expect_failure fallocate -l 1M preallocated.txt +rm -f preallocated.txt + +cd .. +rmdir testdir +echo OK diff --git a/qa/workunits/fs/misc/general_vxattrs.sh b/qa/workunits/fs/misc/general_vxattrs.sh new file mode 100755 index 000000000000..e7d467db616a --- /dev/null +++ b/qa/workunits/fs/misc/general_vxattrs.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# test setfattr remove, and check values of vxattr +# after remove for vxattr, where possible. + +set -ex + +mkdir -p dir + +#ceph.dir.pin test, def val -1, reset val -1 +getfattr -n ceph.dir.pin dir | grep 'ceph.dir.pin="-1"' +setfattr -n ceph.dir.pin dir 2>&1 | grep "setfattr: dir: Invalid argument" +setfattr -n ceph.dir.pin -v 1 dir +getfattr -n ceph.dir.pin dir | grep 'ceph.dir.pin="1"' +setfattr -x ceph.dir.pin dir +getfattr -n ceph.dir.pin dir | grep 'ceph.dir.pin="-1"' + +#TODO: Once test machines support getfattr for vxattr, uncomment getfattr below +#see: https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/thread/EZL3POLMQLMMNBPAJ2QQ2BAKH44VUNJU/#JJNRRYLUKUAUN5HIL5A7Q4N63OCLWQXF +#for further detail + +#ceph.dir.pin.distributed, def val 0, reset val 0 +#getfattr -n ceph.dir.pin.distributed dir | grep 'ceph.dir.pin.distributed="0"' +setfattr -n ceph.dir.pin.distributed dir 2>&1 | grep "setfattr: dir: Invalid argument" +setfattr -n ceph.dir.pin.distributed -v 1 dir +#getfattr -n ceph.dir.pin.distributed dir | grep 'ceph.dir.pin.distributed="1"' +setfattr -x ceph.dir.pin.distributed dir +#getfattr -n ceph.dir.pin.distributed dir | grep 'ceph.dir.pin.distributed="0"' + +#ceph.dir.pin.random def val 0, reset val 0 +#getfattr -n ceph.dir.pin.random dir | grep 'ceph.dir.pin.random="0"' +setfattr -n ceph.dir.pin.random dir 2>&1 | grep "setfattr: dir: Invalid argument" +setfattr -n ceph.dir.pin.random -v 0.01 dir +#getfattr -n ceph.dir.pin.random dir | grep 'ceph.dir.pin.random="0.01"' +setfattr -x ceph.dir.pin.random dir +#getfattr -n ceph.dir.pin.random dir | grep 'ceph.dir.pin.random="0"' + +#ceph.quota, def value 0, reset val 0 +setfattr -n ceph.quota.max_bytes dir 2>&1 | grep "setfattr: dir: Invalid argument" +setfattr -n ceph.quota.max_bytes -v 100000000 dir +#getfattr -n ceph.quota.max_bytes dir | grep 'ceph.quota.max_bytes="100000000"' +setfattr -x ceph.quota.max_bytes dir +setfattr -n ceph.quota.max_files dir 2>&1 | grep "setfattr: dir: Invalid argument" +setfattr -n ceph.quota.max_files -v 10000 dir +#getfattr -n ceph.quota.max_files dir | grep 'ceph.quota.max_files="10000"' +setfattr -x ceph.quota.max_files dir + +rmdir dir + +echo OK + diff --git a/qa/workunits/fs/misc/layout_vxattrs.sh b/qa/workunits/fs/misc/layout_vxattrs.sh index 81133627347b..e87e9aa87d07 100755 --- a/qa/workunits/fs/misc/layout_vxattrs.sh +++ b/qa/workunits/fs/misc/layout_vxattrs.sh @@ -105,6 +105,23 @@ getfattr -n ceph.file.layout.stripe_count dir/file | grep -q 8 getfattr -n ceph.file.layout.object_size dir/file | grep -q 10485760 getfattr -n ceph.file.layout.pool_namespace dir/file | grep -q dirns +#Per https://docs.ceph.com/en/latest/cephfs/file-layouts/#clearing-layouts, pool_namespace +#can be individually removed, while other layout xattrs must be cleared together. +setfattr -x ceph.dir.layout.pool dir 2>&1 | grep "setfattr: dir: Invalid argument" +setfattr -x ceph.dir.layout.pool_id dir 2>&1 | grep "setfattr: dir: Invalid argument" +setfattr -x ceph.dir.layout.pool_name dir 2>&1 | grep "setfattr: dir: Invalid argument" +setfattr -x ceph.dir.layout.stripe_unit dir 2>&1 | grep "setfattr: dir: Invalid argument" +setfattr -x ceph.dir.layout.stripe_count dir 2>&1 | grep "setfattr: dir: Invalid argument" +setfattr -x ceph.dir.layout.object_size dir 2>&1 | grep "setfattr: dir: Invalid argument" + +setfattr -x ceph.file.layout.pool dir/file 2>&1 | grep "setfattr: dir/file: Invalid argument" +setfattr -x ceph.file.layout.pool_id dir/file 2>&1 | grep "setfattr: dir/file: Invalid argument" +setfattr -x ceph.file.layout.pool_name dir/file 2>&1 | grep "setfattr: dir/file: Invalid argument" +setfattr -x ceph.file.layout.stripe_unit dir/file 2>&1 | grep "setfattr: dir/file: Invalid argument" +setfattr -x ceph.file.layout.stripe_count dir/file 2>&1 | grep "setfattr: dir/file: Invalid argument" +setfattr -x ceph.file.layout.object_size dir/file 2>&1 | grep "setfattr: dir/file: Invalid argument" +setfattr -x ceph.file.layout.pool_namespace dir/file + setfattr -x ceph.dir.layout.pool_namespace dir getfattr -n ceph.dir.layout dir | grep -q -v pool_namespace=dirns diff --git a/qa/workunits/fs/snaps/snaptest-double-null.sh b/qa/workunits/fs/snaps/snaptest-double-null.sh index cdf32e4f0ef6..833c0fd696b9 100755 --- a/qa/workunits/fs/snaps/snaptest-double-null.sh +++ b/qa/workunits/fs/snaps/snaptest-double-null.sh @@ -11,6 +11,7 @@ mkdir a cat > a/foo & mkdir a/.snap/one mkdir a/.snap/two +wait chmod 777 a/foo sync # this might crash the mds ps diff --git a/qa/workunits/fs/snaps/snaptest-git-ceph.sh b/qa/workunits/fs/snaps/snaptest-git-ceph.sh index 2b38720c9a57..6079ba8945b1 100755 --- a/qa/workunits/fs/snaps/snaptest-git-ceph.sh +++ b/qa/workunits/fs/snaps/snaptest-git-ceph.sh @@ -4,7 +4,14 @@ set -e # increase the cache size sudo git config --global http.sslVerify false -sudo git config --global http.postBuffer 1048576000 +sudo git config --global http.postBuffer 1024MB # default is 1MB +sudo git config --global http.maxRequestBuffer 100M # default is 10MB +sudo git config --global core.compression 0 + +# enable the debug logs for git clone +export GIT_TRACE_PACKET=1 +export GIT_TRACE=1 +export GIT_CURL_VERBOSE=1 # try it again if the clone is slow and the second time retried=false @@ -19,6 +26,11 @@ timeout 1800 git clone https://git.ceph.com/ceph.git trap - EXIT cd ceph +# disable the debug logs for git clone +export GIT_TRACE_PACKET=0 +export GIT_TRACE=0 +export GIT_CURL_VERBOSE=0 + versions=`seq 1 90` for v in $versions diff --git a/qa/workunits/kernel_untar_build.sh b/qa/workunits/kernel_untar_build.sh index 9ee55eac9924..9855f3d31b6f 100755 --- a/qa/workunits/kernel_untar_build.sh +++ b/qa/workunits/kernel_untar_build.sh @@ -2,11 +2,11 @@ set -ex -wget -O linux.tar.gz http://download.ceph.com/qa/linux-5.4.tar.gz +wget -O linux.tar.xz http://download.ceph.com/qa/linux-6.5.11.tar.xz mkdir t cd t -tar xzf ../linux.tar.gz +tar xJf ../linux.tar.xz cd linux* make defconfig make -j`grep -c processor /proc/cpuinfo` diff --git a/qa/workunits/libcephfs/test.sh b/qa/workunits/libcephfs/test.sh index c53fe893c13b..dc8ef1fc72f4 100755 --- a/qa/workunits/libcephfs/test.sh +++ b/qa/workunits/libcephfs/test.sh @@ -6,5 +6,7 @@ ceph_test_libcephfs_reclaim ceph_test_libcephfs_lazyio ceph_test_libcephfs_newops ceph_test_libcephfs_suidsgid +ceph_test_libcephfs_snapdiff +ceph_test_libcephfs_vxattr exit 0 diff --git a/qa/workunits/mon/config.sh b/qa/workunits/mon/config.sh index 1b00201ae481..10cbe5630e91 100755 --- a/qa/workunits/mon/config.sh +++ b/qa/workunits/mon/config.sh @@ -98,11 +98,11 @@ ceph tell osd.0 config unset debug_asok ceph tell osd.0 config unset debug_asok ceph config rm osd.0 debug_asok -while ceph config show osd.0 | grep debug_asok | grep mon +while ceph config show osd.0 | grep '^debug_asok[:[space]:]' | grep mon do sleep 1 done -ceph config show osd.0 | grep -c debug_asok | grep 0 +ceph config show osd.0 | grep -c '^debug_asok[:[space]:]' | grep 0 ceph config set osd.0 osd_scrub_cost 123 while ! ceph config show osd.0 | grep osd_scrub_cost | grep mon @@ -111,6 +111,13 @@ do done ceph config rm osd.0 osd_scrub_cost +#RGW daemons test config set +ceph config set client.rgw debug_rgw 22 +while ! ceph config show client.rgw | grep debug_rgw | grep 22 | grep mon +do + sleep 1 +done + # show-with-defaults ceph config show-with-defaults osd.0 | grep debug_asok @@ -130,6 +137,21 @@ rm -f $t1 $t2 expect_false ceph config reset expect_false ceph config reset -1 + + +# test parallel config set +# reproducer for https://tracker.ceph.com/issues/62832 +ceph config reset 0 +for ((try = 0; try < 10; try++)); do + set +x + for ((i = 0; i < 100; i++)); do + # Use a config that will get "handled" by the Objecter instantiated by the ceph binary + ceph config set client rados_mon_op_timeout $((i+300)) & + done 2> /dev/null + set -x + wait +done + # we are at end of testing, so it's okay to revert everything ceph config reset 0 diff --git a/qa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh b/qa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh new file mode 100755 index 000000000000..ded138541608 --- /dev/null +++ b/qa/workunits/mon/mon-stretch-mode-5-mons-8-osds.sh @@ -0,0 +1,68 @@ +#!/bin/bash -ex + +# A bash script for setting up stretch mode with 5 monitors and 8 OSDs. + +NUM_OSDS_UP=$(ceph osd df | grep "up" | wc -l) + +if [ $NUM_OSDS_UP -lt 8 ]; then + echo "test requires at least 8 OSDs up and running" + exit 1 +fi + +for dc in dc1 dc2 + do + ceph osd crush add-bucket $dc datacenter + ceph osd crush move $dc root=default + done + +ceph osd crush add-bucket host01 host +ceph osd crush add-bucket host02 host +ceph osd crush add-bucket host03 host +ceph osd crush add-bucket host04 host + +ceph osd crush move host01 datacenter=dc1 +ceph osd crush move host02 datacenter=dc1 +ceph osd crush move host03 datacenter=dc2 +ceph osd crush move host04 datacenter=dc2 + +ceph osd crush move osd.0 host=host01 +ceph osd crush move osd.1 host=host01 +ceph osd crush move osd.2 host=host02 +ceph osd crush move osd.3 host=host02 +ceph osd crush move osd.4 host=host03 +ceph osd crush move osd.5 host=host03 +ceph osd crush move osd.6 host=host04 +ceph osd crush move osd.7 host=host04 + +# set location for monitors +ceph mon set_location a datacenter=dc1 host=host01 +ceph mon set_location b datacenter=dc1 host=host02 +ceph mon set_location c datacenter=dc2 host=host03 +ceph mon set_location d datacenter=dc2 host=host04 + +# set location for tiebreaker monitor +ceph mon set_location e datacenter=dc3 host=host05 + +# remove the current host from crush map +hostname=$(hostname -s) +ceph osd crush remove $hostname +# create a new crush rule with stretch rule +ceph osd getcrushmap > crushmap +crushtool --decompile crushmap > crushmap.txt +sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt +cat >> crushmap_modified.txt << EOF +rule stretch_rule { + id 2 + type replicated + step take default + step choose firstn 2 type datacenter + step chooseleaf firstn 2 type host + step emit +} +# end crush map +EOF + +crushtool --compile crushmap_modified.txt -o crushmap.bin +ceph osd setcrushmap -i crushmap.bin + +ceph mon enable_stretch_mode e stretch_rule datacenter \ No newline at end of file diff --git a/qa/workunits/mon/mon-stretch-pool.sh b/qa/workunits/mon/mon-stretch-pool.sh new file mode 100755 index 000000000000..2c62082db509 --- /dev/null +++ b/qa/workunits/mon/mon-stretch-pool.sh @@ -0,0 +1,148 @@ +#!/bin/bash -ex + +# A CLI test for ceph osd pool stretch set and ceph osd pool stretch show. +# Sets up the cluster with 3 datacenters and 3 hosts in each datacenter + +NUM_OSDS_UP=$(ceph osd df | grep "up" | wc -l) + +if [ $NUM_OSDS_UP -lt 6 ]; then + echo "test requires at least 6 OSDs up and running" + exit 1 +fi + +function expect_false() +{ + # expect the command to return false + if "$@"; then return 1; else return 0; fi +} + +function expect_true() +{ + # expect the command to return true + if "$@"; then return 0; else return 1; fi +} + +function teardown() +{ + # cleanup + for pool in `ceph osd pool ls` + do + ceph osd pool rm $pool $pool --yes-i-really-really-mean-it + done +} + +for dc in dc1 dc2 dc3 + do + ceph osd crush add-bucket $dc datacenter + ceph osd crush move $dc root=default + done + +ceph osd crush add-bucket node-1 host +ceph osd crush add-bucket node-2 host +ceph osd crush add-bucket node-3 host +ceph osd crush add-bucket node-4 host +ceph osd crush add-bucket node-5 host +ceph osd crush add-bucket node-6 host +ceph osd crush add-bucket node-7 host +ceph osd crush add-bucket node-8 host +ceph osd crush add-bucket node-9 host + +ceph osd crush move node-1 datacenter=dc1 +ceph osd crush move node-2 datacenter=dc1 +ceph osd crush move node-3 datacenter=dc1 +ceph osd crush move node-4 datacenter=dc2 +ceph osd crush move node-5 datacenter=dc2 +ceph osd crush move node-6 datacenter=dc2 +ceph osd crush move node-7 datacenter=dc3 +ceph osd crush move node-8 datacenter=dc3 +ceph osd crush move node-9 datacenter=dc3 + +ceph osd crush move osd.0 host=node-1 +ceph osd crush move osd.1 host=node-2 +ceph osd crush move osd.2 host=node-3 +ceph osd crush move osd.3 host=node-4 +ceph osd crush move osd.4 host=node-5 +ceph osd crush move osd.5 host=node-6 +ceph osd crush move osd.6 host=node-7 +ceph osd crush move osd.7 host=node-8 +ceph osd crush move osd.8 host=node-9 + +ceph mon set_location a datacenter=dc1 host=node-1 +ceph mon set_location b datacenter=dc1 host=node-2 +ceph mon set_location c datacenter=dc1 host=node-3 +ceph mon set_location d datacenter=dc2 host=node-4 +ceph mon set_location e datacenter=dc2 host=node-5 +ceph mon set_location f datacenter=dc2 host=node-6 +ceph mon set_location g datacenter=dc3 host=node-7 +ceph mon set_location h datacenter=dc3 host=node-8 +ceph mon set_location i datacenter=dc3 host=node-9 + + +TEST_POOL_STRETCH=pool_stretch +TEST_CRUSH_RULE=replicated_rule_custom + +# Non existence pool should return error +expect_false ceph osd pool stretch show $TEST_POOL_STRETCH + +ceph osd pool create $TEST_POOL_STRETCH 1 + +# pool must be a stretch pool for this command to show anything. +expect_false ceph osd pool stretch show $TEST_POOL_STRETCH + +# All Argument must present +expect_false ceph osd pool stretch set $TEST_POOL_STRETCH 2 3 datacenter $TEST_CRUSH_RULE +# Non existence pool should return error +expect_false ceph osd pool stretch set non_exist_pool 2 3 datacenter $TEST_CRUSH_RULE 6 3 +# Non existence barrier should return appropriate error +expect_false ceph osd pool stretch set $TEST_POOL_STRETCH 2 3 non_exist_barrier $TEST_CRUSH_RULE 6 3 +# Non existence crush_rule should return appropriate error +expect_false ceph osd pool stretch set $TEST_POOL_STRETCH 2 3 datacenter $TEST_CRUSH_RULE 6 3 +# Unsetting a non existence pool should return error +expect_false ceph osd pool stretch unset non_exist_pool +# Unsetting a non-stretch pool should return error +expect_false ceph osd pool stretch unset $TEST_POOL_STRETCH + +# Create a custom crush rule +ceph osd getcrushmap > crushmap +crushtool --decompile crushmap > crushmap.txt +sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt +cat >> crushmap_modified.txt << EOF +rule replicated_rule_custom { + id 1 + type replicated + step take default + step choose firstn 3 type datacenter + step chooseleaf firstn 2 type host + step emit +} +# end crush map +EOF + +# compile the modified crushmap and set it +crushtool --compile crushmap_modified.txt -o crushmap.bin +ceph osd setcrushmap -i crushmap.bin + +# Set the election strategy to connectivity +ceph mon set election_strategy connectivity + +# peer_crush_bucket_count > 3 datacenters throws Error EPERM +expect_false ceph osd pool stretch set $TEST_POOL_STRETCH 4 3 datacenter $TEST_CRUSH_RULE 6 3 + +# peer_crush_bucket_target > 3 datacenters throws Error EPERM +expect_false ceph osd pool stretch set $TEST_POOL_STRETCH 2 4 datacenter $TEST_CRUSH_RULE 6 3 + +# peer_crush_bucket_target > 3 datacenters success when add --yes-i-really-mean-it flag +expect_true ceph osd pool stretch set $TEST_POOL_STRETCH 2 4 datacenter $TEST_CRUSH_RULE 6 3 --yes-i-really-mean-it + +# pool must be a stretch pool for this command to show anything. +expect_true ceph osd pool stretch set $TEST_POOL_STRETCH 2 3 datacenter $TEST_CRUSH_RULE 6 3 +expect_true ceph osd pool stretch show $TEST_POOL_STRETCH + +# Unset the stretch pool and expects it to work +expect_true ceph osd pool stretch unset $TEST_POOL_STRETCH +# try to show the stretch pool values again, should return error since +# the pool is not a stretch pool anymore. +expect_false ceph osd pool stretch show $TEST_POOL_STRETCH + +# cleanup +teardown \ No newline at end of file diff --git a/qa/workunits/mon/rbd_snaps_ops.sh b/qa/workunits/mon/rbd_snaps_ops.sh index eb88565eab9c..0e5b16b7b80b 100755 --- a/qa/workunits/mon/rbd_snaps_ops.sh +++ b/qa/workunits/mon/rbd_snaps_ops.sh @@ -36,6 +36,7 @@ expect 'rbd --pool=test snap ls image' 0 expect 'rbd --pool=test snap rm image@snapshot' 0 expect 'ceph osd pool mksnap test snapshot' 22 +expect 'rados -p test mksnap snapshot' 1 expect 'ceph osd pool delete test test --yes-i-really-really-mean-it' 0 @@ -52,6 +53,8 @@ expect 'rbd --pool test-foo snap create image@snapshot' 0 ceph osd pool delete test-bar test-bar --yes-i-really-really-mean-it || true expect 'ceph osd pool create test-bar 8' 0 expect 'ceph osd pool application enable test-bar rbd' +# "rados cppool" without --yes-i-really-mean-it should fail +expect 'rados cppool test-foo test-bar' 1 expect 'rados cppool test-foo test-bar --yes-i-really-mean-it' 0 expect 'rbd --pool test-bar snap rm image@snapshot' 95 expect 'ceph osd pool delete test-foo test-foo --yes-i-really-really-mean-it' 0 diff --git a/qa/workunits/mon/setup_stretch_cluster.sh b/qa/workunits/mon/setup_stretch_cluster.sh new file mode 100755 index 000000000000..618ba7e94e5b --- /dev/null +++ b/qa/workunits/mon/setup_stretch_cluster.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash + +set -ex + +NUM_OSDS_UP=$(ceph osd df | grep "up" | wc -l) + +if [ $NUM_OSDS_UP -lt 8 ]; then + echo "test requires at least 8 OSDs up and running" + exit 1 +fi + +ceph mon set election_strategy connectivity +ceph mon add disallowed_leader e + +for dc in dc1 dc2 + do + ceph osd crush add-bucket $dc datacenter + ceph osd crush move $dc root=default + done + +ceph osd crush add-bucket node-2 host +ceph osd crush add-bucket node-3 host +ceph osd crush add-bucket node-4 host +ceph osd crush add-bucket node-5 host +ceph osd crush add-bucket node-6 host +ceph osd crush add-bucket node-7 host +ceph osd crush add-bucket node-8 host +ceph osd crush add-bucket node-9 host + +ceph osd crush move node-2 datacenter=dc1 +ceph osd crush move node-3 datacenter=dc1 +ceph osd crush move node-4 datacenter=dc1 +ceph osd crush move node-5 datacenter=dc1 + +ceph osd crush move node-6 datacenter=dc2 +ceph osd crush move node-7 datacenter=dc2 +ceph osd crush move node-8 datacenter=dc2 +ceph osd crush move node-9 datacenter=dc2 + +ceph osd crush move osd.0 host=node-2 +ceph osd crush move osd.1 host=node-3 +ceph osd crush move osd.2 host=node-4 +ceph osd crush move osd.3 host=node-5 + +ceph osd crush move osd.4 host=node-6 +ceph osd crush move osd.5 host=node-7 +ceph osd crush move osd.6 host=node-8 +ceph osd crush move osd.7 host=node-9 + + +ceph mon set_location a datacenter=dc1 host=node-2 +ceph mon set_location b datacenter=dc1 host=node-3 +ceph mon set_location c datacenter=dc2 host=node-6 +ceph mon set_location d datacenter=dc2 host=node-7 + +hostname=$(hostname -s) +ceph osd crush remove $hostname || { echo 'command failed' ; exit 1; } +ceph osd getcrushmap > crushmap || { echo 'command failed' ; exit 1; } +crushtool --decompile crushmap > crushmap.txt || { echo 'command failed' ; exit 1; } +sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt || { echo 'command failed' ; exit 1; } +cat >> crushmap_modified.txt << EOF +rule stretch_rule { + id 1 + type replicated + step take dc1 + step chooseleaf firstn 2 type host + step emit + step take dc2 + step chooseleaf firstn 2 type host + step emit +} +# rule stretch_rule { +# id 1 +# type replicated +# step take default +# step chooseleaf firstn 2 type datacenter +# step chooseleaf firstn 2 type host +# step emit +# } +# end crush map +EOF + +crushtool --compile crushmap_modified.txt -o crushmap.bin || { echo 'command failed' ; exit 1; } +ceph osd setcrushmap -i crushmap.bin || { echo 'command failed' ; exit 1; } +stretched_poolname=stretch_pool +ceph osd pool create $stretched_poolname 32 32 stretch_rule || { echo 'command failed' ; exit 1; } +ceph osd pool set $stretched_poolname size 4 || { echo 'command failed' ; exit 1; } +ceph osd pool application enable $stretched_poolname rados || { echo 'command failed' ; exit 1; } +ceph mon set_location e datacenter=arbiter host=node-1 || { echo 'command failed' ; exit 1; } +ceph mon enable_stretch_mode e stretch_rule datacenter || { echo 'command failed' ; exit 1; } # Enter strech mode diff --git a/qa/workunits/nvmeof/basic_tests.sh b/qa/workunits/nvmeof/basic_tests.sh new file mode 100755 index 000000000000..dc6fd1669da9 --- /dev/null +++ b/qa/workunits/nvmeof/basic_tests.sh @@ -0,0 +1,82 @@ +#!/bin/bash -x + +sudo modprobe nvme-fabrics +sudo modprobe nvme-tcp +sudo dnf reinstall nvme-cli -y +sudo lsmod | grep nvme +nvme version + +source /etc/ceph/nvmeof.env +SPDK_CONTROLLER="Ceph bdev Controller" +DISCOVERY_PORT="8009" + +discovery() { + output=$(sudo nvme discover -t tcp -a $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS -s $DISCOVERY_PORT) + expected_discovery_stdout="subtype: nvme subsystem" + if ! echo "$output" | grep -q "$expected_discovery_stdout"; then + return 1 + fi +} + +connect() { + sudo nvme connect -t tcp --traddr $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS -s $NVMEOF_PORT -n "${NVMEOF_SUBSYSTEMS_PREFIX}1" + sleep 5 + output=$(sudo nvme list --output-format=json) + if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then + return 1 + fi +} + +disconnect_all() { + sudo nvme disconnect-all + output=$(sudo nvme list) + if echo "$output" | grep -q "$SPDK_CONTROLLER"; then + return 1 + fi +} + +connect_all() { + sudo nvme connect-all --traddr=$NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --transport=tcp -l 3600 + sleep 5 + output=$(sudo nvme list --output-format=json) + if ! echo "$output" | grep -q "$SPDK_CONTROLLER"; then + return 1 + fi +} + +list_subsys() { + expected_count=$1 + output=$(sudo nvme list-subsys --output-format=json) + multipath=$(echo $output | grep -o '"tcp"' | wc -l) + if [ "$multipath" -ne "$expected_count" ]; then + return 1 + fi +} + + +test_run() { + echo "[nvmeof] Running test: $1" + $1 "${@:2}" # execute func + if [ $? -eq 0 ]; then + echo "[nvmeof] $1 test passed!" + else + echo "[nvmeof] $1 test failed!" + exit 1 + fi +} + + +test_run disconnect_all +test_run discovery +test_run connect +test_run list_subsys 1 +test_run disconnect_all +test_run list_subsys 0 +test_run connect_all +gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1 )) +multipath_count=$(( $gateways_count * $NVMEOF_SUBSYSTEMS_COUNT)) +test_run list_subsys $multipath_count + + +echo "-------------Test Summary-------------" +echo "[nvmeof] All nvmeof basic tests passed!" diff --git a/qa/workunits/nvmeof/fio_test.sh b/qa/workunits/nvmeof/fio_test.sh new file mode 100755 index 000000000000..57d355a63183 --- /dev/null +++ b/qa/workunits/nvmeof/fio_test.sh @@ -0,0 +1,77 @@ +#!/bin/bash -ex + +sudo yum -y install fio +sudo yum -y install sysstat + +namespace_range_start= +namespace_range_end= +rbd_iostat=false + +while [[ $# -gt 0 ]]; do + case "$1" in + --start_ns) + namespace_range_start=$2 + shift 2 + ;; + --end_ns) + namespace_range_end=$2 + shift 2 + ;; + --rbd_iostat) + rbd_iostat=true + shift + ;; + *) + exit 100 # Internal error + ;; + esac +done + +fio_file=$(mktemp -t nvmeof-fio-XXXX) +all_drives_list=$(sudo nvme list --output-format=json | + jq -r '.Devices | sort_by(.NameSpace) | .[] | select(.ModelNumber == "Ceph bdev Controller") | .DevicePath') + +# When the script is passed --start_ns and --end_ns (example: `nvmeof_fio_test.sh --start_ns 1 --end_ns 3`), +# then fio runs on namespaces only in the defined range (which is 1 to 3 here). +# So if `nvme list` has 5 namespaces with "SPDK Controller", then fio will +# run on first 3 namespaces here. +if [ "$namespace_range_start" ] || [ "$namespace_range_end" ]; then + selected_drives=$(echo "${all_drives_list[@]}" | sed -n "${namespace_range_start},${namespace_range_end}p") +else + selected_drives="${all_drives_list[@]}" +fi + + +RUNTIME=${RUNTIME:-600} + + +cat >> $fio_file < /tmp/gw-conf-original.yaml +sudo /tmp/yq ".spec.enable_auth=true | \ + .spec.root_ca_cert=\"mountcert\" | \ + .spec.client_cert = load_str(\"/etc/ceph/client.crt\") | \ + .spec.client_key = load_str(\"/etc/ceph/client.key\") | \ + .spec.server_cert = load_str(\"/etc/ceph/server.crt\") | \ + .spec.server_key = load_str(\"/etc/ceph/server.key\")" /tmp/gw-conf-original.yaml > /tmp/gw-conf-with-mtls.yaml +cp /tmp/gw-conf-original.yaml /tmp/gw-conf-without-mtls.yaml +sudo /tmp/yq '.spec.enable_auth=false' -i /tmp/gw-conf-without-mtls.yaml + +wait_for_service() { + MAX_RETRIES=30 + for ((RETRY_COUNT=1; RETRY_COUNT<=MAX_RETRIES; RETRY_COUNT++)); do + + if ceph orch ls --refresh | grep -q "nvmeof"; then + echo "Found nvmeof in the output!" + break + fi + if [ $RETRY_COUNT -eq $MAX_RETRIES ]; then + echo "Reached maximum retries ($MAX_RETRIES). Exiting." + break + fi + sleep 5 + done + ceph orch ps + ceph orch ls --refresh +} + +# deploy mtls +cat /tmp/gw-conf-with-mtls.yaml +ceph orch apply -i /tmp/gw-conf-with-mtls.yaml +ceph orch redeploy nvmeof.mypool.mygroup0 +sleep 100 +wait_for_service + + +# test +IFS=',' read -ra gateway_ips <<< "$NVMEOF_GATEWAY_IP_ADDRESSES" +for i in "${!gateway_ips[@]}" +do + ip="${gateway_ips[i]}" + sudo podman run -v /etc/ceph/server.crt:/server.crt:z -v /etc/ceph/client.crt:/client.crt:z \ + -v /etc/ceph/client.key:/client.key:z \ + -it $NVMEOF_CLI_IMAGE --server-address $ip --server-port $NVMEOF_SRPORT \ + --client-key /client.key --client-cert /client.crt --server-cert /server.crt --format json subsystem list +done + + +# remove mtls +cat /tmp/gw-conf-without-mtls.yaml +ceph orch apply -i /tmp/gw-conf-without-mtls.yaml +ceph orch redeploy nvmeof.mypool.mygroup0 +sleep 100 +wait_for_service + + +# test +IFS=',' read -ra gateway_ips <<< "$NVMEOF_GATEWAY_IP_ADDRESSES" +for i in "${!gateway_ips[@]}" +do + ip="${gateway_ips[i]}" + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $ip --server-port $NVMEOF_SRPORT \ + --format json subsystem list +done + diff --git a/qa/workunits/nvmeof/namespace_test.sh b/qa/workunits/nvmeof/namespace_test.sh new file mode 100755 index 000000000000..ef331fd085b6 --- /dev/null +++ b/qa/workunits/nvmeof/namespace_test.sh @@ -0,0 +1,71 @@ +#!/bin/bash -xe + +# It's assumed in this test that each subsystem has equal number +# of namespaces (i.e. NVMEOF_NAMESPACES_COUNT ns per subsystem). +# This script then adds NEW_NAMESPACES_COUNT amount of namespaces +# to each subsystem and then deletes those new namespaces. + +source /etc/ceph/nvmeof.env + +RBD_POOL="${RBD_POOL:-mypool}" +NEW_IMAGE_SIZE="${RBD_IMAGE_SIZE:-8192}" # 1024*8 +NEW_NAMESPACES_COUNT="${NEW_NAMESPACES_COUNT:-3}" + +gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1 )) +new_images_count=$(( $NVMEOF_SUBSYSTEMS_COUNT * $NEW_NAMESPACES_COUNT)) + + +assert_namespaces_count() { + expected_count_per_subsys=$1 + actual_count=$(sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json subsystem list | + grep namespace_count | grep $expected_count_per_subsys | wc -l) + if [ "$actual_count" -ne "$NVMEOF_SUBSYSTEMS_COUNT" ]; then + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json subsystem list + echo "Expected count of namepaces not found, expected (per subsystem): $expected_count_per_subsys" + return 1 + fi +} + + +# add rbd images +for i in $(seq 1 $new_images_count); do + image_name="test${i}" + rbd create $RBD_POOL/$image_name --size $NEW_IMAGE_SIZE +done + +# add new namespaces +image_index=1 +for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do + subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}" + for ns in $(seq 1 $NEW_NAMESPACES_COUNT); do + image="test${image_index}" + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT namespace add --subsystem $subsystem_nqn --rbd-pool $RBD_POOL --rbd-image $image --load-balancing-group $(($image_index % $gateways_count + 1)) + ((image_index++)) + done +done + +# list namespaces +for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do + subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}" + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format plain namespace list --subsystem $subsystem_nqn +done + +# verify namespaces added +expected_count_per_subsys=$(( $NEW_NAMESPACES_COUNT + $NVMEOF_NAMESPACES_COUNT )) +assert_namespaces_count $expected_count_per_subsys + +# delete namespaces +for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do + subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}" + NSIDs=$(sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json namespace list --subsystem $subsystem_nqn | + jq -r '.namespaces[] | select(.rbd_image_name | startswith("test")) | .nsid') + + for nsid in $NSIDs; do + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT namespace del --subsystem $subsystem_nqn --nsid $nsid + done +done + +# verify namespaces deleted +expected_count_per_subsys=$NVMEOF_NAMESPACES_COUNT +assert_namespaces_count $expected_count_per_subsys + diff --git a/qa/workunits/nvmeof/scalability_test.sh b/qa/workunits/nvmeof/scalability_test.sh new file mode 100755 index 000000000000..5a26b6284f74 --- /dev/null +++ b/qa/workunits/nvmeof/scalability_test.sh @@ -0,0 +1,39 @@ +#!/bin/bash -xe + + +GATEWAYS=$1 # exmaple "nvmeof.a,nvmeof.b" +DELAY="${SCALING_DELAYS:-50}" + +if [ -z "$GATEWAYS" ]; then + echo "At least one gateway needs to be defined for scalability test" + exit 1 +fi + +pip3 install yq + +status_checks() { + ceph nvme-gw show mypool '' + ceph orch ls + ceph orch ps + ceph -s +} + + +echo "[nvmeof.scale] Setting up config to remove gateways ${GATEWAYS}" +ceph orch ls nvmeof --export > /tmp/nvmeof-gw.yaml +cat /tmp/nvmeof-gw.yaml +yq "del(.placement.hosts[] | select(. | test(\".*($(echo $GATEWAYS | sed 's/,/|/g'))\")))" /tmp/nvmeof-gw.yaml > /tmp/nvmeof-gw-new.yaml +cat /tmp/nvmeof-gw-new.yaml + +echo "[nvmeof.scale] Starting scale testing by removing ${GATEWAYS}" +status_checks +ceph orch rm nvmeof.mypool && sleep 20 # temp workaround +ceph orch apply -i /tmp/nvmeof-gw-new.yaml # downscale +sleep $DELAY +status_checks +ceph orch rm nvmeof.mypool && sleep 20 # temp workaround +ceph orch apply -i /tmp/nvmeof-gw.yaml #upscale +sleep $DELAY +status_checks + +echo "[nvmeof.scale] Scale testing passed for ${GATEWAYS}" diff --git a/qa/workunits/nvmeof/setup_subsystem.sh b/qa/workunits/nvmeof/setup_subsystem.sh new file mode 100755 index 000000000000..cc4024323eb8 --- /dev/null +++ b/qa/workunits/nvmeof/setup_subsystem.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +set -ex + + +source /etc/ceph/nvmeof.env + +# Set these in job yaml +RBD_POOL="${RBD_POOL:-mypool}" +RBD_IMAGE_PREFIX="${RBD_IMAGE_PREFIX:-myimage}" + +HOSTNAME=$(hostname) +sudo podman images +sudo podman ps +sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format json subsystem list + +IFS=',' read -ra gateway_ips <<< "$NVMEOF_GATEWAY_IP_ADDRESSES" +IFS=',' read -ra gateway_names <<< "$NVMEOF_GATEWAY_NAMES" +gateways_count=$(( $(echo "$NVMEOF_GATEWAY_IP_ADDRESSES" | tr -cd ',' | wc -c) + 1 )) + +list_subsystems () { + for i in "${!gateway_ips[@]}" + do + ip="${gateway_ips[i]}" + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $ip --server-port $NVMEOF_SRPORT --format json subsystem list + done +} + +# add all subsystems +for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do + subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}" + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT subsystem add --subsystem $subsystem_nqn --no-group-append +done + +list_subsystems + +# add all gateway listeners +for i in "${!gateway_ips[@]}" +do + ip="${gateway_ips[i]}" + name="${gateway_names[i]}" + for j in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do + subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${j}" + echo "Adding gateway listener $index with IP ${ip} and name ${name}" + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $ip --server-port $NVMEOF_SRPORT listener add --subsystem $subsystem_nqn --host-name $name --traddr $ip --trsvcid $NVMEOF_PORT + done +done + +# add all hosts +for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do + subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}" + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT host add --subsystem $subsystem_nqn --host "*" +done + +# add all namespaces +image_index=1 +for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do + subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}" + for ns in $(seq 1 $NVMEOF_NAMESPACES_COUNT); do + image="${RBD_IMAGE_PREFIX}${image_index}" + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT namespace add --subsystem $subsystem_nqn --rbd-pool $RBD_POOL --rbd-image $image --load-balancing-group $(($image_index % $gateways_count + 1)) + ((image_index++)) + done +done + +list_subsystems + +# list namespaces +for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do + subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}" + sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT --format plain namespace list --subsystem $subsystem_nqn +done + + +echo "[nvmeof] Subsystem setup done" diff --git a/qa/workunits/rados/test.sh b/qa/workunits/rados/test.sh index daa25fe4dfd8..5256bd82d06e 100755 --- a/qa/workunits/rados/test.sh +++ b/qa/workunits/rados/test.sh @@ -4,6 +4,10 @@ set -ex parallel=1 [ "$1" = "--serial" ] && parallel=0 +# let crimson run in serial mode +crimson=0 +[ "$1" = "--crimson" ] && parallel=0 && crimson=1 + color="" [ -t 1 ] && color="--gtest_color=yes" @@ -12,6 +16,9 @@ function cleanup() { } trap cleanup EXIT ERR HUP INT QUIT +GTEST_OUTPUT_DIR=${TESTDIR:-$(mktemp -d)}/archive/unit_test_xml_report +mkdir -p $GTEST_OUTPUT_DIR + declare -A pids for f in \ @@ -29,7 +36,6 @@ for f in \ api_service api_service_pp \ api_c_write_operations \ api_c_read_operations \ - api_cls_remote_reads \ list_parallel \ open_pools_parallel \ delete_pools_parallel @@ -37,7 +43,7 @@ do if [ $parallel -eq 1 ]; then r=`printf '%25s' $f` ff=`echo $f | awk '{print $1}'` - bash -o pipefail -exc "ceph_test_rados_$f $color 2>&1 | tee ceph_test_rados_$ff.log | sed \"s/^/$r: /\"" & + bash -o pipefail -exc "ceph_test_rados_$f --gtest_output=xml:$GTEST_OUTPUT_DIR/$f.xml $color 2>&1 | tee ceph_test_rados_$ff.log | sed \"s/^/$r: /\"" & pid=$! echo "test $f on pid $pid" pids[$f]=$pid @@ -46,6 +52,28 @@ do fi done +for f in \ + cls cmd handler_error io ec_io list ec_list misc pool read_operations snapshots \ + watch_notify write_operations +do + if [ $parallel -eq 1 ]; then + r=`printf '%25s' $f` + ff=`echo $f | awk '{print $1}'` + bash -o pipefail -exc "ceph_test_neorados_$f $color 2>&1 | tee ceph_test_neorados_$ff.log | sed \"s/^/$r: /\"" & + pid=$! + echo "test $f on pid $pid" + pids[$f]=$pid + else + if [ $crimson -eq 1 ]; then + if [ $f = "ec_io" ] || [ $f = "ec_list" ]; then + echo "Skipping EC with Crimson" + continue + fi + fi + ceph_test_neorados_$f + fi +done + ret=0 if [ $parallel -eq 1 ]; then for t in "${!pids[@]}" diff --git a/qa/workunits/rados/test_dedup_tool.sh b/qa/workunits/rados/test_dedup_tool.sh index 18deb331b60a..8b04dc6f142a 100755 --- a/qa/workunits/rados/test_dedup_tool.sh +++ b/qa/workunits/rados/test_dedup_tool.sh @@ -34,11 +34,13 @@ if [ -n "$CEPH_BIN" ] ; then RADOS_TOOL="$CEPH_BIN/rados" CEPH_TOOL="$CEPH_BIN/ceph" DEDUP_TOOL="$CEPH_BIN/ceph-dedup-tool" + DEDUP_DAEMON="$CEPH_BIN/ceph-dedup-daemon" else # executables should be installed by the QA env RADOS_TOOL=$(which rados) CEPH_TOOL=$(which ceph) DEDUP_TOOL=$(which ceph-dedup-tool) + DEDUP_DAEMON=$(which ceph-dedup-daemon) fi POOL=dedup_pool @@ -374,7 +376,15 @@ function test_sample_dedup() sleep 2 # Execute dedup crawler - RESULT=$($DEDUP_TOOL --pool $POOL --chunk-pool $CHUNK_POOL --op sample-dedup --chunk-algorithm fastcdc --fingerprint-algorithm sha1 --chunk-dedup-threshold 3 --sampling-ratio 50) + $DEDUP_DAEMON --pool $POOL --chunk-pool $CHUNK_POOL --chunk-algorithm fastcdc --fingerprint-algorithm sha1 --chunk-dedup-threshold 3 --sampling-ratio 50 --run-once + sleep 2 + PID=$(pidof ceph-dedup-daemon) + COUNT=1 + while [ -n "$PID" ] && [ $COUNT -le 30 ]; do + sleep 15 + PID=$(pidof ceph-dedup-daemon) + ((COUNT++)) + done CHUNK_OID_1=$(echo $CONTENT_1 | sha1sum | awk '{print $1}') CHUNK_OID_3=$(echo $CONTENT_3 | sha1sum | awk '{print $1}') @@ -395,6 +405,8 @@ function test_sample_dedup() die "Chunk object has no reference of first meta object" fi + sleep 2 + # 7 Duplicated objects but less than chunk dedup threshold CONTENT_2="There hiHI2" echo $CONTENT_2 > foo2 @@ -404,7 +416,15 @@ function test_sample_dedup() done CHUNK_OID_2=$(echo $CONTENT_2 | sha1sum | awk '{print $1}') - RESULT=$($DEDUP_TOOL --pool $POOL --chunk-pool $CHUNK_POOL --op sample-dedup --chunk-algorithm fastcdc --fingerprint-algorithm sha1 --sampling-ratio 100 --chunk-dedup-threshold 2) + RESULT=$($DEDUP_DAEMON --pool $POOL --chunk-pool $CHUNK_POOL --chunk-algorithm fastcdc --fingerprint-algorithm sha1 --sampling-ratio 100 --chunk-dedup-threshold 2 --max-thread 1 --run-once) + sleep 2 + PID=$(pidof ceph-dedup-daemon) + COUNT=1 + while [ -n "$PID" ] && [ $COUNT -le 30 ]; do + sleep 15 + PID=$(pidof ceph-dedup-daemon) + ((COUNT++)) + done # Objects duplicates less than chunk dedup threshold should be deduplicated because of they satisfies object-dedup-threshold # The only object, which is crawled at the very first, should not be deduplicated because it was not duplicated at initial time @@ -446,11 +466,169 @@ function test_sample_dedup() $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it } +function test_sample_dedup_snap() +{ + CHUNK_POOL=dedup_chunk_pool + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + + sleep 2 + + run_expect_succ "$CEPH_TOOL" osd pool create "$POOL" 8 + run_expect_succ "$CEPH_TOOL" osd pool create "$CHUNK_POOL" 8 + run_expect_succ "$CEPH_TOOL" osd pool set "$POOL" dedup_tier "$CHUNK_POOL" + run_expect_succ "$CEPH_TOOL" osd pool set "$POOL" dedup_chunk_algorithm fastcdc + run_expect_succ "$CEPH_TOOL" osd pool set "$POOL" dedup_cdc_chunk_size 8192 + run_expect_succ "$CEPH_TOOL" osd pool set "$POOL" fingerprint_algorithm sha1 + + # 8 Dedupable objects + CONTENT_1="There hiHI" + echo $CONTENT_1 > foo + for num in `seq 1 8` + do + $RADOS_TOOL -p $POOL put foo_$num ./foo + done + + # 1 Unique object + CONTENT_2="There hiHI3" + echo $CONTENT_2 > foo3 + $RADOS_TOOL -p $POOL put foo3_1 ./foo3 + + $RADOS_TOOL -p $POOL mksnap mysnap + + SNAP_CONTENT="There HIHIHI" + echo $SNAP_CONTENT > foo3_new + $RADOS_TOOL -p $POOL put foo3_1 ./foo3_new + + $RADOS_TOOL -p $POOL mksnap mysnap2 + $RADOS_TOOL -p $POOL put foo3_1 ./foo3_new + + sleep 2 + + # Execute dedup crawler + RESULT=$($DEDUP_DAEMON --pool $POOL --chunk-pool $CHUNK_POOL --chunk-algorithm fastcdc --fingerprint-algorithm sha1 --sampling-ratio 100 --chunk-dedup-threshold 1 --snap --run-once) + sleep 2 + PID=$(pidof ceph-dedup-daemon) + COUNT=1 + while [ -n "$PID" ] && [ $COUNT -le 20 ]; do + sleep 5 + PID=$(pidof ceph-dedup-daemon) + ((COUNT++)) + done + + CHUNK_OID_2=$(echo $CONTENT_2 | sha1sum | awk '{print $1}') + SNAP_CONTENT_OID=$(echo $SNAP_CONTENT | sha1sum | awk '{print $1}') + + # Find chunk object has references of 8 dedupable meta objects + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $SNAP_CONTENT_OID | grep foo3_1) + if [ -z "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "There is no expected chunk object" + fi + + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID_2 | grep foo3_1) + if [ -z "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "There is no expected chunk object" + fi + + rm -rf ./foo ./foo3 ./foo3_new + for num in `seq 1 8` + do + $RADOS_TOOL -p $POOL rm foo_$num + done + + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it +} + +function test_dedup_memory_limit() +{ + CHUNK_POOL=dedup_chunk_pool + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + + sleep 2 + + run_expect_succ "$CEPH_TOOL" osd pool create "$POOL" 8 + run_expect_succ "$CEPH_TOOL" osd pool create "$CHUNK_POOL" 8 + + # 6 dedupable objects + CONTENT_1="There hiHI" + echo $CONTENT_1 > foo + for num in `seq 1 6` + do + $RADOS_TOOL -p $POOL put foo_$num ./foo + done + + # 3 Unique objects + for num in `seq 7 9` + do + CONTENT_="There hiHI"$num + echo $CONTENT_ > foo + $RADOS_TOOL -p $POOL put foo_$num ./foo + done + + # 6 dedupable objects + CONTENT_2="There hiHIhi" + echo $CONTENT_2 > foo + for num in `seq 10 15` + do + $RADOS_TOOL -p $POOL put foo_$num ./foo + done + + #Since the memory limit is 100 bytes, adding 3 unique objects causes a memory drop, leaving + #the chunk of the 6 dupable objects. If we then add 6 dedupable objects to the pool, + #the crawler should find dedupable chunks because it free memory space through the memory drop before. + # 1 entry == 46 bytes + + sleep 2 + + # Execute dedup crawler + RESULT=$($DEDUP_DAEMON --pool $POOL --chunk-pool $CHUNK_POOL --chunk-algorithm fastcdc --fingerprint-algorithm sha1 --sampling-ratio 100 --chunk-dedup-threshold 2 --run-once) + sleep 2 + PID=$(pidof ceph-dedup-daemon) + COUNT=1 + while [ -n "$PID" ] && [ $COUNT -le 30 ]; do + sleep 15 + PID=$(pidof ceph-dedup-daemon) + ((COUNT++)) + done + + CHUNK_OID_1=$(echo $CONTENT_1 | sha1sum | awk '{print $1}') + CHUNK_OID_2=$(echo $CONTENT_2 | sha1sum | awk '{print $1}') + + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID_1 | grep foo) + if [ -z "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "There is no expected chunk object" + fi + + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID_2 | grep foo) + if [ -z "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "There is no expected chunk object" + fi + + rm -rf ./foo + for num in `seq 1 15` + do + $RADOS_TOOL -p $POOL rm foo_$num + done + + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it +} + test_dedup_ratio_fixed test_dedup_chunk_scrub test_dedup_chunk_repair test_dedup_object test_sample_dedup +test_sample_dedup_snap +test_dedup_memory_limit $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it diff --git a/qa/workunits/rados/test_envlibrados_for_rocksdb.sh b/qa/workunits/rados/test_envlibrados_for_rocksdb.sh index 371452f40429..1b7f67aa5347 100755 --- a/qa/workunits/rados/test_envlibrados_for_rocksdb.sh +++ b/qa/workunits/rados/test_envlibrados_for_rocksdb.sh @@ -26,7 +26,7 @@ case $(distro_id) in case $(distro_id) in rhel) # RHEL needs CRB repo for snappy-devel - sudo subscription-manager repos --enable "codeready-builder-for-rhel-8-x86_64-rpms" + sudo dnf config-manager --set-enabled "codeready-builder-for-rhel-8-x86_64-rpms" ;; esac install git gcc-c++.x86_64 snappy-devel zlib zlib-devel bzip2 bzip2-devel libradospp-devel.x86_64 cmake libarchive-3.3.3 @@ -58,7 +58,7 @@ if [ -e rocksdb ]; then fi pushd $(dirname /home/ubuntu/cephtest/clone.client.0/qa/workunits/rados/bash.sh)/../../../ -git submodule update --init src/rocksdb +git submodule update --init --progress src/rocksdb popd git clone $(dirname /home/ubuntu/cephtest/clone.client.0/qa/workunits/rados/bash.sh)/../../../src/rocksdb rocksdb diff --git a/qa/workunits/rados/test_rados_tool.sh b/qa/workunits/rados/test_rados_tool.sh index 9d025eee8aee..9febc4a45248 100755 --- a/qa/workunits/rados/test_rados_tool.sh +++ b/qa/workunits/rados/test_rados_tool.sh @@ -89,7 +89,7 @@ run_expect_nosignal "$RADOS_TOOL" --object-locator "asdf" ls run_expect_nosignal "$RADOS_TOOL" --namespace "asdf" ls run_expect_succ "$CEPH_TOOL" osd pool create "$POOL" 8 -run_expect_succ "$CEPH_TOOL" osd erasure-code-profile set myprofile k=2 m=1 stripe_unit=2K crush-failure-domain=osd --force +run_expect_succ "$CEPH_TOOL" osd erasure-code-profile set myprofile k=2 m=1 stripe_unit=2K crush-failure-domain=osd --force --yes-i-really-mean-it run_expect_succ "$CEPH_TOOL" osd pool create "$POOL_EC" 100 100 erasure myprofile @@ -329,10 +329,10 @@ test_xattr() { expect_false $RADOS_TOOL -p $POOL setxattr $OBJ 2>/dev/null expect_false $RADOS_TOOL -p $POOL setxattr $OBJ foo fooval extraarg 2>/dev/null $RADOS_TOOL -p $POOL setxattr $OBJ foo fooval - $RADOS_TOOL -p $POOL getxattr $OBJ foo > $V2 + $RADOS_TOOL -p $POOL getxattr $OBJ foo > $V2 | tr -d '\n' > $V2 cmp $V1 $V2 cat $V1 | $RADOS_TOOL -p $POOL setxattr $OBJ bar - $RADOS_TOOL -p $POOL getxattr $OBJ bar > $V2 + $RADOS_TOOL -p $POOL getxattr $OBJ bar > $V2 | tr -d '\n' > $V2 cmp $V1 $V2 $RADOS_TOOL -p $POOL listxattr $OBJ > $V1 grep -q foo $V1 @@ -779,7 +779,7 @@ function test_stat() ############ rados df test (EC pool): ############## $RADOS_TOOL purge $POOL_EC --yes-i-really-really-mean-it $CEPH_TOOL osd pool rm $POOL_EC $POOL_EC --yes-i-really-really-mean-it - $CEPH_TOOL osd erasure-code-profile set myprofile k=2 m=1 stripe_unit=2K crush-failure-domain=osd --force + $CEPH_TOOL osd erasure-code-profile set myprofile k=2 m=1 stripe_unit=2K crush-failure-domain=osd --force --yes-i-really-mean-it $CEPH_TOOL osd pool create $POOL_EC 8 8 erasure # put object diff --git a/qa/workunits/rbd/cli_generic.sh b/qa/workunits/rbd/cli_generic.sh index 57279d26dcee..2aa27d3d655c 100755 --- a/qa/workunits/rbd/cli_generic.sh +++ b/qa/workunits/rbd/cli_generic.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash set -ex -. $(dirname $0)/../../standalone/ceph-helpers.sh - export RBD_FORCE_ALLOW_V1=1 # make sure rbd pool is EMPTY.. this is a test script!! @@ -385,19 +383,35 @@ test_clone() { rbd clone test1@s1 rbd2/clone rbd -p rbd2 ls | grep clone rbd -p rbd2 ls -l | grep clone | grep test1@s1 - rbd ls | grep -v clone + test "$(rbd ls)" = 'test1' rbd flatten rbd2/clone rbd snap create rbd2/clone@s1 rbd snap protect rbd2/clone@s1 rbd clone rbd2/clone@s1 clone2 rbd ls | grep clone2 rbd ls -l | grep clone2 | grep rbd2/clone@s1 - rbd -p rbd2 ls | grep -v clone2 + test "$(rbd -p rbd2 ls)" = 'clone' + + rbd clone rbd2/clone clone3 |& grep 'snapshot name was not specified' + rbd clone rbd2/clone@invalid clone3 |& grep 'failed to open parent image' + rbd clone rbd2/clone --snap-id 0 clone3 |& grep 'failed to open parent image' + rbd clone rbd2/clone@invalid --snap-id 0 clone3 |& + grep 'trying to access snapshot using both name and id' + SNAP_ID=$(rbd snap ls rbd2/clone --format json | + jq '.[] | select(.name == "s1") | .id') + rbd clone --snap-id $SNAP_ID rbd2/clone clone3 + rbd ls | grep clone3 + rbd ls -l | grep clone3 | grep rbd2/clone@s1 + test "$(rbd -p rbd2 ls)" = 'clone' + test "$(rbd ls -l | grep -c rbd2/clone@s1)" = '2' + rbd flatten clone3 + test "$(rbd ls -l | grep -c rbd2/clone@s1)" = '1' rbd rm clone2 rbd snap unprotect rbd2/clone@s1 rbd snap rm rbd2/clone@s1 rbd rm rbd2/clone + rbd rm clone3 rbd snap unprotect test1@s1 rbd snap rm test1@s1 rbd rm test1 @@ -432,6 +446,7 @@ test_trash() { rbd trash mv test2 ID=`rbd trash ls | cut -d ' ' -f 1` rbd info --image-id $ID | grep "rbd image 'test2'" + rbd children --image-id $ID | wc -l | grep 0 rbd trash restore $ID rbd ls | grep test2 @@ -449,6 +464,7 @@ test_trash() { rbd create $RBD_CREATE_ARGS -s 1 test1 rbd snap create test1@snap1 rbd snap protect test1@snap1 + rbd clone test1@snap1 clone rbd trash mv test1 rbd trash ls | grep test1 @@ -459,7 +475,10 @@ test_trash() { ID=`rbd trash ls | cut -d ' ' -f 1` rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 1 rbd snap ls --image-id $ID | grep '.*snap1.*' + rbd children --image-id $ID | wc -l | grep 1 + rbd children --image-id $ID | grep 'clone' + rbd rm clone rbd snap unprotect --image-id $ID --snap snap1 rbd snap rm --image-id $ID --snap snap1 rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 0 @@ -747,7 +766,9 @@ test_clone_v2() { rbd snap create test1@1 rbd clone --rbd-default-clone-format=1 test1@1 test2 && exit 1 || true rbd clone --rbd-default-clone-format=2 test1@1 test2 - rbd clone --rbd-default-clone-format=2 test1@1 test3 + SNAP_ID=$(rbd snap ls test1 --format json | + jq '.[] | select(.name == "1") | .id') + rbd clone --rbd-default-clone-format=2 --snap-id $SNAP_ID test1 test3 rbd snap protect test1@1 rbd clone --rbd-default-clone-format=1 test1@1 test4 @@ -759,7 +780,7 @@ test_clone_v2() { rbd snap unprotect test1@1 rbd snap remove test1@1 - rbd snap list --all test1 | grep -E "trash \(1\) *$" + rbd snap list --all test1 | grep -E "trash \(user 1\) *$" rbd snap create test1@2 rbd rm test1 2>&1 | grep 'image has snapshots' @@ -912,7 +933,7 @@ get_migration_state() { local image=$1 rbd --format xml status $image | - $XMLSTARLET sel -t -v '//status/migration/state' + xmlstarlet sel -t -v '//status/migration/state' } test_migration() { @@ -1152,14 +1173,14 @@ test_trash_purge_schedule() { for i in `seq 12`; do test "$(rbd trash purge schedule status --format xml | - $XMLSTARLET sel -t -v '//scheduled/item/pool')" = 'rbd' && break + xmlstarlet sel -t -v '//scheduled/item/pool')" = 'rbd' && break sleep 10 done rbd trash purge schedule status test "$(rbd trash purge schedule status --format xml | - $XMLSTARLET sel -t -v '//scheduled/item/pool')" = 'rbd' + xmlstarlet sel -t -v '//scheduled/item/pool')" = 'rbd' test "$(rbd trash purge schedule status -p rbd --format xml | - $XMLSTARLET sel -t -v '//scheduled/item/pool')" = 'rbd' + xmlstarlet sel -t -v '//scheduled/item/pool')" = 'rbd' rbd trash purge schedule add 2d 00:17 rbd trash purge schedule ls | grep 'every 2d starting at 00:17' @@ -1168,36 +1189,36 @@ test_trash_purge_schedule() { rbd trash purge schedule ls -p rbd2 -R | grep 'every 2d starting at 00:17' rbd trash purge schedule ls -p rbd2/ns1 -R | grep 'every 2d starting at 00:17' test "$(rbd trash purge schedule ls -R -p rbd2/ns1 --format xml | - $XMLSTARLET sel -t -v '//schedules/schedule/pool')" = "-" + xmlstarlet sel -t -v '//schedules/schedule/pool')" = "-" test "$(rbd trash purge schedule ls -R -p rbd2/ns1 --format xml | - $XMLSTARLET sel -t -v '//schedules/schedule/namespace')" = "-" + xmlstarlet sel -t -v '//schedules/schedule/namespace')" = "-" test "$(rbd trash purge schedule ls -R -p rbd2/ns1 --format xml | - $XMLSTARLET sel -t -v '//schedules/schedule/items/item/start_time')" = "00:17:00" + xmlstarlet sel -t -v '//schedules/schedule/items/item/start_time')" = "00:17:00" for i in `seq 12`; do rbd trash purge schedule status --format xml | - $XMLSTARLET sel -t -v '//scheduled/item/pool' | grep 'rbd2' && break + xmlstarlet sel -t -v '//scheduled/item/pool' | grep 'rbd2' && break sleep 10 done rbd trash purge schedule status rbd trash purge schedule status --format xml | - $XMLSTARLET sel -t -v '//scheduled/item/pool' | grep 'rbd2' + xmlstarlet sel -t -v '//scheduled/item/pool' | grep 'rbd2' echo $(rbd trash purge schedule status --format xml | - $XMLSTARLET sel -t -v '//scheduled/item/pool') | grep 'rbd rbd2 rbd2' + xmlstarlet sel -t -v '//scheduled/item/pool') | grep 'rbd rbd2 rbd2' test "$(rbd trash purge schedule status -p rbd --format xml | - $XMLSTARLET sel -t -v '//scheduled/item/pool')" = 'rbd' + xmlstarlet sel -t -v '//scheduled/item/pool')" = 'rbd' test "$(echo $(rbd trash purge schedule status -p rbd2 --format xml | - $XMLSTARLET sel -t -v '//scheduled/item/pool'))" = 'rbd2 rbd2' + xmlstarlet sel -t -v '//scheduled/item/pool'))" = 'rbd2 rbd2' test "$(echo $(rbd trash purge schedule ls -R --format xml | - $XMLSTARLET sel -t -v '//schedules/schedule/items'))" = "2d00:17:00 1d01:30:00" + xmlstarlet sel -t -v '//schedules/schedule/items'))" = "2d00:17:00 1d01:30:00" rbd trash purge schedule add 1d rbd trash purge schedule ls | grep 'every 2d starting at 00:17' rbd trash purge schedule ls | grep 'every 1d' rbd trash purge schedule ls -R --format xml | - $XMLSTARLET sel -t -v '//schedules/schedule/items' | grep '2d00:17' + xmlstarlet sel -t -v '//schedules/schedule/items' | grep '2d00:17' rbd trash purge schedule rm 1d rbd trash purge schedule ls | grep 'every 2d starting at 00:17' @@ -1261,7 +1282,6 @@ test_trash_purge_schedule_recovery() { jq 'select(.name == "rbd_support")' | jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') ceph osd blocklist add $CLIENT_ADDR - ceph osd blocklist ls | grep $CLIENT_ADDR # Check that you can add a trash purge schedule after a few retries expect_fail rbd trash purge schedule add -p rbd3 10m @@ -1340,13 +1360,13 @@ test_mirror_snapshot_schedule() { rbd mirror snapshot schedule status test "$(rbd mirror snapshot schedule status --format xml | - $XMLSTARLET sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1' + xmlstarlet sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1' test "$(rbd mirror snapshot schedule status -p rbd2 --format xml | - $XMLSTARLET sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1' + xmlstarlet sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1' test "$(rbd mirror snapshot schedule status -p rbd2/ns1 --format xml | - $XMLSTARLET sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1' + xmlstarlet sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1' test "$(rbd mirror snapshot schedule status -p rbd2/ns1 --image test1 --format xml | - $XMLSTARLET sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1' + xmlstarlet sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1' rbd mirror image demote rbd2/ns1/test1 for i in `seq 12`; do @@ -1420,7 +1440,6 @@ test_mirror_snapshot_schedule_recovery() { jq 'select(.name == "rbd_support")' | jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') ceph osd blocklist add $CLIENT_ADDR - ceph osd blocklist ls | grep $CLIENT_ADDR # Check that you can add a mirror snapshot schedule after a few retries expect_fail rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 2m @@ -1529,7 +1548,6 @@ test_perf_image_iostat_recovery() { jq 'select(.name == "rbd_support")' | jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') ceph osd blocklist add $CLIENT_ADDR - ceph osd blocklist ls | grep $CLIENT_ADDR expect_fail rbd perf image iostat --format json rbd3/ns sleep 10 @@ -1661,7 +1679,6 @@ test_tasks_recovery() { jq 'select(.name == "rbd_support")' | jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') ceph osd blocklist add $CLIENT_ADDR - ceph osd blocklist ls | grep $CLIENT_ADDR expect_fail ceph rbd task add flatten rbd2/clone1 sleep 10 diff --git a/qa/workunits/rbd/cli_migration.sh b/qa/workunits/rbd/cli_migration.sh index be8e031fd1bc..3af194209577 100755 --- a/qa/workunits/rbd/cli_migration.sh +++ b/qa/workunits/rbd/cli_migration.sh @@ -1,17 +1,20 @@ #!/usr/bin/env bash set -ex -. $(dirname $0)/../../standalone/ceph-helpers.sh - TEMPDIR= IMAGE1=image1 IMAGE2=image2 IMAGE3=image3 -IMAGES="${IMAGE1} ${IMAGE2} ${IMAGE3}" +NAMESPACE1=namespace1 +NAMESPACE2=namespace2 +NAMESPACES="${NAMESPACE1} ${NAMESPACE2}" +IMAGES="${IMAGE1} ${IMAGE2} ${IMAGE3} rbd/${NAMESPACE1}/${IMAGE1} rbd/${NAMESPACE2}/${IMAGE2}" cleanup() { + kill_nbd_server cleanup_tempdir remove_images + remove_namespaces } setup_tempdir() { @@ -22,10 +25,17 @@ cleanup_tempdir() { rm -rf ${TEMPDIR} } +expect_false() { + if "$@"; then return 1; else return 0; fi +} + create_base_image() { local image=$1 - rbd create --size 1G ${image} + # size is not a multiple of object size to trigger an edge case in + # list-snaps + rbd create --size 1025M ${image} + rbd bench --io-type write --io-pattern rand --io-size=4K --io-total 256M ${image} rbd snap create ${image}@1 rbd bench --io-type write --io-pattern rand --io-size=4K --io-total 64M ${image} @@ -36,8 +46,11 @@ create_base_image() { export_raw_image() { local image=$1 - rm -rf "${TEMPDIR}/${image}" - rbd export ${image} "${TEMPDIR}/${image}" + # Replace slashes (/) with underscores (_) for namespace images + local export_image="${image//\//_}" + + rm -rf "${TEMPDIR}/${export_image}" + rbd export "${image}" "${TEMPDIR}/${export_image}" } export_base_image() { @@ -63,6 +76,17 @@ remove_images() { done } +remove_namespaces() { + for namespace in ${NAMESPACES} + do + rbd namespace remove rbd/${namespace} || true + done +} + +kill_nbd_server() { + pkill -9 qemu-nbd || true +} + show_diff() { local file1=$1 @@ -80,6 +104,11 @@ compare_images() { local ret=0 export_raw_image ${dst_image} + + # Replace slashes (/) with underscores (_) for namespace images + src_image="${src_image//\//_}" + dst_image="${dst_image//\//_}" + if ! cmp "${TEMPDIR}/${src_image}" "${TEMPDIR}/${dst_image}" then show_diff "${TEMPDIR}/${src_image}" "${TEMPDIR}/${dst_image}" @@ -89,18 +118,26 @@ compare_images() { } test_import_native_format() { - local base_image=$1 - local dest_image=$2 + local base_image_spec=$1 + local dest_image_spec=$2 + + # if base image is from namespace + local base_namespace="" + local base_image=${base_image_spec} + if [[ "${base_image_spec}" == rbd/*/* ]]; then + base_namespace=$(basename "$(dirname "${base_image_spec}")") + base_image=$(basename "${base_image_spec}") + fi - rbd migration prepare --import-only "rbd/${base_image}@2" ${dest_image} - rbd migration abort ${dest_image} + rbd migration prepare --import-only "${base_image_spec}@2" ${dest_image_spec} + rbd migration abort ${dest_image_spec} local pool_id=$(ceph osd pool ls detail --format xml | xmlstarlet sel -t -v "//pools/pool[pool_name='rbd']/pool_id") cat > ${TEMPDIR}/spec.json < ${TEMPDIR}/spec.json < ${TEMPDIR}/spec.json <&1 | wc -l | grep -v '^0$' && echo "nonempty rbd pool, aborting! run this script on an empty test cluster only." && exit 1 @@ -351,7 +607,25 @@ export_base_image ${IMAGE1} test_import_native_format ${IMAGE1} ${IMAGE2} test_import_qcow_format ${IMAGE1} ${IMAGE2} + test_import_qcow2_format ${IMAGE2} ${IMAGE3} +test_import_nbd_stream_qcow2 ${IMAGE2} ${IMAGE3} + test_import_raw_format ${IMAGE1} ${IMAGE2} +test_import_nbd_stream_raw ${IMAGE1} ${IMAGE2} + +rbd namespace create rbd/${NAMESPACE1} +rbd namespace create rbd/${NAMESPACE2} +create_base_image rbd/${NAMESPACE1}/${IMAGE1} +export_base_image rbd/${NAMESPACE1}/${IMAGE1} + +# Migration from namespace to namespace +test_import_native_format rbd/${NAMESPACE1}/${IMAGE1} rbd/${NAMESPACE2}/${IMAGE2} + +# Migration from namespace to non-namespace +test_import_native_format rbd/${NAMESPACE1}/${IMAGE1} ${IMAGE2} + +# Migration from non-namespace to namespace +test_import_native_format ${IMAGE1} rbd/${NAMESPACE2}/${IMAGE2} echo OK diff --git a/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh b/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh new file mode 100755 index 000000000000..78a390230388 --- /dev/null +++ b/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash + +set -ex + +IMAGE=image-alternate-primary +MIRROR_IMAGE_MODE=snapshot +MIRROR_POOL_MODE=image +MOUNT=test-alternate-primary +RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff' +RBD_MIRROR_INSTANCES=1 +RBD_MIRROR_MODE=snapshot +RBD_MIRROR_USE_EXISTING_CLUSTER=1 + +. $(dirname $0)/rbd_mirror_helpers.sh + +take_mirror_snapshots() { + local cluster=$1 + local pool=$2 + local image=$3 + + for i in {1..30}; do + mirror_image_snapshot $cluster $pool $image + sleep 3 + done +} + +slow_untar_workload() { + local mountpt=$1 + + cp linux-5.4.tar.gz $mountpt + # run workload that updates the data and metadata of multiple files on disk. + # rate limit the workload such that the mirror snapshots can be taken as the + # contents of the image are progressively changed by the workload. + local ret=0 + timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \ + | pv -L 256K | tar xf - -C $mountpt" || ret=$? + if ((ret != 124)); then + echo "Workload completed prematurely" + return 1 + fi +} + +setup + +start_mirrors ${CLUSTER1} +start_mirrors ${CLUSTER2} + +# initial setup +create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMAGE} \ + ${RBD_MIRROR_MODE} 10G + +if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \ + -o try-netlink ${POOL}/${IMAGE}) +elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \ + ${POOL}/${IMAGE}) +else + echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}" + exit 1 +fi +sudo mkfs.ext4 ${DEV} +mkdir ${MOUNT} + +wget https://download.ceph.com/qa/linux-5.4.tar.gz + +for i in {1..25}; do + # create mirror snapshots every few seconds under I/O + sudo mount ${DEV} ${MOUNT} + sudo chown $(whoami) ${MOUNT} + rm -rf ${MOUNT}/* + take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMAGE} & + SNAP_PID=$! + slow_untar_workload ${MOUNT} + wait $SNAP_PID + sudo umount ${MOUNT} + + # calculate hash before demotion of primary image + DEMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}') + sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} ${DEV} + + demote_image ${CLUSTER1} ${POOL} ${IMAGE} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMAGE} 'up+unknown' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${IMAGE} 'up+unknown' + promote_image ${CLUSTER2} ${POOL} ${IMAGE} + + # calculate hash after promotion of secondary image + if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \ + -o try-netlink ${POOL}/${IMAGE}) + elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${IMAGE}) + fi + PROMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}') + + if [[ "${DEMOTE_MD5}" != "${PROMOTE_MD5}" ]]; then + echo "Mismatch at iteration ${i}: ${DEMOTE_MD5} != ${PROMOTE_MD5}" + exit 1 + fi + + TEMP=${CLUSTER1} + CLUSTER1=${CLUSTER2} + CLUSTER2=${TEMP} +done + +echo OK diff --git a/qa/workunits/rbd/compare_mirror_images.sh b/qa/workunits/rbd/compare_mirror_images.sh new file mode 100755 index 000000000000..342a1ebc4e7b --- /dev/null +++ b/qa/workunits/rbd/compare_mirror_images.sh @@ -0,0 +1,170 @@ +#!/usr/bin/env bash + +set -ex + +IMG_PREFIX=image-primary +MIRROR_IMAGE_MODE=snapshot +MIRROR_POOL_MODE=image +MNTPT_PREFIX=test-primary +RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff' +RBD_MIRROR_INSTANCES=1 +RBD_MIRROR_MODE=snapshot +RBD_MIRROR_USE_EXISTING_CLUSTER=1 + +. $(dirname $0)/rbd_mirror_helpers.sh + +take_mirror_snapshots() { + local cluster=$1 + local pool=$2 + local image=$3 + + for i in {1..30}; do + mirror_image_snapshot $cluster $pool $image + sleep 3 + done +} + +slow_untar_workload() { + local mountpt=$1 + + cp linux-5.4.tar.gz $mountpt + # run workload that updates the data and metadata of multiple files on disk. + # rate limit the workload such that the mirror snapshots can be taken as the + # contents of the image are progressively changed by the workload. + local ret=0 + timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \ + | pv -L 256K | tar xf - -C $mountpt" || ret=$? + if ((ret != 124)); then + echo "Workload completed prematurely" + return 1 + fi +} + +wait_for_image_removal() { + local cluster=$1 + local pool=$2 + local image=$3 + + for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do + if ! rbd --cluster $cluster ls $pool | grep -wq $image; then + return 0 + fi + sleep $s + done + + echo "image ${pool}/${image} not removed from cluster ${cluster}" + return 1 +} + +compare_demoted_promoted_image() { + local dev=${DEVS[$1-1]} + local img=${IMG_PREFIX}$1 + local mntpt=${MNTPT_PREFIX}$1 + local demote_md5 promote_md5 + + sudo umount ${mntpt} + + # calculate hash before demotion of primary image + demote_md5=$(sudo md5sum ${dev} | awk '{print $1}') + sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} \ + ${POOL}/${img} + + demote_image ${CLUSTER1} ${POOL} ${img} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${img} 'up+unknown' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${img} 'up+unknown' + promote_image ${CLUSTER2} ${POOL} ${img} + + # calculate hash after promotion of secondary image + if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then + dev=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \ + -o try-netlink ${POOL}/${img}) + elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then + dev=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${img}) + fi + promote_md5=$(sudo md5sum ${dev} | awk '{print $1}') + sudo rbd --cluster ${CLUSTER2} device unmap -t ${RBD_DEVICE_TYPE} ${dev} + + if [[ "${demote_md5}" != "${promote_md5}" ]]; then + echo "Mismatch for image ${POOL}/${img}: ${demote_md5} != ${promote_md5}" + return 1 + fi +} + +setup + +start_mirrors ${CLUSTER1} +start_mirrors ${CLUSTER2} + +wget https://download.ceph.com/qa/linux-5.4.tar.gz + +for i in {1..10}; do + DEVS=() + SNAP_PIDS=() + COMPARE_PIDS=() + WORKLOAD_PIDS=() + RET=0 + for j in {1..10}; do + IMG=${IMG_PREFIX}${j} + MNTPT=${MNTPT_PREFIX}${j} + create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMG} \ + ${RBD_MIRROR_MODE} 10G + if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \ + -o try-netlink ${POOL}/${IMG}) + elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \ + ${POOL}/${IMG}) + else + echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}" + exit 1 + fi + DEVS+=($DEV) + sudo mkfs.ext4 ${DEV} + mkdir ${MNTPT} + sudo mount ${DEV} ${MNTPT} + sudo chown $(whoami) ${MNTPT} + # create mirror snapshots under I/O every few seconds + take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMG} & + SNAP_PIDS+=($!) + slow_untar_workload ${MNTPT} & + WORKLOAD_PIDS+=($!) + done + for pid in ${SNAP_PIDS[@]}; do + wait $pid || RET=$? + done + if ((RET != 0)); then + echo "take_mirror_snapshots failed" + exit 1 + fi + for pid in ${WORKLOAD_PIDS[@]}; do + wait $pid || RET=$? + done + if ((RET != 0)); then + echo "slow_untar_workload failed" + exit 1 + fi + + for j in {1..10}; do + compare_demoted_promoted_image $j & + COMPARE_PIDS+=($!) + done + for pid in ${COMPARE_PIDS[@]}; do + wait $pid || RET=$? + done + if ((RET != 0)); then + echo "compare_demoted_promoted_image failed" + exit 1 + fi + + for j in {1..10}; do + IMG=${IMG_PREFIX}${j} + # Allow for removal of non-primary image by checking that mirroring + # image status is "up+replaying" + wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMG} + remove_image ${CLUSTER2} ${POOL} ${IMG} + wait_for_image_removal ${CLUSTER1} ${POOL} ${IMG} + rm -rf ${MNTPT_PREFIX}${j} + done +done + +echo OK diff --git a/qa/workunits/rbd/journal.sh b/qa/workunits/rbd/journal.sh index ba89e75c9264..7652a2742430 100755 --- a/qa/workunits/rbd/journal.sh +++ b/qa/workunits/rbd/journal.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash set -e -. $(dirname $0)/../../standalone/ceph-helpers.sh - function list_tests() { echo "AVAILABLE TESTS" @@ -45,7 +43,7 @@ test_rbd_journal() rbd create --image-feature exclusive-lock --image-feature journaling \ --size 128 ${image} local journal=$(rbd info ${image} --format=xml 2>/dev/null | - $XMLSTARLET sel -t -v "//image/journal") + xmlstarlet sel -t -v "//image/journal") test -n "${journal}" rbd journal info ${journal} rbd journal info --journal ${journal} @@ -54,14 +52,14 @@ test_rbd_journal() rbd feature disable ${image} journaling rbd info ${image} --format=xml 2>/dev/null | - expect_false $XMLSTARLET sel -t -v "//image/journal" + expect_false xmlstarlet sel -t -v "//image/journal" expect_false rbd journal info ${journal} expect_false rbd journal info --image ${image} rbd feature enable ${image} journaling local journal1=$(rbd info ${image} --format=xml 2>/dev/null | - $XMLSTARLET sel -t -v "//image/journal") + xmlstarlet sel -t -v "//image/journal") test "${journal}" = "${journal1}" rbd journal info ${journal} @@ -89,7 +87,7 @@ test_rbd_journal() rbd create --image-feature exclusive-lock --image-feature journaling \ --size 128 ${image1} journal1=$(rbd info ${image1} --format=xml 2>/dev/null | - $XMLSTARLET sel -t -v "//image/journal") + xmlstarlet sel -t -v "//image/journal") save_commit_position ${journal1} rbd journal import --dest ${image1} $TMPDIR/journal.export @@ -130,7 +128,7 @@ rbd_assert_eq() { local expected_val=$4 local val=$(rbd --format xml ${cmd} --image ${image} | - $XMLSTARLET sel -t -v "${param}") + xmlstarlet sel -t -v "${param}") test "${val}" = "${expected_val}" } diff --git a/qa/workunits/rbd/krbd_data_pool.sh b/qa/workunits/rbd/krbd_data_pool.sh index 8eada88bb704..94520f17308f 100755 --- a/qa/workunits/rbd/krbd_data_pool.sh +++ b/qa/workunits/rbd/krbd_data_pool.sh @@ -146,14 +146,14 @@ for pool in rbd rbdnonzero; do done done -# rbd_directory, rbd_children, rbd_info + img0 header + ... -NUM_META_RBDS=$((3 + 1 + 3 * (1*2 + 3*2))) -# rbd_directory, rbd_children, rbd_info + ... -NUM_META_CLONESONLY=$((3 + 2 * 3 * (3*2))) +# rbd_directory, rbd_children, rbd_info + rbd_trash + img0 header + ... +NUM_META_RBDS=$((4 + 1 + 3 * (1*2 + 3*2))) +# rbd_directory, rbd_children, rbd_info + rbd_trash + ... +NUM_META_CLONESONLY=$((4 + 2 * 3 * (3*2))) [[ $(rados -p rbd ls | wc -l) -eq $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]] -[[ $(rados -p repdata ls | wc -l) -eq $((1 + 14 * NUM_OBJECTS)) ]] -[[ $(rados -p ecdata ls | wc -l) -eq $((1 + 14 * NUM_OBJECTS)) ]] +[[ $(rados -p repdata ls | wc -l) -eq $((2 + 14 * NUM_OBJECTS)) ]] +[[ $(rados -p ecdata ls | wc -l) -eq $((2 + 14 * NUM_OBJECTS)) ]] [[ $(rados -p rbdnonzero ls | wc -l) -eq $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]] [[ $(rados -p clonesonly ls | wc -l) -eq $((NUM_META_CLONESONLY + 6 * NUM_OBJECTS)) ]] @@ -192,8 +192,8 @@ done # mkfs_and_mount should discard some objects everywhere but in clonesonly [[ $(list_HEADs rbd | wc -l) -lt $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]] -[[ $(list_HEADs repdata | wc -l) -lt $((1 + 14 * NUM_OBJECTS)) ]] -[[ $(list_HEADs ecdata | wc -l) -lt $((1 + 14 * NUM_OBJECTS)) ]] +[[ $(list_HEADs repdata | wc -l) -lt $((2 + 14 * NUM_OBJECTS)) ]] +[[ $(list_HEADs ecdata | wc -l) -lt $((2 + 14 * NUM_OBJECTS)) ]] [[ $(list_HEADs rbdnonzero | wc -l) -lt $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]] [[ $(list_HEADs clonesonly | wc -l) -eq $((NUM_META_CLONESONLY + 6 * NUM_OBJECTS)) ]] diff --git a/qa/workunits/rbd/krbd_watch_errors_exclusive.sh b/qa/workunits/rbd/krbd_watch_errors_exclusive.sh new file mode 100755 index 000000000000..e0b9586ec66f --- /dev/null +++ b/qa/workunits/rbd/krbd_watch_errors_exclusive.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -ex +set -o pipefail + +readonly IMAGE_NAME="watch-errors-exclusive-test" + +rbd create -s 1G --image-feature exclusive-lock,object-map "${IMAGE_NAME}" + +# induce a watch error every 30 seconds +dev="$(sudo rbd device map -o exclusive,osdkeepalive=60 "${IMAGE_NAME}")" +dev_id="${dev#/dev/rbd}" + +sudo dmesg -C + +# test that a workload doesn't encounter EIO errors +fio --name test --filename="${dev}" --ioengine=libaio --direct=1 \ + --rw=randwrite --norandommap --randrepeat=0 --bs=512 --iodepth=128 \ + --time_based --runtime=1h --eta=never + +num_errors="$(dmesg | grep -c "rbd${dev_id}: encountered watch error")" +echo "Recorded ${num_errors} watch errors" + +sudo rbd device unmap "${dev}" + +if ((num_errors < 60)); then + echo "Too few watch errors" + exit 1 +fi + +echo OK diff --git a/qa/workunits/rbd/luks-encryption.sh b/qa/workunits/rbd/luks-encryption.sh index 5d3cc68cdf34..b6305cb46c6c 100755 --- a/qa/workunits/rbd/luks-encryption.sh +++ b/qa/workunits/rbd/luks-encryption.sh @@ -2,7 +2,7 @@ set -ex CEPH_ID=${CEPH_ID:-admin} -TMP_FILES="/tmp/passphrase /tmp/passphrase2 /tmp/testdata1 /tmp/testdata2 /tmp/cmpdata" +TMP_FILES="/tmp/passphrase /tmp/passphrase1 /tmp/passphrase2 /tmp/testdata1 /tmp/testdata2 /tmp/cmpdata /tmp/rawexport /tmp/export.qcow2" _sudo() { @@ -32,7 +32,6 @@ function expect_false() { function test_encryption_format() { local format=$1 - clean_up_cryptsetup # format rbd encryption format testimg $format /tmp/passphrase @@ -40,19 +39,17 @@ function test_encryption_format() { # open encryption with cryptsetup sudo cryptsetup open $RAW_DEV --type luks cryptsetupdev -d /tmp/passphrase - sudo chmod 666 /dev/mapper/cryptsetupdev # open encryption with librbd LIBRBD_DEV=$(_sudo rbd -p rbd map testimg -t nbd -o encryption-passphrase-file=/tmp/passphrase) - sudo chmod 666 $LIBRBD_DEV # write via librbd && compare - dd if=/tmp/testdata1 of=$LIBRBD_DEV oflag=direct bs=1M + dd if=/tmp/testdata1 of=$LIBRBD_DEV conv=fsync bs=1M dd if=/dev/mapper/cryptsetupdev of=/tmp/cmpdata iflag=direct bs=4M count=4 cmp -n 16MB /tmp/cmpdata /tmp/testdata1 # write via cryptsetup && compare - dd if=/tmp/testdata2 of=/dev/mapper/cryptsetupdev oflag=direct bs=1M + dd if=/tmp/testdata2 of=/dev/mapper/cryptsetupdev conv=fsync bs=1M dd if=$LIBRBD_DEV of=/tmp/cmpdata iflag=direct bs=4M count=4 cmp -n 16MB /tmp/cmpdata /tmp/testdata2 @@ -68,13 +65,12 @@ function test_encryption_format() { (( $(sudo blockdev --getsize64 $LIBRBD_DEV) == (32 << 20) )) _sudo rbd device unmap -t nbd $LIBRBD_DEV + sudo cryptsetup close cryptsetupdev } function test_clone_encryption() { - clean_up_cryptsetup - # write 1MB plaintext - dd if=/tmp/testdata1 of=$RAW_DEV oflag=direct bs=1M count=1 + dd if=/tmp/testdata1 of=$RAW_DEV conv=fsync bs=1M count=1 # clone (luks1) rbd snap create testimg@snap @@ -84,10 +80,9 @@ function test_clone_encryption() { # open encryption with librbd, write one more MB, close LIBRBD_DEV=$(_sudo rbd -p rbd map testimg1 -t nbd -o encryption-format=luks1,encryption-passphrase-file=/tmp/passphrase) - sudo chmod 666 $LIBRBD_DEV - dd if=$LIBRBD_DEV of=/tmp/cmpdata iflag=direct bs=1M count=1 + dd if=$LIBRBD_DEV of=/tmp/cmpdata bs=1M count=1 cmp -n 1MB /tmp/cmpdata /tmp/testdata1 - dd if=/tmp/testdata1 of=$LIBRBD_DEV seek=1 skip=1 oflag=direct bs=1M count=1 + dd if=/tmp/testdata1 of=$LIBRBD_DEV seek=1 skip=1 conv=fsync bs=1M count=1 _sudo rbd device unmap -t nbd $LIBRBD_DEV # second clone (luks2) @@ -98,10 +93,9 @@ function test_clone_encryption() { # open encryption with librbd, write one more MB, close LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-format=luks2,encryption-passphrase-file=/tmp/passphrase2,encryption-format=luks1,encryption-passphrase-file=/tmp/passphrase) - sudo chmod 666 $LIBRBD_DEV - dd if=$LIBRBD_DEV of=/tmp/cmpdata iflag=direct bs=1M count=2 + dd if=$LIBRBD_DEV of=/tmp/cmpdata bs=1M count=2 cmp -n 2MB /tmp/cmpdata /tmp/testdata1 - dd if=/tmp/testdata1 of=$LIBRBD_DEV seek=2 skip=2 oflag=direct bs=1M count=1 + dd if=/tmp/testdata1 of=$LIBRBD_DEV seek=2 skip=2 conv=fsync bs=1M count=1 _sudo rbd device unmap -t nbd $LIBRBD_DEV # flatten @@ -111,10 +105,17 @@ function test_clone_encryption() { # verify with cryptsetup RAW_FLAT_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd) sudo cryptsetup open $RAW_FLAT_DEV --type luks cryptsetupdev -d /tmp/passphrase2 - sudo chmod 666 /dev/mapper/cryptsetupdev - dd if=/dev/mapper/cryptsetupdev of=/tmp/cmpdata iflag=direct bs=1M count=3 + dd if=/dev/mapper/cryptsetupdev of=/tmp/cmpdata bs=1M count=3 cmp -n 3MB /tmp/cmpdata /tmp/testdata1 + sudo cryptsetup close cryptsetupdev _sudo rbd device unmap -t nbd $RAW_FLAT_DEV + + rbd rm testimg2 + rbd snap unprotect testimg1@snap + rbd snap rm testimg1@snap + rbd rm testimg1 + rbd snap unprotect testimg@snap + rbd snap rm testimg@snap } function test_clone_and_load_with_a_single_passphrase { @@ -149,10 +150,253 @@ function test_plaintext_detection { test_clone_and_load_with_a_single_passphrase true # no luks header - dd if=/dev/zero of=$RAW_DEV oflag=direct bs=4M count=8 + dd if=/dev/zero of=$RAW_DEV conv=fsync bs=4M count=8 test_clone_and_load_with_a_single_passphrase false } +function test_migration_read_and_copyup() { + cp /tmp/testdata2 /tmp/cmpdata + + # test reading + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg1 -t nbd -o encryption-passphrase-file=/tmp/passphrase) + cmp $LIBRBD_DEV /tmp/cmpdata + + # trigger copyup at the beginning and at the end + xfs_io -c 'pwrite -S 0xab -W 0 4k' $LIBRBD_DEV /tmp/cmpdata + xfs_io -c 'pwrite -S 0xba -W 4095k 4k' $LIBRBD_DEV /tmp/cmpdata + + cmp $LIBRBD_DEV /tmp/cmpdata + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + # test reading on a fresh mapping + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg1 -t nbd -o encryption-passphrase-file=/tmp/passphrase) + cmp $LIBRBD_DEV /tmp/cmpdata + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + # test reading on a fresh mapping after migration is executed + rbd migration execute testimg1 + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg1 -t nbd -o encryption-passphrase-file=/tmp/passphrase) + cmp $LIBRBD_DEV /tmp/cmpdata + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + # test reading on a fresh mapping after migration is committed + rbd migration commit testimg1 + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg1 -t nbd -o encryption-passphrase-file=/tmp/passphrase) + cmp $LIBRBD_DEV /tmp/cmpdata + _sudo rbd device unmap -t nbd $LIBRBD_DEV +} + +function test_migration_native_with_snaps() { + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg1@snap1 -t nbd -o encryption-passphrase-file=/tmp/passphrase) + cmp $LIBRBD_DEV /tmp/testdata1 + _sudo rbd device unmap -t nbd $LIBRBD_DEV + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg1@snap2 -t nbd -o encryption-passphrase-file=/tmp/passphrase) + cmp $LIBRBD_DEV /tmp/testdata2 + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + test_migration_read_and_copyup + + # check that snapshots aren't affected by copyups + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg1@snap1 -t nbd -o encryption-passphrase-file=/tmp/passphrase) + cmp $LIBRBD_DEV /tmp/testdata1 + _sudo rbd device unmap -t nbd $LIBRBD_DEV + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg1@snap2 -t nbd -o encryption-passphrase-file=/tmp/passphrase) + cmp $LIBRBD_DEV /tmp/testdata2 + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + rbd snap rm testimg1@snap2 + rbd snap rm testimg1@snap1 + rbd rm testimg1 +} + +function test_migration() { + local format=$1 + + rbd encryption format testimg $format /tmp/passphrase + + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg -t nbd -o encryption-passphrase-file=/tmp/passphrase) + dd if=/tmp/testdata1 of=$LIBRBD_DEV conv=fsync bs=1M + rbd snap create testimg@snap1 + dd if=/tmp/testdata2 of=$LIBRBD_DEV conv=fsync bs=1M + rbd snap create testimg@snap2 + # FIXME: https://tracker.ceph.com/issues/67401 + # leave HEAD with the same data as snap2 as a workaround + # dd if=/tmp/testdata3 of=$LIBRBD_DEV conv=fsync bs=1M + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + # live import a raw image + rbd export testimg /tmp/rawexport + rbd migration prepare --import-only --source-spec '{"type": "raw", "stream": {"type": "file", "file_path": "/tmp/rawexport"}}' testimg1 + test_migration_read_and_copyup + rbd rm testimg1 + + # live import a qcow image + qemu-img convert -f raw -O qcow2 /tmp/rawexport /tmp/export.qcow2 + rbd migration prepare --import-only --source-spec '{"type": "qcow", "stream": {"type": "file", "file_path": "/tmp/export.qcow2"}}' testimg1 + test_migration_read_and_copyup + rbd rm testimg1 + + # live import a native image + rbd migration prepare --import-only testimg@snap2 testimg1 + test_migration_native_with_snaps + + # live migrate a native image (removes testimg) + rbd migration prepare testimg testimg1 + test_migration_native_with_snaps + + rm /tmp/rawexport /tmp/export.qcow2 +} + +function test_migration_clone() { + local format=$1 + + truncate -s 0 /tmp/cmpdata + truncate -s 32M /tmp/cmpdata + + rbd encryption format testimg $format /tmp/passphrase + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg -t nbd -o encryption-passphrase-file=/tmp/passphrase) + xfs_io -c 'pwrite -S 0xaa -W 4M 1M' $LIBRBD_DEV /tmp/cmpdata + xfs_io -c 'pwrite -S 0xaa -W 14M 1M' $LIBRBD_DEV /tmp/cmpdata + xfs_io -c 'pwrite -S 0xaa -W 25M 1M' $LIBRBD_DEV /tmp/cmpdata + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + rbd snap create testimg@snap + rbd snap protect testimg@snap + rbd clone testimg@snap testimg1 + + rbd encryption format testimg1 $format /tmp/passphrase2 + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg1 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase) + xfs_io -c 'pwrite -S 0xbb -W 2M 1M' $LIBRBD_DEV /tmp/cmpdata + xfs_io -c 'pwrite -S 0xbb -W 19M 1M' $LIBRBD_DEV /tmp/cmpdata + xfs_io -c 'pwrite -S 0xbb -W 28M 1M' $LIBRBD_DEV /tmp/cmpdata + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + # FIXME: https://tracker.ceph.com/issues/67402 + rbd config image set testimg1 rbd_sparse_read_threshold_bytes 1 + + # live migrate a native clone image (removes testimg1) + rbd migration prepare testimg1 testimg2 + + # test reading + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase) + cmp $LIBRBD_DEV /tmp/cmpdata + + # trigger copyup for an unwritten area + xfs_io -c 'pwrite -S 0xcc -W 24167k 4k' $LIBRBD_DEV /tmp/cmpdata + + # trigger copyup for areas written in testimg (parent) + xfs_io -c 'pwrite -S 0xcc -W 4245k 4k' $LIBRBD_DEV /tmp/cmpdata + xfs_io -c 'pwrite -S 0xcc -W 13320k 4k' $LIBRBD_DEV /tmp/cmpdata + + # trigger copyup for areas written in testimg1 (clone) + xfs_io -c 'pwrite -S 0xcc -W 2084k 4k' $LIBRBD_DEV /tmp/cmpdata + xfs_io -c 'pwrite -S 0xcc -W 32612k 4k' $LIBRBD_DEV /tmp/cmpdata + + cmp $LIBRBD_DEV /tmp/cmpdata + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + # test reading on a fresh mapping + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase) + cmp $LIBRBD_DEV /tmp/cmpdata + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + # test reading on a fresh mapping after migration is executed + rbd migration execute testimg2 + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase) + cmp $LIBRBD_DEV /tmp/cmpdata + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + # test reading on a fresh mapping after migration is committed + rbd migration commit testimg2 + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase) + cmp $LIBRBD_DEV /tmp/cmpdata + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + rbd rm testimg2 + rbd snap unprotect testimg@snap + rbd snap rm testimg@snap + rbd rm testimg +} + +function test_migration_open_clone_chain() { + rbd create --size 32M testimg + rbd encryption format testimg luks1 /tmp/passphrase + rbd snap create testimg@snap + rbd snap protect testimg@snap + + rbd clone testimg@snap testimg1 + rbd encryption format testimg1 luks2 /tmp/passphrase1 + rbd snap create testimg1@snap + rbd snap protect testimg1@snap + + rbd clone testimg1@snap testimg2 + rbd encryption format testimg2 luks1 /tmp/passphrase2 + + # 1. X <-- X <-- X + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase1,encryption-passphrase-file=/tmp/passphrase) + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + # 2. X <-- X <-- migrating + rbd migration prepare testimg2 testimg2 + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase1,encryption-passphrase-file=/tmp/passphrase) + _sudo rbd device unmap -t nbd $LIBRBD_DEV + rbd migration abort testimg2 + + # 3. X <-- migrating <-- X + rbd migration prepare testimg1 testimg1 + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase1,encryption-passphrase-file=/tmp/passphrase) + _sudo rbd device unmap -t nbd $LIBRBD_DEV + rbd migration abort testimg1 + + # 4. migrating <-- X <-- X + rbd migration prepare testimg testimg + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase1,encryption-passphrase-file=/tmp/passphrase) + _sudo rbd device unmap -t nbd $LIBRBD_DEV + rbd migration abort testimg + + # 5. migrating <-- migrating <-- X + rbd migration prepare testimg testimg + rbd migration prepare testimg1 testimg1 + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase1,encryption-passphrase-file=/tmp/passphrase) + _sudo rbd device unmap -t nbd $LIBRBD_DEV + rbd migration abort testimg1 + rbd migration abort testimg + + # 6. migrating <-- X <-- migrating + rbd migration prepare testimg testimg + rbd migration prepare testimg2 testimg2 + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase1,encryption-passphrase-file=/tmp/passphrase) + _sudo rbd device unmap -t nbd $LIBRBD_DEV + rbd migration abort testimg2 + rbd migration abort testimg + + # 7. X <-- migrating <-- migrating + rbd migration prepare testimg1 testimg1 + rbd migration prepare testimg2 testimg2 + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase1,encryption-passphrase-file=/tmp/passphrase) + _sudo rbd device unmap -t nbd $LIBRBD_DEV + rbd migration abort testimg2 + rbd migration abort testimg1 + + # 8. migrating <-- migrating <-- migrating + rbd migration prepare testimg testimg + rbd migration prepare testimg1 testimg1 + rbd migration prepare testimg2 testimg2 + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-passphrase-file=/tmp/passphrase2,encryption-passphrase-file=/tmp/passphrase1,encryption-passphrase-file=/tmp/passphrase) + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + rbd migration abort testimg2 + rbd rm testimg2 + rbd migration abort testimg1 + rbd snap unprotect testimg1@snap + rbd snap rm testimg1@snap + rbd rm testimg1 + rbd migration abort testimg + rbd snap unprotect testimg@snap + rbd snap rm testimg@snap + rbd rm testimg +} + function get_nbd_device_paths { rbd device list -t nbd | tail -n +2 | egrep "\s+rbd\s+testimg" | awk '{print $5;}' } @@ -168,10 +412,17 @@ function clean_up { _sudo rbd device unmap -t nbd $device done + rbd migration abort testimg2 || true rbd remove testimg2 || true + rbd migration abort testimg1 || true + rbd snap remove testimg1@snap2 || true + rbd snap remove testimg1@snap1 || true rbd snap unprotect testimg1@snap || true rbd snap remove testimg1@snap || true rbd remove testimg1 || true + rbd migration abort testimg || true + rbd snap remove testimg@snap2 || true + rbd snap remove testimg@snap1 || true rbd snap unprotect testimg@snap || true rbd snap remove testimg@snap || true rbd remove testimg || true @@ -198,6 +449,7 @@ dd if=/dev/urandom of=/tmp/testdata2 bs=4M count=4 # create passphrase files printf "pass\0word\n" > /tmp/passphrase +printf " passwo\nrd 1,1" > /tmp/passphrase1 printf "\t password2 " > /tmp/passphrase2 # create an image @@ -205,7 +457,6 @@ rbd create testimg --size=32M # map raw data to nbd device RAW_DEV=$(_sudo rbd -p rbd map testimg -t nbd) -sudo chmod 666 $RAW_DEV test_plaintext_detection @@ -214,4 +465,21 @@ test_encryption_format luks2 test_clone_encryption +_sudo rbd device unmap -t nbd $RAW_DEV +rbd rm testimg + +rbd create --size 20M testimg +test_migration luks1 + +rbd create --size 32M testimg +test_migration luks2 + +rbd create --size 36M testimg +test_migration_clone luks1 + +rbd create --size 48M testimg +test_migration_clone luks2 + +test_migration_open_clone_chain + echo OK diff --git a/qa/workunits/rbd/rbd-ggate.sh b/qa/workunits/rbd/rbd-ggate.sh index 1bf89da382c2..d1dd00e4e2d3 100755 --- a/qa/workunits/rbd/rbd-ggate.sh +++ b/qa/workunits/rbd/rbd-ggate.sh @@ -7,15 +7,6 @@ SIZE=64 DATA= DEV= -if which xmlstarlet > /dev/null 2>&1; then - XMLSTARLET=xmlstarlet -elif which xml > /dev/null 2>&1; then - XMLSTARLET=xml -else - echo "Missing xmlstarlet binary!" - exit 1 -fi - if [ `uname -K` -ge 1200078 ] ; then RBD_GGATE_RESIZE_SUPPORTED=1 fi @@ -148,16 +139,16 @@ _sudo sync echo trim test provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | - $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` + xmlstarlet sel -t -m "//stats/images/image/provisioned_size" -v .` used=`rbd -p ${POOL} --format xml du ${IMAGE} | - $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` + xmlstarlet sel -t -m "//stats/images/image/used_size" -v .` [ "${used}" -eq "${provisioned}" ] _sudo newfs -E ${DEV} _sudo sync provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | - $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` + xmlstarlet sel -t -m "//stats/images/image/provisioned_size" -v .` used=`rbd -p ${POOL} --format xml du ${IMAGE} | - $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` + xmlstarlet sel -t -m "//stats/images/image/used_size" -v .` [ "${used}" -lt "${provisioned}" ] echo resize test diff --git a/qa/workunits/rbd/rbd-nbd.sh b/qa/workunits/rbd/rbd-nbd.sh index 122df3d6f35a..1f9acd144926 100755 --- a/qa/workunits/rbd/rbd-nbd.sh +++ b/qa/workunits/rbd/rbd-nbd.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash set -ex -. $(dirname $0)/../../standalone/ceph-helpers.sh - POOL=rbd ANOTHER_POOL=new_default_pool$$ NS=ns @@ -105,7 +103,7 @@ function get_pid() local pool=$1 local ns=$2 - PID=$(rbd device --device-type nbd --format xml list | $XMLSTARLET sel -t -v \ + PID=$(rbd device --device-type nbd --format xml list | xmlstarlet sel -t -v \ "//devices/device[pool='${pool}'][namespace='${ns}'][image='${IMAGE}'][device='${DEV}']/id") test -n "${PID}" || return 1 ps -p ${PID} -C rbd-nbd @@ -172,17 +170,17 @@ unmap_device ${DEV} ${PID} DEV=`_sudo rbd device --device-type nbd --options notrim map ${POOL}/${IMAGE}` get_pid ${POOL} provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | - $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` + xmlstarlet sel -t -m "//stats/images/image/provisioned_size" -v .` used=`rbd -p ${POOL} --format xml du ${IMAGE} | - $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` + xmlstarlet sel -t -m "//stats/images/image/used_size" -v .` [ "${used}" -eq "${provisioned}" ] # should fail discard as at time of mapping notrim was used expect_false _sudo blkdiscard ${DEV} sync provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | - $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` + xmlstarlet sel -t -m "//stats/images/image/provisioned_size" -v .` used=`rbd -p ${POOL} --format xml du ${IMAGE} | - $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` + xmlstarlet sel -t -m "//stats/images/image/used_size" -v .` [ "${used}" -eq "${provisioned}" ] unmap_device ${DEV} ${PID} @@ -190,20 +188,24 @@ unmap_device ${DEV} ${PID} DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` get_pid ${POOL} provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | - $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` + xmlstarlet sel -t -m "//stats/images/image/provisioned_size" -v .` used=`rbd -p ${POOL} --format xml du ${IMAGE} | - $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` + xmlstarlet sel -t -m "//stats/images/image/used_size" -v .` [ "${used}" -eq "${provisioned}" ] # should honor discard as at time of mapping trim was considered by default _sudo blkdiscard ${DEV} sync provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | - $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` + xmlstarlet sel -t -m "//stats/images/image/provisioned_size" -v .` used=`rbd -p ${POOL} --format xml du ${IMAGE} | - $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` + xmlstarlet sel -t -m "//stats/images/image/used_size" -v .` [ "${used}" -lt "${provisioned}" ] +unmap_device ${DEV} ${PID} # resize test +# also test that try-netlink option is accepted for compatibility +DEV=`_sudo rbd device -t nbd -o try-netlink map ${POOL}/${IMAGE}` +get_pid ${POOL} devname=$(basename ${DEV}) blocks=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions) test -n "${blocks}" @@ -216,9 +218,9 @@ rbd resize ${POOL}/${IMAGE} --allow-shrink --size ${SIZE}M blocks2=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions) test -n "${blocks2}" test ${blocks2} -eq ${blocks} +unmap_device ${DEV} ${PID} # read-only option test -unmap_device ${DEV} ${PID} DEV=`_sudo rbd --device-type nbd map --read-only ${POOL}/${IMAGE}` PID=$(rbd device --device-type nbd list | awk -v pool=${POOL} -v img=${IMAGE} -v dev=${DEV} \ '$2 == pool && $3 == img && $5 == dev {print $1}') @@ -388,7 +390,7 @@ cat ${LOG_FILE} expect_false grep 'quiesce failed' ${LOG_FILE} # test detach/attach -OUT=`_sudo rbd device --device-type nbd --options try-netlink,show-cookie map ${POOL}/${IMAGE}` +OUT=`_sudo rbd device --device-type nbd --show-cookie map ${POOL}/${IMAGE}` read DEV COOKIE <<< "${OUT}" get_pid ${POOL} _sudo mount ${DEV} ${TEMPDIR}/mnt @@ -416,7 +418,7 @@ _sudo umount ${TEMPDIR}/mnt unmap_device ${DEV} ${PID} # if kernel supports cookies if [ -n "${COOKIE}" ]; then - OUT=`_sudo rbd device --device-type nbd --show-cookie --cookie "abc de" --options try-netlink map ${POOL}/${IMAGE}` + OUT=`_sudo rbd device --device-type nbd --show-cookie --cookie "abc de" map ${POOL}/${IMAGE}` read DEV ANOTHER_COOKIE <<< "${OUT}" get_pid ${POOL} test "${ANOTHER_COOKIE}" = "abc de" @@ -426,7 +428,7 @@ DEV= # test detach/attach with --snap-id SNAPID=`rbd snap ls ${POOL}/${IMAGE} | awk '$2 == "snap" {print $1}'` -OUT=`_sudo rbd device --device-type nbd --options try-netlink,show-cookie map --snap-id ${SNAPID} ${POOL}/${IMAGE}` +OUT=`_sudo rbd device --device-type nbd --show-cookie map --snap-id ${SNAPID} ${POOL}/${IMAGE}` read DEV COOKIE <<< "${OUT}" get_pid ${POOL} _sudo rbd device detach ${POOL}/${IMAGE} --snap-id ${SNAPID} --device-type nbd @@ -472,6 +474,16 @@ DEV= rbd feature disable ${POOL}/${IMAGE} journaling rbd config image rm ${POOL}/${IMAGE} rbd_discard_granularity_bytes +# test that disabling a feature so that the op is proxied to rbd-nbd +# (arranged here by blkdiscard before "rbd feature disable") doesn't hang +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` +get_pid ${POOL} +rbd feature enable ${POOL}/${IMAGE} journaling +_sudo blkdiscard --offset 0 --length 4096 ${DEV} +rbd feature disable ${POOL}/${IMAGE} journaling +unmap_device ${DEV} ${PID} +DEV= + # test that rbd_op_threads setting takes effect EXPECTED=`ceph-conf --show-config-value librados_thread_count` DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` diff --git a/qa/workunits/rbd/rbd_groups.sh b/qa/workunits/rbd/rbd_groups.sh index a3261848441f..ee3cb5067406 100755 --- a/qa/workunits/rbd/rbd_groups.sh +++ b/qa/workunits/rbd/rbd_groups.sh @@ -25,7 +25,7 @@ list_groups() check_group_exists() { local group_name=$1 - list_groups | grep $group_name + list_groups | grep -w $group_name } remove_group() @@ -165,7 +165,7 @@ check_snapshot_in_group() { local group_name=$1 local snap_name=$2 - list_snapshots $group_name | grep $snap_name + list_snapshots $group_name | grep -w $snap_name } check_snapshots_count_in_group() @@ -182,12 +182,60 @@ check_snapshot_not_in_group() { local group_name=$1 local snap_name=$2 - for v in $(list_snapshots $group_name | awk '{print $1}'); do - if [ "$v" = "$snap_name" ]; then - return 1 - fi - done - return 0 + + check_group_exists $group_name || return 1 + ! check_snapshot_in_group $group_name $snap_name +} + +check_snap_id_in_list_snapshots() +{ + local group_name=$1 + local snap_name=$2 + + local snap_id_in_info=$( + rbd group snap info $group_name@$snap_name --format=json | + jq -r '.id') + [[ -n "$snap_id_in_info" ]] || return 1 + + local snap_id_in_list=$( + rbd group snap ls $group_name --format=json | + jq --arg snap_name $snap_name -r ' + .[] | select(.snapshot == $snap_name) | .id') + test "$snap_id_in_list" = "$snap_id_in_info" +} + +check_snapshot_info() +{ + local group_name=$1 + local snap_name=$2 + local image_count=$3 + + local snap_info_json=$( + rbd group snap info $group_name@$snap_name --format=json) + local actual_snap_name=$(jq -r ".name" <<< "$snap_info_json") + test "$actual_snap_name" = "$snap_name" || return 1 + + local snap_state=$(jq -r ".state" <<< "$snap_info_json") + test "$snap_state" = "complete" || return 1 + + local actual_image_count=$(jq '.images | length' <<< "$snap_info_json") + test "$actual_image_count" = "$image_count" || return 1 + + local image_snap_name=$(jq -r '.image_snap_name' <<< "$snap_info_json") + local snap_info=$(rbd group snap info $group_name@$snap_name) + local snap_state=$(grep -w 'state:' <<< "$snap_info" | tr -d '\t') + test "$snap_state" = "state: complete" || return 1 + local image_snap_field=$(grep -w 'image snap:' <<< "$snap_info") + local images_field=$(grep -w 'images:' <<< "$snap_info") + if ((image_count != 0)); then + test -n "$image_snap_name" || return 1 + test -n "$image_snap_field" || return 1 + test -n "$images_field" || return 1 + else + test -z "$image_snap_name" || return 1 + test -z "$image_snap_field" || return 1 + test -z "$images_field" || return 1 + fi } echo "TEST: create remove consistency group" @@ -217,23 +265,24 @@ echo "PASSED" echo "TEST: create remove snapshots of consistency group" image="test_image" group="test_consistency_group" -snap="group_snap" -new_snap="new_group_snap" -sec_snap="group_snap2" +snaps=("group_snap1" "group_snap2" "group_snap3" "group_snap4") create_image $image create_group $group +create_snapshot $group ${snaps[0]} +check_snapshot_info $group ${snaps[0]} 0 add_image_to_group $image $group -create_snapshot $group $snap -check_snapshot_in_group $group $snap -rename_snapshot $group $snap $new_snap -check_snapshot_not_in_group $group $snap -create_snapshot $group $sec_snap -check_snapshot_in_group $group $sec_snap -rollback_snapshot $group $new_snap -remove_snapshot $group $new_snap -check_snapshot_not_in_group $group $new_snap -remove_snapshot $group $sec_snap -check_snapshot_not_in_group $group $sec_snap +create_snapshot $group ${snaps[1]} +check_snapshot_info $group ${snaps[1]} 1 +rename_snapshot $group ${snaps[1]} ${snaps[2]} +check_snapshot_info $group ${snaps[2]} 1 +check_snapshot_not_in_group $group ${snaps[1]} +create_snapshot $group ${snaps[3]} +check_snapshot_in_group $group ${snaps[3]} +rollback_snapshot $group ${snaps[2]} +remove_snapshot $group ${snaps[2]} +check_snapshot_not_in_group $group ${snaps[2]} +remove_snapshot $group ${snaps[3]} +check_snapshot_not_in_group $group ${snaps[3]} remove_group $group remove_image $image echo "PASSED" @@ -247,6 +296,7 @@ create_group $group add_image_to_group $image $group create_snapshots $group $snap 10 check_snapshots_count_in_group $group $snap 10 +check_snap_id_in_list_snapshots $group ${snap}1 remove_snapshots $group $snap 10 create_snapshots $group $snap 100 check_snapshots_count_in_group $group $snap 100 diff --git a/qa/workunits/rbd/rbd_mirror.sh b/qa/workunits/rbd/rbd_mirror.sh new file mode 100755 index 000000000000..90d5204b92fe --- /dev/null +++ b/qa/workunits/rbd/rbd_mirror.sh @@ -0,0 +1,715 @@ +#!/usr/bin/env bash +# +# rbd_mirror.sh - test rbd-mirror daemon in snapshot or journal mirroring mode +# +# Usage: +# RBD_MIRROR_MODE=journal rbd_mirror.sh +# +# Use environment variable RBD_MIRROR_MODE to set the mode +# Available modes: snapshot | journal +# +# The scripts starts two ("local" and "remote") clusters using mstart.sh script, +# creates a temporary directory, used for cluster configs, daemon logs, admin +# socket, temporary files, and launches rbd-mirror daemon. +# + +set -ex + +if [ "${#}" -gt 0 ]; then + echo "unnecessary arguments: ${@}" + exit 100 +fi + +if [ "${RBD_MIRROR_MODE}" != "snapshot" ] && [ "${RBD_MIRROR_MODE}" != "journal" ]; then + echo "unknown mode: ${RBD_MIRROR_MODE}" + echo "set RBD_MIRROR_MODE env variable, available modes: snapshot | journal" + exit 100 +fi + +. $(dirname $0)/rbd_mirror_helpers.sh +setup + +testlog "TEST: add image and test replay" +start_mirrors ${CLUSTER1} +image=test +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} ${RBD_MIRROR_MODE} +set_image_meta ${CLUSTER2} ${POOL} ${image} "key1" "value1" +set_image_meta ${CLUSTER2} ${POOL} ${image} "key2" "value2" +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'down+unknown' +fi +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} +compare_image_meta ${CLUSTER1} ${POOL} ${image} "key1" "value1" +compare_image_meta ${CLUSTER1} ${POOL} ${image} "key2" "value2" + +testlog "TEST: stop mirror, add image, start mirror and test replay" +stop_mirrors ${CLUSTER1} +image1=test1 +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image1} ${RBD_MIRROR_MODE} +write_image ${CLUSTER2} ${POOL} ${image1} 100 +start_mirrors ${CLUSTER1} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image1} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image1} 'down+unknown' +fi +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image1} + +testlog "TEST: test the first image is replaying after restart" +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + testlog "TEST: stop/start/restart mirror via admin socket" + all_admin_daemons ${CLUSTER1} rbd mirror stop + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' + + all_admin_daemons ${CLUSTER1} rbd mirror start + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror restart + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror stop + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' + + all_admin_daemons ${CLUSTER1} rbd mirror restart + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' + + admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror start ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + + all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' + + all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + flush ${CLUSTER1} + all_admin_daemons ${CLUSTER1} rbd mirror status +fi + +remove_image_retry ${CLUSTER2} ${POOL} ${image1} + +testlog "TEST: test image rename" +new_name="${image}_RENAMED" +rename_image ${CLUSTER2} ${POOL} ${image} ${new_name} +if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + mirror_image_snapshot ${CLUSTER2} ${POOL} ${new_name} +fi +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying' +admin_daemons ${CLUSTER1} rbd mirror status ${POOL}/${new_name} +admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${new_name} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying' +rename_image ${CLUSTER2} ${POOL} ${new_name} ${image} +if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + mirror_image_snapshot ${CLUSTER2} ${POOL} ${image} +fi +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + +testlog "TEST: test trash move restore" +image_id=$(get_image_id ${CLUSTER2} ${POOL} ${image}) +trash_move ${CLUSTER2} ${POOL} ${image} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' +trash_restore ${CLUSTER2} ${POOL} ${image} ${image_id} ${RBD_MIRROR_MODE} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + +testlog "TEST: check if removed images' OMAP are removed (with rbd-mirror on one cluster)" +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} + +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} ${RBD_MIRROR_MODE} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + +testlog "TEST: failover and failback" +start_mirrors ${CLUSTER2} + +# demote and promote same cluster +demote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + +# failover (unmodified) +demote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER1} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} + +# failback (unmodified) +demote_image ${CLUSTER1} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + +# failover +demote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER1} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} +write_image ${CLUSTER1} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' +wait_for_replaying_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + +# failback +demote_image ${CLUSTER1} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + +testlog "TEST: failover / failback loop" +for i in `seq 1 20`; do + demote_image ${CLUSTER2} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' + promote_image ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying' + demote_image ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' + promote_image ${CLUSTER2} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +done +# check that demote (or other mirror snapshots) don't pile up +if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + test "$(count_mirror_snaps ${CLUSTER1} ${POOL} ${image})" -le 3 + test "$(count_mirror_snaps ${CLUSTER2} ${POOL} ${image})" -le 3 +fi + +testlog "TEST: force promote" +force_promote_image=test_force_promote +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${force_promote_image} ${RBD_MIRROR_MODE} +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${force_promote_image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' +promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force' +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image} +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' +write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100 +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image} +remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image} + +testlog "TEST: cloned images" +testlog " - default" +parent_image=test_parent +parent_snap=snap +create_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${RBD_MIRROR_MODE} +write_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} 100 +create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +protect_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} + +clone_image=test_clone +clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image} +write_image ${CLUSTER2} ${POOL} ${clone_image} 100 +if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + enable_mirror ${CLUSTER2} ${POOL} ${clone_image} ${RBD_MIRROR_MODE} +else + enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${RBD_MIRROR_MODE} +fi +wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} ${parent_image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} ${PARENT_POOL} ${parent_image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL} ${parent_image} +compare_images ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} ${PARENT_POOL} ${parent_image} + +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${clone_image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${clone_image} +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${clone_image} +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image} + +testlog " - clone v1" +clone_image_and_enable_mirror ${CLUSTER1} ${PARENT_POOL} \ + ${parent_image} ${parent_snap} ${POOL} ${clone_image}1 \ + ${RBD_MIRROR_MODE} +clone_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} \ + ${parent_image} ${parent_snap} ${POOL} ${clone_image}_v1 \ + ${RBD_MIRROR_MODE} --rbd-default-clone-format 1 +test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v1) = 1 +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v1 +test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v1) = 1 +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v1 +remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}1 +unprotect_snapshot_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} + +testlog " - clone v2" +parent_snap=snap_v2 +create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} +fi +clone_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} \ + ${parent_image} ${parent_snap} ${POOL} ${clone_image}_v2 \ + ${RBD_MIRROR_MODE} --rbd-default-clone-format 2 +test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v2) = 2 +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v2 +test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v2) = 2 + +remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} +fi +test_snap_moved_to_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v2 +wait_for_image_present ${CLUSTER1} ${POOL} ${clone_image}_v2 'deleted' +test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} + +testlog " - clone v2 non-primary" +create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} +fi +wait_for_snap_present ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +clone_image_and_enable_mirror ${CLUSTER1} ${PARENT_POOL} \ + ${parent_image} ${parent_snap} ${POOL} ${clone_image}_v2 \ + ${RBD_MIRROR_MODE} --rbd-default-clone-format 2 +remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} +fi +wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}_v2 +wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_image_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} + +testlog "TEST: data pool" +dp_image=test_data_pool +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${dp_image} \ + ${RBD_MIRROR_MODE} 128 --data-pool ${PARENT_POOL} +data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${dp_image} +data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap1' +write_image ${CLUSTER2} ${POOL} ${dp_image} 100 +create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap2' +write_image ${CLUSTER2} ${POOL} ${dp_image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${dp_image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${dp_image}@snap1 +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${dp_image}@snap2 +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${dp_image} +remove_image_retry ${CLUSTER2} ${POOL} ${dp_image} + +testlog "TEST: disable mirroring / delete non-primary image" +image2=test2 +image3=test3 +image4=test4 +image5=test5 +for i in ${image2} ${image3} ${image4} ${image5}; do + create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${i} ${RBD_MIRROR_MODE} + write_image ${CLUSTER2} ${POOL} ${i} 100 + create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' + create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' + if [ "${i}" = "${image4}" ] || [ "${i}" = "${image5}" ]; then + protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' + protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' + fi + write_image ${CLUSTER2} ${POOL} ${i} 100 + if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + mirror_image_snapshot ${CLUSTER2} ${POOL} ${i} + fi + wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present' + wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2' +done + +set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' +for i in ${image2} ${image4}; do + disable_mirror ${CLUSTER2} ${POOL} ${i} +done + +unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap1' +unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap2' +for i in ${image3} ${image5}; do + remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' + remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' + remove_image_retry ${CLUSTER2} ${POOL} ${i} +done + +for i in ${image2} ${image3} ${image4} ${image5}; do + wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted' +done + +if [ "${RBD_MIRROR_MODE}" = "journal" ]; then + set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool' + for i in ${image2} ${image4}; do + enable_journaling ${CLUSTER2} ${POOL} ${i} + wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present' + wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2' + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${i} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${i} + compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${i} + done + + testlog "TEST: remove mirroring pool" + pool=pool_to_remove + for cluster in ${CLUSTER1} ${CLUSTER2}; do + CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${pool} 16 16 + CEPH_ARGS='' rbd --cluster ${cluster} pool init ${pool} + rbd --cluster ${cluster} mirror pool enable ${pool} pool + done + peer_add ${CLUSTER1} ${pool} ${CLUSTER2} + peer_add ${CLUSTER2} ${pool} ${CLUSTER1} + rdp_image=test_remove_data_pool + create_image ${CLUSTER2} ${pool} ${image} 128 + create_image ${CLUSTER2} ${POOL} ${rdp_image} 128 --data-pool ${pool} + write_image ${CLUSTER2} ${pool} ${image} 100 + write_image ${CLUSTER2} ${POOL} ${rdp_image} 100 + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${pool} ${pool} ${image} + wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${pool} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${rdp_image} + wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${rdp_image} + for cluster in ${CLUSTER1} ${CLUSTER2}; do + CEPH_ARGS='' ceph --cluster ${cluster} osd pool rm ${pool} ${pool} --yes-i-really-really-mean-it + done + remove_image_retry ${CLUSTER2} ${POOL} ${rdp_image} + wait_for_image_present ${CLUSTER1} ${POOL} ${rdp_image} 'deleted' + for i in 0 1 2 4 8 8 8 8 16 16; do + sleep $i + admin_daemons "${CLUSTER2}" rbd mirror status ${pool}/${image} || break + done + admin_daemons "${CLUSTER2}" rbd mirror status ${pool}/${image} && false +fi + +testlog "TEST: snapshot rename" +snap_name='snap_rename' +if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + enable_mirror ${CLUSTER2} ${POOL} ${image2} +fi +create_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_0" +for i in `seq 1 20`; do + rename_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_$(expr ${i} - 1)" "${snap_name}_${i}" +done +if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + mirror_image_snapshot ${CLUSTER2} ${POOL} ${image2} +fi +wait_for_snap_present ${CLUSTER1} ${POOL} ${image2} "${snap_name}_${i}" + +unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap1' +unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap2' +for i in ${image2} ${image4}; do + remove_image_retry ${CLUSTER2} ${POOL} ${i} +done + +testlog "TEST: disable mirror while daemon is stopped" +stop_mirrors ${CLUSTER1} +stop_mirrors ${CLUSTER2} +if [ "${RBD_MIRROR_MODE}" = "journal" ]; then + set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' +fi +disable_mirror ${CLUSTER2} ${POOL} ${image} +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + test_image_present ${CLUSTER1} ${POOL} ${image} 'present' +fi +start_mirrors ${CLUSTER1} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' +if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + enable_mirror ${CLUSTER2} ${POOL} ${image} +else + set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool' + enable_journaling ${CLUSTER2} ${POOL} ${image} +fi +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + +testlog "TEST: non-default namespace image mirroring" +testlog " - replay" +create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS1} ${image} ${RBD_MIRROR_MODE} +create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} ${RBD_MIRROR_MODE} +wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS2} ${image} +write_image ${CLUSTER2} ${POOL}/${NS1} ${image} 100 +write_image ${CLUSTER2} ${POOL}/${NS2} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${POOL}/${NS1} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS2} ${POOL}/${NS2} ${image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS2} ${image} +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${POOL}/${NS1} ${image} +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS2} ${POOL}/${NS2} ${image} + +testlog " - disable mirroring / delete image" +remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${image} +disable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} +wait_for_image_present ${CLUSTER1} ${POOL}/${NS1} ${image} 'deleted' +wait_for_image_present ${CLUSTER1} ${POOL}/${NS2} ${image} 'deleted' +remove_image_retry ${CLUSTER2} ${POOL}/${NS2} ${image} + +testlog "TEST: mirror to a different remote namespace" +testlog " - replay" +NS3=ns3 +NS4=ns4 +rbd --cluster ${CLUSTER1} namespace create ${POOL}/${NS3} +rbd --cluster ${CLUSTER2} namespace create ${POOL}/${NS4} +rbd --cluster ${CLUSTER1} mirror pool enable ${POOL}/${NS3} ${MIRROR_POOL_MODE} --remote-namespace ${NS4} +rbd --cluster ${CLUSTER2} mirror pool enable ${POOL}/${NS4} ${MIRROR_POOL_MODE} --remote-namespace ${NS3} +create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS4} ${image} ${RBD_MIRROR_MODE} +wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS3} ${image} +write_image ${CLUSTER2} ${POOL}/${NS4} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS3} ${POOL}/${NS4} ${image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS3} ${image} +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS3} ${POOL}/${NS4} ${image} + +testlog " - disable mirroring and re-enable without remote-namespace" +remove_image_retry ${CLUSTER2} ${POOL}/${NS4} ${image} +wait_for_image_present ${CLUSTER1} ${POOL}/${NS3} ${image} 'deleted' +rbd --cluster ${CLUSTER1} mirror pool disable ${POOL}/${NS3} +rbd --cluster ${CLUSTER2} mirror pool disable ${POOL}/${NS4} +rbd --cluster ${CLUSTER2} namespace create ${POOL}/${NS3} +rbd --cluster ${CLUSTER2} mirror pool enable ${POOL}/${NS3} ${MIRROR_POOL_MODE} +rbd --cluster ${CLUSTER1} mirror pool enable ${POOL}/${NS3} ${MIRROR_POOL_MODE} +create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS3} ${image} ${RBD_MIRROR_MODE} +wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS3} ${image} +write_image ${CLUSTER2} ${POOL}/${NS3} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS3} ${POOL}/${NS3} ${image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS3} ${image} +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS3} ${POOL}/${NS3} ${image} +remove_image_retry ${CLUSTER2} ${POOL}/${NS3} ${image} +wait_for_image_present ${CLUSTER1} ${POOL}/${NS3} ${image} 'deleted' +rbd --cluster ${CLUSTER1} mirror pool disable ${POOL}/${NS3} +rbd --cluster ${CLUSTER2} mirror pool disable ${POOL}/${NS3} + +testlog " - data pool" +dp_image=test_data_pool +create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS1} ${dp_image} ${RBD_MIRROR_MODE} 128 --data-pool ${PARENT_POOL} +data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL}/${NS1} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${dp_image} +data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL}/${NS1} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +write_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${POOL}/${NS1} ${dp_image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${dp_image} +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${POOL}/${NS1} ${dp_image} +remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${dp_image} + +testlog "TEST: simple image resync" +request_resync_image ${CLUSTER1} ${POOL} ${image} image_id +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + testlog "TEST: image resync while replayer is stopped" + admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + request_resync_image ${CLUSTER1} ${POOL} ${image} image_id + admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} + admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} + compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} +fi + +testlog "TEST: request image resync while daemon is offline" +stop_mirrors ${CLUSTER1} +request_resync_image ${CLUSTER1} ${POOL} ${image} image_id +start_mirrors ${CLUSTER1} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} +compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +if [ "${RBD_MIRROR_MODE}" = "journal" ]; then + testlog "TEST: client disconnect" + image=laggy + create_image ${CLUSTER2} ${POOL} ${image} 128 --journal-object-size 64K + write_image ${CLUSTER2} ${POOL} ${image} 10 + + testlog " - replay stopped after disconnect" + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" + disconnect_image ${CLUSTER2} ${POOL} ${image} + test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected' + + testlog " - replay started after resync requested" + request_resync_image ${CLUSTER1} ${POOL} ${image} image_id + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" + compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + + testlog " - disconnected after max_concurrent_object_sets reached" + if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" + set_image_meta ${CLUSTER2} ${POOL} ${image} \ + conf_rbd_journal_max_concurrent_object_sets 1 + write_image ${CLUSTER2} ${POOL} ${image} 20 16384 + write_image ${CLUSTER2} ${POOL} ${image} 20 16384 + test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" + set_image_meta ${CLUSTER2} ${POOL} ${image} \ + conf_rbd_journal_max_concurrent_object_sets 0 + + testlog " - replay is still stopped (disconnected) after restart" + admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected' + fi + + testlog " - replay started after resync requested" + request_resync_image ${CLUSTER1} ${POOL} ${image} image_id + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" + compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + + testlog " - rbd_mirroring_resync_after_disconnect config option" + set_image_meta ${CLUSTER2} ${POOL} ${image} \ + conf_rbd_mirroring_resync_after_disconnect true + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + image_id=$(get_image_id ${CLUSTER1} ${POOL} ${image}) + disconnect_image ${CLUSTER2} ${POOL} ${image} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" + compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + set_image_meta ${CLUSTER2} ${POOL} ${image} \ + conf_rbd_mirroring_resync_after_disconnect false + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} + disconnect_image ${CLUSTER2} ${POOL} ${image} + test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected' + remove_image_retry ${CLUSTER2} ${POOL} ${image} +fi + +testlog "TEST: split-brain" +image=split-brain +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} ${RBD_MIRROR_MODE} +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} +promote_image ${CLUSTER1} ${POOL} ${image} --force +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' +write_image ${CLUSTER1} ${POOL} ${image} 10 +demote_image ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain' +request_resync_image ${CLUSTER1} ${POOL} ${image} image_id +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +testlog "TEST: check if removed images' OMAP are removed" +start_mirrors ${CLUSTER2} +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} + +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + # teuthology will trash the daemon + testlog "TEST: no blocklists" + CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blocklist ls 2>&1 | grep -q "listed 0 entries" + CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blocklist ls 2>&1 | grep -q "listed 0 entries" +fi diff --git a/qa/workunits/rbd/rbd_mirror_bootstrap.sh b/qa/workunits/rbd/rbd_mirror_bootstrap.sh index 6ef06f2b82cb..3ddb0aa219b7 100755 --- a/qa/workunits/rbd/rbd_mirror_bootstrap.sh +++ b/qa/workunits/rbd/rbd_mirror_bootstrap.sh @@ -1,8 +1,10 @@ -#!/bin/sh -ex +#!/usr/bin/env bash # # rbd_mirror_bootstrap.sh - test peer bootstrap create/import # +set -ex + RBD_MIRROR_MANUAL_PEERS=1 RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-1} . $(dirname $0)/rbd_mirror_helpers.sh @@ -36,7 +38,8 @@ create_image_and_enable_mirror ${CLUSTER1} ${POOL} image1 wait_for_image_replay_started ${CLUSTER2} ${POOL} image1 write_image ${CLUSTER1} ${POOL} image1 100 -wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} image1 +wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${POOL} image1 +wait_for_replaying_status_in_pool_dir ${CLUSTER2} ${POOL} image1 testlog "TEST: verify rx-tx direction" # both rx-tx peers are added immediately by "rbd mirror pool peer bootstrap import" @@ -51,8 +54,45 @@ enable_mirror ${CLUSTER2} ${PARENT_POOL} image2 wait_for_image_replay_started ${CLUSTER2} ${PARENT_POOL} image1 write_image ${CLUSTER1} ${PARENT_POOL} image1 100 -wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${PARENT_POOL} image1 +wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${PARENT_POOL} ${PARENT_POOL} image1 +wait_for_replaying_status_in_pool_dir ${CLUSTER2} ${PARENT_POOL} image1 wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} image2 write_image ${CLUSTER2} ${PARENT_POOL} image2 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} image2 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} ${PARENT_POOL} image2 +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL} image2 + +testlog "TEST: pool replayer and callout cleanup when peer is updated" +test_health_state ${CLUSTER1} ${PARENT_POOL} 'OK' +test_health_state ${CLUSTER2} ${PARENT_POOL} 'OK' +POOL_STATUS=$(get_pool_status_json ${CLUSTER2} ${PARENT_POOL}) +jq -e '.summary.health == "OK"' <<< ${POOL_STATUS} +jq -e '.summary.daemon_health == "OK"' <<< ${POOL_STATUS} +jq -e '.daemons[0].health == "OK"' <<< ${POOL_STATUS} +jq -e '.daemons[0] | has("callouts") | not' <<< ${POOL_STATUS} +OLD_SERVICE_ID=$(jq -r '.daemons[0].service_id' <<< ${POOL_STATUS}) +OLD_INSTANCE_ID=$(jq -r '.daemons[0].instance_id' <<< ${POOL_STATUS}) +# mess up the peer on one of the clusters by setting a bogus user name +PEER_UUID=$(rbd --cluster ${CLUSTER2} --pool ${PARENT_POOL} mirror pool info --format json | jq -r '.peers[0].uuid') +rbd --cluster ${CLUSTER2} --pool ${PARENT_POOL} mirror pool peer set ${PEER_UUID} client client.invalid +wait_for_health_state ${CLUSTER2} ${PARENT_POOL} 'ERROR' +test_health_state ${CLUSTER1} ${PARENT_POOL} 'WARNING' +POOL_STATUS=$(get_pool_status_json ${CLUSTER2} ${PARENT_POOL}) +jq -e '.summary.health == "ERROR"' <<< ${POOL_STATUS} +jq -e '.summary.daemon_health == "ERROR"' <<< ${POOL_STATUS} +jq -e '.daemons[0].health == "ERROR"' <<< ${POOL_STATUS} +jq -e '.daemons[0].callouts == ["unable to connect to remote cluster"]' <<< ${POOL_STATUS} +# restore the correct user name +rbd --cluster ${CLUSTER2} --pool ${PARENT_POOL} mirror pool peer set ${PEER_UUID} client client.rbd-mirror-peer +wait_for_health_state ${CLUSTER2} ${PARENT_POOL} 'OK' +test_health_state ${CLUSTER1} ${PARENT_POOL} 'OK' +POOL_STATUS=$(get_pool_status_json ${CLUSTER2} ${PARENT_POOL}) +jq -e '.summary.health == "OK"' <<< ${POOL_STATUS} +jq -e '.summary.daemon_health == "OK"' <<< ${POOL_STATUS} +jq -e '.daemons[0].health == "OK"' <<< ${POOL_STATUS} +jq -e '.daemons[0] | has("callouts") | not' <<< ${POOL_STATUS} +NEW_SERVICE_ID=$(jq -r '.daemons[0].service_id' <<< ${POOL_STATUS}) +NEW_INSTANCE_ID=$(jq -r '.daemons[0].instance_id' <<< ${POOL_STATUS}) +# check that we are running the same service (daemon) but a newer pool replayer +((OLD_SERVICE_ID == NEW_SERVICE_ID)) +((OLD_INSTANCE_ID < NEW_INSTANCE_ID)) diff --git a/qa/workunits/rbd/rbd_mirror_fsx_compare.sh b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh index 0ba3c97d7519..79c36546d4fb 100755 --- a/qa/workunits/rbd/rbd_mirror_fsx_compare.sh +++ b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh @@ -1,10 +1,12 @@ -#!/bin/sh -ex +#!/usr/bin/env bash # # rbd_mirror_fsx_compare.sh - test rbd-mirror daemon under FSX workload # # The script is used to compare FSX-generated images between two clusters. # +set -ex + . $(dirname $0)/rbd_mirror_helpers.sh trap 'cleanup $?' INT TERM EXIT diff --git a/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh index d988987ba42a..6daadbbb4501 100755 --- a/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh +++ b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh @@ -1,10 +1,12 @@ -#!/bin/sh -ex +#!/usr/bin/env bash # # rbd_mirror_fsx_prepare.sh - test rbd-mirror daemon under FSX workload # # The script is used to compare FSX-generated images between two clusters. # +set -ex + . $(dirname $0)/rbd_mirror_helpers.sh setup diff --git a/qa/workunits/rbd/rbd_mirror_ha.sh b/qa/workunits/rbd/rbd_mirror_ha.sh index 37739a83da02..e5a086b82ab8 100755 --- a/qa/workunits/rbd/rbd_mirror_ha.sh +++ b/qa/workunits/rbd/rbd_mirror_ha.sh @@ -1,8 +1,10 @@ -#!/bin/sh -ex +#!/usr/bin/env bash # # rbd_mirror_ha.sh - test rbd-mirror daemons in HA mode # +set -ex + RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-7} . $(dirname $0)/rbd_mirror_helpers.sh @@ -69,7 +71,7 @@ test_replay() wait_for_image_replay_started ${CLUSTER1}:${LEADER} ${POOL} ${image} write_image ${CLUSTER2} ${POOL} ${image} 100 wait_for_replay_complete ${CLUSTER1}:${LEADER} ${CLUSTER2} ${POOL} \ - ${image} + ${POOL} ${image} wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' \ 'primary_position' \ "${MIRROR_USER_ID_PREFIX}${LEADER} on $(hostname -s)" @@ -77,7 +79,7 @@ test_replay() wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} \ 'down+unknown' fi - compare_images ${POOL} ${image} + compare_images ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} done } diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh index f4961b925e6f..1b1436db74d7 100755 --- a/qa/workunits/rbd/rbd_mirror_helpers.sh +++ b/qa/workunits/rbd/rbd_mirror_helpers.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash # # rbd_mirror_helpers.sh - shared rbd-mirror daemon helper functions # @@ -24,7 +24,7 @@ # The cleanup can be done as a separate step, running the script with # `cleanup ${RBD_MIRROR_TEMDIR}' arguments. # -# Note, as other workunits tests, rbd_mirror_journal.sh expects to find ceph binaries +# Note, as other workunits tests, rbd_mirror_helpers.sh expects to find ceph binaries # in PATH. # # Thus a typical troubleshooting session: @@ -35,7 +35,7 @@ # cd $CEPH_SRC_PATH # PATH=$CEPH_SRC_PATH:$PATH # RBD_MIRROR_NOCLEANUP=1 RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \ -# ../qa/workunits/rbd/rbd_mirror_journal.sh +# RBD_MIRROR_MODE=journal ../qa/workunits/rbd/rbd_mirror.sh # # After the test failure cd to TEMPDIR and check the current state: # @@ -49,32 +49,29 @@ # ceph --admin-daemon rbd-mirror.cluster1_daemon.cluster1.$pid.asok help # ... # +# To setup the environment without actually running the tests: +# +# cd $CEPH_SRC_PATH +# RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \ +# ../qa/workunits/rbd/rbd_mirror_helpers.sh setup +# # Also you can execute commands (functions) from the script: # # cd $CEPH_SRC_PATH # export RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror -# ../qa/workunits/rbd/rbd_mirror_journal.sh status -# ../qa/workunits/rbd/rbd_mirror_journal.sh stop_mirror cluster1 -# ../qa/workunits/rbd/rbd_mirror_journal.sh start_mirror cluster2 -# ../qa/workunits/rbd/rbd_mirror_journal.sh flush cluster2 +# ../qa/workunits/rbd/rbd_mirror_helpers.sh status +# ../qa/workunits/rbd/rbd_mirror_helpers.sh stop_mirror cluster1 +# ../qa/workunits/rbd/rbd_mirror_helpers.sh start_mirror cluster2 +# ../qa/workunits/rbd/rbd_mirror_helpers.sh flush cluster2 # ... # # Eventually, run the cleanup: # # cd $CEPH_SRC_PATH # RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \ -# ../qa/workunits/rbd/rbd_mirror_journal.sh cleanup +# ../qa/workunits/rbd/rbd_mirror_helpers.sh cleanup # -if type xmlstarlet > /dev/null 2>&1; then - XMLSTARLET=xmlstarlet -elif type xml > /dev/null 2>&1; then - XMLSTARLET=xml -else - echo "Missing xmlstarlet binary!" - exit 1 -fi - RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-2} CLUSTER1=cluster1 @@ -88,8 +85,11 @@ TEMPDIR= CEPH_ID=${CEPH_ID:-mirror} RBD_IMAGE_FEATURES=${RBD_IMAGE_FEATURES:-layering,exclusive-lock,journaling} MIRROR_USER_ID_PREFIX=${MIRROR_USER_ID_PREFIX:-${CEPH_ID}.} +RBD_MIRROR_MODE=${RBD_MIRROR_MODE:-journal} MIRROR_POOL_MODE=${MIRROR_POOL_MODE:-pool} -MIRROR_IMAGE_MODE=${MIRROR_IMAGE_MODE:-journal} +if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + MIRROR_POOL_MODE=image +fi export CEPH_ARGS="--id ${CEPH_ID}" @@ -526,11 +526,11 @@ status() rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" ls -l echo - echo "${cluster} ${image_pool}${image_ns} mirror pool info" + echo "${cluster} ${image_pool} ${image_ns} mirror pool info" rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" mirror pool info echo - echo "${cluster} ${image_pool}${image_ns} mirror pool status" + echo "${cluster} ${image_pool} ${image_ns} mirror pool status" CEPH_ARGS='' rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" mirror pool status --verbose echo @@ -623,6 +623,39 @@ flush() admin_daemons "${cluster}" ${cmd} } +get_pool_status_json() +{ + local cluster="$1" + local pool="$2" + + CEPH_ARGS='' rbd --cluster "${cluster}" mirror pool status "${pool}" --verbose --format json +} + +test_health_state() +{ + local cluster="$1" + local pool="$2" + local state="$3" + + local status + status="$(get_pool_status_json "${cluster}" "${pool}")" + jq -e '.summary.health == "'"${state}"'"' <<< "${status}" +} + +wait_for_health_state() +{ + local cluster="$1" + local pool="$2" + local state="$3" + local s + + for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do + sleep "${s}" + test_health_state "${cluster}" "${pool}" "${state}" && return 0 + done + return 1 +} + test_image_replay_state() { local cluster=$1 @@ -710,17 +743,18 @@ wait_for_journal_replay_complete() { local local_cluster=$1 local cluster=$2 - local pool=$3 - local image=$4 + local local_pool=$3 + local remote_pool=$4 + local image=$5 local s master_pos mirror_pos last_mirror_pos local master_tag master_entry mirror_tag mirror_entry while true; do for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16 32 32; do sleep ${s} - flush "${local_cluster}" "${pool}" "${image}" - master_pos=$(get_master_journal_position "${cluster}" "${pool}" "${image}") - mirror_pos=$(get_mirror_journal_position "${cluster}" "${pool}" "${image}") + flush "${local_cluster}" "${local_pool}" "${image}" + master_pos=$(get_master_journal_position "${cluster}" "${remote_pool}" "${image}") + mirror_pos=$(get_mirror_journal_position "${cluster}" "${remote_pool}" "${image}") test -n "${master_pos}" -a "${master_pos}" = "${mirror_pos}" && return 0 test "${mirror_pos}" != "${last_mirror_pos}" && break done @@ -763,21 +797,22 @@ wait_for_snapshot_sync_complete() { local local_cluster=$1 local cluster=$2 - local pool=$3 - local image=$4 + local local_pool=$3 + local remote_pool=$4 + local image=$5 - local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}-${image}.status) - local local_status_log=${TEMPDIR}/$(mkfname ${local_cluster}-${pool}-${image}.status) + local status_log=${TEMPDIR}/$(mkfname ${cluster}-${remote_pool}-${image}.status) + local local_status_log=${TEMPDIR}/$(mkfname ${local_cluster}-${local_pool}-${image}.status) - mirror_image_snapshot "${cluster}" "${pool}" "${image}" - get_newest_mirror_snapshot "${cluster}" "${pool}" "${image}" "${status_log}" + mirror_image_snapshot "${cluster}" "${remote_pool}" "${image}" + get_newest_mirror_snapshot "${cluster}" "${remote_pool}" "${image}" "${status_log}" local snapshot_id=$(xmlstarlet sel -t -v "//snapshot/id" < ${status_log}) while true; do for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16 32 32; do sleep ${s} - get_newest_mirror_snapshot "${local_cluster}" "${pool}" "${image}" "${local_status_log}" + get_newest_mirror_snapshot "${local_cluster}" "${local_pool}" "${image}" "${local_status_log}" local primary_snapshot_id=$(xmlstarlet sel -t -v "//snapshot/namespace/primary_snap_id" < ${local_status_log}) test "${snapshot_id}" = "${primary_snapshot_id}" && return 0 @@ -792,13 +827,14 @@ wait_for_replay_complete() { local local_cluster=$1 local cluster=$2 - local pool=$3 - local image=$4 - - if [ "${MIRROR_IMAGE_MODE}" = "journal" ]; then - wait_for_journal_replay_complete ${local_cluster} ${cluster} ${pool} ${image} - elif [ "${MIRROR_IMAGE_MODE}" = "snapshot" ]; then - wait_for_snapshot_sync_complete ${local_cluster} ${cluster} ${pool} ${image} + local local_pool=$3 + local remote_pool=$4 + local image=$5 + + if [ "${RBD_MIRROR_MODE}" = "journal" ]; then + wait_for_journal_replay_complete ${local_cluster} ${cluster} ${local_pool} ${remote_pool} ${image} + elif [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then + wait_for_snapshot_sync_complete ${local_cluster} ${cluster} ${local_pool} ${remote_pool} ${image} else return 1 fi @@ -814,23 +850,23 @@ test_status_in_pool_dir() local description_pattern="$5" local service_pattern="$6" - local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}-${image}.mirror_status) - CEPH_ARGS='' rbd --cluster ${cluster} mirror image status ${pool}/${image} | - tee ${status_log} >&2 - grep "^ state: .*${state_pattern}" ${status_log} || return 1 - grep "^ description: .*${description_pattern}" ${status_log} || return 1 + local status + status=$(CEPH_ARGS='' rbd --cluster ${cluster} mirror image status \ + ${pool}/${image}) + grep "^ state: .*${state_pattern}" <<< "$status" || return 1 + grep "^ description: .*${description_pattern}" <<< "$status" || return 1 if [ -n "${service_pattern}" ]; then - grep "service: *${service_pattern}" ${status_log} || return 1 + grep "service: *${service_pattern}" <<< "$status" || return 1 elif echo ${state_pattern} | grep '^up+'; then - grep "service: *${MIRROR_USER_ID_PREFIX}.* on " ${status_log} || return 1 + grep "service: *${MIRROR_USER_ID_PREFIX}.* on " <<< "$status" || return 1 else - grep "service: " ${status_log} && return 1 + grep "service: " <<< "$status" && return 1 fi # recheck using `mirror pool status` command to stress test it. - - local last_update="$(sed -nEe 's/^ last_update: *(.*) *$/\1/p' ${status_log})" + local last_update + last_update="$(sed -nEe 's/^ last_update: *(.*) *$/\1/p' <<< "$status")" test_mirror_pool_status_verbose \ ${cluster} ${pool} ${image} "${state_pattern}" "${last_update}" && return 0 @@ -847,16 +883,15 @@ test_mirror_pool_status_verbose() local state_pattern="$4" local prev_last_update="$5" - local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}.mirror_status) - - rbd --cluster ${cluster} mirror pool status ${pool} --verbose --format xml \ - > ${status_log} + local status + status=$(CEPH_ARGS='' rbd --cluster ${cluster} mirror pool status ${pool} \ + --verbose --format xml) local last_update state - last_update=$($XMLSTARLET sel -t -v \ - "//images/image[name='${image}']/last_update" < ${status_log}) - state=$($XMLSTARLET sel -t -v \ - "//images/image[name='${image}']/state" < ${status_log}) + last_update=$(xmlstarlet sel -t -v \ + "//images/image[name='${image}']/last_update" <<< "$status") + state=$(xmlstarlet sel -t -v \ + "//images/image[name='${image}']/state" <<< "$status") echo "${state}" | grep "${state_pattern}" || test "${last_update}" '>' "${prev_last_update}" @@ -880,6 +915,20 @@ wait_for_status_in_pool_dir() return 1 } +wait_for_replaying_status_in_pool_dir() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + if [ "${RBD_MIRROR_MODE}" = "journal" ]; then + wait_for_status_in_pool_dir ${cluster} ${pool} ${image} 'up+replaying' \ + 'primary_position' + else + wait_for_status_in_pool_dir ${cluster} ${pool} ${image} 'up+replaying' + fi +} + create_image() { local cluster=$1 ; shift @@ -896,18 +945,35 @@ create_image() --image-feature "${RBD_IMAGE_FEATURES}" $@ ${pool}/${image} } +is_pool_mirror_mode_image() +{ + local pool=$1 + + if [ "${MIRROR_POOL_MODE}" = "image" ]; then + return 0 + fi + + case "${pool}" in + */${NS2} | ${PARENT_POOL}) + return 0 + ;; + esac + + return 1 +} + create_image_and_enable_mirror() { local cluster=$1 ; shift local pool=$1 ; shift local image=$1 ; shift - local mode=${1:-${MIRROR_IMAGE_MODE}} + local mode=${1:-${RBD_MIRROR_MODE}} if [ -n "$1" ]; then shift fi create_image ${cluster} ${pool} ${image} $@ - if [ "${MIRROR_POOL_MODE}" = "image" ] || [ "$pool" = "${PARENT_POOL}" ]; then + if is_pool_mirror_mode_image ${pool}; then enable_mirror ${cluster} ${pool} ${image} ${mode} fi } @@ -987,9 +1053,14 @@ trash_move() { trash_restore() { local cluster=$1 local pool=$2 - local image_id=$3 + local image=$3 + local image_id=$4 + local mode=${5:-${RBD_MIRROR_MODE}} rbd --cluster=${cluster} trash restore ${pool}/${image_id} + if is_pool_mirror_mode_image ${pool}; then + enable_mirror ${cluster} ${pool} ${image} ${mode} + fi } clone_image() @@ -1018,13 +1089,15 @@ clone_image_and_enable_mirror() local clone_image=$6 shift 6 - local mode=${1:-${MIRROR_IMAGE_MODE}} + local mode=${1:-${RBD_MIRROR_MODE}} if [ -n "$1" ]; then shift fi clone_image ${cluster} ${parent_pool} ${parent_image} ${parent_snap} ${clone_pool} ${clone_image} $@ - enable_mirror ${cluster} ${clone_pool} ${clone_image} ${mode} + if is_pool_mirror_mode_image ${clone_pool}; then + enable_mirror ${cluster} ${clone_pool} ${clone_image} ${mode} + fi } disconnect_image() @@ -1136,7 +1209,7 @@ test_snap_moved_to_trash() local snap_name=$4 rbd --cluster ${cluster} snap ls ${pool}/${image} --all | - grep -F " trash (${snap_name})" + grep -F " trash (user ${snap_name})" } wait_for_snap_moved_to_trash() @@ -1228,16 +1301,19 @@ show_diff() compare_images() { - local pool=$1 - local image=$2 local ret=0 + local local_cluster=$1 + local cluster=$2 + local local_pool=$3 + local remote_pool=$4 + local image=$5 - local rmt_export=${TEMPDIR}/$(mkfname ${CLUSTER2}-${pool}-${image}.export) - local loc_export=${TEMPDIR}/$(mkfname ${CLUSTER1}-${pool}-${image}.export) + local rmt_export=${TEMPDIR}/$(mkfname ${cluster}-${remote_pool}-${image}.export) + local loc_export=${TEMPDIR}/$(mkfname ${local_cluster}-${local_pool}-${image}.export) rm -f ${rmt_export} ${loc_export} - rbd --cluster ${CLUSTER2} export ${pool}/${image} ${rmt_export} - rbd --cluster ${CLUSTER1} export ${pool}/${image} ${loc_export} + rbd --cluster ${cluster} export ${remote_pool}/${image} ${rmt_export} + rbd --cluster ${local_cluster} export ${local_pool}/${image} ${loc_export} if ! cmp ${rmt_export} ${loc_export} then show_diff ${rmt_export} ${loc_export} @@ -1258,7 +1334,7 @@ compare_image_snapshots() for snap_name in $(rbd --cluster ${CLUSTER1} --format xml \ snap list ${pool}/${image} | \ - $XMLSTARLET sel -t -v "//snapshot/name" | \ + xmlstarlet sel -t -v "//snapshot/name" | \ grep -E -v "^\.rbd-mirror\."); do rm -f ${rmt_export} ${loc_export} rbd --cluster ${CLUSTER2} export ${pool}/${image}@${snap_name} ${rmt_export} @@ -1315,7 +1391,7 @@ enable_mirror() local cluster=$1 local pool=$2 local image=$3 - local mode=${4:-${MIRROR_IMAGE_MODE}} + local mode=${4:-${RBD_MIRROR_MODE}} rbd --cluster=${cluster} mirror image enable ${pool}/${image} ${mode} # Display image info including the global image id for debugging purpose diff --git a/qa/workunits/rbd/rbd_mirror_journal.sh b/qa/workunits/rbd/rbd_mirror_journal.sh deleted file mode 100755 index 54f6aeec8e00..000000000000 --- a/qa/workunits/rbd/rbd_mirror_journal.sh +++ /dev/null @@ -1,614 +0,0 @@ -#!/bin/sh -ex -# -# rbd_mirror_journal.sh - test rbd-mirror daemon in journal-based mirroring mode -# -# The scripts starts two ("local" and "remote") clusters using mstart.sh script, -# creates a temporary directory, used for cluster configs, daemon logs, admin -# socket, temporary files, and launches rbd-mirror daemon. -# - -. $(dirname $0)/rbd_mirror_helpers.sh - -setup - -testlog "TEST: add image and test replay" -start_mirrors ${CLUSTER1} -image=test -create_image ${CLUSTER2} ${POOL} ${image} -set_image_meta ${CLUSTER2} ${POOL} ${image} "key1" "value1" -set_image_meta ${CLUSTER2} ${POOL} ${image} "key2" "value2" -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -write_image ${CLUSTER2} ${POOL} ${image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'down+unknown' -fi -compare_images ${POOL} ${image} -compare_image_meta ${CLUSTER1} ${POOL} ${image} "key1" "value1" -compare_image_meta ${CLUSTER1} ${POOL} ${image} "key2" "value2" - -testlog "TEST: stop mirror, add image, start mirror and test replay" -stop_mirrors ${CLUSTER1} -image1=test1 -create_image ${CLUSTER2} ${POOL} ${image1} -write_image ${CLUSTER2} ${POOL} ${image1} 100 -start_mirrors ${CLUSTER1} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image1} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' 'primary_position' -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image1} 'down+unknown' -fi -compare_images ${POOL} ${image1} - -testlog "TEST: test the first image is replaying after restart" -write_image ${CLUSTER2} ${POOL} ${image} 100 -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' -compare_images ${POOL} ${image} - -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - testlog "TEST: stop/start/restart mirror via admin socket" - all_admin_daemons ${CLUSTER1} rbd mirror stop - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' - - all_admin_daemons ${CLUSTER1} rbd mirror start - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' - - all_admin_daemons ${CLUSTER1} rbd mirror restart - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' - - all_admin_daemons ${CLUSTER1} rbd mirror stop - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' - - all_admin_daemons ${CLUSTER1} rbd mirror restart - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' - - all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' - - admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - - all_admin_daemons ${CLUSTER1} rbd mirror start ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' - - admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - - all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} - - all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' - - all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' - - flush ${CLUSTER1} - all_admin_daemons ${CLUSTER1} rbd mirror status -fi - -remove_image_retry ${CLUSTER2} ${POOL} ${image1} - -testlog "TEST: test image rename" -new_name="${image}_RENAMED" -rename_image ${CLUSTER2} ${POOL} ${image} ${new_name} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying' -admin_daemons ${CLUSTER1} rbd mirror status ${POOL}/${new_name} -admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${new_name} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying' -rename_image ${CLUSTER2} ${POOL} ${new_name} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - -testlog "TEST: test trash move restore" -image_id=$(get_image_id ${CLUSTER2} ${POOL} ${image}) -trash_move ${CLUSTER2} ${POOL} ${image} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' -trash_restore ${CLUSTER2} ${POOL} ${image_id} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - -testlog "TEST: check if removed images' OMAP are removed (with rbd-mirror on one cluster)" -remove_image_retry ${CLUSTER2} ${POOL} ${image} - -wait_for_image_in_omap ${CLUSTER1} ${POOL} -wait_for_image_in_omap ${CLUSTER2} ${POOL} - -create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -write_image ${CLUSTER2} ${POOL} ${image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - -testlog "TEST: failover and failback" -start_mirrors ${CLUSTER2} - -# demote and promote same cluster -demote_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' -promote_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -write_image ${CLUSTER2} ${POOL} ${image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' -compare_images ${POOL} ${image} - -# failover (unmodified) -demote_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' -promote_image ${CLUSTER1} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} - -# failback (unmodified) -demote_image ${CLUSTER1} ${POOL} ${image} -wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' -promote_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' -compare_images ${POOL} ${image} - -# failover -demote_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' -promote_image ${CLUSTER1} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} -write_image ${CLUSTER1} ${POOL} ${image} 100 -wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying' 'primary_position' -compare_images ${POOL} ${image} - -# failback -demote_image ${CLUSTER1} ${POOL} ${image} -wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' -promote_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -write_image ${CLUSTER2} ${POOL} ${image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' -compare_images ${POOL} ${image} - -testlog "TEST: failover / failback loop" -for i in `seq 1 20`; do - demote_image ${CLUSTER2} ${POOL} ${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' - wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' - promote_image ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} - wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' - wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying' - demote_image ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' - wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' - promote_image ${CLUSTER2} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' -done - -testlog "TEST: force promote" -force_promote_image=test_force_promote -create_image ${CLUSTER2} ${POOL} ${force_promote_image} -write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 -wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${force_promote_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+replaying' 'primary_position' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' -promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force' -wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image} -wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' -write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100 -write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 -remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image} -remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image} - -testlog "TEST: cloned images" -testlog " - default" -parent_image=test_parent -parent_snap=snap -create_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} -write_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} 100 -create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -protect_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} - -clone_image=test_clone -clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image} -write_image ${CLUSTER2} ${POOL} ${clone_image} 100 - -enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} journal -wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} ${parent_image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} ${parent_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL} ${parent_image} 'up+replaying' 'primary_position' -compare_images ${PARENT_POOL} ${parent_image} - -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${clone_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${clone_image} 'up+replaying' 'primary_position' -compare_images ${POOL} ${clone_image} -remove_image_retry ${CLUSTER2} ${POOL} ${clone_image} - -testlog " - clone v1" -clone_image ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image}1 - -clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \ - ${clone_image}_v1 --rbd-default-clone-format 1 -test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v1) = 1 -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v1 -test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v1) = 1 -remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v1 -remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}1 -unprotect_snapshot_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} - -testlog " - clone v2" -parent_snap=snap_v2 -create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \ - ${clone_image}_v2 --rbd-default-clone-format 2 -test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v2) = 2 -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v2 -test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v2) = 2 - -remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -test_snap_moved_to_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} -remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v2 -wait_for_image_present ${CLUSTER1} ${POOL} ${clone_image}_v2 'deleted' -test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} - -testlog " - clone v2 non-primary" -create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -wait_for_snap_present ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} -clone_image ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \ - ${clone_image}_v2 --rbd-default-clone-format 2 -remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} -remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}_v2 -wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} -remove_image_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} - -testlog "TEST: data pool" -dp_image=test_data_pool -create_image ${CLUSTER2} ${POOL} ${dp_image} 128 --data-pool ${PARENT_POOL} -data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL} ${dp_image}) -test "${data_pool}" = "${PARENT_POOL}" -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${dp_image} -data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL} ${dp_image}) -test "${data_pool}" = "${PARENT_POOL}" -create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap1' -write_image ${CLUSTER2} ${POOL} ${dp_image} 100 -create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap2' -write_image ${CLUSTER2} ${POOL} ${dp_image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${dp_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying' 'primary_position' -compare_images ${POOL} ${dp_image}@snap1 -compare_images ${POOL} ${dp_image}@snap2 -compare_images ${POOL} ${dp_image} -remove_image_retry ${CLUSTER2} ${POOL} ${dp_image} - -testlog "TEST: disable mirroring / delete non-primary image" -image2=test2 -image3=test3 -image4=test4 -image5=test5 -for i in ${image2} ${image3} ${image4} ${image5}; do - create_image ${CLUSTER2} ${POOL} ${i} - write_image ${CLUSTER2} ${POOL} ${i} 100 - create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' - create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' - if [ "${i}" = "${image4}" ] || [ "${i}" = "${image5}" ]; then - protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' - protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' - fi - write_image ${CLUSTER2} ${POOL} ${i} 100 - wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present' - wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2' -done - -set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' -for i in ${image2} ${image4}; do - disable_mirror ${CLUSTER2} ${POOL} ${i} -done - -unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap1' -unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap2' -for i in ${image3} ${image5}; do - remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' - remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' - remove_image_retry ${CLUSTER2} ${POOL} ${i} -done - -for i in ${image2} ${image3} ${image4} ${image5}; do - wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted' -done - -set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool' -for i in ${image2} ${image4}; do - enable_journaling ${CLUSTER2} ${POOL} ${i} - wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present' - wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2' - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${i} - wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${i} - compare_images ${POOL} ${i} -done - -testlog "TEST: remove mirroring pool" -pool=pool_to_remove -for cluster in ${CLUSTER1} ${CLUSTER2}; do - CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${pool} 16 16 - CEPH_ARGS='' rbd --cluster ${cluster} pool init ${pool} - rbd --cluster ${cluster} mirror pool enable ${pool} pool -done -peer_add ${CLUSTER1} ${pool} ${CLUSTER2} -peer_add ${CLUSTER2} ${pool} ${CLUSTER1} -rdp_image=test_remove_data_pool -create_image ${CLUSTER2} ${pool} ${image} 128 -create_image ${CLUSTER2} ${POOL} ${rdp_image} 128 --data-pool ${pool} -write_image ${CLUSTER2} ${pool} ${image} 100 -write_image ${CLUSTER2} ${POOL} ${rdp_image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${pool} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${pool} ${image} 'up+replaying' 'primary_position' -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${rdp_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${rdp_image} 'up+replaying' 'primary_position' -for cluster in ${CLUSTER1} ${CLUSTER2}; do - CEPH_ARGS='' ceph --cluster ${cluster} osd pool rm ${pool} ${pool} --yes-i-really-really-mean-it -done -remove_image_retry ${CLUSTER2} ${POOL} ${rdp_image} -wait_for_image_present ${CLUSTER1} ${POOL} ${rdp_image} 'deleted' -for i in 0 1 2 4 8 8 8 8 16 16; do - sleep $i - admin_daemons "${CLUSTER2}" rbd mirror status ${pool}/${image} || break -done -admin_daemons "${CLUSTER2}" rbd mirror status ${pool}/${image} && false - -testlog "TEST: snapshot rename" -snap_name='snap_rename' -create_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_0" -for i in `seq 1 20`; do - rename_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_$(expr ${i} - 1)" "${snap_name}_${i}" -done -wait_for_snap_present ${CLUSTER1} ${POOL} ${image2} "${snap_name}_${i}" - -unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap1' -unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap2' -for i in ${image2} ${image4}; do - remove_image_retry ${CLUSTER2} ${POOL} ${i} -done - -testlog "TEST: disable mirror while daemon is stopped" -stop_mirrors ${CLUSTER1} -stop_mirrors ${CLUSTER2} -set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' -disable_mirror ${CLUSTER2} ${POOL} ${image} -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - test_image_present ${CLUSTER1} ${POOL} ${image} 'present' -fi -start_mirrors ${CLUSTER1} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' -set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool' -enable_journaling ${CLUSTER2} ${POOL} ${image} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - -testlog "TEST: non-default namespace image mirroring" -testlog " - replay" -create_image ${CLUSTER2} ${POOL}/${NS1} ${image} -create_image ${CLUSTER2} ${POOL}/${NS2} ${image} -enable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} journal -wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS2} ${image} -write_image ${CLUSTER2} ${POOL}/${NS1} ${image} 100 -write_image ${CLUSTER2} ${POOL}/${NS2} ${image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS2} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${image} 'up+replaying' 'primary_position' -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS2} ${image} 'up+replaying' 'primary_position' -compare_images ${POOL}/${NS1} ${image} -compare_images ${POOL}/${NS2} ${image} - -testlog " - disable mirroring / delete image" -remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${image} -disable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} -wait_for_image_present ${CLUSTER1} ${POOL}/${NS1} ${image} 'deleted' -wait_for_image_present ${CLUSTER1} ${POOL}/${NS2} ${image} 'deleted' -remove_image_retry ${CLUSTER2} ${POOL}/${NS2} ${image} - -testlog " - data pool" -dp_image=test_data_pool -create_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 128 --data-pool ${PARENT_POOL} -data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL}/${NS1} ${dp_image}) -test "${data_pool}" = "${PARENT_POOL}" -wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${dp_image} -data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL}/${NS1} ${dp_image}) -test "${data_pool}" = "${PARENT_POOL}" -write_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${dp_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${dp_image} 'up+replaying' 'primary_position' -compare_images ${POOL}/${NS1} ${dp_image} -remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${dp_image} - -testlog "TEST: simple image resync" -request_resync_image ${CLUSTER1} ${POOL} ${image} image_id -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' -compare_images ${POOL} ${image} - -testlog "TEST: image resync while replayer is stopped" -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - request_resync_image ${CLUSTER1} ${POOL} ${image} image_id - admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} - wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} - admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} - wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' - compare_images ${POOL} ${image} -fi - -testlog "TEST: request image resync while daemon is offline" -stop_mirrors ${CLUSTER1} -request_resync_image ${CLUSTER1} ${POOL} ${image} image_id -start_mirrors ${CLUSTER1} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' -compare_images ${POOL} ${image} -remove_image_retry ${CLUSTER2} ${POOL} ${image} - -testlog "TEST: client disconnect" -image=laggy -create_image ${CLUSTER2} ${POOL} ${image} 128 --journal-object-size 64K -write_image ${CLUSTER2} ${POOL} ${image} 10 - -testlog " - replay stopped after disconnect" -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" -disconnect_image ${CLUSTER2} ${POOL} ${image} -test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" -wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected' - -testlog " - replay started after resync requested" -request_resync_image ${CLUSTER1} ${POOL} ${image} image_id -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" -compare_images ${POOL} ${image} - -testlog " - disconnected after max_concurrent_object_sets reached" -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" - set_image_meta ${CLUSTER2} ${POOL} ${image} \ - conf_rbd_journal_max_concurrent_object_sets 1 - write_image ${CLUSTER2} ${POOL} ${image} 20 16384 - write_image ${CLUSTER2} ${POOL} ${image} 20 16384 - test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" - set_image_meta ${CLUSTER2} ${POOL} ${image} \ - conf_rbd_journal_max_concurrent_object_sets 0 - - testlog " - replay is still stopped (disconnected) after restart" - admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected' -fi - -testlog " - replay started after resync requested" -request_resync_image ${CLUSTER1} ${POOL} ${image} image_id -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" -compare_images ${POOL} ${image} - -testlog " - rbd_mirroring_resync_after_disconnect config option" -set_image_meta ${CLUSTER2} ${POOL} ${image} \ - conf_rbd_mirroring_resync_after_disconnect true -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -image_id=$(get_image_id ${CLUSTER1} ${POOL} ${image}) -disconnect_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" -compare_images ${POOL} ${image} -set_image_meta ${CLUSTER2} ${POOL} ${image} \ - conf_rbd_mirroring_resync_after_disconnect false -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -disconnect_image ${CLUSTER2} ${POOL} ${image} -test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" -wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected' -remove_image_retry ${CLUSTER2} ${POOL} ${image} - -testlog "TEST: split-brain" -image=split-brain -create_image ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' -promote_image ${CLUSTER1} ${POOL} ${image} --force -wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' -write_image ${CLUSTER1} ${POOL} ${image} 10 -demote_image ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain' -request_resync_image ${CLUSTER1} ${POOL} ${image} image_id -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' -remove_image_retry ${CLUSTER2} ${POOL} ${image} - -testlog "TEST: check if removed images' OMAP are removed" -start_mirrors ${CLUSTER2} -wait_for_image_in_omap ${CLUSTER1} ${POOL} -wait_for_image_in_omap ${CLUSTER2} ${POOL} - -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - # teuthology will trash the daemon - testlog "TEST: no blocklists" - CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blocklist ls 2>&1 | grep -q "listed 0 entries" - CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blocklist ls 2>&1 | grep -q "listed 0 entries" -fi diff --git a/qa/workunits/rbd/rbd_mirror_snapshot.sh b/qa/workunits/rbd/rbd_mirror_snapshot.sh deleted file mode 100755 index c70d48b09db4..000000000000 --- a/qa/workunits/rbd/rbd_mirror_snapshot.sh +++ /dev/null @@ -1,517 +0,0 @@ -#!/bin/sh -ex -# -# rbd_mirror_snapshot.sh - test rbd-mirror daemon in snapshot-based mirroring mode -# -# The scripts starts two ("local" and "remote") clusters using mstart.sh script, -# creates a temporary directory, used for cluster configs, daemon logs, admin -# socket, temporary files, and launches rbd-mirror daemon. -# - -MIRROR_POOL_MODE=image -MIRROR_IMAGE_MODE=snapshot - -. $(dirname $0)/rbd_mirror_helpers.sh - -setup - -testlog "TEST: add image and test replay" -start_mirrors ${CLUSTER1} -image=test -create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} -set_image_meta ${CLUSTER2} ${POOL} ${image} "key1" "value1" -set_image_meta ${CLUSTER2} ${POOL} ${image} "key2" "value2" -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -write_image ${CLUSTER2} ${POOL} ${image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'down+unknown' -fi -compare_images ${POOL} ${image} -compare_image_meta ${CLUSTER1} ${POOL} ${image} "key1" "value1" -compare_image_meta ${CLUSTER1} ${POOL} ${image} "key2" "value2" - -testlog "TEST: stop mirror, add image, start mirror and test replay" -stop_mirrors ${CLUSTER1} -image1=test1 -create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image1} -write_image ${CLUSTER2} ${POOL} ${image1} 100 -start_mirrors ${CLUSTER1} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image1} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image1} 'down+unknown' -fi -compare_images ${POOL} ${image1} - -testlog "TEST: test the first image is replaying after restart" -write_image ${CLUSTER2} ${POOL} ${image} 100 -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' -compare_images ${POOL} ${image} - -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - testlog "TEST: stop/start/restart mirror via admin socket" - all_admin_daemons ${CLUSTER1} rbd mirror stop - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' - - all_admin_daemons ${CLUSTER1} rbd mirror start - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' - - all_admin_daemons ${CLUSTER1} rbd mirror restart - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' - - all_admin_daemons ${CLUSTER1} rbd mirror stop - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' - - all_admin_daemons ${CLUSTER1} rbd mirror restart - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' - - all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' - - admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - - all_admin_daemons ${CLUSTER1} rbd mirror start ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' - - admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - - all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} - - all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' - - all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' - - flush ${CLUSTER1} - all_admin_daemons ${CLUSTER1} rbd mirror status -fi - -remove_image_retry ${CLUSTER2} ${POOL} ${image1} - -testlog "TEST: test image rename" -new_name="${image}_RENAMED" -rename_image ${CLUSTER2} ${POOL} ${image} ${new_name} -mirror_image_snapshot ${CLUSTER2} ${POOL} ${new_name} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying' -admin_daemons ${CLUSTER1} rbd mirror status ${POOL}/${new_name} -admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${new_name} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying' -rename_image ${CLUSTER2} ${POOL} ${new_name} ${image} -mirror_image_snapshot ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - -testlog "TEST: test trash move restore" -image_id=$(get_image_id ${CLUSTER2} ${POOL} ${image}) -trash_move ${CLUSTER2} ${POOL} ${image} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' -trash_restore ${CLUSTER2} ${POOL} ${image_id} -enable_mirror ${CLUSTER2} ${POOL} ${image} snapshot -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - -testlog "TEST: check if removed images' OMAP are removed (with rbd-mirror on one cluster)" -remove_image_retry ${CLUSTER2} ${POOL} ${image} - -wait_for_image_in_omap ${CLUSTER1} ${POOL} -wait_for_image_in_omap ${CLUSTER2} ${POOL} - -create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -write_image ${CLUSTER2} ${POOL} ${image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - -testlog "TEST: failover and failback" -start_mirrors ${CLUSTER2} - -# demote and promote same cluster -demote_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' -promote_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -write_image ${CLUSTER2} ${POOL} ${image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' -compare_images ${POOL} ${image} - -# failover (unmodified) -demote_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' -promote_image ${CLUSTER1} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} - -# failback (unmodified) -demote_image ${CLUSTER1} ${POOL} ${image} -wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' -promote_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' -compare_images ${POOL} ${image} - -# failover -demote_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' -promote_image ${CLUSTER1} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} -write_image ${CLUSTER1} ${POOL} ${image} 100 -wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying' -compare_images ${POOL} ${image} - -# failback -demote_image ${CLUSTER1} ${POOL} ${image} -wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' -promote_image ${CLUSTER2} ${POOL} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -write_image ${CLUSTER2} ${POOL} ${image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' -compare_images ${POOL} ${image} - -testlog "TEST: failover / failback loop" -for i in `seq 1 20`; do - demote_image ${CLUSTER2} ${POOL} ${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' - wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' - promote_image ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} - wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' - wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying' - demote_image ${CLUSTER1} ${POOL} ${image} - wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' - wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' - promote_image ${CLUSTER2} ${POOL} ${image} - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' -done -# check that demote (or other mirror snapshots) don't pile up -test "$(count_mirror_snaps ${CLUSTER1} ${POOL} ${image})" -le 3 -test "$(count_mirror_snaps ${CLUSTER2} ${POOL} ${image})" -le 3 - -testlog "TEST: force promote" -force_promote_image=test_force_promote -create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${force_promote_image} -write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 -wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${force_promote_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+replaying' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' -promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force' -wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image} -wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped' -wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' -write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100 -write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 -remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image} -remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image} - -testlog "TEST: cloned images" -testlog " - default" -parent_image=test_parent -parent_snap=snap -create_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} -write_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} 100 -create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -protect_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} - -clone_image=test_clone -clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image} -write_image ${CLUSTER2} ${POOL} ${clone_image} 100 -enable_mirror ${CLUSTER2} ${POOL} ${clone_image} snapshot - -wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} ${parent_image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} ${parent_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL} ${parent_image} 'up+replaying' -compare_images ${PARENT_POOL} ${parent_image} - -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${clone_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${clone_image} 'up+replaying' -compare_images ${POOL} ${clone_image} -remove_image_retry ${CLUSTER2} ${POOL} ${clone_image} - -testlog " - clone v1" -clone_image_and_enable_mirror ${CLUSTER1} ${PARENT_POOL} ${parent_image} \ - ${parent_snap} ${POOL} ${clone_image}1 - -clone_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} \ - ${parent_snap} ${POOL} ${clone_image}_v1 snapshot --rbd-default-clone-format 1 -test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v1) = 1 -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v1 -test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v1) = 1 -remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v1 -remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}1 -unprotect_snapshot_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} - -testlog " - clone v2" -parent_snap=snap_v2 -create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} -clone_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} \ - ${parent_snap} ${POOL} ${clone_image}_v2 snapshot --rbd-default-clone-format 2 -test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v2) = 2 -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v2 -test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v2) = 2 - -remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} -test_snap_moved_to_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} -remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v2 -wait_for_image_present ${CLUSTER1} ${POOL} ${clone_image}_v2 'deleted' -test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} - -testlog " - clone v2 non-primary" -create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} -wait_for_snap_present ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} -clone_image_and_enable_mirror ${CLUSTER1} ${PARENT_POOL} ${parent_image} \ - ${parent_snap} ${POOL} ${clone_image}_v2 snapshot --rbd-default-clone-format 2 -remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} -mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} -wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} -remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}_v2 -wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} -remove_image_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} - -testlog "TEST: data pool" -dp_image=test_data_pool -create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${dp_image} snapshot 128 --data-pool ${PARENT_POOL} -data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL} ${dp_image}) -test "${data_pool}" = "${PARENT_POOL}" -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${dp_image} -data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL} ${dp_image}) -test "${data_pool}" = "${PARENT_POOL}" -create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap1' -write_image ${CLUSTER2} ${POOL} ${dp_image} 100 -create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap2' -write_image ${CLUSTER2} ${POOL} ${dp_image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${dp_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying' -compare_images ${POOL} ${dp_image}@snap1 -compare_images ${POOL} ${dp_image}@snap2 -compare_images ${POOL} ${dp_image} -remove_image_retry ${CLUSTER2} ${POOL} ${dp_image} - -testlog "TEST: disable mirroring / delete non-primary image" -image2=test2 -image3=test3 -image4=test4 -image5=test5 -for i in ${image2} ${image3} ${image4} ${image5}; do - create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${i} - write_image ${CLUSTER2} ${POOL} ${i} 100 - create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' - create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' - if [ "${i}" = "${image4}" ] || [ "${i}" = "${image5}" ]; then - protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' - protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' - fi - write_image ${CLUSTER2} ${POOL} ${i} 100 - mirror_image_snapshot ${CLUSTER2} ${POOL} ${i} - wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present' - wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2' -done - -set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' -for i in ${image2} ${image4}; do - disable_mirror ${CLUSTER2} ${POOL} ${i} -done - -unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap1' -unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap2' -for i in ${image3} ${image5}; do - remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' - remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' - remove_image_retry ${CLUSTER2} ${POOL} ${i} -done - -for i in ${image2} ${image3} ${image4} ${image5}; do - wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted' -done - -testlog "TEST: snapshot rename" -snap_name='snap_rename' -enable_mirror ${CLUSTER2} ${POOL} ${image2} -create_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_0" -for i in `seq 1 20`; do - rename_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_$(expr ${i} - 1)" "${snap_name}_${i}" -done -mirror_image_snapshot ${CLUSTER2} ${POOL} ${image2} -wait_for_snap_present ${CLUSTER1} ${POOL} ${image2} "${snap_name}_${i}" - -unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap1' -unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap2' -for i in ${image2} ${image4}; do - remove_image_retry ${CLUSTER2} ${POOL} ${i} -done - -testlog "TEST: disable mirror while daemon is stopped" -stop_mirrors ${CLUSTER1} -stop_mirrors ${CLUSTER2} -disable_mirror ${CLUSTER2} ${POOL} ${image} -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - test_image_present ${CLUSTER1} ${POOL} ${image} 'present' -fi -start_mirrors ${CLUSTER1} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' -enable_mirror ${CLUSTER2} ${POOL} ${image} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - -testlog "TEST: non-default namespace image mirroring" -testlog " - replay" -create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS1} ${image} -create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${image} -wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS2} ${image} -write_image ${CLUSTER2} ${POOL}/${NS1} ${image} 100 -write_image ${CLUSTER2} ${POOL}/${NS2} ${image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS2} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${image} 'up+replaying' -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS2} ${image} 'up+replaying' -compare_images ${POOL}/${NS1} ${image} -compare_images ${POOL}/${NS2} ${image} - -testlog " - disable mirroring / delete image" -remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${image} -disable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} -wait_for_image_present ${CLUSTER1} ${POOL}/${NS1} ${image} 'deleted' -wait_for_image_present ${CLUSTER1} ${POOL}/${NS2} ${image} 'deleted' -remove_image_retry ${CLUSTER2} ${POOL}/${NS2} ${image} - -testlog " - data pool" -dp_image=test_data_pool -create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS1} ${dp_image} snapshot 128 --data-pool ${PARENT_POOL} -data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL}/${NS1} ${dp_image}) -test "${data_pool}" = "${PARENT_POOL}" -wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${dp_image} -data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL}/${NS1} ${dp_image}) -test "${data_pool}" = "${PARENT_POOL}" -write_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 100 -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${dp_image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${dp_image} 'up+replaying' -compare_images ${POOL}/${NS1} ${dp_image} -remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${dp_image} - -testlog "TEST: simple image resync" -request_resync_image ${CLUSTER1} ${POOL} ${image} image_id -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' -compare_images ${POOL} ${image} - -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - testlog "TEST: image resync while replayer is stopped" - admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image} - wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} - request_resync_image ${CLUSTER1} ${POOL} ${image} image_id - admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} - wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} - admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} - wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' - wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' - compare_images ${POOL} ${image} -fi - -testlog "TEST: request image resync while daemon is offline" -stop_mirrors ${CLUSTER1} -request_resync_image ${CLUSTER1} ${POOL} ${image} image_id -start_mirrors ${CLUSTER1} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} -wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' -wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' -compare_images ${POOL} ${image} -remove_image_retry ${CLUSTER2} ${POOL} ${image} - -testlog "TEST: split-brain" -image=split-brain -create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' -promote_image ${CLUSTER1} ${POOL} ${image} --force -wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' -write_image ${CLUSTER1} ${POOL} ${image} 10 -demote_image ${CLUSTER1} ${POOL} ${image} -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain' -request_resync_image ${CLUSTER1} ${POOL} ${image} image_id -wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' -remove_image_retry ${CLUSTER2} ${POOL} ${image} - -testlog "TEST: check if removed images' OMAP are removed" -start_mirrors ${CLUSTER2} -wait_for_image_in_omap ${CLUSTER1} ${POOL} -wait_for_image_in_omap ${CLUSTER2} ${POOL} - -if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - # teuthology will trash the daemon - testlog "TEST: no blocklists" - CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blocklist ls 2>&1 | grep -q "listed 0 entries" - CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blocklist ls 2>&1 | grep -q "listed 0 entries" -fi diff --git a/qa/workunits/rbd/rbd_mirror_stress.sh b/qa/workunits/rbd/rbd_mirror_stress.sh index cb79aba7ebc9..b0a85e8a48a5 100755 --- a/qa/workunits/rbd/rbd_mirror_stress.sh +++ b/qa/workunits/rbd/rbd_mirror_stress.sh @@ -1,4 +1,4 @@ -#!/bin/sh -ex +#!/usr/bin/env bash # # rbd_mirror_stress.sh - stress test rbd-mirror daemon # @@ -8,6 +8,8 @@ # tool during the many image test # +set -ex + IMAGE_COUNT=50 export LOCKDEP=0 @@ -96,7 +98,7 @@ start_mirrors ${CLUSTER2} testlog "TEST: add image and test replay after client crashes" image=test -create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} ${MIRROR_IMAGE_MODE} '512M' +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} ${RBD_MIRROR_MODE} '512M' wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} clean_snap_name= @@ -109,7 +111,7 @@ do snap_name="snap${i}" create_snap ${CLUSTER2} ${POOL} ${image} ${snap_name} wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} wait_for_snap_present ${CLUSTER1} ${POOL} ${image} ${snap_name} if [ -n "${clean_snap_name}" ]; then @@ -122,7 +124,7 @@ do done wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} -wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} wait_for_snap_present ${CLUSTER1} ${POOL} ${image} ${clean_snap_name} for i in `seq 1 10` @@ -151,7 +153,7 @@ snap_name="snap" for i in `seq 1 ${IMAGE_COUNT}` do image="image_${i}" - create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} ${MIRROR_IMAGE_MODE} '128M' + create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} ${RBD_MIRROR_MODE} '128M' if [ -n "${RBD_MIRROR_REDUCE_WRITES}" ]; then write_image ${CLUSTER2} ${POOL} ${image} 100 else @@ -171,7 +173,7 @@ do image="image_${i}" create_snap ${CLUSTER2} ${POOL} ${image} ${snap_name} wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} - wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${image} wait_for_snap_present ${CLUSTER1} ${POOL} ${image} ${snap_name} compare_image_snaps ${POOL} ${image} ${snap_name} done diff --git a/qa/workunits/rbd/rbd_support_module_recovery.sh b/qa/workunits/rbd/rbd_support_module_recovery.sh new file mode 100755 index 000000000000..e9defced24a8 --- /dev/null +++ b/qa/workunits/rbd/rbd_support_module_recovery.sh @@ -0,0 +1,77 @@ +#!/bin/bash +set -ex + +POOL=rbd +IMAGE_PREFIX=image +NUM_IMAGES=20 +RUN_TIME=3600 + +rbd mirror pool enable ${POOL} image +rbd mirror pool peer add ${POOL} dummy + +# Create images and schedule their mirror snapshots +for ((i = 1; i <= ${NUM_IMAGES}; i++)); do + rbd create -s 1G --image-feature exclusive-lock ${POOL}/${IMAGE_PREFIX}$i + rbd mirror image enable ${POOL}/${IMAGE_PREFIX}$i snapshot + rbd mirror snapshot schedule add -p ${POOL} --image ${IMAGE_PREFIX}$i 1m +done + +# Run fio workloads on images via kclient +# Test the recovery of the rbd_support module and its scheduler from their +# librbd client being blocklisted while a exclusive lock gets passed around +# between their librbd client and a kclient trying to take mirror snapshots +# and perform I/O on the same image. +for ((i = 1; i <= ${NUM_IMAGES}; i++)); do + DEVS[$i]=$(sudo rbd device map ${POOL}/${IMAGE_PREFIX}$i) + fio --name=fiotest --filename=${DEVS[$i]} --rw=randrw --bs=4K --direct=1 \ + --ioengine=libaio --iodepth=2 --runtime=43200 --time_based \ + &> /dev/null & +done + +# Repeatedly blocklist rbd_support module's client ~10s after the module +# recovers from previous blocklisting +CURRENT_TIME=$(date +%s) +END_TIME=$((CURRENT_TIME + RUN_TIME)) +PREV_CLIENT_ADDR="" +CLIENT_ADDR="" +while ((CURRENT_TIME <= END_TIME)); do + if [[ -n "${CLIENT_ADDR}" ]] && + [[ "${CLIENT_ADDR}" != "${PREV_CLIENT_ADDR}" ]]; then + ceph osd blocklist add ${CLIENT_ADDR} + # Confirm rbd_support module's client is blocklisted + ceph osd blocklist ls | grep -q ${CLIENT_ADDR} + PREV_CLIENT_ADDR=${CLIENT_ADDR} + fi + sleep 10 + CLIENT_ADDR=$(ceph mgr dump | + jq .active_clients[] | + jq 'select(.name == "rbd_support")' | + jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') + CURRENT_TIME=$(date +%s) +done + +# Confirm that rbd_support module recovered from repeated blocklisting +# Check that you can add a mirror snapshot schedule after a few retries +for ((i = 1; i <= 24; i++)); do + rbd mirror snapshot schedule add -p ${POOL} \ + --image ${IMAGE_PREFIX}1 2m && break + sleep 10 +done +rbd mirror snapshot schedule ls -p ${POOL} --image ${IMAGE_PREFIX}1 | + grep 'every 2m' +# Verify that the schedule present before client blocklisting is preserved +rbd mirror snapshot schedule ls -p ${POOL} --image ${IMAGE_PREFIX}1 | + grep 'every 1m' +rbd mirror snapshot schedule rm -p ${POOL} --image ${IMAGE_PREFIX}1 2m +for ((i = 1; i <= ${NUM_IMAGES}; i++)); do + rbd mirror snapshot schedule rm -p ${POOL} --image ${IMAGE_PREFIX}$i 1m +done + +# cleanup +killall fio || true +wait +for ((i = 1; i <= ${NUM_IMAGES}; i++)); do + sudo rbd device unmap ${DEVS[$i]} +done + +echo OK diff --git a/qa/workunits/rbd/test_admin_socket.sh b/qa/workunits/rbd/test_admin_socket.sh index 6b960787b5ed..110fdd48ea74 100755 --- a/qa/workunits/rbd/test_admin_socket.sh +++ b/qa/workunits/rbd/test_admin_socket.sh @@ -5,8 +5,6 @@ TMPDIR=/tmp/rbd_test_admin_socket$$ mkdir $TMPDIR trap "rm -fr $TMPDIR" 0 -. $(dirname $0)/../../standalone/ceph-helpers.sh - function expect_false() { set -x @@ -40,12 +38,12 @@ function rbd_get_perfcounter() local name name=$(ceph --format xml --admin-daemon $(rbd_watch_asok ${image}) \ - perf schema | $XMLSTARLET el -d3 | + perf schema | xmlstarlet el -d3 | grep "/librbd-.*-${image}/${counter}\$") test -n "${name}" || return 1 ceph --format xml --admin-daemon $(rbd_watch_asok ${image}) perf dump | - $XMLSTARLET sel -t -m "${name}" -v . + xmlstarlet sel -t -m "${name}" -v . } function rbd_check_perfcounter() diff --git a/qa/workunits/rest/test-restful.sh b/qa/workunits/rest/test-restful.sh deleted file mode 100755 index fde0d107a0bd..000000000000 --- a/qa/workunits/rest/test-restful.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh -ex - -mydir=`dirname $0` - -secret=`ceph config-key get mgr/restful/keys/admin` -url=$(ceph mgr dump|jq -r .services.restful|sed -e 's/\/$//') -echo "url $url secret $secret" -$mydir/test_mgr_rest_api.py $url $secret - -echo $0 OK diff --git a/qa/workunits/rgw/jcksum/.gitignore b/qa/workunits/rgw/jcksum/.gitignore new file mode 100644 index 000000000000..19b9f97248cf --- /dev/null +++ b/qa/workunits/rgw/jcksum/.gitignore @@ -0,0 +1,20 @@ +# Maven +target/ + +# Ignore Gradle GUI config +gradle-app.setting + +# Eclipse +/.classpath +/.settings/ +/.project +/bin/ + +# IntelliJ +.idea +*.iml +*.ipr +*.iws + +# Misc +*.log diff --git a/qa/workunits/rgw/jcksum/.mvn/wrapper/maven-wrapper.jar b/qa/workunits/rgw/jcksum/.mvn/wrapper/maven-wrapper.jar new file mode 100644 index 000000000000..cb28b0e37c7d Binary files /dev/null and b/qa/workunits/rgw/jcksum/.mvn/wrapper/maven-wrapper.jar differ diff --git a/qa/workunits/rgw/jcksum/.mvn/wrapper/maven-wrapper.properties b/qa/workunits/rgw/jcksum/.mvn/wrapper/maven-wrapper.properties new file mode 100644 index 000000000000..346d645fd06f --- /dev/null +++ b/qa/workunits/rgw/jcksum/.mvn/wrapper/maven-wrapper.properties @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.6/apache-maven-3.9.6-bin.zip +wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar diff --git a/qa/workunits/rgw/jcksum/README.md b/qa/workunits/rgw/jcksum/README.md new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/qa/workunits/rgw/jcksum/file-0b b/qa/workunits/rgw/jcksum/file-0b new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/qa/workunits/rgw/jcksum/file-1m b/qa/workunits/rgw/jcksum/file-1m new file mode 100644 index 000000000000..084ad696ac55 --- /dev/null +++ b/qa/workunits/rgw/jcksum/file-1m @@ -0,0 +1,30819 @@ +// Copyright 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Josh Kelley (joshkel@gmail.com) +// +// Google C++ Testing Framework (Google Test) +// +// C++Builder's IDE cannot build a static library from files with hyphens +// in their name. See http://qc.codegear.com/wc/qcmain.aspx?d=70977 . +// This file serves as a workaround. + +#include "src/gtest-all.cc" +// Copyright 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Josh Kelley (joshkel@gmail.com) +// +// Google C++ Testing Framework (Google Test) +// +// Links gtest.lib and gtest_main.lib into the current project in C++Builder. +// This means that these libraries can't be renamed, but it's the only way to +// ensure that Debug versus Release test builds are linked against the +// appropriate Debug or Release build of the libraries. + +#pragma link "gtest.lib" +#pragma link "gtest_main.lib" +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "sample2.h" + +#include + +// Clones a 0-terminated C string, allocating memory using new. +const char* MyString::CloneCString(const char* a_c_string) { + if (a_c_string == NULL) return NULL; + + const size_t len = strlen(a_c_string); + char* const clone = new char[ len + 1 ]; + memcpy(clone, a_c_string, len + 1); + + return clone; +} + +// Sets the 0-terminated C string this MyString object +// represents. +void MyString::Set(const char* a_c_string) { + // Makes sure this works when c_string == c_string_ + const char* const temp = MyString::CloneCString(a_c_string); + delete[] c_string_; + c_string_ = temp; +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + +#include + +#include "sample4.h" + +// Returns the current counter value, and increments it. +int Counter::Increment() { + return counter_++; +} + +// Prints the current counter value to STDOUT. +void Counter::Print() const { + printf("%d", counter_); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "sample1.h" + +// Returns n! (the factorial of n). For negative n, n! is defined to be 1. +int Factorial(int n) { + int result = 1; + for (int i = 1; i <= n; i++) { + result *= i; + } + + return result; +} + +// Returns true iff n is a prime number. +bool IsPrime(int n) { + // Trivial case 1: small numbers + if (n <= 1) return false; + + // Trivial case 2: even numbers + if (n % 2 == 0) return n == 2; + + // Now, we have that n is odd and n >= 3. + + // Try to divide n by every odd number i, starting from 3 + for (int i = 3; ; i += 2) { + // We only have to try i up to the squre root of n + if (i > n/i) break; + + // Now, we have i <= n/i < n. + // If n is divisible by i, n is not prime. + if (n % i == 0) return false; + } + + // n has no integer factor in the range (1, n), and thus is prime. + return true; +} +// Copyright 2009 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// This sample shows how to use Google Test listener API to implement +// a primitive leak checker. + +#include +#include + +#include "gtest/gtest.h" + +using ::testing::EmptyTestEventListener; +using ::testing::InitGoogleTest; +using ::testing::Test; +using ::testing::TestCase; +using ::testing::TestEventListeners; +using ::testing::TestInfo; +using ::testing::TestPartResult; +using ::testing::UnitTest; + +namespace { + +// We will track memory used by this class. +class Water { + public: + // Normal Water declarations go here. + + // operator new and operator delete help us control water allocation. + void* operator new(size_t allocation_size) { + allocated_++; + return malloc(allocation_size); + } + + void operator delete(void* block, size_t /* allocation_size */) { + allocated_--; + free(block); + } + + static int allocated() { return allocated_; } + + private: + static int allocated_; +}; + +int Water::allocated_ = 0; + +// This event listener monitors how many Water objects are created and +// destroyed by each test, and reports a failure if a test leaks some Water +// objects. It does this by comparing the number of live Water objects at +// the beginning of a test and at the end of a test. +class LeakChecker : public EmptyTestEventListener { + private: + // Called before a test starts. + virtual void OnTestStart(const TestInfo& /* test_info */) { + initially_allocated_ = Water::allocated(); + } + + // Called after a test ends. + virtual void OnTestEnd(const TestInfo& /* test_info */) { + int difference = Water::allocated() - initially_allocated_; + + // You can generate a failure in any event handler except + // OnTestPartResult. Just use an appropriate Google Test assertion to do + // it. + EXPECT_LE(difference, 0) << "Leaked " << difference << " unit(s) of Water!"; + } + + int initially_allocated_; +}; + +TEST(ListenersTest, DoesNotLeak) { + Water* water = new Water; + delete water; +} + +// This should fail when the --check_for_leaks command line flag is +// specified. +TEST(ListenersTest, LeaksWater) { + Water* water = new Water; + EXPECT_TRUE(water != NULL); +} + +} // namespace + +int main(int argc, char **argv) { + InitGoogleTest(&argc, argv); + + bool check_for_leaks = false; + if (argc > 1 && strcmp(argv[1], "--check_for_leaks") == 0 ) + check_for_leaks = true; + else + printf("%s\n", "Run this program with --check_for_leaks to enable " + "custom leak checking in the tests."); + + // If we are given the --check_for_leaks command line flag, installs the + // leak checker. + if (check_for_leaks) { + TestEventListeners& listeners = UnitTest::GetInstance()->listeners(); + + // Adds the leak checker to the end of the test event listener list, + // after the default text output printer and the default XML report + // generator. + // + // The order is important - it ensures that failures generated in the + // leak checker's OnTestEnd() method are processed by the text and XML + // printers *before* their OnTestEnd() methods are called, such that + // they are attributed to the right test. Remember that a listener + // receives an OnXyzStart event *after* listeners preceding it in the + // list received that event, and receives an OnXyzEnd event *before* + // listeners preceding it. + // + // We don't need to worry about deleting the new listener later, as + // Google Test will do it. + listeners.Append(new LeakChecker); + } + return RUN_ALL_TESTS(); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + + +// This sample shows how to write a simple unit test for a function, +// using Google C++ testing framework. +// +// Writing a unit test using Google C++ testing framework is easy as 1-2-3: + + +// Step 1. Include necessary header files such that the stuff your +// test logic needs is declared. +// +// Don't forget gtest.h, which declares the testing framework. + +#include +#include "sample1.h" +#include "gtest/gtest.h" + + +// Step 2. Use the TEST macro to define your tests. +// +// TEST has two parameters: the test case name and the test name. +// After using the macro, you should define your test logic between a +// pair of braces. You can use a bunch of macros to indicate the +// success or failure of a test. EXPECT_TRUE and EXPECT_EQ are +// examples of such macros. For a complete list, see gtest.h. +// +// +// +// In Google Test, tests are grouped into test cases. This is how we +// keep test code organized. You should put logically related tests +// into the same test case. +// +// The test case name and the test name should both be valid C++ +// identifiers. And you should not use underscore (_) in the names. +// +// Google Test guarantees that each test you define is run exactly +// once, but it makes no guarantee on the order the tests are +// executed. Therefore, you should write your tests in such a way +// that their results don't depend on their order. +// +// + + +// Tests Factorial(). + +// Tests factorial of negative numbers. +TEST(FactorialTest, Negative) { + // This test is named "Negative", and belongs to the "FactorialTest" + // test case. + EXPECT_EQ(1, Factorial(-5)); + EXPECT_EQ(1, Factorial(-1)); + EXPECT_GT(Factorial(-10), 0); + + // + // + // EXPECT_EQ(expected, actual) is the same as + // + // EXPECT_TRUE((expected) == (actual)) + // + // except that it will print both the expected value and the actual + // value when the assertion fails. This is very helpful for + // debugging. Therefore in this case EXPECT_EQ is preferred. + // + // On the other hand, EXPECT_TRUE accepts any Boolean expression, + // and is thus more general. + // + // +} + +// Tests factorial of 0. +TEST(FactorialTest, Zero) { + EXPECT_EQ(1, Factorial(0)); +} + +// Tests factorial of positive numbers. +TEST(FactorialTest, Positive) { + EXPECT_EQ(1, Factorial(1)); + EXPECT_EQ(2, Factorial(2)); + EXPECT_EQ(6, Factorial(3)); + EXPECT_EQ(40320, Factorial(8)); +} + + +// Tests IsPrime() + +// Tests negative input. +TEST(IsPrimeTest, Negative) { + // This test belongs to the IsPrimeTest test case. + + EXPECT_FALSE(IsPrime(-1)); + EXPECT_FALSE(IsPrime(-2)); + EXPECT_FALSE(IsPrime(INT_MIN)); +} + +// Tests some trivial cases. +TEST(IsPrimeTest, Trivial) { + EXPECT_FALSE(IsPrime(0)); + EXPECT_FALSE(IsPrime(1)); + EXPECT_TRUE(IsPrime(2)); + EXPECT_TRUE(IsPrime(3)); +} + +// Tests positive input. +TEST(IsPrimeTest, Positive) { + EXPECT_FALSE(IsPrime(4)); + EXPECT_TRUE(IsPrime(5)); + EXPECT_FALSE(IsPrime(6)); + EXPECT_TRUE(IsPrime(23)); +} + +// Step 3. Call RUN_ALL_TESTS() in main(). +// +// We do this by linking in src/gtest_main.cc file, which consists of +// a main() function which calls RUN_ALL_TESTS() for us. +// +// This runs all the tests you've defined, prints the result, and +// returns 0 if successful, or 1 otherwise. +// +// Did you notice that we didn't register the tests? The +// RUN_ALL_TESTS() macro magically knows about all the tests we +// defined. Isn't this convenient? +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + + +// This sample shows how to write a more complex unit test for a class +// that has multiple member functions. +// +// Usually, it's a good idea to have one test for each method in your +// class. You don't have to do that exactly, but it helps to keep +// your tests organized. You may also throw in additional tests as +// needed. + +#include "sample2.h" +#include "gtest/gtest.h" + +// In this example, we test the MyString class (a simple string). + +// Tests the default c'tor. +TEST(MyString, DefaultConstructor) { + const MyString s; + + // Asserts that s.c_string() returns NULL. + // + // + // + // If we write NULL instead of + // + // static_cast(NULL) + // + // in this assertion, it will generate a warning on gcc 3.4. The + // reason is that EXPECT_EQ needs to know the types of its + // arguments in order to print them when it fails. Since NULL is + // #defined as 0, the compiler will use the formatter function for + // int to print it. However, gcc thinks that NULL should be used as + // a pointer, not an int, and therefore complains. + // + // The root of the problem is C++'s lack of distinction between the + // integer number 0 and the null pointer constant. Unfortunately, + // we have to live with this fact. + // + // + EXPECT_STREQ(NULL, s.c_string()); + + EXPECT_EQ(0u, s.Length()); +} + +const char kHelloString[] = "Hello, world!"; + +// Tests the c'tor that accepts a C string. +TEST(MyString, ConstructorFromCString) { + const MyString s(kHelloString); + EXPECT_EQ(0, strcmp(s.c_string(), kHelloString)); + EXPECT_EQ(sizeof(kHelloString)/sizeof(kHelloString[0]) - 1, + s.Length()); +} + +// Tests the copy c'tor. +TEST(MyString, CopyConstructor) { + const MyString s1(kHelloString); + const MyString s2 = s1; + EXPECT_EQ(0, strcmp(s2.c_string(), kHelloString)); +} + +// Tests the Set method. +TEST(MyString, Set) { + MyString s; + + s.Set(kHelloString); + EXPECT_EQ(0, strcmp(s.c_string(), kHelloString)); + + // Set should work when the input pointer is the same as the one + // already in the MyString object. + s.Set(s.c_string()); + EXPECT_EQ(0, strcmp(s.c_string(), kHelloString)); + + // Can we set the MyString to NULL? + s.Set(NULL); + EXPECT_STREQ(NULL, s.c_string()); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + + +// In this example, we use a more advanced feature of Google Test called +// test fixture. +// +// A test fixture is a place to hold objects and functions shared by +// all tests in a test case. Using a test fixture avoids duplicating +// the test code necessary to initialize and cleanup those common +// objects for each test. It is also useful for defining sub-routines +// that your tests need to invoke a lot. +// +// +// +// The tests share the test fixture in the sense of code sharing, not +// data sharing. Each test is given its own fresh copy of the +// fixture. You cannot expect the data modified by one test to be +// passed on to another test, which is a bad idea. +// +// The reason for this design is that tests should be independent and +// repeatable. In particular, a test should not fail as the result of +// another test's failure. If one test depends on info produced by +// another test, then the two tests should really be one big test. +// +// The macros for indicating the success/failure of a test +// (EXPECT_TRUE, FAIL, etc) need to know what the current test is +// (when Google Test prints the test result, it tells you which test +// each failure belongs to). Technically, these macros invoke a +// member function of the Test class. Therefore, you cannot use them +// in a global function. That's why you should put test sub-routines +// in a test fixture. +// +// + +#include "sample3-inl.h" +#include "gtest/gtest.h" + +// To use a test fixture, derive a class from testing::Test. +class QueueTest : public testing::Test { + protected: // You should make the members protected s.t. they can be + // accessed from sub-classes. + + // virtual void SetUp() will be called before each test is run. You + // should define it if you need to initialize the varaibles. + // Otherwise, this can be skipped. + virtual void SetUp() { + q1_.Enqueue(1); + q2_.Enqueue(2); + q2_.Enqueue(3); + } + + // virtual void TearDown() will be called after each test is run. + // You should define it if there is cleanup work to do. Otherwise, + // you don't have to provide it. + // + // virtual void TearDown() { + // } + + // A helper function that some test uses. + static int Double(int n) { + return 2*n; + } + + // A helper function for testing Queue::Map(). + void MapTester(const Queue * q) { + // Creates a new queue, where each element is twice as big as the + // corresponding one in q. + const Queue * const new_q = q->Map(Double); + + // Verifies that the new queue has the same size as q. + ASSERT_EQ(q->Size(), new_q->Size()); + + // Verifies the relationship between the elements of the two queues. + for ( const QueueNode * n1 = q->Head(), * n2 = new_q->Head(); + n1 != NULL; n1 = n1->next(), n2 = n2->next() ) { + EXPECT_EQ(2 * n1->element(), n2->element()); + } + + delete new_q; + } + + // Declares the variables your tests want to use. + Queue q0_; + Queue q1_; + Queue q2_; +}; + +// When you have a test fixture, you define a test using TEST_F +// instead of TEST. + +// Tests the default c'tor. +TEST_F(QueueTest, DefaultConstructor) { + // You can access data in the test fixture here. + EXPECT_EQ(0u, q0_.Size()); +} + +// Tests Dequeue(). +TEST_F(QueueTest, Dequeue) { + int * n = q0_.Dequeue(); + EXPECT_TRUE(n == NULL); + + n = q1_.Dequeue(); + ASSERT_TRUE(n != NULL); + EXPECT_EQ(1, *n); + EXPECT_EQ(0u, q1_.Size()); + delete n; + + n = q2_.Dequeue(); + ASSERT_TRUE(n != NULL); + EXPECT_EQ(2, *n); + EXPECT_EQ(1u, q2_.Size()); + delete n; +} + +// Tests the Queue::Map() function. +TEST_F(QueueTest, Map) { + MapTester(&q0_); + MapTester(&q1_); + MapTester(&q2_); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "gtest/gtest.h" +#include "sample4.h" + +// Tests the Increment() method. +TEST(Counter, Increment) { + Counter c; + + // EXPECT_EQ() evaluates its arguments exactly once, so they + // can have side effects. + + EXPECT_EQ(0, c.Increment()); + EXPECT_EQ(1, c.Increment()); + EXPECT_EQ(2, c.Increment()); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// This sample teaches how to reuse a test fixture in multiple test +// cases by deriving sub-fixtures from it. +// +// When you define a test fixture, you specify the name of the test +// case that will use this fixture. Therefore, a test fixture can +// be used by only one test case. +// +// Sometimes, more than one test cases may want to use the same or +// slightly different test fixtures. For example, you may want to +// make sure that all tests for a GUI library don't leak important +// system resources like fonts and brushes. In Google Test, you do +// this by putting the shared logic in a super (as in "super class") +// test fixture, and then have each test case use a fixture derived +// from this super fixture. + +#include +#include +#include "sample3-inl.h" +#include "gtest/gtest.h" +#include "sample1.h" + +// In this sample, we want to ensure that every test finishes within +// ~5 seconds. If a test takes longer to run, we consider it a +// failure. +// +// We put the code for timing a test in a test fixture called +// "QuickTest". QuickTest is intended to be the super fixture that +// other fixtures derive from, therefore there is no test case with +// the name "QuickTest". This is OK. +// +// Later, we will derive multiple test fixtures from QuickTest. +class QuickTest : public testing::Test { + protected: + // Remember that SetUp() is run immediately before a test starts. + // This is a good place to record the start time. + virtual void SetUp() { + start_time_ = time(NULL); + } + + // TearDown() is invoked immediately after a test finishes. Here we + // check if the test was too slow. + virtual void TearDown() { + // Gets the time when the test finishes + const time_t end_time = time(NULL); + + // Asserts that the test took no more than ~5 seconds. Did you + // know that you can use assertions in SetUp() and TearDown() as + // well? + EXPECT_TRUE(end_time - start_time_ <= 5) << "The test took too long."; + } + + // The UTC time (in seconds) when the test starts + time_t start_time_; +}; + + +// We derive a fixture named IntegerFunctionTest from the QuickTest +// fixture. All tests using this fixture will be automatically +// required to be quick. +class IntegerFunctionTest : public QuickTest { + // We don't need any more logic than already in the QuickTest fixture. + // Therefore the body is empty. +}; + + +// Now we can write tests in the IntegerFunctionTest test case. + +// Tests Factorial() +TEST_F(IntegerFunctionTest, Factorial) { + // Tests factorial of negative numbers. + EXPECT_EQ(1, Factorial(-5)); + EXPECT_EQ(1, Factorial(-1)); + EXPECT_GT(Factorial(-10), 0); + + // Tests factorial of 0. + EXPECT_EQ(1, Factorial(0)); + + // Tests factorial of positive numbers. + EXPECT_EQ(1, Factorial(1)); + EXPECT_EQ(2, Factorial(2)); + EXPECT_EQ(6, Factorial(3)); + EXPECT_EQ(40320, Factorial(8)); +} + + +// Tests IsPrime() +TEST_F(IntegerFunctionTest, IsPrime) { + // Tests negative input. + EXPECT_FALSE(IsPrime(-1)); + EXPECT_FALSE(IsPrime(-2)); + EXPECT_FALSE(IsPrime(INT_MIN)); + + // Tests some trivial cases. + EXPECT_FALSE(IsPrime(0)); + EXPECT_FALSE(IsPrime(1)); + EXPECT_TRUE(IsPrime(2)); + EXPECT_TRUE(IsPrime(3)); + + // Tests positive input. + EXPECT_FALSE(IsPrime(4)); + EXPECT_TRUE(IsPrime(5)); + EXPECT_FALSE(IsPrime(6)); + EXPECT_TRUE(IsPrime(23)); +} + + +// The next test case (named "QueueTest") also needs to be quick, so +// we derive another fixture from QuickTest. +// +// The QueueTest test fixture has some logic and shared objects in +// addition to what's in QuickTest already. We define the additional +// stuff inside the body of the test fixture, as usual. +class QueueTest : public QuickTest { + protected: + virtual void SetUp() { + // First, we need to set up the super fixture (QuickTest). + QuickTest::SetUp(); + + // Second, some additional setup for this fixture. + q1_.Enqueue(1); + q2_.Enqueue(2); + q2_.Enqueue(3); + } + + // By default, TearDown() inherits the behavior of + // QuickTest::TearDown(). As we have no additional cleaning work + // for QueueTest, we omit it here. + // + // virtual void TearDown() { + // QuickTest::TearDown(); + // } + + Queue q0_; + Queue q1_; + Queue q2_; +}; + + +// Now, let's write tests using the QueueTest fixture. + +// Tests the default constructor. +TEST_F(QueueTest, DefaultConstructor) { + EXPECT_EQ(0u, q0_.Size()); +} + +// Tests Dequeue(). +TEST_F(QueueTest, Dequeue) { + int* n = q0_.Dequeue(); + EXPECT_TRUE(n == NULL); + + n = q1_.Dequeue(); + EXPECT_TRUE(n != NULL); + EXPECT_EQ(1, *n); + EXPECT_EQ(0u, q1_.Size()); + delete n; + + n = q2_.Dequeue(); + EXPECT_TRUE(n != NULL); + EXPECT_EQ(2, *n); + EXPECT_EQ(1u, q2_.Size()); + delete n; +} + +// If necessary, you can derive further test fixtures from a derived +// fixture itself. For example, you can derive another fixture from +// QueueTest. Google Test imposes no limit on how deep the hierarchy +// can be. In practice, however, you probably don't want it to be too +// deep as to be confusing. +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// This sample shows how to test common properties of multiple +// implementations of the same interface (aka interface tests). + +// The interface and its implementations are in this header. +#include "prime_tables.h" + +#include "gtest/gtest.h" + +// First, we define some factory functions for creating instances of +// the implementations. You may be able to skip this step if all your +// implementations can be constructed the same way. + +template +PrimeTable* CreatePrimeTable(); + +template <> +PrimeTable* CreatePrimeTable() { + return new OnTheFlyPrimeTable; +} + +template <> +PrimeTable* CreatePrimeTable() { + return new PreCalculatedPrimeTable(10000); +} + +// Then we define a test fixture class template. +template +class PrimeTableTest : public testing::Test { + protected: + // The ctor calls the factory function to create a prime table + // implemented by T. + PrimeTableTest() : table_(CreatePrimeTable()) {} + + virtual ~PrimeTableTest() { delete table_; } + + // Note that we test an implementation via the base interface + // instead of the actual implementation class. This is important + // for keeping the tests close to the real world scenario, where the + // implementation is invoked via the base interface. It avoids + // got-yas where the implementation class has a method that shadows + // a method with the same name (but slightly different argument + // types) in the base interface, for example. + PrimeTable* const table_; +}; + +#if GTEST_HAS_TYPED_TEST + +using testing::Types; + +// Google Test offers two ways for reusing tests for different types. +// The first is called "typed tests". You should use it if you +// already know *all* the types you are gonna exercise when you write +// the tests. + +// To write a typed test case, first use +// +// TYPED_TEST_CASE(TestCaseName, TypeList); +// +// to declare it and specify the type parameters. As with TEST_F, +// TestCaseName must match the test fixture name. + +// The list of types we want to test. +typedef Types Implementations; + +TYPED_TEST_CASE(PrimeTableTest, Implementations); + +// Then use TYPED_TEST(TestCaseName, TestName) to define a typed test, +// similar to TEST_F. +TYPED_TEST(PrimeTableTest, ReturnsFalseForNonPrimes) { + // Inside the test body, you can refer to the type parameter by + // TypeParam, and refer to the fixture class by TestFixture. We + // don't need them in this example. + + // Since we are in the template world, C++ requires explicitly + // writing 'this->' when referring to members of the fixture class. + // This is something you have to learn to live with. + EXPECT_FALSE(this->table_->IsPrime(-5)); + EXPECT_FALSE(this->table_->IsPrime(0)); + EXPECT_FALSE(this->table_->IsPrime(1)); + EXPECT_FALSE(this->table_->IsPrime(4)); + EXPECT_FALSE(this->table_->IsPrime(6)); + EXPECT_FALSE(this->table_->IsPrime(100)); +} + +TYPED_TEST(PrimeTableTest, ReturnsTrueForPrimes) { + EXPECT_TRUE(this->table_->IsPrime(2)); + EXPECT_TRUE(this->table_->IsPrime(3)); + EXPECT_TRUE(this->table_->IsPrime(5)); + EXPECT_TRUE(this->table_->IsPrime(7)); + EXPECT_TRUE(this->table_->IsPrime(11)); + EXPECT_TRUE(this->table_->IsPrime(131)); +} + +TYPED_TEST(PrimeTableTest, CanGetNextPrime) { + EXPECT_EQ(2, this->table_->GetNextPrime(0)); + EXPECT_EQ(3, this->table_->GetNextPrime(2)); + EXPECT_EQ(5, this->table_->GetNextPrime(3)); + EXPECT_EQ(7, this->table_->GetNextPrime(5)); + EXPECT_EQ(11, this->table_->GetNextPrime(7)); + EXPECT_EQ(131, this->table_->GetNextPrime(128)); +} + +// That's it! Google Test will repeat each TYPED_TEST for each type +// in the type list specified in TYPED_TEST_CASE. Sit back and be +// happy that you don't have to define them multiple times. + +#endif // GTEST_HAS_TYPED_TEST + +#if GTEST_HAS_TYPED_TEST_P + +using testing::Types; + +// Sometimes, however, you don't yet know all the types that you want +// to test when you write the tests. For example, if you are the +// author of an interface and expect other people to implement it, you +// might want to write a set of tests to make sure each implementation +// conforms to some basic requirements, but you don't know what +// implementations will be written in the future. +// +// How can you write the tests without committing to the type +// parameters? That's what "type-parameterized tests" can do for you. +// It is a bit more involved than typed tests, but in return you get a +// test pattern that can be reused in many contexts, which is a big +// win. Here's how you do it: + +// First, define a test fixture class template. Here we just reuse +// the PrimeTableTest fixture defined earlier: + +template +class PrimeTableTest2 : public PrimeTableTest { +}; + +// Then, declare the test case. The argument is the name of the test +// fixture, and also the name of the test case (as usual). The _P +// suffix is for "parameterized" or "pattern". +TYPED_TEST_CASE_P(PrimeTableTest2); + +// Next, use TYPED_TEST_P(TestCaseName, TestName) to define a test, +// similar to what you do with TEST_F. +TYPED_TEST_P(PrimeTableTest2, ReturnsFalseForNonPrimes) { + EXPECT_FALSE(this->table_->IsPrime(-5)); + EXPECT_FALSE(this->table_->IsPrime(0)); + EXPECT_FALSE(this->table_->IsPrime(1)); + EXPECT_FALSE(this->table_->IsPrime(4)); + EXPECT_FALSE(this->table_->IsPrime(6)); + EXPECT_FALSE(this->table_->IsPrime(100)); +} + +TYPED_TEST_P(PrimeTableTest2, ReturnsTrueForPrimes) { + EXPECT_TRUE(this->table_->IsPrime(2)); + EXPECT_TRUE(this->table_->IsPrime(3)); + EXPECT_TRUE(this->table_->IsPrime(5)); + EXPECT_TRUE(this->table_->IsPrime(7)); + EXPECT_TRUE(this->table_->IsPrime(11)); + EXPECT_TRUE(this->table_->IsPrime(131)); +} + +TYPED_TEST_P(PrimeTableTest2, CanGetNextPrime) { + EXPECT_EQ(2, this->table_->GetNextPrime(0)); + EXPECT_EQ(3, this->table_->GetNextPrime(2)); + EXPECT_EQ(5, this->table_->GetNextPrime(3)); + EXPECT_EQ(7, this->table_->GetNextPrime(5)); + EXPECT_EQ(11, this->table_->GetNextPrime(7)); + EXPECT_EQ(131, this->table_->GetNextPrime(128)); +} + +// Type-parameterized tests involve one extra step: you have to +// enumerate the tests you defined: +REGISTER_TYPED_TEST_CASE_P( + PrimeTableTest2, // The first argument is the test case name. + // The rest of the arguments are the test names. + ReturnsFalseForNonPrimes, ReturnsTrueForPrimes, CanGetNextPrime); + +// At this point the test pattern is done. However, you don't have +// any real test yet as you haven't said which types you want to run +// the tests with. + +// To turn the abstract test pattern into real tests, you instantiate +// it with a list of types. Usually the test pattern will be defined +// in a .h file, and anyone can #include and instantiate it. You can +// even instantiate it more than once in the same program. To tell +// different instances apart, you give each of them a name, which will +// become part of the test case name and can be used in test filters. + +// The list of types we want to test. Note that it doesn't have to be +// defined at the time we write the TYPED_TEST_P()s. +typedef Types + PrimeTableImplementations; +INSTANTIATE_TYPED_TEST_CASE_P(OnTheFlyAndPreCalculated, // Instance name + PrimeTableTest2, // Test case name + PrimeTableImplementations); // Type list + +#endif // GTEST_HAS_TYPED_TEST_P +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// This sample shows how to test common properties of multiple +// implementations of an interface (aka interface tests) using +// value-parameterized tests. Each test in the test case has +// a parameter that is an interface pointer to an implementation +// tested. + +// The interface and its implementations are in this header. +#include "prime_tables.h" + +#include "gtest/gtest.h" + +#if GTEST_HAS_PARAM_TEST + +using ::testing::TestWithParam; +using ::testing::Values; + +// As a general rule, to prevent a test from affecting the tests that come +// after it, you should create and destroy the tested objects for each test +// instead of reusing them. In this sample we will define a simple factory +// function for PrimeTable objects. We will instantiate objects in test's +// SetUp() method and delete them in TearDown() method. +typedef PrimeTable* CreatePrimeTableFunc(); + +PrimeTable* CreateOnTheFlyPrimeTable() { + return new OnTheFlyPrimeTable(); +} + +template +PrimeTable* CreatePreCalculatedPrimeTable() { + return new PreCalculatedPrimeTable(max_precalculated); +} + +// Inside the test body, fixture constructor, SetUp(), and TearDown() you +// can refer to the test parameter by GetParam(). In this case, the test +// parameter is a factory function which we call in fixture's SetUp() to +// create and store an instance of PrimeTable. +class PrimeTableTest : public TestWithParam { + public: + virtual ~PrimeTableTest() { delete table_; } + virtual void SetUp() { table_ = (*GetParam())(); } + virtual void TearDown() { + delete table_; + table_ = NULL; + } + + protected: + PrimeTable* table_; +}; + +TEST_P(PrimeTableTest, ReturnsFalseForNonPrimes) { + EXPECT_FALSE(table_->IsPrime(-5)); + EXPECT_FALSE(table_->IsPrime(0)); + EXPECT_FALSE(table_->IsPrime(1)); + EXPECT_FALSE(table_->IsPrime(4)); + EXPECT_FALSE(table_->IsPrime(6)); + EXPECT_FALSE(table_->IsPrime(100)); +} + +TEST_P(PrimeTableTest, ReturnsTrueForPrimes) { + EXPECT_TRUE(table_->IsPrime(2)); + EXPECT_TRUE(table_->IsPrime(3)); + EXPECT_TRUE(table_->IsPrime(5)); + EXPECT_TRUE(table_->IsPrime(7)); + EXPECT_TRUE(table_->IsPrime(11)); + EXPECT_TRUE(table_->IsPrime(131)); +} + +TEST_P(PrimeTableTest, CanGetNextPrime) { + EXPECT_EQ(2, table_->GetNextPrime(0)); + EXPECT_EQ(3, table_->GetNextPrime(2)); + EXPECT_EQ(5, table_->GetNextPrime(3)); + EXPECT_EQ(7, table_->GetNextPrime(5)); + EXPECT_EQ(11, table_->GetNextPrime(7)); + EXPECT_EQ(131, table_->GetNextPrime(128)); +} + +// In order to run value-parameterized tests, you need to instantiate them, +// or bind them to a list of values which will be used as test parameters. +// You can instantiate them in a different translation module, or even +// instantiate them several times. +// +// Here, we instantiate our tests with a list of two PrimeTable object +// factory functions: +INSTANTIATE_TEST_CASE_P( + OnTheFlyAndPreCalculated, + PrimeTableTest, + Values(&CreateOnTheFlyPrimeTable, &CreatePreCalculatedPrimeTable<1000>)); + +#else + +// Google Test may not support value-parameterized tests with some +// compilers. If we use conditional compilation to compile out all +// code referring to the gtest_main library, MSVC linker will not link +// that library at all and consequently complain about missing entry +// point defined in that library (fatal error LNK1561: entry point +// must be defined). This dummy test keeps gtest_main linked in. +TEST(DummyTest, ValueParameterizedTestsAreNotSupportedOnThisPlatform) {} + +#endif // GTEST_HAS_PARAM_TEST +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// This sample shows how to test code relying on some global flag variables. +// Combine() helps with generating all possible combinations of such flags, +// and each test is given one combination as a parameter. + +// Use class definitions to test from this header. +#include "prime_tables.h" + +#include "gtest/gtest.h" + +#if GTEST_HAS_COMBINE + +// Suppose we want to introduce a new, improved implementation of PrimeTable +// which combines speed of PrecalcPrimeTable and versatility of +// OnTheFlyPrimeTable (see prime_tables.h). Inside it instantiates both +// PrecalcPrimeTable and OnTheFlyPrimeTable and uses the one that is more +// appropriate under the circumstances. But in low memory conditions, it can be +// told to instantiate without PrecalcPrimeTable instance at all and use only +// OnTheFlyPrimeTable. +class HybridPrimeTable : public PrimeTable { + public: + HybridPrimeTable(bool force_on_the_fly, int max_precalculated) + : on_the_fly_impl_(new OnTheFlyPrimeTable), + precalc_impl_(force_on_the_fly ? NULL : + new PreCalculatedPrimeTable(max_precalculated)), + max_precalculated_(max_precalculated) {} + virtual ~HybridPrimeTable() { + delete on_the_fly_impl_; + delete precalc_impl_; + } + + virtual bool IsPrime(int n) const { + if (precalc_impl_ != NULL && n < max_precalculated_) + return precalc_impl_->IsPrime(n); + else + return on_the_fly_impl_->IsPrime(n); + } + + virtual int GetNextPrime(int p) const { + int next_prime = -1; + if (precalc_impl_ != NULL && p < max_precalculated_) + next_prime = precalc_impl_->GetNextPrime(p); + + return next_prime != -1 ? next_prime : on_the_fly_impl_->GetNextPrime(p); + } + + private: + OnTheFlyPrimeTable* on_the_fly_impl_; + PreCalculatedPrimeTable* precalc_impl_; + int max_precalculated_; +}; + +using ::testing::TestWithParam; +using ::testing::Bool; +using ::testing::Values; +using ::testing::Combine; + +// To test all code paths for HybridPrimeTable we must test it with numbers +// both within and outside PreCalculatedPrimeTable's capacity and also with +// PreCalculatedPrimeTable disabled. We do this by defining fixture which will +// accept different combinations of parameters for instantiating a +// HybridPrimeTable instance. +class PrimeTableTest : public TestWithParam< ::testing::tuple > { + protected: + virtual void SetUp() { + // This can be written as + // + // bool force_on_the_fly; + // int max_precalculated; + // tie(force_on_the_fly, max_precalculated) = GetParam(); + // + // once the Google C++ Style Guide allows use of ::std::tr1::tie. + // + bool force_on_the_fly = ::testing::get<0>(GetParam()); + int max_precalculated = ::testing::get<1>(GetParam()); + table_ = new HybridPrimeTable(force_on_the_fly, max_precalculated); + } + virtual void TearDown() { + delete table_; + table_ = NULL; + } + HybridPrimeTable* table_; +}; + +TEST_P(PrimeTableTest, ReturnsFalseForNonPrimes) { + // Inside the test body, you can refer to the test parameter by GetParam(). + // In this case, the test parameter is a PrimeTable interface pointer which + // we can use directly. + // Please note that you can also save it in the fixture's SetUp() method + // or constructor and use saved copy in the tests. + + EXPECT_FALSE(table_->IsPrime(-5)); + EXPECT_FALSE(table_->IsPrime(0)); + EXPECT_FALSE(table_->IsPrime(1)); + EXPECT_FALSE(table_->IsPrime(4)); + EXPECT_FALSE(table_->IsPrime(6)); + EXPECT_FALSE(table_->IsPrime(100)); +} + +TEST_P(PrimeTableTest, ReturnsTrueForPrimes) { + EXPECT_TRUE(table_->IsPrime(2)); + EXPECT_TRUE(table_->IsPrime(3)); + EXPECT_TRUE(table_->IsPrime(5)); + EXPECT_TRUE(table_->IsPrime(7)); + EXPECT_TRUE(table_->IsPrime(11)); + EXPECT_TRUE(table_->IsPrime(131)); +} + +TEST_P(PrimeTableTest, CanGetNextPrime) { + EXPECT_EQ(2, table_->GetNextPrime(0)); + EXPECT_EQ(3, table_->GetNextPrime(2)); + EXPECT_EQ(5, table_->GetNextPrime(3)); + EXPECT_EQ(7, table_->GetNextPrime(5)); + EXPECT_EQ(11, table_->GetNextPrime(7)); + EXPECT_EQ(131, table_->GetNextPrime(128)); +} + +// In order to run value-parameterized tests, you need to instantiate them, +// or bind them to a list of values which will be used as test parameters. +// You can instantiate them in a different translation module, or even +// instantiate them several times. +// +// Here, we instantiate our tests with a list of parameters. We must combine +// all variations of the boolean flag suppressing PrecalcPrimeTable and some +// meaningful values for tests. We choose a small value (1), and a value that +// will put some of the tested numbers beyond the capability of the +// PrecalcPrimeTable instance and some inside it (10). Combine will produce all +// possible combinations. +INSTANTIATE_TEST_CASE_P(MeaningfulTestParameters, + PrimeTableTest, + Combine(Bool(), Values(1, 10))); + +#else + +// Google Test may not support Combine() with some compilers. If we +// use conditional compilation to compile out all code referring to +// the gtest_main library, MSVC linker will not link that library at +// all and consequently complain about missing entry point defined in +// that library (fatal error LNK1561: entry point must be +// defined). This dummy test keeps gtest_main linked in. +TEST(DummyTest, CombineIsNotSupportedOnThisPlatform) {} + +#endif // GTEST_HAS_COMBINE +// Copyright 2009 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// This sample shows how to use Google Test listener API to implement +// an alternative console output and how to use the UnitTest reflection API +// to enumerate test cases and tests and to inspect their results. + +#include + +#include "gtest/gtest.h" + +using ::testing::EmptyTestEventListener; +using ::testing::InitGoogleTest; +using ::testing::Test; +using ::testing::TestCase; +using ::testing::TestEventListeners; +using ::testing::TestInfo; +using ::testing::TestPartResult; +using ::testing::UnitTest; + +namespace { + +// Provides alternative output mode which produces minimal amount of +// information about tests. +class TersePrinter : public EmptyTestEventListener { + private: + // Called before any test activity starts. + virtual void OnTestProgramStart(const UnitTest& /* unit_test */) {} + + // Called after all test activities have ended. + virtual void OnTestProgramEnd(const UnitTest& unit_test) { + fprintf(stdout, "TEST %s\n", unit_test.Passed() ? "PASSED" : "FAILED"); + fflush(stdout); + } + + // Called before a test starts. + virtual void OnTestStart(const TestInfo& test_info) { + fprintf(stdout, + "*** Test %s.%s starting.\n", + test_info.test_case_name(), + test_info.name()); + fflush(stdout); + } + + // Called after a failed assertion or a SUCCEED() invocation. + virtual void OnTestPartResult(const TestPartResult& test_part_result) { + fprintf(stdout, + "%s in %s:%d\n%s\n", + test_part_result.failed() ? "*** Failure" : "Success", + test_part_result.file_name(), + test_part_result.line_number(), + test_part_result.summary()); + fflush(stdout); + } + + // Called after a test ends. + virtual void OnTestEnd(const TestInfo& test_info) { + fprintf(stdout, + "*** Test %s.%s ending.\n", + test_info.test_case_name(), + test_info.name()); + fflush(stdout); + } +}; // class TersePrinter + +TEST(CustomOutputTest, PrintsMessage) { + printf("Printing something from the test body...\n"); +} + +TEST(CustomOutputTest, Succeeds) { + SUCCEED() << "SUCCEED() has been invoked from here"; +} + +TEST(CustomOutputTest, Fails) { + EXPECT_EQ(1, 2) + << "This test fails in order to demonstrate alternative failure messages"; +} + +} // namespace + +int main(int argc, char **argv) { + InitGoogleTest(&argc, argv); + + bool terse_output = false; + if (argc > 1 && strcmp(argv[1], "--terse_output") == 0 ) + terse_output = true; + else + printf("%s\n", "Run this program with --terse_output to change the way " + "it prints its output."); + + UnitTest& unit_test = *UnitTest::GetInstance(); + + // If we are given the --terse_output command line flag, suppresses the + // standard output and attaches own result printer. + if (terse_output) { + TestEventListeners& listeners = unit_test.listeners(); + + // Removes the default console output listener from the list so it will + // not receive events from Google Test and won't print any output. Since + // this operation transfers ownership of the listener to the caller we + // have to delete it as well. + delete listeners.Release(listeners.default_result_printer()); + + // Adds the custom output listener to the list. It will now receive + // events from Google Test and print the alternative output. We don't + // have to worry about deleting it since Google Test assumes ownership + // over it after adding it to the list. + listeners.Append(new TersePrinter); + } + int ret_val = RUN_ALL_TESTS(); + + // This is an example of using the UnitTest reflection API to inspect test + // results. Here we discount failures from the tests we expected to fail. + int unexpectedly_failed_tests = 0; + for (int i = 0; i < unit_test.total_test_case_count(); ++i) { + const TestCase& test_case = *unit_test.GetTestCase(i); + for (int j = 0; j < test_case.total_test_count(); ++j) { + const TestInfo& test_info = *test_case.GetTestInfo(j); + // Counts failed tests that were not meant to fail (those without + // 'Fails' in the name). + if (test_info.result()->Failed() && + strcmp(test_info.name(), "Fails") != 0) { + unexpectedly_failed_tests++; + } + } + } + + // Test that were meant to fail should not affect the test program outcome. + if (unexpectedly_failed_tests == 0) + ret_val = 0; + + return ret_val; +} +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: mheule@google.com (Markus Heule) +// +// Google C++ Testing Framework (Google Test) +// +// Sometimes it's desirable to build Google Test by compiling a single file. +// This file serves this purpose. + +// This line ensures that gtest.h can be compiled on its own, even +// when it's fused. +#include "gtest/gtest.h" + +// The following lines pull in the real gtest *.cc files. +#include "src/gtest.cc" +#include "src/gtest-death-test.cc" +#include "src/gtest-filepath.cc" +#include "src/gtest-port.cc" +#include "src/gtest-printers.cc" +#include "src/gtest-test-part.cc" +#include "src/gtest-typed-test.cc" +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev) +// +// This file implements death tests. + +#include "gtest/gtest-death-test.h" +#include "gtest/internal/gtest-port.h" +#include "gtest/internal/custom/gtest.h" + +#if GTEST_HAS_DEATH_TEST + +# if GTEST_OS_MAC +# include +# endif // GTEST_OS_MAC + +# include +# include +# include + +# if GTEST_OS_LINUX +# include +# endif // GTEST_OS_LINUX + +# include + +# if GTEST_OS_WINDOWS +# include +# else +# include +# include +# endif // GTEST_OS_WINDOWS + +# if GTEST_OS_QNX +# include +# endif // GTEST_OS_QNX + +#endif // GTEST_HAS_DEATH_TEST + +#include "gtest/gtest-message.h" +#include "gtest/internal/gtest-string.h" + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick exists to +// prevent the accidental inclusion of gtest-internal-inl.h in the +// user's code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +namespace testing { + +// Constants. + +// The default death test style. +static const char kDefaultDeathTestStyle[] = "fast"; + +GTEST_DEFINE_string_( + death_test_style, + internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle), + "Indicates how to run a death test in a forked child process: " + "\"threadsafe\" (child process re-executes the test binary " + "from the beginning, running only the specific death test) or " + "\"fast\" (child process runs the death test immediately " + "after forking)."); + +GTEST_DEFINE_bool_( + death_test_use_fork, + internal::BoolFromGTestEnv("death_test_use_fork", false), + "Instructs to use fork()/_exit() instead of clone() in death tests. " + "Ignored and always uses fork() on POSIX systems where clone() is not " + "implemented. Useful when running under valgrind or similar tools if " + "those do not support clone(). Valgrind 3.3.1 will just fail if " + "it sees an unsupported combination of clone() flags. " + "It is not recommended to use this flag w/o valgrind though it will " + "work in 99% of the cases. Once valgrind is fixed, this flag will " + "most likely be removed."); + +namespace internal { +GTEST_DEFINE_string_( + internal_run_death_test, "", + "Indicates the file, line number, temporal index of " + "the single death test to run, and a file descriptor to " + "which a success code may be sent, all separated by " + "the '|' characters. This flag is specified if and only if the current " + "process is a sub-process launched for running a thread-safe " + "death test. FOR INTERNAL USE ONLY."); +} // namespace internal + +#if GTEST_HAS_DEATH_TEST + +namespace internal { + +// Valid only for fast death tests. Indicates the code is running in the +// child process of a fast style death test. +# if !GTEST_OS_WINDOWS +static bool g_in_fast_death_test_child = false; +# endif + +// Returns a Boolean value indicating whether the caller is currently +// executing in the context of the death test child process. Tools such as +// Valgrind heap checkers may need this to modify their behavior in death +// tests. IMPORTANT: This is an internal utility. Using it may break the +// implementation of death tests. User code MUST NOT use it. +bool InDeathTestChild() { +# if GTEST_OS_WINDOWS + + // On Windows, death tests are thread-safe regardless of the value of the + // death_test_style flag. + return !GTEST_FLAG(internal_run_death_test).empty(); + +# else + + if (GTEST_FLAG(death_test_style) == "threadsafe") + return !GTEST_FLAG(internal_run_death_test).empty(); + else + return g_in_fast_death_test_child; +#endif +} + +} // namespace internal + +// ExitedWithCode constructor. +ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) { +} + +// ExitedWithCode function-call operator. +bool ExitedWithCode::operator()(int exit_status) const { +# if GTEST_OS_WINDOWS + + return exit_status == exit_code_; + +# else + + return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_; + +# endif // GTEST_OS_WINDOWS +} + +# if !GTEST_OS_WINDOWS +// KilledBySignal constructor. +KilledBySignal::KilledBySignal(int signum) : signum_(signum) { +} + +// KilledBySignal function-call operator. +bool KilledBySignal::operator()(int exit_status) const { +# if defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_) + { + bool result; + if (GTEST_KILLED_BY_SIGNAL_OVERRIDE_(signum_, exit_status, &result)) { + return result; + } + } +# endif // defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_) + return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_; +} +# endif // !GTEST_OS_WINDOWS + +namespace internal { + +// Utilities needed for death tests. + +// Generates a textual description of a given exit code, in the format +// specified by wait(2). +static std::string ExitSummary(int exit_code) { + Message m; + +# if GTEST_OS_WINDOWS + + m << "Exited with exit status " << exit_code; + +# else + + if (WIFEXITED(exit_code)) { + m << "Exited with exit status " << WEXITSTATUS(exit_code); + } else if (WIFSIGNALED(exit_code)) { + m << "Terminated by signal " << WTERMSIG(exit_code); + } +# ifdef WCOREDUMP + if (WCOREDUMP(exit_code)) { + m << " (core dumped)"; + } +# endif +# endif // GTEST_OS_WINDOWS + + return m.GetString(); +} + +// Returns true if exit_status describes a process that was terminated +// by a signal, or exited normally with a nonzero exit code. +bool ExitedUnsuccessfully(int exit_status) { + return !ExitedWithCode(0)(exit_status); +} + +# if !GTEST_OS_WINDOWS +// Generates a textual failure message when a death test finds more than +// one thread running, or cannot determine the number of threads, prior +// to executing the given statement. It is the responsibility of the +// caller not to pass a thread_count of 1. +static std::string DeathTestThreadWarning(size_t thread_count) { + Message msg; + msg << "Death tests use fork(), which is unsafe particularly" + << " in a threaded context. For this test, " << GTEST_NAME_ << " "; + if (thread_count == 0) + msg << "couldn't detect the number of threads."; + else + msg << "detected " << thread_count << " threads."; + return msg.GetString(); +} +# endif // !GTEST_OS_WINDOWS + +// Flag characters for reporting a death test that did not die. +static const char kDeathTestLived = 'L'; +static const char kDeathTestReturned = 'R'; +static const char kDeathTestThrew = 'T'; +static const char kDeathTestInternalError = 'I'; + +// An enumeration describing all of the possible ways that a death test can +// conclude. DIED means that the process died while executing the test +// code; LIVED means that process lived beyond the end of the test code; +// RETURNED means that the test statement attempted to execute a return +// statement, which is not allowed; THREW means that the test statement +// returned control by throwing an exception. IN_PROGRESS means the test +// has not yet concluded. +// TODO(vladl@google.com): Unify names and possibly values for +// AbortReason, DeathTestOutcome, and flag characters above. +enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW }; + +// Routine for aborting the program which is safe to call from an +// exec-style death test child process, in which case the error +// message is propagated back to the parent process. Otherwise, the +// message is simply printed to stderr. In either case, the program +// then exits with status 1. +void DeathTestAbort(const std::string& message) { + // On a POSIX system, this function may be called from a threadsafe-style + // death test child process, which operates on a very small stack. Use + // the heap for any additional non-minuscule memory requirements. + const InternalRunDeathTestFlag* const flag = + GetUnitTestImpl()->internal_run_death_test_flag(); + if (flag != NULL) { + FILE* parent = posix::FDOpen(flag->write_fd(), "w"); + fputc(kDeathTestInternalError, parent); + fprintf(parent, "%s", message.c_str()); + fflush(parent); + _exit(1); + } else { + fprintf(stderr, "%s", message.c_str()); + fflush(stderr); + posix::Abort(); + } +} + +// A replacement for CHECK that calls DeathTestAbort if the assertion +// fails. +# define GTEST_DEATH_TEST_CHECK_(expression) \ + do { \ + if (!::testing::internal::IsTrue(expression)) { \ + DeathTestAbort( \ + ::std::string("CHECK failed: File ") + __FILE__ + ", line " \ + + ::testing::internal::StreamableToString(__LINE__) + ": " \ + + #expression); \ + } \ + } while (::testing::internal::AlwaysFalse()) + +// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for +// evaluating any system call that fulfills two conditions: it must return +// -1 on failure, and set errno to EINTR when it is interrupted and +// should be tried again. The macro expands to a loop that repeatedly +// evaluates the expression as long as it evaluates to -1 and sets +// errno to EINTR. If the expression evaluates to -1 but errno is +// something other than EINTR, DeathTestAbort is called. +# define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \ + do { \ + int gtest_retval; \ + do { \ + gtest_retval = (expression); \ + } while (gtest_retval == -1 && errno == EINTR); \ + if (gtest_retval == -1) { \ + DeathTestAbort( \ + ::std::string("CHECK failed: File ") + __FILE__ + ", line " \ + + ::testing::internal::StreamableToString(__LINE__) + ": " \ + + #expression + " != -1"); \ + } \ + } while (::testing::internal::AlwaysFalse()) + +// Returns the message describing the last system error in errno. +std::string GetLastErrnoDescription() { + return errno == 0 ? "" : posix::StrError(errno); +} + +// This is called from a death test parent process to read a failure +// message from the death test child process and log it with the FATAL +// severity. On Windows, the message is read from a pipe handle. On other +// platforms, it is read from a file descriptor. +static void FailFromInternalError(int fd) { + Message error; + char buffer[256]; + int num_read; + + do { + while ((num_read = posix::Read(fd, buffer, 255)) > 0) { + buffer[num_read] = '\0'; + error << buffer; + } + } while (num_read == -1 && errno == EINTR); + + if (num_read == 0) { + GTEST_LOG_(FATAL) << error.GetString(); + } else { + const int last_error = errno; + GTEST_LOG_(FATAL) << "Error while reading death test internal: " + << GetLastErrnoDescription() << " [" << last_error << "]"; + } +} + +// Death test constructor. Increments the running death test count +// for the current test. +DeathTest::DeathTest() { + TestInfo* const info = GetUnitTestImpl()->current_test_info(); + if (info == NULL) { + DeathTestAbort("Cannot run a death test outside of a TEST or " + "TEST_F construct"); + } +} + +// Creates and returns a death test by dispatching to the current +// death test factory. +bool DeathTest::Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test) { + return GetUnitTestImpl()->death_test_factory()->Create( + statement, regex, file, line, test); +} + +const char* DeathTest::LastMessage() { + return last_death_test_message_.c_str(); +} + +void DeathTest::set_last_death_test_message(const std::string& message) { + last_death_test_message_ = message; +} + +std::string DeathTest::last_death_test_message_; + +// Provides cross platform implementation for some death functionality. +class DeathTestImpl : public DeathTest { + protected: + DeathTestImpl(const char* a_statement, const RE* a_regex) + : statement_(a_statement), + regex_(a_regex), + spawned_(false), + status_(-1), + outcome_(IN_PROGRESS), + read_fd_(-1), + write_fd_(-1) {} + + // read_fd_ is expected to be closed and cleared by a derived class. + ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); } + + void Abort(AbortReason reason); + virtual bool Passed(bool status_ok); + + const char* statement() const { return statement_; } + const RE* regex() const { return regex_; } + bool spawned() const { return spawned_; } + void set_spawned(bool is_spawned) { spawned_ = is_spawned; } + int status() const { return status_; } + void set_status(int a_status) { status_ = a_status; } + DeathTestOutcome outcome() const { return outcome_; } + void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; } + int read_fd() const { return read_fd_; } + void set_read_fd(int fd) { read_fd_ = fd; } + int write_fd() const { return write_fd_; } + void set_write_fd(int fd) { write_fd_ = fd; } + + // Called in the parent process only. Reads the result code of the death + // test child process via a pipe, interprets it to set the outcome_ + // member, and closes read_fd_. Outputs diagnostics and terminates in + // case of unexpected codes. + void ReadAndInterpretStatusByte(); + + private: + // The textual content of the code this object is testing. This class + // doesn't own this string and should not attempt to delete it. + const char* const statement_; + // The regular expression which test output must match. DeathTestImpl + // doesn't own this object and should not attempt to delete it. + const RE* const regex_; + // True if the death test child process has been successfully spawned. + bool spawned_; + // The exit status of the child process. + int status_; + // How the death test concluded. + DeathTestOutcome outcome_; + // Descriptor to the read end of the pipe to the child process. It is + // always -1 in the child process. The child keeps its write end of the + // pipe in write_fd_. + int read_fd_; + // Descriptor to the child's write end of the pipe to the parent process. + // It is always -1 in the parent process. The parent keeps its end of the + // pipe in read_fd_. + int write_fd_; +}; + +// Called in the parent process only. Reads the result code of the death +// test child process via a pipe, interprets it to set the outcome_ +// member, and closes read_fd_. Outputs diagnostics and terminates in +// case of unexpected codes. +void DeathTestImpl::ReadAndInterpretStatusByte() { + char flag; + int bytes_read; + + // The read() here blocks until data is available (signifying the + // failure of the death test) or until the pipe is closed (signifying + // its success), so it's okay to call this in the parent before + // the child process has exited. + do { + bytes_read = posix::Read(read_fd(), &flag, 1); + } while (bytes_read == -1 && errno == EINTR); + + if (bytes_read == 0) { + set_outcome(DIED); + } else if (bytes_read == 1) { + switch (flag) { + case kDeathTestReturned: + set_outcome(RETURNED); + break; + case kDeathTestThrew: + set_outcome(THREW); + break; + case kDeathTestLived: + set_outcome(LIVED); + break; + case kDeathTestInternalError: + FailFromInternalError(read_fd()); // Does not return. + break; + default: + GTEST_LOG_(FATAL) << "Death test child process reported " + << "unexpected status byte (" + << static_cast(flag) << ")"; + } + } else { + GTEST_LOG_(FATAL) << "Read from death test child process failed: " + << GetLastErrnoDescription(); + } + GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd())); + set_read_fd(-1); +} + +// Signals that the death test code which should have exited, didn't. +// Should be called only in a death test child process. +// Writes a status byte to the child's status file descriptor, then +// calls _exit(1). +void DeathTestImpl::Abort(AbortReason reason) { + // The parent process considers the death test to be a failure if + // it finds any data in our pipe. So, here we write a single flag byte + // to the pipe, then exit. + const char status_ch = + reason == TEST_DID_NOT_DIE ? kDeathTestLived : + reason == TEST_THREW_EXCEPTION ? kDeathTestThrew : kDeathTestReturned; + + GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1)); + // We are leaking the descriptor here because on some platforms (i.e., + // when built as Windows DLL), destructors of global objects will still + // run after calling _exit(). On such systems, write_fd_ will be + // indirectly closed from the destructor of UnitTestImpl, causing double + // close if it is also closed here. On debug configurations, double close + // may assert. As there are no in-process buffers to flush here, we are + // relying on the OS to close the descriptor after the process terminates + // when the destructors are not run. + _exit(1); // Exits w/o any normal exit hooks (we were supposed to crash) +} + +// Returns an indented copy of stderr output for a death test. +// This makes distinguishing death test output lines from regular log lines +// much easier. +static ::std::string FormatDeathTestOutput(const ::std::string& output) { + ::std::string ret; + for (size_t at = 0; ; ) { + const size_t line_end = output.find('\n', at); + ret += "[ DEATH ] "; + if (line_end == ::std::string::npos) { + ret += output.substr(at); + break; + } + ret += output.substr(at, line_end + 1 - at); + at = line_end + 1; + } + return ret; +} + +// Assesses the success or failure of a death test, using both private +// members which have previously been set, and one argument: +// +// Private data members: +// outcome: An enumeration describing how the death test +// concluded: DIED, LIVED, THREW, or RETURNED. The death test +// fails in the latter three cases. +// status: The exit status of the child process. On *nix, it is in the +// in the format specified by wait(2). On Windows, this is the +// value supplied to the ExitProcess() API or a numeric code +// of the exception that terminated the program. +// regex: A regular expression object to be applied to +// the test's captured standard error output; the death test +// fails if it does not match. +// +// Argument: +// status_ok: true if exit_status is acceptable in the context of +// this particular death test, which fails if it is false +// +// Returns true iff all of the above conditions are met. Otherwise, the +// first failing condition, in the order given above, is the one that is +// reported. Also sets the last death test message string. +bool DeathTestImpl::Passed(bool status_ok) { + if (!spawned()) + return false; + + const std::string error_message = GetCapturedStderr(); + + bool success = false; + Message buffer; + + buffer << "Death test: " << statement() << "\n"; + switch (outcome()) { + case LIVED: + buffer << " Result: failed to die.\n" + << " Error msg:\n" << FormatDeathTestOutput(error_message); + break; + case THREW: + buffer << " Result: threw an exception.\n" + << " Error msg:\n" << FormatDeathTestOutput(error_message); + break; + case RETURNED: + buffer << " Result: illegal return in test statement.\n" + << " Error msg:\n" << FormatDeathTestOutput(error_message); + break; + case DIED: + if (status_ok) { + const bool matched = RE::PartialMatch(error_message.c_str(), *regex()); + if (matched) { + success = true; + } else { + buffer << " Result: died but not with expected error.\n" + << " Expected: " << regex()->pattern() << "\n" + << "Actual msg:\n" << FormatDeathTestOutput(error_message); + } + } else { + buffer << " Result: died but not with expected exit code:\n" + << " " << ExitSummary(status()) << "\n" + << "Actual msg:\n" << FormatDeathTestOutput(error_message); + } + break; + case IN_PROGRESS: + default: + GTEST_LOG_(FATAL) + << "DeathTest::Passed somehow called before conclusion of test"; + } + + DeathTest::set_last_death_test_message(buffer.GetString()); + return success; +} + +# if GTEST_OS_WINDOWS +// WindowsDeathTest implements death tests on Windows. Due to the +// specifics of starting new processes on Windows, death tests there are +// always threadsafe, and Google Test considers the +// --gtest_death_test_style=fast setting to be equivalent to +// --gtest_death_test_style=threadsafe there. +// +// A few implementation notes: Like the Linux version, the Windows +// implementation uses pipes for child-to-parent communication. But due to +// the specifics of pipes on Windows, some extra steps are required: +// +// 1. The parent creates a communication pipe and stores handles to both +// ends of it. +// 2. The parent starts the child and provides it with the information +// necessary to acquire the handle to the write end of the pipe. +// 3. The child acquires the write end of the pipe and signals the parent +// using a Windows event. +// 4. Now the parent can release the write end of the pipe on its side. If +// this is done before step 3, the object's reference count goes down to +// 0 and it is destroyed, preventing the child from acquiring it. The +// parent now has to release it, or read operations on the read end of +// the pipe will not return when the child terminates. +// 5. The parent reads child's output through the pipe (outcome code and +// any possible error messages) from the pipe, and its stderr and then +// determines whether to fail the test. +// +// Note: to distinguish Win32 API calls from the local method and function +// calls, the former are explicitly resolved in the global namespace. +// +class WindowsDeathTest : public DeathTestImpl { + public: + WindowsDeathTest(const char* a_statement, + const RE* a_regex, + const char* file, + int line) + : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {} + + // All of these virtual functions are inherited from DeathTest. + virtual int Wait(); + virtual TestRole AssumeRole(); + + private: + // The name of the file in which the death test is located. + const char* const file_; + // The line number on which the death test is located. + const int line_; + // Handle to the write end of the pipe to the child process. + AutoHandle write_handle_; + // Child process handle. + AutoHandle child_handle_; + // Event the child process uses to signal the parent that it has + // acquired the handle to the write end of the pipe. After seeing this + // event the parent can release its own handles to make sure its + // ReadFile() calls return when the child terminates. + AutoHandle event_handle_; +}; + +// Waits for the child in a death test to exit, returning its exit +// status, or 0 if no child process exists. As a side effect, sets the +// outcome data member. +int WindowsDeathTest::Wait() { + if (!spawned()) + return 0; + + // Wait until the child either signals that it has acquired the write end + // of the pipe or it dies. + const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() }; + switch (::WaitForMultipleObjects(2, + wait_handles, + FALSE, // Waits for any of the handles. + INFINITE)) { + case WAIT_OBJECT_0: + case WAIT_OBJECT_0 + 1: + break; + default: + GTEST_DEATH_TEST_CHECK_(false); // Should not get here. + } + + // The child has acquired the write end of the pipe or exited. + // We release the handle on our side and continue. + write_handle_.Reset(); + event_handle_.Reset(); + + ReadAndInterpretStatusByte(); + + // Waits for the child process to exit if it haven't already. This + // returns immediately if the child has already exited, regardless of + // whether previous calls to WaitForMultipleObjects synchronized on this + // handle or not. + GTEST_DEATH_TEST_CHECK_( + WAIT_OBJECT_0 == ::WaitForSingleObject(child_handle_.Get(), + INFINITE)); + DWORD status_code; + GTEST_DEATH_TEST_CHECK_( + ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE); + child_handle_.Reset(); + set_status(static_cast(status_code)); + return status(); +} + +// The AssumeRole process for a Windows death test. It creates a child +// process with the same executable as the current process to run the +// death test. The child process is given the --gtest_filter and +// --gtest_internal_run_death_test flags such that it knows to run the +// current death test only. +DeathTest::TestRole WindowsDeathTest::AssumeRole() { + const UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const TestInfo* const info = impl->current_test_info(); + const int death_test_index = info->result()->death_test_count(); + + if (flag != NULL) { + // ParseInternalRunDeathTestFlag() has performed all the necessary + // processing. + set_write_fd(flag->write_fd()); + return EXECUTE_TEST; + } + + // WindowsDeathTest uses an anonymous pipe to communicate results of + // a death test. + SECURITY_ATTRIBUTES handles_are_inheritable = { + sizeof(SECURITY_ATTRIBUTES), NULL, TRUE }; + HANDLE read_handle, write_handle; + GTEST_DEATH_TEST_CHECK_( + ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable, + 0) // Default buffer size. + != FALSE); + set_read_fd(::_open_osfhandle(reinterpret_cast(read_handle), + O_RDONLY)); + write_handle_.Reset(write_handle); + event_handle_.Reset(::CreateEvent( + &handles_are_inheritable, + TRUE, // The event will automatically reset to non-signaled state. + FALSE, // The initial state is non-signalled. + NULL)); // The even is unnamed. + GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL); + const std::string filter_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" + + info->test_case_name() + "." + info->name(); + const std::string internal_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + + "=" + file_ + "|" + StreamableToString(line_) + "|" + + StreamableToString(death_test_index) + "|" + + StreamableToString(static_cast(::GetCurrentProcessId())) + + // size_t has the same width as pointers on both 32-bit and 64-bit + // Windows platforms. + // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx. + "|" + StreamableToString(reinterpret_cast(write_handle)) + + "|" + StreamableToString(reinterpret_cast(event_handle_.Get())); + + char executable_path[_MAX_PATH + 1]; // NOLINT + GTEST_DEATH_TEST_CHECK_( + _MAX_PATH + 1 != ::GetModuleFileNameA(NULL, + executable_path, + _MAX_PATH)); + + std::string command_line = + std::string(::GetCommandLineA()) + " " + filter_flag + " \"" + + internal_flag + "\""; + + DeathTest::set_last_death_test_message(""); + + CaptureStderr(); + // Flush the log buffers since the log streams are shared with the child. + FlushInfoLog(); + + // The child process will share the standard handles with the parent. + STARTUPINFOA startup_info; + memset(&startup_info, 0, sizeof(STARTUPINFO)); + startup_info.dwFlags = STARTF_USESTDHANDLES; + startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE); + startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE); + startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE); + + PROCESS_INFORMATION process_info; + GTEST_DEATH_TEST_CHECK_(::CreateProcessA( + executable_path, + const_cast(command_line.c_str()), + NULL, // Retuned process handle is not inheritable. + NULL, // Retuned thread handle is not inheritable. + TRUE, // Child inherits all inheritable handles (for write_handle_). + 0x0, // Default creation flags. + NULL, // Inherit the parent's environment. + UnitTest::GetInstance()->original_working_dir(), + &startup_info, + &process_info) != FALSE); + child_handle_.Reset(process_info.hProcess); + ::CloseHandle(process_info.hThread); + set_spawned(true); + return OVERSEE_TEST; +} +# else // We are not on Windows. + +// ForkingDeathTest provides implementations for most of the abstract +// methods of the DeathTest interface. Only the AssumeRole method is +// left undefined. +class ForkingDeathTest : public DeathTestImpl { + public: + ForkingDeathTest(const char* statement, const RE* regex); + + // All of these virtual functions are inherited from DeathTest. + virtual int Wait(); + + protected: + void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; } + + private: + // PID of child process during death test; 0 in the child process itself. + pid_t child_pid_; +}; + +// Constructs a ForkingDeathTest. +ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex) + : DeathTestImpl(a_statement, a_regex), + child_pid_(-1) {} + +// Waits for the child in a death test to exit, returning its exit +// status, or 0 if no child process exists. As a side effect, sets the +// outcome data member. +int ForkingDeathTest::Wait() { + if (!spawned()) + return 0; + + ReadAndInterpretStatusByte(); + + int status_value; + GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0)); + set_status(status_value); + return status_value; +} + +// A concrete death test class that forks, then immediately runs the test +// in the child process. +class NoExecDeathTest : public ForkingDeathTest { + public: + NoExecDeathTest(const char* a_statement, const RE* a_regex) : + ForkingDeathTest(a_statement, a_regex) { } + virtual TestRole AssumeRole(); +}; + +// The AssumeRole process for a fork-and-run death test. It implements a +// straightforward fork, with a simple pipe to transmit the status byte. +DeathTest::TestRole NoExecDeathTest::AssumeRole() { + const size_t thread_count = GetThreadCount(); + if (thread_count != 1) { + GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count); + } + + int pipe_fd[2]; + GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1); + + DeathTest::set_last_death_test_message(""); + CaptureStderr(); + // When we fork the process below, the log file buffers are copied, but the + // file descriptors are shared. We flush all log files here so that closing + // the file descriptors in the child process doesn't throw off the + // synchronization between descriptors and buffers in the parent process. + // This is as close to the fork as possible to avoid a race condition in case + // there are multiple threads running before the death test, and another + // thread writes to the log file. + FlushInfoLog(); + + const pid_t child_pid = fork(); + GTEST_DEATH_TEST_CHECK_(child_pid != -1); + set_child_pid(child_pid); + if (child_pid == 0) { + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0])); + set_write_fd(pipe_fd[1]); + // Redirects all logging to stderr in the child process to prevent + // concurrent writes to the log files. We capture stderr in the parent + // process and append the child process' output to a log. + LogToStderr(); + // Event forwarding to the listeners of event listener API mush be shut + // down in death test subprocesses. + GetUnitTestImpl()->listeners()->SuppressEventForwarding(); + g_in_fast_death_test_child = true; + return EXECUTE_TEST; + } else { + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1])); + set_read_fd(pipe_fd[0]); + set_spawned(true); + return OVERSEE_TEST; + } +} + +// A concrete death test class that forks and re-executes the main +// program from the beginning, with command-line flags set that cause +// only this specific death test to be run. +class ExecDeathTest : public ForkingDeathTest { + public: + ExecDeathTest(const char* a_statement, const RE* a_regex, + const char* file, int line) : + ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { } + virtual TestRole AssumeRole(); + private: + static ::std::vector + GetArgvsForDeathTestChildProcess() { + ::std::vector args = GetInjectableArgvs(); +# if defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_) + ::std::vector extra_args = + GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_(); + args.insert(args.end(), extra_args.begin(), extra_args.end()); +# endif // defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_) + return args; + } + // The name of the file in which the death test is located. + const char* const file_; + // The line number on which the death test is located. + const int line_; +}; + +// Utility class for accumulating command-line arguments. +class Arguments { + public: + Arguments() { + args_.push_back(NULL); + } + + ~Arguments() { + for (std::vector::iterator i = args_.begin(); i != args_.end(); + ++i) { + free(*i); + } + } + void AddArgument(const char* argument) { + args_.insert(args_.end() - 1, posix::StrDup(argument)); + } + + template + void AddArguments(const ::std::vector& arguments) { + for (typename ::std::vector::const_iterator i = arguments.begin(); + i != arguments.end(); + ++i) { + args_.insert(args_.end() - 1, posix::StrDup(i->c_str())); + } + } + char* const* Argv() { + return &args_[0]; + } + + private: + std::vector args_; +}; + +// A struct that encompasses the arguments to the child process of a +// threadsafe-style death test process. +struct ExecDeathTestArgs { + char* const* argv; // Command-line arguments for the child's call to exec + int close_fd; // File descriptor to close; the read end of a pipe +}; + +# if GTEST_OS_MAC +inline char** GetEnviron() { + // When Google Test is built as a framework on MacOS X, the environ variable + // is unavailable. Apple's documentation (man environ) recommends using + // _NSGetEnviron() instead. + return *_NSGetEnviron(); +} +# else +// Some POSIX platforms expect you to declare environ. extern "C" makes +// it reside in the global namespace. +extern "C" char** environ; +inline char** GetEnviron() { return environ; } +# endif // GTEST_OS_MAC + +# if !GTEST_OS_QNX +// The main function for a threadsafe-style death test child process. +// This function is called in a clone()-ed process and thus must avoid +// any potentially unsafe operations like malloc or libc functions. +static int ExecDeathTestChildMain(void* child_arg) { + ExecDeathTestArgs* const args = static_cast(child_arg); + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd)); + + // We need to execute the test program in the same environment where + // it was originally invoked. Therefore we change to the original + // working directory first. + const char* const original_dir = + UnitTest::GetInstance()->original_working_dir(); + // We can safely call chdir() as it's a direct system call. + if (chdir(original_dir) != 0) { + DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " + + GetLastErrnoDescription()); + return EXIT_FAILURE; + } + + // We can safely call execve() as it's a direct system call. We + // cannot use execvp() as it's a libc function and thus potentially + // unsafe. Since execve() doesn't search the PATH, the user must + // invoke the test program via a valid path that contains at least + // one path separator. + execve(args->argv[0], args->argv, GetEnviron()); + DeathTestAbort(std::string("execve(") + args->argv[0] + ", ...) in " + + original_dir + " failed: " + + GetLastErrnoDescription()); + return EXIT_FAILURE; +} +# endif // !GTEST_OS_QNX + +// Two utility routines that together determine the direction the stack +// grows. +// This could be accomplished more elegantly by a single recursive +// function, but we want to guard against the unlikely possibility of +// a smart compiler optimizing the recursion away. +// +// GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining +// StackLowerThanAddress into StackGrowsDown, which then doesn't give +// correct answer. +void StackLowerThanAddress(const void* ptr, bool* result) GTEST_NO_INLINE_; +void StackLowerThanAddress(const void* ptr, bool* result) { + int dummy; + *result = (&dummy < ptr); +} + +// Make sure AddressSanitizer does not tamper with the stack here. +GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ +bool StackGrowsDown() { + int dummy; + bool result; + StackLowerThanAddress(&dummy, &result); + return result; +} + +// Spawns a child process with the same executable as the current process in +// a thread-safe manner and instructs it to run the death test. The +// implementation uses fork(2) + exec. On systems where clone(2) is +// available, it is used instead, being slightly more thread-safe. On QNX, +// fork supports only single-threaded environments, so this function uses +// spawn(2) there instead. The function dies with an error message if +// anything goes wrong. +static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) { + ExecDeathTestArgs args = { argv, close_fd }; + pid_t child_pid = -1; + +# if GTEST_OS_QNX + // Obtains the current directory and sets it to be closed in the child + // process. + const int cwd_fd = open(".", O_RDONLY); + GTEST_DEATH_TEST_CHECK_(cwd_fd != -1); + GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(cwd_fd, F_SETFD, FD_CLOEXEC)); + // We need to execute the test program in the same environment where + // it was originally invoked. Therefore we change to the original + // working directory first. + const char* const original_dir = + UnitTest::GetInstance()->original_working_dir(); + // We can safely call chdir() as it's a direct system call. + if (chdir(original_dir) != 0) { + DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " + + GetLastErrnoDescription()); + return EXIT_FAILURE; + } + + int fd_flags; + // Set close_fd to be closed after spawn. + GTEST_DEATH_TEST_CHECK_SYSCALL_(fd_flags = fcntl(close_fd, F_GETFD)); + GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(close_fd, F_SETFD, + fd_flags | FD_CLOEXEC)); + struct inheritance inherit = {0}; + // spawn is a system call. + child_pid = spawn(args.argv[0], 0, NULL, &inherit, args.argv, GetEnviron()); + // Restores the current working directory. + GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1); + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd)); + +# else // GTEST_OS_QNX +# if GTEST_OS_LINUX + // When a SIGPROF signal is received while fork() or clone() are executing, + // the process may hang. To avoid this, we ignore SIGPROF here and re-enable + // it after the call to fork()/clone() is complete. + struct sigaction saved_sigprof_action; + struct sigaction ignore_sigprof_action; + memset(&ignore_sigprof_action, 0, sizeof(ignore_sigprof_action)); + sigemptyset(&ignore_sigprof_action.sa_mask); + ignore_sigprof_action.sa_handler = SIG_IGN; + GTEST_DEATH_TEST_CHECK_SYSCALL_(sigaction( + SIGPROF, &ignore_sigprof_action, &saved_sigprof_action)); +# endif // GTEST_OS_LINUX + +# if GTEST_HAS_CLONE + const bool use_fork = GTEST_FLAG(death_test_use_fork); + + if (!use_fork) { + static const bool stack_grows_down = StackGrowsDown(); + const size_t stack_size = getpagesize(); + // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead. + void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED); + + // Maximum stack alignment in bytes: For a downward-growing stack, this + // amount is subtracted from size of the stack space to get an address + // that is within the stack space and is aligned on all systems we care + // about. As far as I know there is no ABI with stack alignment greater + // than 64. We assume stack and stack_size already have alignment of + // kMaxStackAlignment. + const size_t kMaxStackAlignment = 64; + void* const stack_top = + static_cast(stack) + + (stack_grows_down ? stack_size - kMaxStackAlignment : 0); + GTEST_DEATH_TEST_CHECK_(stack_size > kMaxStackAlignment && + reinterpret_cast(stack_top) % kMaxStackAlignment == 0); + + child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args); + + GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1); + } +# else + const bool use_fork = true; +# endif // GTEST_HAS_CLONE + + if (use_fork && (child_pid = fork()) == 0) { + ExecDeathTestChildMain(&args); + _exit(0); + } +# endif // GTEST_OS_QNX +# if GTEST_OS_LINUX + GTEST_DEATH_TEST_CHECK_SYSCALL_( + sigaction(SIGPROF, &saved_sigprof_action, NULL)); +# endif // GTEST_OS_LINUX + + GTEST_DEATH_TEST_CHECK_(child_pid != -1); + return child_pid; +} + +// The AssumeRole process for a fork-and-exec death test. It re-executes the +// main program from the beginning, setting the --gtest_filter +// and --gtest_internal_run_death_test flags to cause only the current +// death test to be re-run. +DeathTest::TestRole ExecDeathTest::AssumeRole() { + const UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const TestInfo* const info = impl->current_test_info(); + const int death_test_index = info->result()->death_test_count(); + + if (flag != NULL) { + set_write_fd(flag->write_fd()); + return EXECUTE_TEST; + } + + int pipe_fd[2]; + GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1); + // Clear the close-on-exec flag on the write end of the pipe, lest + // it be closed when the child process does an exec: + GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1); + + const std::string filter_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" + + info->test_case_name() + "." + info->name(); + const std::string internal_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "=" + + file_ + "|" + StreamableToString(line_) + "|" + + StreamableToString(death_test_index) + "|" + + StreamableToString(pipe_fd[1]); + Arguments args; + args.AddArguments(GetArgvsForDeathTestChildProcess()); + args.AddArgument(filter_flag.c_str()); + args.AddArgument(internal_flag.c_str()); + + DeathTest::set_last_death_test_message(""); + + CaptureStderr(); + // See the comment in NoExecDeathTest::AssumeRole for why the next line + // is necessary. + FlushInfoLog(); + + const pid_t child_pid = ExecDeathTestSpawnChild(args.Argv(), pipe_fd[0]); + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1])); + set_child_pid(child_pid); + set_read_fd(pipe_fd[0]); + set_spawned(true); + return OVERSEE_TEST; +} + +# endif // !GTEST_OS_WINDOWS + +// Creates a concrete DeathTest-derived class that depends on the +// --gtest_death_test_style flag, and sets the pointer pointed to +// by the "test" argument to its address. If the test should be +// skipped, sets that pointer to NULL. Returns true, unless the +// flag is set to an invalid value. +bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex, + const char* file, int line, + DeathTest** test) { + UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const int death_test_index = impl->current_test_info() + ->increment_death_test_count(); + + if (flag != NULL) { + if (death_test_index > flag->index()) { + DeathTest::set_last_death_test_message( + "Death test count (" + StreamableToString(death_test_index) + + ") somehow exceeded expected maximum (" + + StreamableToString(flag->index()) + ")"); + return false; + } + + if (!(flag->file() == file && flag->line() == line && + flag->index() == death_test_index)) { + *test = NULL; + return true; + } + } + +# if GTEST_OS_WINDOWS + + if (GTEST_FLAG(death_test_style) == "threadsafe" || + GTEST_FLAG(death_test_style) == "fast") { + *test = new WindowsDeathTest(statement, regex, file, line); + } + +# else + + if (GTEST_FLAG(death_test_style) == "threadsafe") { + *test = new ExecDeathTest(statement, regex, file, line); + } else if (GTEST_FLAG(death_test_style) == "fast") { + *test = new NoExecDeathTest(statement, regex); + } + +# endif // GTEST_OS_WINDOWS + + else { // NOLINT - this is more readable than unbalanced brackets inside #if. + DeathTest::set_last_death_test_message( + "Unknown death test style \"" + GTEST_FLAG(death_test_style) + + "\" encountered"); + return false; + } + + return true; +} + +# if GTEST_OS_WINDOWS +// Recreates the pipe and event handles from the provided parameters, +// signals the event, and returns a file descriptor wrapped around the pipe +// handle. This function is called in the child process only. +int GetStatusFileDescriptor(unsigned int parent_process_id, + size_t write_handle_as_size_t, + size_t event_handle_as_size_t) { + AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE, + FALSE, // Non-inheritable. + parent_process_id)); + if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) { + DeathTestAbort("Unable to open parent process " + + StreamableToString(parent_process_id)); + } + + // TODO(vladl@google.com): Replace the following check with a + // compile-time assertion when available. + GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t)); + + const HANDLE write_handle = + reinterpret_cast(write_handle_as_size_t); + HANDLE dup_write_handle; + + // The newly initialized handle is accessible only in in the parent + // process. To obtain one accessible within the child, we need to use + // DuplicateHandle. + if (!::DuplicateHandle(parent_process_handle.Get(), write_handle, + ::GetCurrentProcess(), &dup_write_handle, + 0x0, // Requested privileges ignored since + // DUPLICATE_SAME_ACCESS is used. + FALSE, // Request non-inheritable handler. + DUPLICATE_SAME_ACCESS)) { + DeathTestAbort("Unable to duplicate the pipe handle " + + StreamableToString(write_handle_as_size_t) + + " from the parent process " + + StreamableToString(parent_process_id)); + } + + const HANDLE event_handle = reinterpret_cast(event_handle_as_size_t); + HANDLE dup_event_handle; + + if (!::DuplicateHandle(parent_process_handle.Get(), event_handle, + ::GetCurrentProcess(), &dup_event_handle, + 0x0, + FALSE, + DUPLICATE_SAME_ACCESS)) { + DeathTestAbort("Unable to duplicate the event handle " + + StreamableToString(event_handle_as_size_t) + + " from the parent process " + + StreamableToString(parent_process_id)); + } + + const int write_fd = + ::_open_osfhandle(reinterpret_cast(dup_write_handle), O_APPEND); + if (write_fd == -1) { + DeathTestAbort("Unable to convert pipe handle " + + StreamableToString(write_handle_as_size_t) + + " to a file descriptor"); + } + + // Signals the parent that the write end of the pipe has been acquired + // so the parent can release its own write end. + ::SetEvent(dup_event_handle); + + return write_fd; +} +# endif // GTEST_OS_WINDOWS + +// Returns a newly created InternalRunDeathTestFlag object with fields +// initialized from the GTEST_FLAG(internal_run_death_test) flag if +// the flag is specified; otherwise returns NULL. +InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() { + if (GTEST_FLAG(internal_run_death_test) == "") return NULL; + + // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we + // can use it here. + int line = -1; + int index = -1; + ::std::vector< ::std::string> fields; + SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields); + int write_fd = -1; + +# if GTEST_OS_WINDOWS + + unsigned int parent_process_id = 0; + size_t write_handle_as_size_t = 0; + size_t event_handle_as_size_t = 0; + + if (fields.size() != 6 + || !ParseNaturalNumber(fields[1], &line) + || !ParseNaturalNumber(fields[2], &index) + || !ParseNaturalNumber(fields[3], &parent_process_id) + || !ParseNaturalNumber(fields[4], &write_handle_as_size_t) + || !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) { + DeathTestAbort("Bad --gtest_internal_run_death_test flag: " + + GTEST_FLAG(internal_run_death_test)); + } + write_fd = GetStatusFileDescriptor(parent_process_id, + write_handle_as_size_t, + event_handle_as_size_t); +# else + + if (fields.size() != 4 + || !ParseNaturalNumber(fields[1], &line) + || !ParseNaturalNumber(fields[2], &index) + || !ParseNaturalNumber(fields[3], &write_fd)) { + DeathTestAbort("Bad --gtest_internal_run_death_test flag: " + + GTEST_FLAG(internal_run_death_test)); + } + +# endif // GTEST_OS_WINDOWS + + return new InternalRunDeathTestFlag(fields[0], line, index, write_fd); +} + +} // namespace internal + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: keith.ray@gmail.com (Keith Ray) + +#include "gtest/gtest-message.h" +#include "gtest/internal/gtest-filepath.h" +#include "gtest/internal/gtest-port.h" + +#include + +#if GTEST_OS_WINDOWS_MOBILE +# include +#elif GTEST_OS_WINDOWS +# include +# include +#elif GTEST_OS_SYMBIAN +// Symbian OpenC has PATH_MAX in sys/syslimits.h +# include +#else +# include +# include // Some Linux distributions define PATH_MAX here. +#endif // GTEST_OS_WINDOWS_MOBILE + +#if GTEST_OS_WINDOWS +# define GTEST_PATH_MAX_ _MAX_PATH +#elif defined(PATH_MAX) +# define GTEST_PATH_MAX_ PATH_MAX +#elif defined(_XOPEN_PATH_MAX) +# define GTEST_PATH_MAX_ _XOPEN_PATH_MAX +#else +# define GTEST_PATH_MAX_ _POSIX_PATH_MAX +#endif // GTEST_OS_WINDOWS + +#include "gtest/internal/gtest-string.h" + +namespace testing { +namespace internal { + +#if GTEST_OS_WINDOWS +// On Windows, '\\' is the standard path separator, but many tools and the +// Windows API also accept '/' as an alternate path separator. Unless otherwise +// noted, a file path can contain either kind of path separators, or a mixture +// of them. +const char kPathSeparator = '\\'; +const char kAlternatePathSeparator = '/'; +const char kAlternatePathSeparatorString[] = "/"; +# if GTEST_OS_WINDOWS_MOBILE +// Windows CE doesn't have a current directory. You should not use +// the current directory in tests on Windows CE, but this at least +// provides a reasonable fallback. +const char kCurrentDirectoryString[] = "\\"; +// Windows CE doesn't define INVALID_FILE_ATTRIBUTES +const DWORD kInvalidFileAttributes = 0xffffffff; +# else +const char kCurrentDirectoryString[] = ".\\"; +# endif // GTEST_OS_WINDOWS_MOBILE +#else +const char kPathSeparator = '/'; +const char kCurrentDirectoryString[] = "./"; +#endif // GTEST_OS_WINDOWS + +// Returns whether the given character is a valid path separator. +static bool IsPathSeparator(char c) { +#if GTEST_HAS_ALT_PATH_SEP_ + return (c == kPathSeparator) || (c == kAlternatePathSeparator); +#else + return c == kPathSeparator; +#endif +} + +// Returns the current working directory, or "" if unsuccessful. +FilePath FilePath::GetCurrentDir() { +#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT + // Windows CE doesn't have a current directory, so we just return + // something reasonable. + return FilePath(kCurrentDirectoryString); +#elif GTEST_OS_WINDOWS + char cwd[GTEST_PATH_MAX_ + 1] = { '\0' }; + return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd); +#else + char cwd[GTEST_PATH_MAX_ + 1] = { '\0' }; + char* result = getcwd(cwd, sizeof(cwd)); +# if GTEST_OS_NACL + // getcwd will likely fail in NaCl due to the sandbox, so return something + // reasonable. The user may have provided a shim implementation for getcwd, + // however, so fallback only when failure is detected. + return FilePath(result == NULL ? kCurrentDirectoryString : cwd); +# endif // GTEST_OS_NACL + return FilePath(result == NULL ? "" : cwd); +#endif // GTEST_OS_WINDOWS_MOBILE +} + +// Returns a copy of the FilePath with the case-insensitive extension removed. +// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns +// FilePath("dir/file"). If a case-insensitive extension is not +// found, returns a copy of the original FilePath. +FilePath FilePath::RemoveExtension(const char* extension) const { + const std::string dot_extension = std::string(".") + extension; + if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) { + return FilePath(pathname_.substr( + 0, pathname_.length() - dot_extension.length())); + } + return *this; +} + +// Returns a pointer to the last occurence of a valid path separator in +// the FilePath. On Windows, for example, both '/' and '\' are valid path +// separators. Returns NULL if no path separator was found. +const char* FilePath::FindLastPathSeparator() const { + const char* const last_sep = strrchr(c_str(), kPathSeparator); +#if GTEST_HAS_ALT_PATH_SEP_ + const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator); + // Comparing two pointers of which only one is NULL is undefined. + if (last_alt_sep != NULL && + (last_sep == NULL || last_alt_sep > last_sep)) { + return last_alt_sep; + } +#endif + return last_sep; +} + +// Returns a copy of the FilePath with the directory part removed. +// Example: FilePath("path/to/file").RemoveDirectoryName() returns +// FilePath("file"). If there is no directory part ("just_a_file"), it returns +// the FilePath unmodified. If there is no file part ("just_a_dir/") it +// returns an empty FilePath (""). +// On Windows platform, '\' is the path separator, otherwise it is '/'. +FilePath FilePath::RemoveDirectoryName() const { + const char* const last_sep = FindLastPathSeparator(); + return last_sep ? FilePath(last_sep + 1) : *this; +} + +// RemoveFileName returns the directory path with the filename removed. +// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/". +// If the FilePath is "a_file" or "/a_file", RemoveFileName returns +// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does +// not have a file, like "just/a/dir/", it returns the FilePath unmodified. +// On Windows platform, '\' is the path separator, otherwise it is '/'. +FilePath FilePath::RemoveFileName() const { + const char* const last_sep = FindLastPathSeparator(); + std::string dir; + if (last_sep) { + dir = std::string(c_str(), last_sep + 1 - c_str()); + } else { + dir = kCurrentDirectoryString; + } + return FilePath(dir); +} + +// Helper functions for naming files in a directory for xml output. + +// Given directory = "dir", base_name = "test", number = 0, +// extension = "xml", returns "dir/test.xml". If number is greater +// than zero (e.g., 12), returns "dir/test_12.xml". +// On Windows platform, uses \ as the separator rather than /. +FilePath FilePath::MakeFileName(const FilePath& directory, + const FilePath& base_name, + int number, + const char* extension) { + std::string file; + if (number == 0) { + file = base_name.string() + "." + extension; + } else { + file = base_name.string() + "_" + StreamableToString(number) + + "." + extension; + } + return ConcatPaths(directory, FilePath(file)); +} + +// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml". +// On Windows, uses \ as the separator rather than /. +FilePath FilePath::ConcatPaths(const FilePath& directory, + const FilePath& relative_path) { + if (directory.IsEmpty()) + return relative_path; + const FilePath dir(directory.RemoveTrailingPathSeparator()); + return FilePath(dir.string() + kPathSeparator + relative_path.string()); +} + +// Returns true if pathname describes something findable in the file-system, +// either a file, directory, or whatever. +bool FilePath::FileOrDirectoryExists() const { +#if GTEST_OS_WINDOWS_MOBILE + LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str()); + const DWORD attributes = GetFileAttributes(unicode); + delete [] unicode; + return attributes != kInvalidFileAttributes; +#else + posix::StatStruct file_stat; + return posix::Stat(pathname_.c_str(), &file_stat) == 0; +#endif // GTEST_OS_WINDOWS_MOBILE +} + +// Returns true if pathname describes a directory in the file-system +// that exists. +bool FilePath::DirectoryExists() const { + bool result = false; +#if GTEST_OS_WINDOWS + // Don't strip off trailing separator if path is a root directory on + // Windows (like "C:\\"). + const FilePath& path(IsRootDirectory() ? *this : + RemoveTrailingPathSeparator()); +#else + const FilePath& path(*this); +#endif + +#if GTEST_OS_WINDOWS_MOBILE + LPCWSTR unicode = String::AnsiToUtf16(path.c_str()); + const DWORD attributes = GetFileAttributes(unicode); + delete [] unicode; + if ((attributes != kInvalidFileAttributes) && + (attributes & FILE_ATTRIBUTE_DIRECTORY)) { + result = true; + } +#else + posix::StatStruct file_stat; + result = posix::Stat(path.c_str(), &file_stat) == 0 && + posix::IsDir(file_stat); +#endif // GTEST_OS_WINDOWS_MOBILE + + return result; +} + +// Returns true if pathname describes a root directory. (Windows has one +// root directory per disk drive.) +bool FilePath::IsRootDirectory() const { +#if GTEST_OS_WINDOWS + // TODO(wan@google.com): on Windows a network share like + // \\server\share can be a root directory, although it cannot be the + // current directory. Handle this properly. + return pathname_.length() == 3 && IsAbsolutePath(); +#else + return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]); +#endif +} + +// Returns true if pathname describes an absolute path. +bool FilePath::IsAbsolutePath() const { + const char* const name = pathname_.c_str(); +#if GTEST_OS_WINDOWS + return pathname_.length() >= 3 && + ((name[0] >= 'a' && name[0] <= 'z') || + (name[0] >= 'A' && name[0] <= 'Z')) && + name[1] == ':' && + IsPathSeparator(name[2]); +#else + return IsPathSeparator(name[0]); +#endif +} + +// Returns a pathname for a file that does not currently exist. The pathname +// will be directory/base_name.extension or +// directory/base_name_.extension if directory/base_name.extension +// already exists. The number will be incremented until a pathname is found +// that does not already exist. +// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'. +// There could be a race condition if two or more processes are calling this +// function at the same time -- they could both pick the same filename. +FilePath FilePath::GenerateUniqueFileName(const FilePath& directory, + const FilePath& base_name, + const char* extension) { + FilePath full_pathname; + int number = 0; + do { + full_pathname.Set(MakeFileName(directory, base_name, number++, extension)); + } while (full_pathname.FileOrDirectoryExists()); + return full_pathname; +} + +// Returns true if FilePath ends with a path separator, which indicates that +// it is intended to represent a directory. Returns false otherwise. +// This does NOT check that a directory (or file) actually exists. +bool FilePath::IsDirectory() const { + return !pathname_.empty() && + IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]); +} + +// Create directories so that path exists. Returns true if successful or if +// the directories already exist; returns false if unable to create directories +// for any reason. +bool FilePath::CreateDirectoriesRecursively() const { + if (!this->IsDirectory()) { + return false; + } + + if (pathname_.length() == 0 || this->DirectoryExists()) { + return true; + } + + const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName()); + return parent.CreateDirectoriesRecursively() && this->CreateFolder(); +} + +// Create the directory so that path exists. Returns true if successful or +// if the directory already exists; returns false if unable to create the +// directory for any reason, including if the parent directory does not +// exist. Not named "CreateDirectory" because that's a macro on Windows. +bool FilePath::CreateFolder() const { +#if GTEST_OS_WINDOWS_MOBILE + FilePath removed_sep(this->RemoveTrailingPathSeparator()); + LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str()); + int result = CreateDirectory(unicode, NULL) ? 0 : -1; + delete [] unicode; +#elif GTEST_OS_WINDOWS + int result = _mkdir(pathname_.c_str()); +#else + int result = mkdir(pathname_.c_str(), 0777); +#endif // GTEST_OS_WINDOWS_MOBILE + + if (result == -1) { + return this->DirectoryExists(); // An error is OK if the directory exists. + } + return true; // No error. +} + +// If input name has a trailing separator character, remove it and return the +// name, otherwise return the name string unmodified. +// On Windows platform, uses \ as the separator, other platforms use /. +FilePath FilePath::RemoveTrailingPathSeparator() const { + return IsDirectory() + ? FilePath(pathname_.substr(0, pathname_.length() - 1)) + : *this; +} + +// Removes any redundant separators that might be in the pathname. +// For example, "bar///foo" becomes "bar/foo". Does not eliminate other +// redundancies that might be in a pathname involving "." or "..". +// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share). +void FilePath::Normalize() { + if (pathname_.c_str() == NULL) { + pathname_ = ""; + return; + } + const char* src = pathname_.c_str(); + char* const dest = new char[pathname_.length() + 1]; + char* dest_ptr = dest; + memset(dest_ptr, 0, pathname_.length() + 1); + + while (*src != '\0') { + *dest_ptr = *src; + if (!IsPathSeparator(*src)) { + src++; + } else { +#if GTEST_HAS_ALT_PATH_SEP_ + if (*dest_ptr == kAlternatePathSeparator) { + *dest_ptr = kPathSeparator; + } +#endif + while (IsPathSeparator(*src)) + src++; + } + dest_ptr++; + } + *dest_ptr = '\0'; + pathname_ = dest; + delete[] dest; +} + +} // namespace internal +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "gtest/internal/gtest-port.h" + +#include +#include +#include +#include +#include + +#if GTEST_OS_WINDOWS +# include +# include +# include +# include // Used in ThreadLocal. +#else +# include +#endif // GTEST_OS_WINDOWS + +#if GTEST_OS_MAC +# include +# include +# include +#endif // GTEST_OS_MAC + +#if GTEST_OS_QNX +# include +# include +# include +#endif // GTEST_OS_QNX + +#if GTEST_OS_AIX +# include +# include +#endif // GTEST_OS_AIX + +#include "gtest/gtest-spi.h" +#include "gtest/gtest-message.h" +#include "gtest/internal/gtest-internal.h" +#include "gtest/internal/gtest-string.h" + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick exists to +// prevent the accidental inclusion of gtest-internal-inl.h in the +// user's code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +namespace testing { +namespace internal { + +#if defined(_MSC_VER) || defined(__BORLANDC__) +// MSVC and C++Builder do not provide a definition of STDERR_FILENO. +const int kStdOutFileno = 1; +const int kStdErrFileno = 2; +#else +const int kStdOutFileno = STDOUT_FILENO; +const int kStdErrFileno = STDERR_FILENO; +#endif // _MSC_VER + +#if GTEST_OS_LINUX + +namespace { +template +T ReadProcFileField(const string& filename, int field) { + std::string dummy; + std::ifstream file(filename.c_str()); + while (field-- > 0) { + file >> dummy; + } + T output = 0; + file >> output; + return output; +} +} // namespace + +// Returns the number of active threads, or 0 when there is an error. +size_t GetThreadCount() { + const string filename = + (Message() << "/proc/" << getpid() << "/stat").GetString(); + return ReadProcFileField(filename, 19); +} + +#elif GTEST_OS_MAC + +size_t GetThreadCount() { + const task_t task = mach_task_self(); + mach_msg_type_number_t thread_count; + thread_act_array_t thread_list; + const kern_return_t status = task_threads(task, &thread_list, &thread_count); + if (status == KERN_SUCCESS) { + // task_threads allocates resources in thread_list and we need to free them + // to avoid leaks. + vm_deallocate(task, + reinterpret_cast(thread_list), + sizeof(thread_t) * thread_count); + return static_cast(thread_count); + } else { + return 0; + } +} + +#elif GTEST_OS_QNX + +// Returns the number of threads running in the process, or 0 to indicate that +// we cannot detect it. +size_t GetThreadCount() { + const int fd = open("/proc/self/as", O_RDONLY); + if (fd < 0) { + return 0; + } + procfs_info process_info; + const int status = + devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), NULL); + close(fd); + if (status == EOK) { + return static_cast(process_info.num_threads); + } else { + return 0; + } +} + +#elif GTEST_OS_AIX + +size_t GetThreadCount() { + struct procentry64 entry; + pid_t pid = getpid(); + int status = getprocs64(&entry, sizeof(entry), NULL, 0, &pid, 1); + if (status == 1) { + return entry.pi_thcount; + } else { + return 0; + } +} + +#else + +size_t GetThreadCount() { + // There's no portable way to detect the number of threads, so we just + // return 0 to indicate that we cannot detect it. + return 0; +} + +#endif // GTEST_OS_LINUX + +#if GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS + +void SleepMilliseconds(int n) { + ::Sleep(n); +} + +AutoHandle::AutoHandle() + : handle_(INVALID_HANDLE_VALUE) {} + +AutoHandle::AutoHandle(Handle handle) + : handle_(handle) {} + +AutoHandle::~AutoHandle() { + Reset(); +} + +AutoHandle::Handle AutoHandle::Get() const { + return handle_; +} + +void AutoHandle::Reset() { + Reset(INVALID_HANDLE_VALUE); +} + +void AutoHandle::Reset(HANDLE handle) { + // Resetting with the same handle we already own is invalid. + if (handle_ != handle) { + if (IsCloseable()) { + ::CloseHandle(handle_); + } + handle_ = handle; + } else { + GTEST_CHECK_(!IsCloseable()) + << "Resetting a valid handle to itself is likely a programmer error " + "and thus not allowed."; + } +} + +bool AutoHandle::IsCloseable() const { + // Different Windows APIs may use either of these values to represent an + // invalid handle. + return handle_ != NULL && handle_ != INVALID_HANDLE_VALUE; +} + +Notification::Notification() + : event_(::CreateEvent(NULL, // Default security attributes. + TRUE, // Do not reset automatically. + FALSE, // Initially unset. + NULL)) { // Anonymous event. + GTEST_CHECK_(event_.Get() != NULL); +} + +void Notification::Notify() { + GTEST_CHECK_(::SetEvent(event_.Get()) != FALSE); +} + +void Notification::WaitForNotification() { + GTEST_CHECK_( + ::WaitForSingleObject(event_.Get(), INFINITE) == WAIT_OBJECT_0); +} + +Mutex::Mutex() + : owner_thread_id_(0), + type_(kDynamic), + critical_section_init_phase_(0), + critical_section_(new CRITICAL_SECTION) { + ::InitializeCriticalSection(critical_section_); +} + +Mutex::~Mutex() { + // Static mutexes are leaked intentionally. It is not thread-safe to try + // to clean them up. + // TODO(yukawa): Switch to Slim Reader/Writer (SRW) Locks, which requires + // nothing to clean it up but is available only on Vista and later. + // http://msdn.microsoft.com/en-us/library/windows/desktop/aa904937.aspx + if (type_ == kDynamic) { + ::DeleteCriticalSection(critical_section_); + delete critical_section_; + critical_section_ = NULL; + } +} + +void Mutex::Lock() { + ThreadSafeLazyInit(); + ::EnterCriticalSection(critical_section_); + owner_thread_id_ = ::GetCurrentThreadId(); +} + +void Mutex::Unlock() { + ThreadSafeLazyInit(); + // We don't protect writing to owner_thread_id_ here, as it's the + // caller's responsibility to ensure that the current thread holds the + // mutex when this is called. + owner_thread_id_ = 0; + ::LeaveCriticalSection(critical_section_); +} + +// Does nothing if the current thread holds the mutex. Otherwise, crashes +// with high probability. +void Mutex::AssertHeld() { + ThreadSafeLazyInit(); + GTEST_CHECK_(owner_thread_id_ == ::GetCurrentThreadId()) + << "The current thread is not holding the mutex @" << this; +} + +// Initializes owner_thread_id_ and critical_section_ in static mutexes. +void Mutex::ThreadSafeLazyInit() { + // Dynamic mutexes are initialized in the constructor. + if (type_ == kStatic) { + switch ( + ::InterlockedCompareExchange(&critical_section_init_phase_, 1L, 0L)) { + case 0: + // If critical_section_init_phase_ was 0 before the exchange, we + // are the first to test it and need to perform the initialization. + owner_thread_id_ = 0; + critical_section_ = new CRITICAL_SECTION; + ::InitializeCriticalSection(critical_section_); + // Updates the critical_section_init_phase_ to 2 to signal + // initialization complete. + GTEST_CHECK_(::InterlockedCompareExchange( + &critical_section_init_phase_, 2L, 1L) == + 1L); + break; + case 1: + // Somebody else is already initializing the mutex; spin until they + // are done. + while (::InterlockedCompareExchange(&critical_section_init_phase_, + 2L, + 2L) != 2L) { + // Possibly yields the rest of the thread's time slice to other + // threads. + ::Sleep(0); + } + break; + + case 2: + break; // The mutex is already initialized and ready for use. + + default: + GTEST_CHECK_(false) + << "Unexpected value of critical_section_init_phase_ " + << "while initializing a static mutex."; + } + } +} + +namespace { + +class ThreadWithParamSupport : public ThreadWithParamBase { + public: + static HANDLE CreateThread(Runnable* runnable, + Notification* thread_can_start) { + ThreadMainParam* param = new ThreadMainParam(runnable, thread_can_start); + DWORD thread_id; + // TODO(yukawa): Consider to use _beginthreadex instead. + HANDLE thread_handle = ::CreateThread( + NULL, // Default security. + 0, // Default stack size. + &ThreadWithParamSupport::ThreadMain, + param, // Parameter to ThreadMainStatic + 0x0, // Default creation flags. + &thread_id); // Need a valid pointer for the call to work under Win98. + GTEST_CHECK_(thread_handle != NULL) << "CreateThread failed with error " + << ::GetLastError() << "."; + if (thread_handle == NULL) { + delete param; + } + return thread_handle; + } + + private: + struct ThreadMainParam { + ThreadMainParam(Runnable* runnable, Notification* thread_can_start) + : runnable_(runnable), + thread_can_start_(thread_can_start) { + } + scoped_ptr runnable_; + // Does not own. + Notification* thread_can_start_; + }; + + static DWORD WINAPI ThreadMain(void* ptr) { + // Transfers ownership. + scoped_ptr param(static_cast(ptr)); + if (param->thread_can_start_ != NULL) + param->thread_can_start_->WaitForNotification(); + param->runnable_->Run(); + return 0; + } + + // Prohibit instantiation. + ThreadWithParamSupport(); + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParamSupport); +}; + +} // namespace + +ThreadWithParamBase::ThreadWithParamBase(Runnable *runnable, + Notification* thread_can_start) + : thread_(ThreadWithParamSupport::CreateThread(runnable, + thread_can_start)) { +} + +ThreadWithParamBase::~ThreadWithParamBase() { + Join(); +} + +void ThreadWithParamBase::Join() { + GTEST_CHECK_(::WaitForSingleObject(thread_.Get(), INFINITE) == WAIT_OBJECT_0) + << "Failed to join the thread with error " << ::GetLastError() << "."; +} + +// Maps a thread to a set of ThreadIdToThreadLocals that have values +// instantiated on that thread and notifies them when the thread exits. A +// ThreadLocal instance is expected to persist until all threads it has +// values on have terminated. +class ThreadLocalRegistryImpl { + public: + // Registers thread_local_instance as having value on the current thread. + // Returns a value that can be used to identify the thread from other threads. + static ThreadLocalValueHolderBase* GetValueOnCurrentThread( + const ThreadLocalBase* thread_local_instance) { + DWORD current_thread = ::GetCurrentThreadId(); + MutexLock lock(&mutex_); + ThreadIdToThreadLocals* const thread_to_thread_locals = + GetThreadLocalsMapLocked(); + ThreadIdToThreadLocals::iterator thread_local_pos = + thread_to_thread_locals->find(current_thread); + if (thread_local_pos == thread_to_thread_locals->end()) { + thread_local_pos = thread_to_thread_locals->insert( + std::make_pair(current_thread, ThreadLocalValues())).first; + StartWatcherThreadFor(current_thread); + } + ThreadLocalValues& thread_local_values = thread_local_pos->second; + ThreadLocalValues::iterator value_pos = + thread_local_values.find(thread_local_instance); + if (value_pos == thread_local_values.end()) { + value_pos = + thread_local_values + .insert(std::make_pair( + thread_local_instance, + linked_ptr( + thread_local_instance->NewValueForCurrentThread()))) + .first; + } + return value_pos->second.get(); + } + + static void OnThreadLocalDestroyed( + const ThreadLocalBase* thread_local_instance) { + std::vector > value_holders; + // Clean up the ThreadLocalValues data structure while holding the lock, but + // defer the destruction of the ThreadLocalValueHolderBases. + { + MutexLock lock(&mutex_); + ThreadIdToThreadLocals* const thread_to_thread_locals = + GetThreadLocalsMapLocked(); + for (ThreadIdToThreadLocals::iterator it = + thread_to_thread_locals->begin(); + it != thread_to_thread_locals->end(); + ++it) { + ThreadLocalValues& thread_local_values = it->second; + ThreadLocalValues::iterator value_pos = + thread_local_values.find(thread_local_instance); + if (value_pos != thread_local_values.end()) { + value_holders.push_back(value_pos->second); + thread_local_values.erase(value_pos); + // This 'if' can only be successful at most once, so theoretically we + // could break out of the loop here, but we don't bother doing so. + } + } + } + // Outside the lock, let the destructor for 'value_holders' deallocate the + // ThreadLocalValueHolderBases. + } + + static void OnThreadExit(DWORD thread_id) { + GTEST_CHECK_(thread_id != 0) << ::GetLastError(); + std::vector > value_holders; + // Clean up the ThreadIdToThreadLocals data structure while holding the + // lock, but defer the destruction of the ThreadLocalValueHolderBases. + { + MutexLock lock(&mutex_); + ThreadIdToThreadLocals* const thread_to_thread_locals = + GetThreadLocalsMapLocked(); + ThreadIdToThreadLocals::iterator thread_local_pos = + thread_to_thread_locals->find(thread_id); + if (thread_local_pos != thread_to_thread_locals->end()) { + ThreadLocalValues& thread_local_values = thread_local_pos->second; + for (ThreadLocalValues::iterator value_pos = + thread_local_values.begin(); + value_pos != thread_local_values.end(); + ++value_pos) { + value_holders.push_back(value_pos->second); + } + thread_to_thread_locals->erase(thread_local_pos); + } + } + // Outside the lock, let the destructor for 'value_holders' deallocate the + // ThreadLocalValueHolderBases. + } + + private: + // In a particular thread, maps a ThreadLocal object to its value. + typedef std::map > ThreadLocalValues; + // Stores all ThreadIdToThreadLocals having values in a thread, indexed by + // thread's ID. + typedef std::map ThreadIdToThreadLocals; + + // Holds the thread id and thread handle that we pass from + // StartWatcherThreadFor to WatcherThreadFunc. + typedef std::pair ThreadIdAndHandle; + + static void StartWatcherThreadFor(DWORD thread_id) { + // The returned handle will be kept in thread_map and closed by + // watcher_thread in WatcherThreadFunc. + HANDLE thread = ::OpenThread(SYNCHRONIZE | THREAD_QUERY_INFORMATION, + FALSE, + thread_id); + GTEST_CHECK_(thread != NULL); + // We need to to pass a valid thread ID pointer into CreateThread for it + // to work correctly under Win98. + DWORD watcher_thread_id; + HANDLE watcher_thread = ::CreateThread( + NULL, // Default security. + 0, // Default stack size + &ThreadLocalRegistryImpl::WatcherThreadFunc, + reinterpret_cast(new ThreadIdAndHandle(thread_id, thread)), + CREATE_SUSPENDED, + &watcher_thread_id); + GTEST_CHECK_(watcher_thread != NULL); + // Give the watcher thread the same priority as ours to avoid being + // blocked by it. + ::SetThreadPriority(watcher_thread, + ::GetThreadPriority(::GetCurrentThread())); + ::ResumeThread(watcher_thread); + ::CloseHandle(watcher_thread); + } + + // Monitors exit from a given thread and notifies those + // ThreadIdToThreadLocals about thread termination. + static DWORD WINAPI WatcherThreadFunc(LPVOID param) { + const ThreadIdAndHandle* tah = + reinterpret_cast(param); + GTEST_CHECK_( + ::WaitForSingleObject(tah->second, INFINITE) == WAIT_OBJECT_0); + OnThreadExit(tah->first); + ::CloseHandle(tah->second); + delete tah; + return 0; + } + + // Returns map of thread local instances. + static ThreadIdToThreadLocals* GetThreadLocalsMapLocked() { + mutex_.AssertHeld(); + static ThreadIdToThreadLocals* map = new ThreadIdToThreadLocals; + return map; + } + + // Protects access to GetThreadLocalsMapLocked() and its return value. + static Mutex mutex_; + // Protects access to GetThreadMapLocked() and its return value. + static Mutex thread_map_mutex_; +}; + +Mutex ThreadLocalRegistryImpl::mutex_(Mutex::kStaticMutex); +Mutex ThreadLocalRegistryImpl::thread_map_mutex_(Mutex::kStaticMutex); + +ThreadLocalValueHolderBase* ThreadLocalRegistry::GetValueOnCurrentThread( + const ThreadLocalBase* thread_local_instance) { + return ThreadLocalRegistryImpl::GetValueOnCurrentThread( + thread_local_instance); +} + +void ThreadLocalRegistry::OnThreadLocalDestroyed( + const ThreadLocalBase* thread_local_instance) { + ThreadLocalRegistryImpl::OnThreadLocalDestroyed(thread_local_instance); +} + +#endif // GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS + +#if GTEST_USES_POSIX_RE + +// Implements RE. Currently only needed for death tests. + +RE::~RE() { + if (is_valid_) { + // regfree'ing an invalid regex might crash because the content + // of the regex is undefined. Since the regex's are essentially + // the same, one cannot be valid (or invalid) without the other + // being so too. + regfree(&partial_regex_); + regfree(&full_regex_); + } + free(const_cast(pattern_)); +} + +// Returns true iff regular expression re matches the entire str. +bool RE::FullMatch(const char* str, const RE& re) { + if (!re.is_valid_) return false; + + regmatch_t match; + return regexec(&re.full_regex_, str, 1, &match, 0) == 0; +} + +// Returns true iff regular expression re matches a substring of str +// (including str itself). +bool RE::PartialMatch(const char* str, const RE& re) { + if (!re.is_valid_) return false; + + regmatch_t match; + return regexec(&re.partial_regex_, str, 1, &match, 0) == 0; +} + +// Initializes an RE from its string representation. +void RE::Init(const char* regex) { + pattern_ = posix::StrDup(regex); + + // Reserves enough bytes to hold the regular expression used for a + // full match. + const size_t full_regex_len = strlen(regex) + 10; + char* const full_pattern = new char[full_regex_len]; + + snprintf(full_pattern, full_regex_len, "^(%s)$", regex); + is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0; + // We want to call regcomp(&partial_regex_, ...) even if the + // previous expression returns false. Otherwise partial_regex_ may + // not be properly initialized can may cause trouble when it's + // freed. + // + // Some implementation of POSIX regex (e.g. on at least some + // versions of Cygwin) doesn't accept the empty string as a valid + // regex. We change it to an equivalent form "()" to be safe. + if (is_valid_) { + const char* const partial_regex = (*regex == '\0') ? "()" : regex; + is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0; + } + EXPECT_TRUE(is_valid_) + << "Regular expression \"" << regex + << "\" is not a valid POSIX Extended regular expression."; + + delete[] full_pattern; +} + +#elif GTEST_USES_SIMPLE_RE + +// Returns true iff ch appears anywhere in str (excluding the +// terminating '\0' character). +bool IsInSet(char ch, const char* str) { + return ch != '\0' && strchr(str, ch) != NULL; +} + +// Returns true iff ch belongs to the given classification. Unlike +// similar functions in , these aren't affected by the +// current locale. +bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; } +bool IsAsciiPunct(char ch) { + return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"); +} +bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); } +bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); } +bool IsAsciiWordChar(char ch) { + return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || + ('0' <= ch && ch <= '9') || ch == '_'; +} + +// Returns true iff "\\c" is a supported escape sequence. +bool IsValidEscape(char c) { + return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW")); +} + +// Returns true iff the given atom (specified by escaped and pattern) +// matches ch. The result is undefined if the atom is invalid. +bool AtomMatchesChar(bool escaped, char pattern_char, char ch) { + if (escaped) { // "\\p" where p is pattern_char. + switch (pattern_char) { + case 'd': return IsAsciiDigit(ch); + case 'D': return !IsAsciiDigit(ch); + case 'f': return ch == '\f'; + case 'n': return ch == '\n'; + case 'r': return ch == '\r'; + case 's': return IsAsciiWhiteSpace(ch); + case 'S': return !IsAsciiWhiteSpace(ch); + case 't': return ch == '\t'; + case 'v': return ch == '\v'; + case 'w': return IsAsciiWordChar(ch); + case 'W': return !IsAsciiWordChar(ch); + } + return IsAsciiPunct(pattern_char) && pattern_char == ch; + } + + return (pattern_char == '.' && ch != '\n') || pattern_char == ch; +} + +// Helper function used by ValidateRegex() to format error messages. +std::string FormatRegexSyntaxError(const char* regex, int index) { + return (Message() << "Syntax error at index " << index + << " in simple regular expression \"" << regex << "\": ").GetString(); +} + +// Generates non-fatal failures and returns false if regex is invalid; +// otherwise returns true. +bool ValidateRegex(const char* regex) { + if (regex == NULL) { + // TODO(wan@google.com): fix the source file location in the + // assertion failures to match where the regex is used in user + // code. + ADD_FAILURE() << "NULL is not a valid simple regular expression."; + return false; + } + + bool is_valid = true; + + // True iff ?, *, or + can follow the previous atom. + bool prev_repeatable = false; + for (int i = 0; regex[i]; i++) { + if (regex[i] == '\\') { // An escape sequence + i++; + if (regex[i] == '\0') { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) + << "'\\' cannot appear at the end."; + return false; + } + + if (!IsValidEscape(regex[i])) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) + << "invalid escape sequence \"\\" << regex[i] << "\"."; + is_valid = false; + } + prev_repeatable = true; + } else { // Not an escape sequence. + const char ch = regex[i]; + + if (ch == '^' && i > 0) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'^' can only appear at the beginning."; + is_valid = false; + } else if (ch == '$' && regex[i + 1] != '\0') { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'$' can only appear at the end."; + is_valid = false; + } else if (IsInSet(ch, "()[]{}|")) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'" << ch << "' is unsupported."; + is_valid = false; + } else if (IsRepeat(ch) && !prev_repeatable) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'" << ch << "' can only follow a repeatable token."; + is_valid = false; + } + + prev_repeatable = !IsInSet(ch, "^$?*+"); + } + } + + return is_valid; +} + +// Matches a repeated regex atom followed by a valid simple regular +// expression. The regex atom is defined as c if escaped is false, +// or \c otherwise. repeat is the repetition meta character (?, *, +// or +). The behavior is undefined if str contains too many +// characters to be indexable by size_t, in which case the test will +// probably time out anyway. We are fine with this limitation as +// std::string has it too. +bool MatchRepetitionAndRegexAtHead( + bool escaped, char c, char repeat, const char* regex, + const char* str) { + const size_t min_count = (repeat == '+') ? 1 : 0; + const size_t max_count = (repeat == '?') ? 1 : + static_cast(-1) - 1; + // We cannot call numeric_limits::max() as it conflicts with the + // max() macro on Windows. + + for (size_t i = 0; i <= max_count; ++i) { + // We know that the atom matches each of the first i characters in str. + if (i >= min_count && MatchRegexAtHead(regex, str + i)) { + // We have enough matches at the head, and the tail matches too. + // Since we only care about *whether* the pattern matches str + // (as opposed to *how* it matches), there is no need to find a + // greedy match. + return true; + } + if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i])) + return false; + } + return false; +} + +// Returns true iff regex matches a prefix of str. regex must be a +// valid simple regular expression and not start with "^", or the +// result is undefined. +bool MatchRegexAtHead(const char* regex, const char* str) { + if (*regex == '\0') // An empty regex matches a prefix of anything. + return true; + + // "$" only matches the end of a string. Note that regex being + // valid guarantees that there's nothing after "$" in it. + if (*regex == '$') + return *str == '\0'; + + // Is the first thing in regex an escape sequence? + const bool escaped = *regex == '\\'; + if (escaped) + ++regex; + if (IsRepeat(regex[1])) { + // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so + // here's an indirect recursion. It terminates as the regex gets + // shorter in each recursion. + return MatchRepetitionAndRegexAtHead( + escaped, regex[0], regex[1], regex + 2, str); + } else { + // regex isn't empty, isn't "$", and doesn't start with a + // repetition. We match the first atom of regex with the first + // character of str and recurse. + return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) && + MatchRegexAtHead(regex + 1, str + 1); + } +} + +// Returns true iff regex matches any substring of str. regex must be +// a valid simple regular expression, or the result is undefined. +// +// The algorithm is recursive, but the recursion depth doesn't exceed +// the regex length, so we won't need to worry about running out of +// stack space normally. In rare cases the time complexity can be +// exponential with respect to the regex length + the string length, +// but usually it's must faster (often close to linear). +bool MatchRegexAnywhere(const char* regex, const char* str) { + if (regex == NULL || str == NULL) + return false; + + if (*regex == '^') + return MatchRegexAtHead(regex + 1, str); + + // A successful match can be anywhere in str. + do { + if (MatchRegexAtHead(regex, str)) + return true; + } while (*str++ != '\0'); + return false; +} + +// Implements the RE class. + +RE::~RE() { + free(const_cast(pattern_)); + free(const_cast(full_pattern_)); +} + +// Returns true iff regular expression re matches the entire str. +bool RE::FullMatch(const char* str, const RE& re) { + return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str); +} + +// Returns true iff regular expression re matches a substring of str +// (including str itself). +bool RE::PartialMatch(const char* str, const RE& re) { + return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str); +} + +// Initializes an RE from its string representation. +void RE::Init(const char* regex) { + pattern_ = full_pattern_ = NULL; + if (regex != NULL) { + pattern_ = posix::StrDup(regex); + } + + is_valid_ = ValidateRegex(regex); + if (!is_valid_) { + // No need to calculate the full pattern when the regex is invalid. + return; + } + + const size_t len = strlen(regex); + // Reserves enough bytes to hold the regular expression used for a + // full match: we need space to prepend a '^', append a '$', and + // terminate the string with '\0'. + char* buffer = static_cast(malloc(len + 3)); + full_pattern_ = buffer; + + if (*regex != '^') + *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'. + + // We don't use snprintf or strncpy, as they trigger a warning when + // compiled with VC++ 8.0. + memcpy(buffer, regex, len); + buffer += len; + + if (len == 0 || regex[len - 1] != '$') + *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'. + + *buffer = '\0'; +} + +#endif // GTEST_USES_POSIX_RE + +const char kUnknownFile[] = "unknown file"; + +// Formats a source file path and a line number as they would appear +// in an error message from the compiler used to compile this code. +GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) { + const std::string file_name(file == NULL ? kUnknownFile : file); + + if (line < 0) { + return file_name + ":"; + } +#ifdef _MSC_VER + return file_name + "(" + StreamableToString(line) + "):"; +#else + return file_name + ":" + StreamableToString(line) + ":"; +#endif // _MSC_VER +} + +// Formats a file location for compiler-independent XML output. +// Although this function is not platform dependent, we put it next to +// FormatFileLocation in order to contrast the two functions. +// Note that FormatCompilerIndependentFileLocation() does NOT append colon +// to the file location it produces, unlike FormatFileLocation(). +GTEST_API_ ::std::string FormatCompilerIndependentFileLocation( + const char* file, int line) { + const std::string file_name(file == NULL ? kUnknownFile : file); + + if (line < 0) + return file_name; + else + return file_name + ":" + StreamableToString(line); +} + +GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line) + : severity_(severity) { + const char* const marker = + severity == GTEST_INFO ? "[ INFO ]" : + severity == GTEST_WARNING ? "[WARNING]" : + severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]"; + GetStream() << ::std::endl << marker << " " + << FormatFileLocation(file, line).c_str() << ": "; +} + +// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program. +GTestLog::~GTestLog() { + GetStream() << ::std::endl; + if (severity_ == GTEST_FATAL) { + fflush(stderr); + posix::Abort(); + } +} +// Disable Microsoft deprecation warnings for POSIX functions called from +// this class (creat, dup, dup2, and close) +GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996) + +#if GTEST_HAS_STREAM_REDIRECTION + +// Object that captures an output stream (stdout/stderr). +class CapturedStream { + public: + // The ctor redirects the stream to a temporary file. + explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) { +# if GTEST_OS_WINDOWS + char temp_dir_path[MAX_PATH + 1] = { '\0' }; // NOLINT + char temp_file_path[MAX_PATH + 1] = { '\0' }; // NOLINT + + ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path); + const UINT success = ::GetTempFileNameA(temp_dir_path, + "gtest_redir", + 0, // Generate unique file name. + temp_file_path); + GTEST_CHECK_(success != 0) + << "Unable to create a temporary file in " << temp_dir_path; + const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE); + GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file " + << temp_file_path; + filename_ = temp_file_path; +# else + // There's no guarantee that a test has write access to the current + // directory, so we create the temporary file in the /tmp directory + // instead. We use /tmp on most systems, and /sdcard on Android. + // That's because Android doesn't have /tmp. +# if GTEST_OS_LINUX_ANDROID + // Note: Android applications are expected to call the framework's + // Context.getExternalStorageDirectory() method through JNI to get + // the location of the world-writable SD Card directory. However, + // this requires a Context handle, which cannot be retrieved + // globally from native code. Doing so also precludes running the + // code as part of a regular standalone executable, which doesn't + // run in a Dalvik process (e.g. when running it through 'adb shell'). + // + // The location /sdcard is directly accessible from native code + // and is the only location (unofficially) supported by the Android + // team. It's generally a symlink to the real SD Card mount point + // which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or + // other OEM-customized locations. Never rely on these, and always + // use /sdcard. + char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX"; +# else + char name_template[] = "/tmp/captured_stream.XXXXXX"; +# endif // GTEST_OS_LINUX_ANDROID + const int captured_fd = mkstemp(name_template); + filename_ = name_template; +# endif // GTEST_OS_WINDOWS + fflush(NULL); + dup2(captured_fd, fd_); + close(captured_fd); + } + + ~CapturedStream() { + remove(filename_.c_str()); + } + + std::string GetCapturedString() { + if (uncaptured_fd_ != -1) { + // Restores the original stream. + fflush(NULL); + dup2(uncaptured_fd_, fd_); + close(uncaptured_fd_); + uncaptured_fd_ = -1; + } + + FILE* const file = posix::FOpen(filename_.c_str(), "r"); + const std::string content = ReadEntireFile(file); + posix::FClose(file); + return content; + } + + private: + const int fd_; // A stream to capture. + int uncaptured_fd_; + // Name of the temporary file holding the stderr output. + ::std::string filename_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream); +}; + +GTEST_DISABLE_MSC_WARNINGS_POP_() + +static CapturedStream* g_captured_stderr = NULL; +static CapturedStream* g_captured_stdout = NULL; + +// Starts capturing an output stream (stdout/stderr). +void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) { + if (*stream != NULL) { + GTEST_LOG_(FATAL) << "Only one " << stream_name + << " capturer can exist at a time."; + } + *stream = new CapturedStream(fd); +} + +// Stops capturing the output stream and returns the captured string. +std::string GetCapturedStream(CapturedStream** captured_stream) { + const std::string content = (*captured_stream)->GetCapturedString(); + + delete *captured_stream; + *captured_stream = NULL; + + return content; +} + +// Starts capturing stdout. +void CaptureStdout() { + CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout); +} + +// Starts capturing stderr. +void CaptureStderr() { + CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr); +} + +// Stops capturing stdout and returns the captured string. +std::string GetCapturedStdout() { + return GetCapturedStream(&g_captured_stdout); +} + +// Stops capturing stderr and returns the captured string. +std::string GetCapturedStderr() { + return GetCapturedStream(&g_captured_stderr); +} + +#endif // GTEST_HAS_STREAM_REDIRECTION + +std::string TempDir() { +#if GTEST_OS_WINDOWS_MOBILE + return "\\temp\\"; +#elif GTEST_OS_WINDOWS + const char* temp_dir = posix::GetEnv("TEMP"); + if (temp_dir == NULL || temp_dir[0] == '\0') + return "\\temp\\"; + else if (temp_dir[strlen(temp_dir) - 1] == '\\') + return temp_dir; + else + return std::string(temp_dir) + "\\"; +#elif GTEST_OS_LINUX_ANDROID + return "/sdcard/"; +#else + return "/tmp/"; +#endif // GTEST_OS_WINDOWS_MOBILE +} + +size_t GetFileSize(FILE* file) { + fseek(file, 0, SEEK_END); + return static_cast(ftell(file)); +} + +std::string ReadEntireFile(FILE* file) { + const size_t file_size = GetFileSize(file); + char* const buffer = new char[file_size]; + + size_t bytes_last_read = 0; // # of bytes read in the last fread() + size_t bytes_read = 0; // # of bytes read so far + + fseek(file, 0, SEEK_SET); + + // Keeps reading the file until we cannot read further or the + // pre-determined file size is reached. + do { + bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file); + bytes_read += bytes_last_read; + } while (bytes_last_read > 0 && bytes_read < file_size); + + const std::string content(buffer, bytes_read); + delete[] buffer; + + return content; +} + +#if GTEST_HAS_DEATH_TEST + +static const ::std::vector* g_injected_test_argvs = + NULL; // Owned. + +void SetInjectableArgvs(const ::std::vector* argvs) { + if (g_injected_test_argvs != argvs) + delete g_injected_test_argvs; + g_injected_test_argvs = argvs; +} + +const ::std::vector& GetInjectableArgvs() { + if (g_injected_test_argvs != NULL) { + return *g_injected_test_argvs; + } + return GetArgvs(); +} +#endif // GTEST_HAS_DEATH_TEST + +#if GTEST_OS_WINDOWS_MOBILE +namespace posix { +void Abort() { + DebugBreak(); + TerminateProcess(GetCurrentProcess(), 1); +} +} // namespace posix +#endif // GTEST_OS_WINDOWS_MOBILE + +// Returns the name of the environment variable corresponding to the +// given flag. For example, FlagToEnvVar("foo") will return +// "GTEST_FOO" in the open-source version. +static std::string FlagToEnvVar(const char* flag) { + const std::string full_flag = + (Message() << GTEST_FLAG_PREFIX_ << flag).GetString(); + + Message env_var; + for (size_t i = 0; i != full_flag.length(); i++) { + env_var << ToUpper(full_flag.c_str()[i]); + } + + return env_var.GetString(); +} + +// Parses 'str' for a 32-bit signed integer. If successful, writes +// the result to *value and returns true; otherwise leaves *value +// unchanged and returns false. +bool ParseInt32(const Message& src_text, const char* str, Int32* value) { + // Parses the environment variable as a decimal integer. + char* end = NULL; + const long long_value = strtol(str, &end, 10); // NOLINT + + // Has strtol() consumed all characters in the string? + if (*end != '\0') { + // No - an invalid character was encountered. + Message msg; + msg << "WARNING: " << src_text + << " is expected to be a 32-bit integer, but actually" + << " has value \"" << str << "\".\n"; + printf("%s", msg.GetString().c_str()); + fflush(stdout); + return false; + } + + // Is the parsed value in the range of an Int32? + const Int32 result = static_cast(long_value); + if (long_value == LONG_MAX || long_value == LONG_MIN || + // The parsed value overflows as a long. (strtol() returns + // LONG_MAX or LONG_MIN when the input overflows.) + result != long_value + // The parsed value overflows as an Int32. + ) { + Message msg; + msg << "WARNING: " << src_text + << " is expected to be a 32-bit integer, but actually" + << " has value " << str << ", which overflows.\n"; + printf("%s", msg.GetString().c_str()); + fflush(stdout); + return false; + } + + *value = result; + return true; +} + +// Reads and returns the Boolean environment variable corresponding to +// the given flag; if it's not set, returns default_value. +// +// The value is considered true iff it's not "0". +bool BoolFromGTestEnv(const char* flag, bool default_value) { +#if defined(GTEST_GET_BOOL_FROM_ENV_) + return GTEST_GET_BOOL_FROM_ENV_(flag, default_value); +#endif // defined(GTEST_GET_BOOL_FROM_ENV_) + const std::string env_var = FlagToEnvVar(flag); + const char* const string_value = posix::GetEnv(env_var.c_str()); + return string_value == NULL ? + default_value : strcmp(string_value, "0") != 0; +} + +// Reads and returns a 32-bit integer stored in the environment +// variable corresponding to the given flag; if it isn't set or +// doesn't represent a valid 32-bit integer, returns default_value. +Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) { +#if defined(GTEST_GET_INT32_FROM_ENV_) + return GTEST_GET_INT32_FROM_ENV_(flag, default_value); +#endif // defined(GTEST_GET_INT32_FROM_ENV_) + const std::string env_var = FlagToEnvVar(flag); + const char* const string_value = posix::GetEnv(env_var.c_str()); + if (string_value == NULL) { + // The environment variable is not set. + return default_value; + } + + Int32 result = default_value; + if (!ParseInt32(Message() << "Environment variable " << env_var, + string_value, &result)) { + printf("The default value %s is used.\n", + (Message() << default_value).GetString().c_str()); + fflush(stdout); + return default_value; + } + + return result; +} + +// Reads and returns the string environment variable corresponding to +// the given flag; if it's not set, returns default_value. +std::string StringFromGTestEnv(const char* flag, const char* default_value) { +#if defined(GTEST_GET_STRING_FROM_ENV_) + return GTEST_GET_STRING_FROM_ENV_(flag, default_value); +#endif // defined(GTEST_GET_STRING_FROM_ENV_) + const std::string env_var = FlagToEnvVar(flag); + const char* value = posix::GetEnv(env_var.c_str()); + if (value != NULL) { + return value; + } + + // As a special case for the 'output' flag, if GTEST_OUTPUT is not + // set, we look for XML_OUTPUT_FILE, which is set by the Bazel build + // system. The value of XML_OUTPUT_FILE is a filename without the + // "xml:" prefix of GTEST_OUTPUT. + // + // The net priority order after flag processing is thus: + // --gtest_output command line flag + // GTEST_OUTPUT environment variable + // XML_OUTPUT_FILE environment variable + // 'default_value' + if (strcmp(flag, "output") == 0) { + value = posix::GetEnv("XML_OUTPUT_FILE"); + if (value != NULL) { + return std::string("xml:") + value; + } + } + return default_value; +} + +} // namespace internal +} // namespace testing +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Test - The Google C++ Testing Framework +// +// This file implements a universal value printer that can print a +// value of any type T: +// +// void ::testing::internal::UniversalPrinter::Print(value, ostream_ptr); +// +// It uses the << operator when possible, and prints the bytes in the +// object otherwise. A user can override its behavior for a class +// type Foo by defining either operator<<(::std::ostream&, const Foo&) +// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that +// defines Foo. + +#include "gtest/gtest-printers.h" +#include +#include +#include +#include // NOLINT +#include +#include "gtest/internal/gtest-port.h" + +namespace testing { + +namespace { + +using ::std::ostream; + +// Prints a segment of bytes in the given object. +GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ +GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ +GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ +void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start, + size_t count, ostream* os) { + char text[5] = ""; + for (size_t i = 0; i != count; i++) { + const size_t j = start + i; + if (i != 0) { + // Organizes the bytes into groups of 2 for easy parsing by + // human. + if ((j % 2) == 0) + *os << ' '; + else + *os << '-'; + } + GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]); + *os << text; + } +} + +// Prints the bytes in the given value to the given ostream. +void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count, + ostream* os) { + // Tells the user how big the object is. + *os << count << "-byte object <"; + + const size_t kThreshold = 132; + const size_t kChunkSize = 64; + // If the object size is bigger than kThreshold, we'll have to omit + // some details by printing only the first and the last kChunkSize + // bytes. + // TODO(wan): let the user control the threshold using a flag. + if (count < kThreshold) { + PrintByteSegmentInObjectTo(obj_bytes, 0, count, os); + } else { + PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os); + *os << " ... "; + // Rounds up to 2-byte boundary. + const size_t resume_pos = (count - kChunkSize + 1)/2*2; + PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os); + } + *os << ">"; +} + +} // namespace + +namespace internal2 { + +// Delegates to PrintBytesInObjectToImpl() to print the bytes in the +// given object. The delegation simplifies the implementation, which +// uses the << operator and thus is easier done outside of the +// ::testing::internal namespace, which contains a << operator that +// sometimes conflicts with the one in STL. +void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count, + ostream* os) { + PrintBytesInObjectToImpl(obj_bytes, count, os); +} + +} // namespace internal2 + +namespace internal { + +// Depending on the value of a char (or wchar_t), we print it in one +// of three formats: +// - as is if it's a printable ASCII (e.g. 'a', '2', ' '), +// - as a hexidecimal escape sequence (e.g. '\x7F'), or +// - as a special escape sequence (e.g. '\r', '\n'). +enum CharFormat { + kAsIs, + kHexEscape, + kSpecialEscape +}; + +// Returns true if c is a printable ASCII character. We test the +// value of c directly instead of calling isprint(), which is buggy on +// Windows Mobile. +inline bool IsPrintableAscii(wchar_t c) { + return 0x20 <= c && c <= 0x7E; +} + +// Prints a wide or narrow char c as a character literal without the +// quotes, escaping it when necessary; returns how c was formatted. +// The template argument UnsignedChar is the unsigned version of Char, +// which is the type of c. +template +static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) { + switch (static_cast(c)) { + case L'\0': + *os << "\\0"; + break; + case L'\'': + *os << "\\'"; + break; + case L'\\': + *os << "\\\\"; + break; + case L'\a': + *os << "\\a"; + break; + case L'\b': + *os << "\\b"; + break; + case L'\f': + *os << "\\f"; + break; + case L'\n': + *os << "\\n"; + break; + case L'\r': + *os << "\\r"; + break; + case L'\t': + *os << "\\t"; + break; + case L'\v': + *os << "\\v"; + break; + default: + if (IsPrintableAscii(c)) { + *os << static_cast(c); + return kAsIs; + } else { + *os << "\\x" + String::FormatHexInt(static_cast(c)); + return kHexEscape; + } + } + return kSpecialEscape; +} + +// Prints a wchar_t c as if it's part of a string literal, escaping it when +// necessary; returns how c was formatted. +static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) { + switch (c) { + case L'\'': + *os << "'"; + return kAsIs; + case L'"': + *os << "\\\""; + return kSpecialEscape; + default: + return PrintAsCharLiteralTo(c, os); + } +} + +// Prints a char c as if it's part of a string literal, escaping it when +// necessary; returns how c was formatted. +static CharFormat PrintAsStringLiteralTo(char c, ostream* os) { + return PrintAsStringLiteralTo( + static_cast(static_cast(c)), os); +} + +// Prints a wide or narrow character c and its code. '\0' is printed +// as "'\\0'", other unprintable characters are also properly escaped +// using the standard C++ escape sequence. The template argument +// UnsignedChar is the unsigned version of Char, which is the type of c. +template +void PrintCharAndCodeTo(Char c, ostream* os) { + // First, print c as a literal in the most readable form we can find. + *os << ((sizeof(c) > 1) ? "L'" : "'"); + const CharFormat format = PrintAsCharLiteralTo(c, os); + *os << "'"; + + // To aid user debugging, we also print c's code in decimal, unless + // it's 0 (in which case c was printed as '\\0', making the code + // obvious). + if (c == 0) + return; + *os << " (" << static_cast(c); + + // For more convenience, we print c's code again in hexidecimal, + // unless c was already printed in the form '\x##' or the code is in + // [1, 9]. + if (format == kHexEscape || (1 <= c && c <= 9)) { + // Do nothing. + } else { + *os << ", 0x" << String::FormatHexInt(static_cast(c)); + } + *os << ")"; +} + +void PrintTo(unsigned char c, ::std::ostream* os) { + PrintCharAndCodeTo(c, os); +} +void PrintTo(signed char c, ::std::ostream* os) { + PrintCharAndCodeTo(c, os); +} + +// Prints a wchar_t as a symbol if it is printable or as its internal +// code otherwise and also as its code. L'\0' is printed as "L'\\0'". +void PrintTo(wchar_t wc, ostream* os) { + PrintCharAndCodeTo(wc, os); +} + +// Prints the given array of characters to the ostream. CharType must be either +// char or wchar_t. +// The array starts at begin, the length is len, it may include '\0' characters +// and may not be NUL-terminated. +template +GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ +GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ +GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ +static void PrintCharsAsStringTo( + const CharType* begin, size_t len, ostream* os) { + const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\""; + *os << kQuoteBegin; + bool is_previous_hex = false; + for (size_t index = 0; index < len; ++index) { + const CharType cur = begin[index]; + if (is_previous_hex && IsXDigit(cur)) { + // Previous character is of '\x..' form and this character can be + // interpreted as another hexadecimal digit in its number. Break string to + // disambiguate. + *os << "\" " << kQuoteBegin; + } + is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape; + } + *os << "\""; +} + +// Prints a (const) char/wchar_t array of 'len' elements, starting at address +// 'begin'. CharType must be either char or wchar_t. +template +GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ +GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ +GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ +static void UniversalPrintCharArray( + const CharType* begin, size_t len, ostream* os) { + // The code + // const char kFoo[] = "foo"; + // generates an array of 4, not 3, elements, with the last one being '\0'. + // + // Therefore when printing a char array, we don't print the last element if + // it's '\0', such that the output matches the string literal as it's + // written in the source code. + if (len > 0 && begin[len - 1] == '\0') { + PrintCharsAsStringTo(begin, len - 1, os); + return; + } + + // If, however, the last element in the array is not '\0', e.g. + // const char kFoo[] = { 'f', 'o', 'o' }; + // we must print the entire array. We also print a message to indicate + // that the array is not NUL-terminated. + PrintCharsAsStringTo(begin, len, os); + *os << " (no terminating NUL)"; +} + +// Prints a (const) char array of 'len' elements, starting at address 'begin'. +void UniversalPrintArray(const char* begin, size_t len, ostream* os) { + UniversalPrintCharArray(begin, len, os); +} + +// Prints a (const) wchar_t array of 'len' elements, starting at address +// 'begin'. +void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) { + UniversalPrintCharArray(begin, len, os); +} + +// Prints the given C string to the ostream. +void PrintTo(const char* s, ostream* os) { + if (s == NULL) { + *os << "NULL"; + } else { + *os << ImplicitCast_(s) << " pointing to "; + PrintCharsAsStringTo(s, strlen(s), os); + } +} + +// MSVC compiler can be configured to define whar_t as a typedef +// of unsigned short. Defining an overload for const wchar_t* in that case +// would cause pointers to unsigned shorts be printed as wide strings, +// possibly accessing more memory than intended and causing invalid +// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when +// wchar_t is implemented as a native type. +#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) +// Prints the given wide C string to the ostream. +void PrintTo(const wchar_t* s, ostream* os) { + if (s == NULL) { + *os << "NULL"; + } else { + *os << ImplicitCast_(s) << " pointing to "; + PrintCharsAsStringTo(s, std::wcslen(s), os); + } +} +#endif // wchar_t is native + +// Prints a ::string object. +#if GTEST_HAS_GLOBAL_STRING +void PrintStringTo(const ::string& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} +#endif // GTEST_HAS_GLOBAL_STRING + +void PrintStringTo(const ::std::string& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} + +// Prints a ::wstring object. +#if GTEST_HAS_GLOBAL_WSTRING +void PrintWideStringTo(const ::wstring& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +#if GTEST_HAS_STD_WSTRING +void PrintWideStringTo(const ::std::wstring& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} +#endif // GTEST_HAS_STD_WSTRING + +} // namespace internal + +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: mheule@google.com (Markus Heule) +// +// The Google C++ Testing Framework (Google Test) + +#include "gtest/gtest-test-part.h" + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick exists to +// prevent the accidental inclusion of gtest-internal-inl.h in the +// user's code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +namespace testing { + +using internal::GetUnitTestImpl; + +// Gets the summary of the failure message by omitting the stack trace +// in it. +std::string TestPartResult::ExtractSummary(const char* message) { + const char* const stack_trace = strstr(message, internal::kStackTraceMarker); + return stack_trace == NULL ? message : + std::string(message, stack_trace); +} + +// Prints a TestPartResult object. +std::ostream& operator<<(std::ostream& os, const TestPartResult& result) { + return os + << result.file_name() << ":" << result.line_number() << ": " + << (result.type() == TestPartResult::kSuccess ? "Success" : + result.type() == TestPartResult::kFatalFailure ? "Fatal failure" : + "Non-fatal failure") << ":\n" + << result.message() << std::endl; +} + +// Appends a TestPartResult to the array. +void TestPartResultArray::Append(const TestPartResult& result) { + array_.push_back(result); +} + +// Returns the TestPartResult at the given index (0-based). +const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const { + if (index < 0 || index >= size()) { + printf("\nInvalid index (%d) into TestPartResultArray.\n", index); + internal::posix::Abort(); + } + + return array_[index]; +} + +// Returns the number of TestPartResult objects in the array. +int TestPartResultArray::size() const { + return static_cast(array_.size()); +} + +namespace internal { + +HasNewFatalFailureHelper::HasNewFatalFailureHelper() + : has_new_fatal_failure_(false), + original_reporter_(GetUnitTestImpl()-> + GetTestPartResultReporterForCurrentThread()) { + GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this); +} + +HasNewFatalFailureHelper::~HasNewFatalFailureHelper() { + GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread( + original_reporter_); +} + +void HasNewFatalFailureHelper::ReportTestPartResult( + const TestPartResult& result) { + if (result.fatally_failed()) + has_new_fatal_failure_ = true; + original_reporter_->ReportTestPartResult(result); +} + +} // namespace internal + +} // namespace testing +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "gtest/gtest-typed-test.h" +#include "gtest/gtest.h" + +namespace testing { +namespace internal { + +#if GTEST_HAS_TYPED_TEST_P + +// Skips to the first non-space char in str. Returns an empty string if str +// contains only whitespace characters. +static const char* SkipSpaces(const char* str) { + while (IsSpace(*str)) + str++; + return str; +} + +static std::vector SplitIntoTestNames(const char* src) { + std::vector name_vec; + src = SkipSpaces(src); + for (; src != NULL; src = SkipComma(src)) { + name_vec.push_back(StripTrailingSpaces(GetPrefixUntilComma(src))); + } + return name_vec; +} + +// Verifies that registered_tests match the test names in +// registered_tests_; returns registered_tests if successful, or +// aborts the program otherwise. +const char* TypedTestCasePState::VerifyRegisteredTestNames( + const char* file, int line, const char* registered_tests) { + typedef RegisteredTestsMap::const_iterator RegisteredTestIter; + registered_ = true; + + std::vector name_vec = SplitIntoTestNames(registered_tests); + + Message errors; + + std::set tests; + for (std::vector::const_iterator name_it = name_vec.begin(); + name_it != name_vec.end(); ++name_it) { + const std::string& name = *name_it; + if (tests.count(name) != 0) { + errors << "Test " << name << " is listed more than once.\n"; + continue; + } + + bool found = false; + for (RegisteredTestIter it = registered_tests_.begin(); + it != registered_tests_.end(); + ++it) { + if (name == it->first) { + found = true; + break; + } + } + + if (found) { + tests.insert(name); + } else { + errors << "No test named " << name + << " can be found in this test case.\n"; + } + } + + for (RegisteredTestIter it = registered_tests_.begin(); + it != registered_tests_.end(); + ++it) { + if (tests.count(it->first) == 0) { + errors << "You forgot to list test " << it->first << ".\n"; + } + } + + const std::string& errors_str = errors.GetString(); + if (errors_str != "") { + fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(), + errors_str.c_str()); + fflush(stderr); + posix::Abort(); + } + + return registered_tests; +} + +#endif // GTEST_HAS_TYPED_TEST_P + +} // namespace internal +} // namespace testing +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) + +#include "gtest/gtest.h" +#include "gtest/internal/custom/gtest.h" +#include "gtest/gtest-spi.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include // NOLINT +#include +#include + +#if GTEST_OS_LINUX + +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +# define GTEST_HAS_GETTIMEOFDAY_ 1 + +# include // NOLINT +# include // NOLINT +# include // NOLINT +// Declares vsnprintf(). This header is not available on Windows. +# include // NOLINT +# include // NOLINT +# include // NOLINT +# include // NOLINT +# include + +#elif GTEST_OS_SYMBIAN +# define GTEST_HAS_GETTIMEOFDAY_ 1 +# include // NOLINT + +#elif GTEST_OS_ZOS +# define GTEST_HAS_GETTIMEOFDAY_ 1 +# include // NOLINT + +// On z/OS we additionally need strings.h for strcasecmp. +# include // NOLINT + +#elif GTEST_OS_WINDOWS_MOBILE // We are on Windows CE. + +# include // NOLINT +# undef min + +#elif GTEST_OS_WINDOWS // We are on Windows proper. + +# include // NOLINT +# include // NOLINT +# include // NOLINT +# include // NOLINT + +# if GTEST_OS_WINDOWS_MINGW +// MinGW has gettimeofday() but not _ftime64(). +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +// TODO(kenton@google.com): There are other ways to get the time on +// Windows, like GetTickCount() or GetSystemTimeAsFileTime(). MinGW +// supports these. consider using them instead. +# define GTEST_HAS_GETTIMEOFDAY_ 1 +# include // NOLINT +# endif // GTEST_OS_WINDOWS_MINGW + +// cpplint thinks that the header is already included, so we want to +// silence it. +# include // NOLINT +# undef min + +#else + +// Assume other platforms have gettimeofday(). +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +# define GTEST_HAS_GETTIMEOFDAY_ 1 + +// cpplint thinks that the header is already included, so we want to +// silence it. +# include // NOLINT +# include // NOLINT + +#endif // GTEST_OS_LINUX + +#if GTEST_HAS_EXCEPTIONS +# include +#endif + +#if GTEST_CAN_STREAM_RESULTS_ +# include // NOLINT +# include // NOLINT +# include // NOLINT +# include // NOLINT +#endif + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +#if GTEST_OS_WINDOWS +# define vsnprintf _vsnprintf +#endif // GTEST_OS_WINDOWS + +namespace testing { + +using internal::CountIf; +using internal::ForEach; +using internal::GetElementOr; +using internal::Shuffle; + +// Constants. + +// A test whose test case name or test name matches this filter is +// disabled and not run. +static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*"; + +// A test case whose name matches this filter is considered a death +// test case and will be run before test cases whose name doesn't +// match this filter. +static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*"; + +// A test filter that matches everything. +static const char kUniversalFilter[] = "*"; + +// The default output file for XML output. +static const char kDefaultOutputFile[] = "test_detail.xml"; + +// The environment variable name for the test shard index. +static const char kTestShardIndex[] = "GTEST_SHARD_INDEX"; +// The environment variable name for the total number of test shards. +static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS"; +// The environment variable name for the test shard status file. +static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE"; + +namespace internal { + +// The text used in failure messages to indicate the start of the +// stack trace. +const char kStackTraceMarker[] = "\nStack trace:\n"; + +// g_help_flag is true iff the --help flag or an equivalent form is +// specified on the command line. +bool g_help_flag = false; + +} // namespace internal + +static const char* GetDefaultFilter() { +#ifdef GTEST_TEST_FILTER_ENV_VAR_ + const char* const testbridge_test_only = getenv(GTEST_TEST_FILTER_ENV_VAR_); + if (testbridge_test_only != NULL) { + return testbridge_test_only; + } +#endif // GTEST_TEST_FILTER_ENV_VAR_ + return kUniversalFilter; +} + +GTEST_DEFINE_bool_( + also_run_disabled_tests, + internal::BoolFromGTestEnv("also_run_disabled_tests", false), + "Run disabled tests too, in addition to the tests normally being run."); + +GTEST_DEFINE_bool_( + break_on_failure, + internal::BoolFromGTestEnv("break_on_failure", false), + "True iff a failed assertion should be a debugger break-point."); + +GTEST_DEFINE_bool_( + catch_exceptions, + internal::BoolFromGTestEnv("catch_exceptions", true), + "True iff " GTEST_NAME_ + " should catch exceptions and treat them as test failures."); + +GTEST_DEFINE_string_( + color, + internal::StringFromGTestEnv("color", "auto"), + "Whether to use colors in the output. Valid values: yes, no, " + "and auto. 'auto' means to use colors if the output is " + "being sent to a terminal and the TERM environment variable " + "is set to a terminal type that supports colors."); + +GTEST_DEFINE_string_( + filter, + internal::StringFromGTestEnv("filter", GetDefaultFilter()), + "A colon-separated list of glob (not regex) patterns " + "for filtering the tests to run, optionally followed by a " + "'-' and a : separated list of negative patterns (tests to " + "exclude). A test is run if it matches one of the positive " + "patterns and does not match any of the negative patterns."); + +GTEST_DEFINE_bool_(list_tests, false, + "List all tests without running them."); + +GTEST_DEFINE_string_( + output, + internal::StringFromGTestEnv("output", ""), + "A format (currently must be \"xml\"), optionally followed " + "by a colon and an output file name or directory. A directory " + "is indicated by a trailing pathname separator. " + "Examples: \"xml:filename.xml\", \"xml::directoryname/\". " + "If a directory is specified, output files will be created " + "within that directory, with file-names based on the test " + "executable's name and, if necessary, made unique by adding " + "digits."); + +GTEST_DEFINE_bool_( + print_time, + internal::BoolFromGTestEnv("print_time", true), + "True iff " GTEST_NAME_ + " should display elapsed time in text output."); + +GTEST_DEFINE_int32_( + random_seed, + internal::Int32FromGTestEnv("random_seed", 0), + "Random number seed to use when shuffling test orders. Must be in range " + "[1, 99999], or 0 to use a seed based on the current time."); + +GTEST_DEFINE_int32_( + repeat, + internal::Int32FromGTestEnv("repeat", 1), + "How many times to repeat each test. Specify a negative number " + "for repeating forever. Useful for shaking out flaky tests."); + +GTEST_DEFINE_bool_( + show_internal_stack_frames, false, + "True iff " GTEST_NAME_ " should include internal stack frames when " + "printing test failure stack traces."); + +GTEST_DEFINE_bool_( + shuffle, + internal::BoolFromGTestEnv("shuffle", false), + "True iff " GTEST_NAME_ + " should randomize tests' order on every run."); + +GTEST_DEFINE_int32_( + stack_trace_depth, + internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth), + "The maximum number of stack frames to print when an " + "assertion fails. The valid range is 0 through 100, inclusive."); + +GTEST_DEFINE_string_( + stream_result_to, + internal::StringFromGTestEnv("stream_result_to", ""), + "This flag specifies the host name and the port number on which to stream " + "test results. Example: \"localhost:555\". The flag is effective only on " + "Linux."); + +GTEST_DEFINE_bool_( + throw_on_failure, + internal::BoolFromGTestEnv("throw_on_failure", false), + "When this flag is specified, a failed assertion will throw an exception " + "if exceptions are enabled or exit the program with a non-zero code " + "otherwise."); + +#if GTEST_USE_OWN_FLAGFILE_FLAG_ +GTEST_DEFINE_string_( + flagfile, + internal::StringFromGTestEnv("flagfile", ""), + "This flag specifies the flagfile to read command-line flags from."); +#endif // GTEST_USE_OWN_FLAGFILE_FLAG_ + +namespace internal { + +// Generates a random number from [0, range), using a Linear +// Congruential Generator (LCG). Crashes if 'range' is 0 or greater +// than kMaxRange. +UInt32 Random::Generate(UInt32 range) { + // These constants are the same as are used in glibc's rand(3). + state_ = (1103515245U*state_ + 12345U) % kMaxRange; + + GTEST_CHECK_(range > 0) + << "Cannot generate a number in the range [0, 0)."; + GTEST_CHECK_(range <= kMaxRange) + << "Generation of a number in [0, " << range << ") was requested, " + << "but this can only generate numbers in [0, " << kMaxRange << ")."; + + // Converting via modulus introduces a bit of downward bias, but + // it's simple, and a linear congruential generator isn't too good + // to begin with. + return state_ % range; +} + +// GTestIsInitialized() returns true iff the user has initialized +// Google Test. Useful for catching the user mistake of not initializing +// Google Test before calling RUN_ALL_TESTS(). +static bool GTestIsInitialized() { return GetArgvs().size() > 0; } + +// Iterates over a vector of TestCases, keeping a running sum of the +// results of calling a given int-returning method on each. +// Returns the sum. +static int SumOverTestCaseList(const std::vector& case_list, + int (TestCase::*method)() const) { + int sum = 0; + for (size_t i = 0; i < case_list.size(); i++) { + sum += (case_list[i]->*method)(); + } + return sum; +} + +// Returns true iff the test case passed. +static bool TestCasePassed(const TestCase* test_case) { + return test_case->should_run() && test_case->Passed(); +} + +// Returns true iff the test case failed. +static bool TestCaseFailed(const TestCase* test_case) { + return test_case->should_run() && test_case->Failed(); +} + +// Returns true iff test_case contains at least one test that should +// run. +static bool ShouldRunTestCase(const TestCase* test_case) { + return test_case->should_run(); +} + +// AssertHelper constructor. +AssertHelper::AssertHelper(TestPartResult::Type type, + const char* file, + int line, + const char* message) + : data_(new AssertHelperData(type, file, line, message)) { +} + +AssertHelper::~AssertHelper() { + delete data_; +} + +// Message assignment, for assertion streaming support. +void AssertHelper::operator=(const Message& message) const { + UnitTest::GetInstance()-> + AddTestPartResult(data_->type, data_->file, data_->line, + AppendUserMessage(data_->message, message), + UnitTest::GetInstance()->impl() + ->CurrentOsStackTraceExceptTop(1) + // Skips the stack frame for this function itself. + ); // NOLINT +} + +// Mutex for linked pointers. +GTEST_API_ GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex); + +// A copy of all command line arguments. Set by InitGoogleTest(). +::std::vector g_argvs; + +const ::std::vector& GetArgvs() { +#if defined(GTEST_CUSTOM_GET_ARGVS_) + return GTEST_CUSTOM_GET_ARGVS_(); +#else // defined(GTEST_CUSTOM_GET_ARGVS_) + return g_argvs; +#endif // defined(GTEST_CUSTOM_GET_ARGVS_) +} + +// Returns the current application's name, removing directory path if that +// is present. +FilePath GetCurrentExecutableName() { + FilePath result; + +#if GTEST_OS_WINDOWS + result.Set(FilePath(GetArgvs()[0]).RemoveExtension("exe")); +#else + result.Set(FilePath(GetArgvs()[0])); +#endif // GTEST_OS_WINDOWS + + return result.RemoveDirectoryName(); +} + +// Functions for processing the gtest_output flag. + +// Returns the output format, or "" for normal printed output. +std::string UnitTestOptions::GetOutputFormat() { + const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); + if (gtest_output_flag == NULL) return std::string(""); + + const char* const colon = strchr(gtest_output_flag, ':'); + return (colon == NULL) ? + std::string(gtest_output_flag) : + std::string(gtest_output_flag, colon - gtest_output_flag); +} + +// Returns the name of the requested output file, or the default if none +// was explicitly specified. +std::string UnitTestOptions::GetAbsolutePathToOutputFile() { + const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); + if (gtest_output_flag == NULL) + return ""; + + const char* const colon = strchr(gtest_output_flag, ':'); + if (colon == NULL) + return internal::FilePath::ConcatPaths( + internal::FilePath( + UnitTest::GetInstance()->original_working_dir()), + internal::FilePath(kDefaultOutputFile)).string(); + + internal::FilePath output_name(colon + 1); + if (!output_name.IsAbsolutePath()) + // TODO(wan@google.com): on Windows \some\path is not an absolute + // path (as its meaning depends on the current drive), yet the + // following logic for turning it into an absolute path is wrong. + // Fix it. + output_name = internal::FilePath::ConcatPaths( + internal::FilePath(UnitTest::GetInstance()->original_working_dir()), + internal::FilePath(colon + 1)); + + if (!output_name.IsDirectory()) + return output_name.string(); + + internal::FilePath result(internal::FilePath::GenerateUniqueFileName( + output_name, internal::GetCurrentExecutableName(), + GetOutputFormat().c_str())); + return result.string(); +} + +// Returns true iff the wildcard pattern matches the string. The +// first ':' or '\0' character in pattern marks the end of it. +// +// This recursive algorithm isn't very efficient, but is clear and +// works well enough for matching test names, which are short. +bool UnitTestOptions::PatternMatchesString(const char *pattern, + const char *str) { + switch (*pattern) { + case '\0': + case ':': // Either ':' or '\0' marks the end of the pattern. + return *str == '\0'; + case '?': // Matches any single character. + return *str != '\0' && PatternMatchesString(pattern + 1, str + 1); + case '*': // Matches any string (possibly empty) of characters. + return (*str != '\0' && PatternMatchesString(pattern, str + 1)) || + PatternMatchesString(pattern + 1, str); + default: // Non-special character. Matches itself. + return *pattern == *str && + PatternMatchesString(pattern + 1, str + 1); + } +} + +bool UnitTestOptions::MatchesFilter( + const std::string& name, const char* filter) { + const char *cur_pattern = filter; + for (;;) { + if (PatternMatchesString(cur_pattern, name.c_str())) { + return true; + } + + // Finds the next pattern in the filter. + cur_pattern = strchr(cur_pattern, ':'); + + // Returns if no more pattern can be found. + if (cur_pattern == NULL) { + return false; + } + + // Skips the pattern separater (the ':' character). + cur_pattern++; + } +} + +// Returns true iff the user-specified filter matches the test case +// name and the test name. +bool UnitTestOptions::FilterMatchesTest(const std::string &test_case_name, + const std::string &test_name) { + const std::string& full_name = test_case_name + "." + test_name.c_str(); + + // Split --gtest_filter at '-', if there is one, to separate into + // positive filter and negative filter portions + const char* const p = GTEST_FLAG(filter).c_str(); + const char* const dash = strchr(p, '-'); + std::string positive; + std::string negative; + if (dash == NULL) { + positive = GTEST_FLAG(filter).c_str(); // Whole string is a positive filter + negative = ""; + } else { + positive = std::string(p, dash); // Everything up to the dash + negative = std::string(dash + 1); // Everything after the dash + if (positive.empty()) { + // Treat '-test1' as the same as '*-test1' + positive = kUniversalFilter; + } + } + + // A filter is a colon-separated list of patterns. It matches a + // test if any pattern in it matches the test. + return (MatchesFilter(full_name, positive.c_str()) && + !MatchesFilter(full_name, negative.c_str())); +} + +#if GTEST_HAS_SEH +// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the +// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise. +// This function is useful as an __except condition. +int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) { + // Google Test should handle a SEH exception if: + // 1. the user wants it to, AND + // 2. this is not a breakpoint exception, AND + // 3. this is not a C++ exception (VC++ implements them via SEH, + // apparently). + // + // SEH exception code for C++ exceptions. + // (see http://support.microsoft.com/kb/185294 for more information). + const DWORD kCxxExceptionCode = 0xe06d7363; + + bool should_handle = true; + + if (!GTEST_FLAG(catch_exceptions)) + should_handle = false; + else if (exception_code == EXCEPTION_BREAKPOINT) + should_handle = false; + else if (exception_code == kCxxExceptionCode) + should_handle = false; + + return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH; +} +#endif // GTEST_HAS_SEH + +} // namespace internal + +// The c'tor sets this object as the test part result reporter used by +// Google Test. The 'result' parameter specifies where to report the +// results. Intercepts only failures from the current thread. +ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( + TestPartResultArray* result) + : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD), + result_(result) { + Init(); +} + +// The c'tor sets this object as the test part result reporter used by +// Google Test. The 'result' parameter specifies where to report the +// results. +ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( + InterceptMode intercept_mode, TestPartResultArray* result) + : intercept_mode_(intercept_mode), + result_(result) { + Init(); +} + +void ScopedFakeTestPartResultReporter::Init() { + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + if (intercept_mode_ == INTERCEPT_ALL_THREADS) { + old_reporter_ = impl->GetGlobalTestPartResultReporter(); + impl->SetGlobalTestPartResultReporter(this); + } else { + old_reporter_ = impl->GetTestPartResultReporterForCurrentThread(); + impl->SetTestPartResultReporterForCurrentThread(this); + } +} + +// The d'tor restores the test part result reporter used by Google Test +// before. +ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() { + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + if (intercept_mode_ == INTERCEPT_ALL_THREADS) { + impl->SetGlobalTestPartResultReporter(old_reporter_); + } else { + impl->SetTestPartResultReporterForCurrentThread(old_reporter_); + } +} + +// Increments the test part result count and remembers the result. +// This method is from the TestPartResultReporterInterface interface. +void ScopedFakeTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + result_->Append(result); +} + +namespace internal { + +// Returns the type ID of ::testing::Test. We should always call this +// instead of GetTypeId< ::testing::Test>() to get the type ID of +// testing::Test. This is to work around a suspected linker bug when +// using Google Test as a framework on Mac OS X. The bug causes +// GetTypeId< ::testing::Test>() to return different values depending +// on whether the call is from the Google Test framework itself or +// from user test code. GetTestTypeId() is guaranteed to always +// return the same value, as it always calls GetTypeId<>() from the +// gtest.cc, which is within the Google Test framework. +TypeId GetTestTypeId() { + return GetTypeId(); +} + +// The value of GetTestTypeId() as seen from within the Google Test +// library. This is solely for testing GetTestTypeId(). +extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId(); + +// This predicate-formatter checks that 'results' contains a test part +// failure of the given type and that the failure message contains the +// given substring. +AssertionResult HasOneFailure(const char* /* results_expr */, + const char* /* type_expr */, + const char* /* substr_expr */, + const TestPartResultArray& results, + TestPartResult::Type type, + const string& substr) { + const std::string expected(type == TestPartResult::kFatalFailure ? + "1 fatal failure" : + "1 non-fatal failure"); + Message msg; + if (results.size() != 1) { + msg << "Expected: " << expected << "\n" + << " Actual: " << results.size() << " failures"; + for (int i = 0; i < results.size(); i++) { + msg << "\n" << results.GetTestPartResult(i); + } + return AssertionFailure() << msg; + } + + const TestPartResult& r = results.GetTestPartResult(0); + if (r.type() != type) { + return AssertionFailure() << "Expected: " << expected << "\n" + << " Actual:\n" + << r; + } + + if (strstr(r.message(), substr.c_str()) == NULL) { + return AssertionFailure() << "Expected: " << expected << " containing \"" + << substr << "\"\n" + << " Actual:\n" + << r; + } + + return AssertionSuccess(); +} + +// The constructor of SingleFailureChecker remembers where to look up +// test part results, what type of failure we expect, and what +// substring the failure message should contain. +SingleFailureChecker:: SingleFailureChecker( + const TestPartResultArray* results, + TestPartResult::Type type, + const string& substr) + : results_(results), + type_(type), + substr_(substr) {} + +// The destructor of SingleFailureChecker verifies that the given +// TestPartResultArray contains exactly one failure that has the given +// type and contains the given substring. If that's not the case, a +// non-fatal failure will be generated. +SingleFailureChecker::~SingleFailureChecker() { + EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_); +} + +DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter( + UnitTestImpl* unit_test) : unit_test_(unit_test) {} + +void DefaultGlobalTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + unit_test_->current_test_result()->AddTestPartResult(result); + unit_test_->listeners()->repeater()->OnTestPartResult(result); +} + +DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter( + UnitTestImpl* unit_test) : unit_test_(unit_test) {} + +void DefaultPerThreadTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result); +} + +// Returns the global test part result reporter. +TestPartResultReporterInterface* +UnitTestImpl::GetGlobalTestPartResultReporter() { + internal::MutexLock lock(&global_test_part_result_reporter_mutex_); + return global_test_part_result_repoter_; +} + +// Sets the global test part result reporter. +void UnitTestImpl::SetGlobalTestPartResultReporter( + TestPartResultReporterInterface* reporter) { + internal::MutexLock lock(&global_test_part_result_reporter_mutex_); + global_test_part_result_repoter_ = reporter; +} + +// Returns the test part result reporter for the current thread. +TestPartResultReporterInterface* +UnitTestImpl::GetTestPartResultReporterForCurrentThread() { + return per_thread_test_part_result_reporter_.get(); +} + +// Sets the test part result reporter for the current thread. +void UnitTestImpl::SetTestPartResultReporterForCurrentThread( + TestPartResultReporterInterface* reporter) { + per_thread_test_part_result_reporter_.set(reporter); +} + +// Gets the number of successful test cases. +int UnitTestImpl::successful_test_case_count() const { + return CountIf(test_cases_, TestCasePassed); +} + +// Gets the number of failed test cases. +int UnitTestImpl::failed_test_case_count() const { + return CountIf(test_cases_, TestCaseFailed); +} + +// Gets the number of all test cases. +int UnitTestImpl::total_test_case_count() const { + return static_cast(test_cases_.size()); +} + +// Gets the number of all test cases that contain at least one test +// that should run. +int UnitTestImpl::test_case_to_run_count() const { + return CountIf(test_cases_, ShouldRunTestCase); +} + +// Gets the number of successful tests. +int UnitTestImpl::successful_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count); +} + +// Gets the number of failed tests. +int UnitTestImpl::failed_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count); +} + +// Gets the number of disabled tests that will be reported in the XML report. +int UnitTestImpl::reportable_disabled_test_count() const { + return SumOverTestCaseList(test_cases_, + &TestCase::reportable_disabled_test_count); +} + +// Gets the number of disabled tests. +int UnitTestImpl::disabled_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count); +} + +// Gets the number of tests to be printed in the XML report. +int UnitTestImpl::reportable_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::reportable_test_count); +} + +// Gets the number of all tests. +int UnitTestImpl::total_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::total_test_count); +} + +// Gets the number of tests that should run. +int UnitTestImpl::test_to_run_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count); +} + +// Returns the current OS stack trace as an std::string. +// +// The maximum number of stack frames to be included is specified by +// the gtest_stack_trace_depth flag. The skip_count parameter +// specifies the number of top frames to be skipped, which doesn't +// count against the number of frames to be included. +// +// For example, if Foo() calls Bar(), which in turn calls +// CurrentOsStackTraceExceptTop(1), Foo() will be included in the +// trace but Bar() and CurrentOsStackTraceExceptTop() won't. +std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) { + return os_stack_trace_getter()->CurrentStackTrace( + static_cast(GTEST_FLAG(stack_trace_depth)), + skip_count + 1 + // Skips the user-specified number of frames plus this function + // itself. + ); // NOLINT +} + +// Returns the current time in milliseconds. +TimeInMillis GetTimeInMillis() { +#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__) + // Difference between 1970-01-01 and 1601-01-01 in milliseconds. + // http://analogous.blogspot.com/2005/04/epoch.html + const TimeInMillis kJavaEpochToWinFileTimeDelta = + static_cast(116444736UL) * 100000UL; + const DWORD kTenthMicrosInMilliSecond = 10000; + + SYSTEMTIME now_systime; + FILETIME now_filetime; + ULARGE_INTEGER now_int64; + // TODO(kenton@google.com): Shouldn't this just use + // GetSystemTimeAsFileTime()? + GetSystemTime(&now_systime); + if (SystemTimeToFileTime(&now_systime, &now_filetime)) { + now_int64.LowPart = now_filetime.dwLowDateTime; + now_int64.HighPart = now_filetime.dwHighDateTime; + now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) - + kJavaEpochToWinFileTimeDelta; + return now_int64.QuadPart; + } + return 0; +#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_ + __timeb64 now; + + // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996 + // (deprecated function) there. + // TODO(kenton@google.com): Use GetTickCount()? Or use + // SystemTimeToFileTime() + GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996) + _ftime64(&now); + GTEST_DISABLE_MSC_WARNINGS_POP_() + + return static_cast(now.time) * 1000 + now.millitm; +#elif GTEST_HAS_GETTIMEOFDAY_ + struct timeval now; + gettimeofday(&now, NULL); + return static_cast(now.tv_sec) * 1000 + now.tv_usec / 1000; +#else +# error "Don't know how to get the current time on your system." +#endif +} + +// Utilities + +// class String. + +#if GTEST_OS_WINDOWS_MOBILE +// Creates a UTF-16 wide string from the given ANSI string, allocating +// memory using new. The caller is responsible for deleting the return +// value using delete[]. Returns the wide string, or NULL if the +// input is NULL. +LPCWSTR String::AnsiToUtf16(const char* ansi) { + if (!ansi) return NULL; + const int length = strlen(ansi); + const int unicode_length = + MultiByteToWideChar(CP_ACP, 0, ansi, length, + NULL, 0); + WCHAR* unicode = new WCHAR[unicode_length + 1]; + MultiByteToWideChar(CP_ACP, 0, ansi, length, + unicode, unicode_length); + unicode[unicode_length] = 0; + return unicode; +} + +// Creates an ANSI string from the given wide string, allocating +// memory using new. The caller is responsible for deleting the return +// value using delete[]. Returns the ANSI string, or NULL if the +// input is NULL. +const char* String::Utf16ToAnsi(LPCWSTR utf16_str) { + if (!utf16_str) return NULL; + const int ansi_length = + WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, + NULL, 0, NULL, NULL); + char* ansi = new char[ansi_length + 1]; + WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, + ansi, ansi_length, NULL, NULL); + ansi[ansi_length] = 0; + return ansi; +} + +#endif // GTEST_OS_WINDOWS_MOBILE + +// Compares two C strings. Returns true iff they have the same content. +// +// Unlike strcmp(), this function can handle NULL argument(s). A NULL +// C string is considered different to any non-NULL C string, +// including the empty string. +bool String::CStringEquals(const char * lhs, const char * rhs) { + if ( lhs == NULL ) return rhs == NULL; + + if ( rhs == NULL ) return false; + + return strcmp(lhs, rhs) == 0; +} + +#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING + +// Converts an array of wide chars to a narrow string using the UTF-8 +// encoding, and streams the result to the given Message object. +static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length, + Message* msg) { + for (size_t i = 0; i != length; ) { // NOLINT + if (wstr[i] != L'\0') { + *msg << WideStringToUtf8(wstr + i, static_cast(length - i)); + while (i != length && wstr[i] != L'\0') + i++; + } else { + *msg << '\0'; + i++; + } + } +} + +#endif // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING + +void SplitString(const ::std::string& str, char delimiter, + ::std::vector< ::std::string>* dest) { + ::std::vector< ::std::string> parsed; + ::std::string::size_type pos = 0; + while (::testing::internal::AlwaysTrue()) { + const ::std::string::size_type colon = str.find(delimiter, pos); + if (colon == ::std::string::npos) { + parsed.push_back(str.substr(pos)); + break; + } else { + parsed.push_back(str.substr(pos, colon - pos)); + pos = colon + 1; + } + } + dest->swap(parsed); +} + +} // namespace internal + +// Constructs an empty Message. +// We allocate the stringstream separately because otherwise each use of +// ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's +// stack frame leading to huge stack frames in some cases; gcc does not reuse +// the stack space. +Message::Message() : ss_(new ::std::stringstream) { + // By default, we want there to be enough precision when printing + // a double to a Message. + *ss_ << std::setprecision(std::numeric_limits::digits10 + 2); +} + +// These two overloads allow streaming a wide C string to a Message +// using the UTF-8 encoding. +Message& Message::operator <<(const wchar_t* wide_c_str) { + return *this << internal::String::ShowWideCString(wide_c_str); +} +Message& Message::operator <<(wchar_t* wide_c_str) { + return *this << internal::String::ShowWideCString(wide_c_str); +} + +#if GTEST_HAS_STD_WSTRING +// Converts the given wide string to a narrow string using the UTF-8 +// encoding, and streams the result to this Message object. +Message& Message::operator <<(const ::std::wstring& wstr) { + internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); + return *this; +} +#endif // GTEST_HAS_STD_WSTRING + +#if GTEST_HAS_GLOBAL_WSTRING +// Converts the given wide string to a narrow string using the UTF-8 +// encoding, and streams the result to this Message object. +Message& Message::operator <<(const ::wstring& wstr) { + internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); + return *this; +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +// Gets the text streamed to this object so far as an std::string. +// Each '\0' character in the buffer is replaced with "\\0". +std::string Message::GetString() const { + return internal::StringStreamToString(ss_.get()); +} + +// AssertionResult constructors. +// Used in EXPECT_TRUE/FALSE(assertion_result). +AssertionResult::AssertionResult(const AssertionResult& other) + : success_(other.success_), + message_(other.message_.get() != NULL ? + new ::std::string(*other.message_) : + static_cast< ::std::string*>(NULL)) { +} + +// Swaps two AssertionResults. +void AssertionResult::swap(AssertionResult& other) { + using std::swap; + swap(success_, other.success_); + swap(message_, other.message_); +} + +// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE. +AssertionResult AssertionResult::operator!() const { + AssertionResult negation(!success_); + if (message_.get() != NULL) + negation << *message_; + return negation; +} + +// Makes a successful assertion result. +AssertionResult AssertionSuccess() { + return AssertionResult(true); +} + +// Makes a failed assertion result. +AssertionResult AssertionFailure() { + return AssertionResult(false); +} + +// Makes a failed assertion result with the given failure message. +// Deprecated; use AssertionFailure() << message. +AssertionResult AssertionFailure(const Message& message) { + return AssertionFailure() << message; +} + +namespace internal { + +namespace edit_distance { +std::vector CalculateOptimalEdits(const std::vector& left, + const std::vector& right) { + std::vector > costs( + left.size() + 1, std::vector(right.size() + 1)); + std::vector > best_move( + left.size() + 1, std::vector(right.size() + 1)); + + // Populate for empty right. + for (size_t l_i = 0; l_i < costs.size(); ++l_i) { + costs[l_i][0] = static_cast(l_i); + best_move[l_i][0] = kRemove; + } + // Populate for empty left. + for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) { + costs[0][r_i] = static_cast(r_i); + best_move[0][r_i] = kAdd; + } + + for (size_t l_i = 0; l_i < left.size(); ++l_i) { + for (size_t r_i = 0; r_i < right.size(); ++r_i) { + if (left[l_i] == right[r_i]) { + // Found a match. Consume it. + costs[l_i + 1][r_i + 1] = costs[l_i][r_i]; + best_move[l_i + 1][r_i + 1] = kMatch; + continue; + } + + const double add = costs[l_i + 1][r_i]; + const double remove = costs[l_i][r_i + 1]; + const double replace = costs[l_i][r_i]; + if (add < remove && add < replace) { + costs[l_i + 1][r_i + 1] = add + 1; + best_move[l_i + 1][r_i + 1] = kAdd; + } else if (remove < add && remove < replace) { + costs[l_i + 1][r_i + 1] = remove + 1; + best_move[l_i + 1][r_i + 1] = kRemove; + } else { + // We make replace a little more expensive than add/remove to lower + // their priority. + costs[l_i + 1][r_i + 1] = replace + 1.00001; + best_move[l_i + 1][r_i + 1] = kReplace; + } + } + } + + // Reconstruct the best path. We do it in reverse order. + std::vector best_path; + for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) { + EditType move = best_move[l_i][r_i]; + best_path.push_back(move); + l_i -= move != kAdd; + r_i -= move != kRemove; + } + std::reverse(best_path.begin(), best_path.end()); + return best_path; +} + +namespace { + +// Helper class to convert string into ids with deduplication. +class InternalStrings { + public: + size_t GetId(const std::string& str) { + IdMap::iterator it = ids_.find(str); + if (it != ids_.end()) return it->second; + size_t id = ids_.size(); + return ids_[str] = id; + } + + private: + typedef std::map IdMap; + IdMap ids_; +}; + +} // namespace + +std::vector CalculateOptimalEdits( + const std::vector& left, + const std::vector& right) { + std::vector left_ids, right_ids; + { + InternalStrings intern_table; + for (size_t i = 0; i < left.size(); ++i) { + left_ids.push_back(intern_table.GetId(left[i])); + } + for (size_t i = 0; i < right.size(); ++i) { + right_ids.push_back(intern_table.GetId(right[i])); + } + } + return CalculateOptimalEdits(left_ids, right_ids); +} + +namespace { + +// Helper class that holds the state for one hunk and prints it out to the +// stream. +// It reorders adds/removes when possible to group all removes before all +// adds. It also adds the hunk header before printint into the stream. +class Hunk { + public: + Hunk(size_t left_start, size_t right_start) + : left_start_(left_start), + right_start_(right_start), + adds_(), + removes_(), + common_() {} + + void PushLine(char edit, const char* line) { + switch (edit) { + case ' ': + ++common_; + FlushEdits(); + hunk_.push_back(std::make_pair(' ', line)); + break; + case '-': + ++removes_; + hunk_removes_.push_back(std::make_pair('-', line)); + break; + case '+': + ++adds_; + hunk_adds_.push_back(std::make_pair('+', line)); + break; + } + } + + void PrintTo(std::ostream* os) { + PrintHeader(os); + FlushEdits(); + for (std::list >::const_iterator it = + hunk_.begin(); + it != hunk_.end(); ++it) { + *os << it->first << it->second << "\n"; + } + } + + bool has_edits() const { return adds_ || removes_; } + + private: + void FlushEdits() { + hunk_.splice(hunk_.end(), hunk_removes_); + hunk_.splice(hunk_.end(), hunk_adds_); + } + + // Print a unified diff header for one hunk. + // The format is + // "@@ -, +, @@" + // where the left/right parts are ommitted if unnecessary. + void PrintHeader(std::ostream* ss) const { + *ss << "@@ "; + if (removes_) { + *ss << "-" << left_start_ << "," << (removes_ + common_); + } + if (removes_ && adds_) { + *ss << " "; + } + if (adds_) { + *ss << "+" << right_start_ << "," << (adds_ + common_); + } + *ss << " @@\n"; + } + + size_t left_start_, right_start_; + size_t adds_, removes_, common_; + std::list > hunk_, hunk_adds_, hunk_removes_; +}; + +} // namespace + +// Create a list of diff hunks in Unified diff format. +// Each hunk has a header generated by PrintHeader above plus a body with +// lines prefixed with ' ' for no change, '-' for deletion and '+' for +// addition. +// 'context' represents the desired unchanged prefix/suffix around the diff. +// If two hunks are close enough that their contexts overlap, then they are +// joined into one hunk. +std::string CreateUnifiedDiff(const std::vector& left, + const std::vector& right, + size_t context) { + const std::vector edits = CalculateOptimalEdits(left, right); + + size_t l_i = 0, r_i = 0, edit_i = 0; + std::stringstream ss; + while (edit_i < edits.size()) { + // Find first edit. + while (edit_i < edits.size() && edits[edit_i] == kMatch) { + ++l_i; + ++r_i; + ++edit_i; + } + + // Find the first line to include in the hunk. + const size_t prefix_context = std::min(l_i, context); + Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1); + for (size_t i = prefix_context; i > 0; --i) { + hunk.PushLine(' ', left[l_i - i].c_str()); + } + + // Iterate the edits until we found enough suffix for the hunk or the input + // is over. + size_t n_suffix = 0; + for (; edit_i < edits.size(); ++edit_i) { + if (n_suffix >= context) { + // Continue only if the next hunk is very close. + std::vector::const_iterator it = edits.begin() + edit_i; + while (it != edits.end() && *it == kMatch) ++it; + if (it == edits.end() || (it - edits.begin()) - edit_i >= context) { + // There is no next edit or it is too far away. + break; + } + } + + EditType edit = edits[edit_i]; + // Reset count when a non match is found. + n_suffix = edit == kMatch ? n_suffix + 1 : 0; + + if (edit == kMatch || edit == kRemove || edit == kReplace) { + hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str()); + } + if (edit == kAdd || edit == kReplace) { + hunk.PushLine('+', right[r_i].c_str()); + } + + // Advance indices, depending on edit type. + l_i += edit != kAdd; + r_i += edit != kRemove; + } + + if (!hunk.has_edits()) { + // We are done. We don't want this hunk. + break; + } + + hunk.PrintTo(&ss); + } + return ss.str(); +} + +} // namespace edit_distance + +namespace { + +// The string representation of the values received in EqFailure() are already +// escaped. Split them on escaped '\n' boundaries. Leave all other escaped +// characters the same. +std::vector SplitEscapedString(const std::string& str) { + std::vector lines; + size_t start = 0, end = str.size(); + if (end > 2 && str[0] == '"' && str[end - 1] == '"') { + ++start; + --end; + } + bool escaped = false; + for (size_t i = start; i + 1 < end; ++i) { + if (escaped) { + escaped = false; + if (str[i] == 'n') { + lines.push_back(str.substr(start, i - start - 1)); + start = i + 1; + } + } else { + escaped = str[i] == '\\'; + } + } + lines.push_back(str.substr(start, end - start)); + return lines; +} + +} // namespace + +// Constructs and returns the message for an equality assertion +// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure. +// +// The first four parameters are the expressions used in the assertion +// and their values, as strings. For example, for ASSERT_EQ(foo, bar) +// where foo is 5 and bar is 6, we have: +// +// lhs_expression: "foo" +// rhs_expression: "bar" +// lhs_value: "5" +// rhs_value: "6" +// +// The ignoring_case parameter is true iff the assertion is a +// *_STRCASEEQ*. When it's true, the string "Ignoring case" will +// be inserted into the message. +AssertionResult EqFailure(const char* lhs_expression, + const char* rhs_expression, + const std::string& lhs_value, + const std::string& rhs_value, + bool ignoring_case) { + Message msg; + msg << " Expected: " << lhs_expression; + if (lhs_value != lhs_expression) { + msg << "\n Which is: " << lhs_value; + } + msg << "\nTo be equal to: " << rhs_expression; + if (rhs_value != rhs_expression) { + msg << "\n Which is: " << rhs_value; + } + + if (ignoring_case) { + msg << "\nIgnoring case"; + } + + if (!lhs_value.empty() && !rhs_value.empty()) { + const std::vector lhs_lines = + SplitEscapedString(lhs_value); + const std::vector rhs_lines = + SplitEscapedString(rhs_value); + if (lhs_lines.size() > 1 || rhs_lines.size() > 1) { + msg << "\nWith diff:\n" + << edit_distance::CreateUnifiedDiff(lhs_lines, rhs_lines); + } + } + + return AssertionFailure() << msg; +} + +// Constructs a failure message for Boolean assertions such as EXPECT_TRUE. +std::string GetBoolAssertionFailureMessage( + const AssertionResult& assertion_result, + const char* expression_text, + const char* actual_predicate_value, + const char* expected_predicate_value) { + const char* actual_message = assertion_result.message(); + Message msg; + msg << "Value of: " << expression_text + << "\n Actual: " << actual_predicate_value; + if (actual_message[0] != '\0') + msg << " (" << actual_message << ")"; + msg << "\nExpected: " << expected_predicate_value; + return msg.GetString(); +} + +// Helper function for implementing ASSERT_NEAR. +AssertionResult DoubleNearPredFormat(const char* expr1, + const char* expr2, + const char* abs_error_expr, + double val1, + double val2, + double abs_error) { + const double diff = fabs(val1 - val2); + if (diff <= abs_error) return AssertionSuccess(); + + // TODO(wan): do not print the value of an expression if it's + // already a literal. + return AssertionFailure() + << "The difference between " << expr1 << " and " << expr2 + << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n" + << expr1 << " evaluates to " << val1 << ",\n" + << expr2 << " evaluates to " << val2 << ", and\n" + << abs_error_expr << " evaluates to " << abs_error << "."; +} + + +// Helper template for implementing FloatLE() and DoubleLE(). +template +AssertionResult FloatingPointLE(const char* expr1, + const char* expr2, + RawType val1, + RawType val2) { + // Returns success if val1 is less than val2, + if (val1 < val2) { + return AssertionSuccess(); + } + + // or if val1 is almost equal to val2. + const FloatingPoint lhs(val1), rhs(val2); + if (lhs.AlmostEquals(rhs)) { + return AssertionSuccess(); + } + + // Note that the above two checks will both fail if either val1 or + // val2 is NaN, as the IEEE floating-point standard requires that + // any predicate involving a NaN must return false. + + ::std::stringstream val1_ss; + val1_ss << std::setprecision(std::numeric_limits::digits10 + 2) + << val1; + + ::std::stringstream val2_ss; + val2_ss << std::setprecision(std::numeric_limits::digits10 + 2) + << val2; + + return AssertionFailure() + << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n" + << " Actual: " << StringStreamToString(&val1_ss) << " vs " + << StringStreamToString(&val2_ss); +} + +} // namespace internal + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +AssertionResult FloatLE(const char* expr1, const char* expr2, + float val1, float val2) { + return internal::FloatingPointLE(expr1, expr2, val1, val2); +} + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +AssertionResult DoubleLE(const char* expr1, const char* expr2, + double val1, double val2) { + return internal::FloatingPointLE(expr1, expr2, val1, val2); +} + +namespace internal { + +// The helper function for {ASSERT|EXPECT}_EQ with int or enum +// arguments. +AssertionResult CmpHelperEQ(const char* lhs_expression, + const char* rhs_expression, + BiggestInt lhs, + BiggestInt rhs) { + if (lhs == rhs) { + return AssertionSuccess(); + } + + return EqFailure(lhs_expression, + rhs_expression, + FormatForComparisonFailureMessage(lhs, rhs), + FormatForComparisonFailureMessage(rhs, lhs), + false); +} + +// A macro for implementing the helper functions needed to implement +// ASSERT_?? and EXPECT_?? with integer or enum arguments. It is here +// just to avoid copy-and-paste of similar code. +#define GTEST_IMPL_CMP_HELPER_(op_name, op)\ +AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ + BiggestInt val1, BiggestInt val2) {\ + if (val1 op val2) {\ + return AssertionSuccess();\ + } else {\ + return AssertionFailure() \ + << "Expected: (" << expr1 << ") " #op " (" << expr2\ + << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\ + << " vs " << FormatForComparisonFailureMessage(val2, val1);\ + }\ +} + +// Implements the helper function for {ASSERT|EXPECT}_NE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(NE, !=) +// Implements the helper function for {ASSERT|EXPECT}_LE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(LE, <=) +// Implements the helper function for {ASSERT|EXPECT}_LT with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(LT, < ) +// Implements the helper function for {ASSERT|EXPECT}_GE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(GE, >=) +// Implements the helper function for {ASSERT|EXPECT}_GT with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(GT, > ) + +#undef GTEST_IMPL_CMP_HELPER_ + +// The helper function for {ASSERT|EXPECT}_STREQ. +AssertionResult CmpHelperSTREQ(const char* lhs_expression, + const char* rhs_expression, + const char* lhs, + const char* rhs) { + if (String::CStringEquals(lhs, rhs)) { + return AssertionSuccess(); + } + + return EqFailure(lhs_expression, + rhs_expression, + PrintToString(lhs), + PrintToString(rhs), + false); +} + +// The helper function for {ASSERT|EXPECT}_STRCASEEQ. +AssertionResult CmpHelperSTRCASEEQ(const char* lhs_expression, + const char* rhs_expression, + const char* lhs, + const char* rhs) { + if (String::CaseInsensitiveCStringEquals(lhs, rhs)) { + return AssertionSuccess(); + } + + return EqFailure(lhs_expression, + rhs_expression, + PrintToString(lhs), + PrintToString(rhs), + true); +} + +// The helper function for {ASSERT|EXPECT}_STRNE. +AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2) { + if (!String::CStringEquals(s1, s2)) { + return AssertionSuccess(); + } else { + return AssertionFailure() << "Expected: (" << s1_expression << ") != (" + << s2_expression << "), actual: \"" + << s1 << "\" vs \"" << s2 << "\""; + } +} + +// The helper function for {ASSERT|EXPECT}_STRCASENE. +AssertionResult CmpHelperSTRCASENE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2) { + if (!String::CaseInsensitiveCStringEquals(s1, s2)) { + return AssertionSuccess(); + } else { + return AssertionFailure() + << "Expected: (" << s1_expression << ") != (" + << s2_expression << ") (ignoring case), actual: \"" + << s1 << "\" vs \"" << s2 << "\""; + } +} + +} // namespace internal + +namespace { + +// Helper functions for implementing IsSubString() and IsNotSubstring(). + +// This group of overloaded functions return true iff needle is a +// substring of haystack. NULL is considered a substring of itself +// only. + +bool IsSubstringPred(const char* needle, const char* haystack) { + if (needle == NULL || haystack == NULL) + return needle == haystack; + + return strstr(haystack, needle) != NULL; +} + +bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) { + if (needle == NULL || haystack == NULL) + return needle == haystack; + + return wcsstr(haystack, needle) != NULL; +} + +// StringType here can be either ::std::string or ::std::wstring. +template +bool IsSubstringPred(const StringType& needle, + const StringType& haystack) { + return haystack.find(needle) != StringType::npos; +} + +// This function implements either IsSubstring() or IsNotSubstring(), +// depending on the value of the expected_to_be_substring parameter. +// StringType here can be const char*, const wchar_t*, ::std::string, +// or ::std::wstring. +template +AssertionResult IsSubstringImpl( + bool expected_to_be_substring, + const char* needle_expr, const char* haystack_expr, + const StringType& needle, const StringType& haystack) { + if (IsSubstringPred(needle, haystack) == expected_to_be_substring) + return AssertionSuccess(); + + const bool is_wide_string = sizeof(needle[0]) > 1; + const char* const begin_string_quote = is_wide_string ? "L\"" : "\""; + return AssertionFailure() + << "Value of: " << needle_expr << "\n" + << " Actual: " << begin_string_quote << needle << "\"\n" + << "Expected: " << (expected_to_be_substring ? "" : "not ") + << "a substring of " << haystack_expr << "\n" + << "Which is: " << begin_string_quote << haystack << "\""; +} + +} // namespace + +// IsSubstring() and IsNotSubstring() check whether needle is a +// substring of haystack (NULL is considered a substring of itself +// only), and return an appropriate error message when they fail. + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +#if GTEST_HAS_STD_WSTRING +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} +#endif // GTEST_HAS_STD_WSTRING + +namespace internal { + +#if GTEST_OS_WINDOWS + +namespace { + +// Helper function for IsHRESULT{SuccessFailure} predicates +AssertionResult HRESULTFailureHelper(const char* expr, + const char* expected, + long hr) { // NOLINT +# if GTEST_OS_WINDOWS_MOBILE + + // Windows CE doesn't support FormatMessage. + const char error_text[] = ""; + +# else + + // Looks up the human-readable system message for the HRESULT code + // and since we're not passing any params to FormatMessage, we don't + // want inserts expanded. + const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS; + const DWORD kBufSize = 4096; + // Gets the system's human readable message string for this HRESULT. + char error_text[kBufSize] = { '\0' }; + DWORD message_length = ::FormatMessageA(kFlags, + 0, // no source, we're asking system + hr, // the error + 0, // no line width restrictions + error_text, // output buffer + kBufSize, // buf size + NULL); // no arguments for inserts + // Trims tailing white space (FormatMessage leaves a trailing CR-LF) + for (; message_length && IsSpace(error_text[message_length - 1]); + --message_length) { + error_text[message_length - 1] = '\0'; + } + +# endif // GTEST_OS_WINDOWS_MOBILE + + const std::string error_hex("0x" + String::FormatHexInt(hr)); + return ::testing::AssertionFailure() + << "Expected: " << expr << " " << expected << ".\n" + << " Actual: " << error_hex << " " << error_text << "\n"; +} + +} // namespace + +AssertionResult IsHRESULTSuccess(const char* expr, long hr) { // NOLINT + if (SUCCEEDED(hr)) { + return AssertionSuccess(); + } + return HRESULTFailureHelper(expr, "succeeds", hr); +} + +AssertionResult IsHRESULTFailure(const char* expr, long hr) { // NOLINT + if (FAILED(hr)) { + return AssertionSuccess(); + } + return HRESULTFailureHelper(expr, "fails", hr); +} + +#endif // GTEST_OS_WINDOWS + +// Utility functions for encoding Unicode text (wide strings) in +// UTF-8. + +// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8 +// like this: +// +// Code-point length Encoding +// 0 - 7 bits 0xxxxxxx +// 8 - 11 bits 110xxxxx 10xxxxxx +// 12 - 16 bits 1110xxxx 10xxxxxx 10xxxxxx +// 17 - 21 bits 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + +// The maximum code-point a one-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint1 = (static_cast(1) << 7) - 1; + +// The maximum code-point a two-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint2 = (static_cast(1) << (5 + 6)) - 1; + +// The maximum code-point a three-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint3 = (static_cast(1) << (4 + 2*6)) - 1; + +// The maximum code-point a four-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint4 = (static_cast(1) << (3 + 3*6)) - 1; + +// Chops off the n lowest bits from a bit pattern. Returns the n +// lowest bits. As a side effect, the original bit pattern will be +// shifted to the right by n bits. +inline UInt32 ChopLowBits(UInt32* bits, int n) { + const UInt32 low_bits = *bits & ((static_cast(1) << n) - 1); + *bits >>= n; + return low_bits; +} + +// Converts a Unicode code point to a narrow string in UTF-8 encoding. +// code_point parameter is of type UInt32 because wchar_t may not be +// wide enough to contain a code point. +// If the code_point is not a valid Unicode code point +// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted +// to "(Invalid Unicode 0xXXXXXXXX)". +std::string CodePointToUtf8(UInt32 code_point) { + if (code_point > kMaxCodePoint4) { + return "(Invalid Unicode 0x" + String::FormatHexInt(code_point) + ")"; + } + + char str[5]; // Big enough for the largest valid code point. + if (code_point <= kMaxCodePoint1) { + str[1] = '\0'; + str[0] = static_cast(code_point); // 0xxxxxxx + } else if (code_point <= kMaxCodePoint2) { + str[2] = '\0'; + str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[0] = static_cast(0xC0 | code_point); // 110xxxxx + } else if (code_point <= kMaxCodePoint3) { + str[3] = '\0'; + str[2] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[0] = static_cast(0xE0 | code_point); // 1110xxxx + } else { // code_point <= kMaxCodePoint4 + str[4] = '\0'; + str[3] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[2] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[0] = static_cast(0xF0 | code_point); // 11110xxx + } + return str; +} + +// The following two functions only make sense if the the system +// uses UTF-16 for wide string encoding. All supported systems +// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16. + +// Determines if the arguments constitute UTF-16 surrogate pair +// and thus should be combined into a single Unicode code point +// using CreateCodePointFromUtf16SurrogatePair. +inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) { + return sizeof(wchar_t) == 2 && + (first & 0xFC00) == 0xD800 && (second & 0xFC00) == 0xDC00; +} + +// Creates a Unicode code point from UTF16 surrogate pair. +inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first, + wchar_t second) { + const UInt32 mask = (1 << 10) - 1; + return (sizeof(wchar_t) == 2) ? + (((first & mask) << 10) | (second & mask)) + 0x10000 : + // This function should not be called when the condition is + // false, but we provide a sensible default in case it is. + static_cast(first); +} + +// Converts a wide string to a narrow string in UTF-8 encoding. +// The wide string is assumed to have the following encoding: +// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS) +// UTF-32 if sizeof(wchar_t) == 4 (on Linux) +// Parameter str points to a null-terminated wide string. +// Parameter num_chars may additionally limit the number +// of wchar_t characters processed. -1 is used when the entire string +// should be processed. +// If the string contains code points that are not valid Unicode code points +// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output +// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding +// and contains invalid UTF-16 surrogate pairs, values in those pairs +// will be encoded as individual Unicode characters from Basic Normal Plane. +std::string WideStringToUtf8(const wchar_t* str, int num_chars) { + if (num_chars == -1) + num_chars = static_cast(wcslen(str)); + + ::std::stringstream stream; + for (int i = 0; i < num_chars; ++i) { + UInt32 unicode_code_point; + + if (str[i] == L'\0') { + break; + } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) { + unicode_code_point = CreateCodePointFromUtf16SurrogatePair(str[i], + str[i + 1]); + i++; + } else { + unicode_code_point = static_cast(str[i]); + } + + stream << CodePointToUtf8(unicode_code_point); + } + return StringStreamToString(&stream); +} + +// Converts a wide C string to an std::string using the UTF-8 encoding. +// NULL will be converted to "(null)". +std::string String::ShowWideCString(const wchar_t * wide_c_str) { + if (wide_c_str == NULL) return "(null)"; + + return internal::WideStringToUtf8(wide_c_str, -1); +} + +// Compares two wide C strings. Returns true iff they have the same +// content. +// +// Unlike wcscmp(), this function can handle NULL argument(s). A NULL +// C string is considered different to any non-NULL C string, +// including the empty string. +bool String::WideCStringEquals(const wchar_t * lhs, const wchar_t * rhs) { + if (lhs == NULL) return rhs == NULL; + + if (rhs == NULL) return false; + + return wcscmp(lhs, rhs) == 0; +} + +// Helper function for *_STREQ on wide strings. +AssertionResult CmpHelperSTREQ(const char* lhs_expression, + const char* rhs_expression, + const wchar_t* lhs, + const wchar_t* rhs) { + if (String::WideCStringEquals(lhs, rhs)) { + return AssertionSuccess(); + } + + return EqFailure(lhs_expression, + rhs_expression, + PrintToString(lhs), + PrintToString(rhs), + false); +} + +// Helper function for *_STRNE on wide strings. +AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const wchar_t* s1, + const wchar_t* s2) { + if (!String::WideCStringEquals(s1, s2)) { + return AssertionSuccess(); + } + + return AssertionFailure() << "Expected: (" << s1_expression << ") != (" + << s2_expression << "), actual: " + << PrintToString(s1) + << " vs " << PrintToString(s2); +} + +// Compares two C strings, ignoring case. Returns true iff they have +// the same content. +// +// Unlike strcasecmp(), this function can handle NULL argument(s). A +// NULL C string is considered different to any non-NULL C string, +// including the empty string. +bool String::CaseInsensitiveCStringEquals(const char * lhs, const char * rhs) { + if (lhs == NULL) + return rhs == NULL; + if (rhs == NULL) + return false; + return posix::StrCaseCmp(lhs, rhs) == 0; +} + + // Compares two wide C strings, ignoring case. Returns true iff they + // have the same content. + // + // Unlike wcscasecmp(), this function can handle NULL argument(s). + // A NULL C string is considered different to any non-NULL wide C string, + // including the empty string. + // NB: The implementations on different platforms slightly differ. + // On windows, this method uses _wcsicmp which compares according to LC_CTYPE + // environment variable. On GNU platform this method uses wcscasecmp + // which compares according to LC_CTYPE category of the current locale. + // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the + // current locale. +bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs, + const wchar_t* rhs) { + if (lhs == NULL) return rhs == NULL; + + if (rhs == NULL) return false; + +#if GTEST_OS_WINDOWS + return _wcsicmp(lhs, rhs) == 0; +#elif GTEST_OS_LINUX && !GTEST_OS_LINUX_ANDROID + return wcscasecmp(lhs, rhs) == 0; +#else + // Android, Mac OS X and Cygwin don't define wcscasecmp. + // Other unknown OSes may not define it either. + wint_t left, right; + do { + left = towlower(*lhs++); + right = towlower(*rhs++); + } while (left && left == right); + return left == right; +#endif // OS selector +} + +// Returns true iff str ends with the given suffix, ignoring case. +// Any string is considered to end with an empty suffix. +bool String::EndsWithCaseInsensitive( + const std::string& str, const std::string& suffix) { + const size_t str_len = str.length(); + const size_t suffix_len = suffix.length(); + return (str_len >= suffix_len) && + CaseInsensitiveCStringEquals(str.c_str() + str_len - suffix_len, + suffix.c_str()); +} + +// Formats an int value as "%02d". +std::string String::FormatIntWidth2(int value) { + std::stringstream ss; + ss << std::setfill('0') << std::setw(2) << value; + return ss.str(); +} + +// Formats an int value as "%X". +std::string String::FormatHexInt(int value) { + std::stringstream ss; + ss << std::hex << std::uppercase << value; + return ss.str(); +} + +// Formats a byte as "%02X". +std::string String::FormatByte(unsigned char value) { + std::stringstream ss; + ss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase + << static_cast(value); + return ss.str(); +} + +// Converts the buffer in a stringstream to an std::string, converting NUL +// bytes to "\\0" along the way. +std::string StringStreamToString(::std::stringstream* ss) { + const ::std::string& str = ss->str(); + const char* const start = str.c_str(); + const char* const end = start + str.length(); + + std::string result; + result.reserve(2 * (end - start)); + for (const char* ch = start; ch != end; ++ch) { + if (*ch == '\0') { + result += "\\0"; // Replaces NUL with "\\0"; + } else { + result += *ch; + } + } + + return result; +} + +// Appends the user-supplied message to the Google-Test-generated message. +std::string AppendUserMessage(const std::string& gtest_msg, + const Message& user_msg) { + // Appends the user message if it's non-empty. + const std::string user_msg_string = user_msg.GetString(); + if (user_msg_string.empty()) { + return gtest_msg; + } + + return gtest_msg + "\n" + user_msg_string; +} + +} // namespace internal + +// class TestResult + +// Creates an empty TestResult. +TestResult::TestResult() + : death_test_count_(0), + elapsed_time_(0) { +} + +// D'tor. +TestResult::~TestResult() { +} + +// Returns the i-th test part result among all the results. i can +// range from 0 to total_part_count() - 1. If i is not in that range, +// aborts the program. +const TestPartResult& TestResult::GetTestPartResult(int i) const { + if (i < 0 || i >= total_part_count()) + internal::posix::Abort(); + return test_part_results_.at(i); +} + +// Returns the i-th test property. i can range from 0 to +// test_property_count() - 1. If i is not in that range, aborts the +// program. +const TestProperty& TestResult::GetTestProperty(int i) const { + if (i < 0 || i >= test_property_count()) + internal::posix::Abort(); + return test_properties_.at(i); +} + +// Clears the test part results. +void TestResult::ClearTestPartResults() { + test_part_results_.clear(); +} + +// Adds a test part result to the list. +void TestResult::AddTestPartResult(const TestPartResult& test_part_result) { + test_part_results_.push_back(test_part_result); +} + +// Adds a test property to the list. If a property with the same key as the +// supplied property is already represented, the value of this test_property +// replaces the old value for that key. +void TestResult::RecordProperty(const std::string& xml_element, + const TestProperty& test_property) { + if (!ValidateTestProperty(xml_element, test_property)) { + return; + } + internal::MutexLock lock(&test_properites_mutex_); + const std::vector::iterator property_with_matching_key = + std::find_if(test_properties_.begin(), test_properties_.end(), + internal::TestPropertyKeyIs(test_property.key())); + if (property_with_matching_key == test_properties_.end()) { + test_properties_.push_back(test_property); + return; + } + property_with_matching_key->SetValue(test_property.value()); +} + +// The list of reserved attributes used in the element of XML +// output. +static const char* const kReservedTestSuitesAttributes[] = { + "disabled", + "errors", + "failures", + "name", + "random_seed", + "tests", + "time", + "timestamp" +}; + +// The list of reserved attributes used in the element of XML +// output. +static const char* const kReservedTestSuiteAttributes[] = { + "disabled", + "errors", + "failures", + "name", + "tests", + "time" +}; + +// The list of reserved attributes used in the element of XML output. +static const char* const kReservedTestCaseAttributes[] = { + "classname", + "name", + "status", + "time", + "type_param", + "value_param" +}; + +template +std::vector ArrayAsVector(const char* const (&array)[kSize]) { + return std::vector(array, array + kSize); +} + +static std::vector GetReservedAttributesForElement( + const std::string& xml_element) { + if (xml_element == "testsuites") { + return ArrayAsVector(kReservedTestSuitesAttributes); + } else if (xml_element == "testsuite") { + return ArrayAsVector(kReservedTestSuiteAttributes); + } else if (xml_element == "testcase") { + return ArrayAsVector(kReservedTestCaseAttributes); + } else { + GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element; + } + // This code is unreachable but some compilers may not realizes that. + return std::vector(); +} + +static std::string FormatWordList(const std::vector& words) { + Message word_list; + for (size_t i = 0; i < words.size(); ++i) { + if (i > 0 && words.size() > 2) { + word_list << ", "; + } + if (i == words.size() - 1) { + word_list << "and "; + } + word_list << "'" << words[i] << "'"; + } + return word_list.GetString(); +} + +bool ValidateTestPropertyName(const std::string& property_name, + const std::vector& reserved_names) { + if (std::find(reserved_names.begin(), reserved_names.end(), property_name) != + reserved_names.end()) { + ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name + << " (" << FormatWordList(reserved_names) + << " are reserved by " << GTEST_NAME_ << ")"; + return false; + } + return true; +} + +// Adds a failure if the key is a reserved attribute of the element named +// xml_element. Returns true if the property is valid. +bool TestResult::ValidateTestProperty(const std::string& xml_element, + const TestProperty& test_property) { + return ValidateTestPropertyName(test_property.key(), + GetReservedAttributesForElement(xml_element)); +} + +// Clears the object. +void TestResult::Clear() { + test_part_results_.clear(); + test_properties_.clear(); + death_test_count_ = 0; + elapsed_time_ = 0; +} + +// Returns true iff the test failed. +bool TestResult::Failed() const { + for (int i = 0; i < total_part_count(); ++i) { + if (GetTestPartResult(i).failed()) + return true; + } + return false; +} + +// Returns true iff the test part fatally failed. +static bool TestPartFatallyFailed(const TestPartResult& result) { + return result.fatally_failed(); +} + +// Returns true iff the test fatally failed. +bool TestResult::HasFatalFailure() const { + return CountIf(test_part_results_, TestPartFatallyFailed) > 0; +} + +// Returns true iff the test part non-fatally failed. +static bool TestPartNonfatallyFailed(const TestPartResult& result) { + return result.nonfatally_failed(); +} + +// Returns true iff the test has a non-fatal failure. +bool TestResult::HasNonfatalFailure() const { + return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0; +} + +// Gets the number of all test parts. This is the sum of the number +// of successful test parts and the number of failed test parts. +int TestResult::total_part_count() const { + return static_cast(test_part_results_.size()); +} + +// Returns the number of the test properties. +int TestResult::test_property_count() const { + return static_cast(test_properties_.size()); +} + +// class Test + +// Creates a Test object. + +// The c'tor saves the states of all flags. +Test::Test() + : gtest_flag_saver_(new GTEST_FLAG_SAVER_) { +} + +// The d'tor restores the states of all flags. The actual work is +// done by the d'tor of the gtest_flag_saver_ field, and thus not +// visible here. +Test::~Test() { +} + +// Sets up the test fixture. +// +// A sub-class may override this. +void Test::SetUp() { +} + +// Tears down the test fixture. +// +// A sub-class may override this. +void Test::TearDown() { +} + +// Allows user supplied key value pairs to be recorded for later output. +void Test::RecordProperty(const std::string& key, const std::string& value) { + UnitTest::GetInstance()->RecordProperty(key, value); +} + +// Allows user supplied key value pairs to be recorded for later output. +void Test::RecordProperty(const std::string& key, int value) { + Message value_message; + value_message << value; + RecordProperty(key, value_message.GetString().c_str()); +} + +namespace internal { + +void ReportFailureInUnknownLocation(TestPartResult::Type result_type, + const std::string& message) { + // This function is a friend of UnitTest and as such has access to + // AddTestPartResult. + UnitTest::GetInstance()->AddTestPartResult( + result_type, + NULL, // No info about the source file where the exception occurred. + -1, // We have no info on which line caused the exception. + message, + ""); // No stack trace, either. +} + +} // namespace internal + +// Google Test requires all tests in the same test case to use the same test +// fixture class. This function checks if the current test has the +// same fixture class as the first test in the current test case. If +// yes, it returns true; otherwise it generates a Google Test failure and +// returns false. +bool Test::HasSameFixtureClass() { + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + const TestCase* const test_case = impl->current_test_case(); + + // Info about the first test in the current test case. + const TestInfo* const first_test_info = test_case->test_info_list()[0]; + const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_; + const char* const first_test_name = first_test_info->name(); + + // Info about the current test. + const TestInfo* const this_test_info = impl->current_test_info(); + const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_; + const char* const this_test_name = this_test_info->name(); + + if (this_fixture_id != first_fixture_id) { + // Is the first test defined using TEST? + const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId(); + // Is this test defined using TEST? + const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId(); + + if (first_is_TEST || this_is_TEST) { + // Both TEST and TEST_F appear in same test case, which is incorrect. + // Tell the user how to fix this. + + // Gets the name of the TEST and the name of the TEST_F. Note + // that first_is_TEST and this_is_TEST cannot both be true, as + // the fixture IDs are different for the two tests. + const char* const TEST_name = + first_is_TEST ? first_test_name : this_test_name; + const char* const TEST_F_name = + first_is_TEST ? this_test_name : first_test_name; + + ADD_FAILURE() + << "All tests in the same test case must use the same test fixture\n" + << "class, so mixing TEST_F and TEST in the same test case is\n" + << "illegal. In test case " << this_test_info->test_case_name() + << ",\n" + << "test " << TEST_F_name << " is defined using TEST_F but\n" + << "test " << TEST_name << " is defined using TEST. You probably\n" + << "want to change the TEST to TEST_F or move it to another test\n" + << "case."; + } else { + // Two fixture classes with the same name appear in two different + // namespaces, which is not allowed. Tell the user how to fix this. + ADD_FAILURE() + << "All tests in the same test case must use the same test fixture\n" + << "class. However, in test case " + << this_test_info->test_case_name() << ",\n" + << "you defined test " << first_test_name + << " and test " << this_test_name << "\n" + << "using two different test fixture classes. This can happen if\n" + << "the two classes are from different namespaces or translation\n" + << "units and have the same name. You should probably rename one\n" + << "of the classes to put the tests into different test cases."; + } + return false; + } + + return true; +} + +#if GTEST_HAS_SEH + +// Adds an "exception thrown" fatal failure to the current test. This +// function returns its result via an output parameter pointer because VC++ +// prohibits creation of objects with destructors on stack in functions +// using __try (see error C2712). +static std::string* FormatSehExceptionMessage(DWORD exception_code, + const char* location) { + Message message; + message << "SEH exception with code 0x" << std::setbase(16) << + exception_code << std::setbase(10) << " thrown in " << location << "."; + + return new std::string(message.GetString()); +} + +#endif // GTEST_HAS_SEH + +namespace internal { + +#if GTEST_HAS_EXCEPTIONS + +// Adds an "exception thrown" fatal failure to the current test. +static std::string FormatCxxExceptionMessage(const char* description, + const char* location) { + Message message; + if (description != NULL) { + message << "C++ exception with description \"" << description << "\""; + } else { + message << "Unknown C++ exception"; + } + message << " thrown in " << location << "."; + + return message.GetString(); +} + +static std::string PrintTestPartResultToString( + const TestPartResult& test_part_result); + +GoogleTestFailureException::GoogleTestFailureException( + const TestPartResult& failure) + : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {} + +#endif // GTEST_HAS_EXCEPTIONS + +// We put these helper functions in the internal namespace as IBM's xlC +// compiler rejects the code if they were declared static. + +// Runs the given method and handles SEH exceptions it throws, when +// SEH is supported; returns the 0-value for type Result in case of an +// SEH exception. (Microsoft compilers cannot handle SEH and C++ +// exceptions in the same function. Therefore, we provide a separate +// wrapper function for handling SEH exceptions.) +template +Result HandleSehExceptionsInMethodIfSupported( + T* object, Result (T::*method)(), const char* location) { +#if GTEST_HAS_SEH + __try { + return (object->*method)(); + } __except (internal::UnitTestOptions::GTestShouldProcessSEH( // NOLINT + GetExceptionCode())) { + // We create the exception message on the heap because VC++ prohibits + // creation of objects with destructors on stack in functions using __try + // (see error C2712). + std::string* exception_message = FormatSehExceptionMessage( + GetExceptionCode(), location); + internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure, + *exception_message); + delete exception_message; + return static_cast(0); + } +#else + (void)location; + return (object->*method)(); +#endif // GTEST_HAS_SEH +} + +// Runs the given method and catches and reports C++ and/or SEH-style +// exceptions, if they are supported; returns the 0-value for type +// Result in case of an SEH exception. +template +Result HandleExceptionsInMethodIfSupported( + T* object, Result (T::*method)(), const char* location) { + // NOTE: The user code can affect the way in which Google Test handles + // exceptions by setting GTEST_FLAG(catch_exceptions), but only before + // RUN_ALL_TESTS() starts. It is technically possible to check the flag + // after the exception is caught and either report or re-throw the + // exception based on the flag's value: + // + // try { + // // Perform the test method. + // } catch (...) { + // if (GTEST_FLAG(catch_exceptions)) + // // Report the exception as failure. + // else + // throw; // Re-throws the original exception. + // } + // + // However, the purpose of this flag is to allow the program to drop into + // the debugger when the exception is thrown. On most platforms, once the + // control enters the catch block, the exception origin information is + // lost and the debugger will stop the program at the point of the + // re-throw in this function -- instead of at the point of the original + // throw statement in the code under test. For this reason, we perform + // the check early, sacrificing the ability to affect Google Test's + // exception handling in the method where the exception is thrown. + if (internal::GetUnitTestImpl()->catch_exceptions()) { +#if GTEST_HAS_EXCEPTIONS + try { + return HandleSehExceptionsInMethodIfSupported(object, method, location); + } catch (const internal::GoogleTestFailureException&) { // NOLINT + // This exception type can only be thrown by a failed Google + // Test assertion with the intention of letting another testing + // framework catch it. Therefore we just re-throw it. + throw; + } catch (const std::exception& e) { // NOLINT + internal::ReportFailureInUnknownLocation( + TestPartResult::kFatalFailure, + FormatCxxExceptionMessage(e.what(), location)); + } catch (...) { // NOLINT + internal::ReportFailureInUnknownLocation( + TestPartResult::kFatalFailure, + FormatCxxExceptionMessage(NULL, location)); + } + return static_cast(0); +#else + return HandleSehExceptionsInMethodIfSupported(object, method, location); +#endif // GTEST_HAS_EXCEPTIONS + } else { + return (object->*method)(); + } +} + +} // namespace internal + +// Runs the test and updates the test result. +void Test::Run() { + if (!HasSameFixtureClass()) return; + + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()"); + // We will run the test only if SetUp() was successful. + if (!HasFatalFailure()) { + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + this, &Test::TestBody, "the test body"); + } + + // However, we want to clean up as much as possible. Hence we will + // always call TearDown(), even if SetUp() or the test body has + // failed. + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + this, &Test::TearDown, "TearDown()"); +} + +// Returns true iff the current test has a fatal failure. +bool Test::HasFatalFailure() { + return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure(); +} + +// Returns true iff the current test has a non-fatal failure. +bool Test::HasNonfatalFailure() { + return internal::GetUnitTestImpl()->current_test_result()-> + HasNonfatalFailure(); +} + +// class TestInfo + +// Constructs a TestInfo object. It assumes ownership of the test factory +// object. +TestInfo::TestInfo(const std::string& a_test_case_name, + const std::string& a_name, + const char* a_type_param, + const char* a_value_param, + internal::CodeLocation a_code_location, + internal::TypeId fixture_class_id, + internal::TestFactoryBase* factory) + : test_case_name_(a_test_case_name), + name_(a_name), + type_param_(a_type_param ? new std::string(a_type_param) : NULL), + value_param_(a_value_param ? new std::string(a_value_param) : NULL), + location_(a_code_location), + fixture_class_id_(fixture_class_id), + should_run_(false), + is_disabled_(false), + matches_filter_(false), + factory_(factory), + result_() {} + +// Destructs a TestInfo object. +TestInfo::~TestInfo() { delete factory_; } + +namespace internal { + +// Creates a new TestInfo object and registers it with Google Test; +// returns the created object. +// +// Arguments: +// +// test_case_name: name of the test case +// name: name of the test +// type_param: the name of the test's type parameter, or NULL if +// this is not a typed or a type-parameterized test. +// value_param: text representation of the test's value parameter, +// or NULL if this is not a value-parameterized test. +// code_location: code location where the test is defined +// fixture_class_id: ID of the test fixture class +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +// factory: pointer to the factory that creates a test object. +// The newly created TestInfo instance will assume +// ownership of the factory object. +TestInfo* MakeAndRegisterTestInfo( + const char* test_case_name, + const char* name, + const char* type_param, + const char* value_param, + CodeLocation code_location, + TypeId fixture_class_id, + SetUpTestCaseFunc set_up_tc, + TearDownTestCaseFunc tear_down_tc, + TestFactoryBase* factory) { + TestInfo* const test_info = + new TestInfo(test_case_name, name, type_param, value_param, + code_location, fixture_class_id, factory); + GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info); + return test_info; +} + +#if GTEST_HAS_PARAM_TEST +void ReportInvalidTestCaseType(const char* test_case_name, + CodeLocation code_location) { + Message errors; + errors + << "Attempted redefinition of test case " << test_case_name << ".\n" + << "All tests in the same test case must use the same test fixture\n" + << "class. However, in test case " << test_case_name << ", you tried\n" + << "to define a test using a fixture class different from the one\n" + << "used earlier. This can happen if the two fixture classes are\n" + << "from different namespaces and have the same name. You should\n" + << "probably rename one of the classes to put the tests into different\n" + << "test cases."; + + fprintf(stderr, "%s %s", + FormatFileLocation(code_location.file.c_str(), + code_location.line).c_str(), + errors.GetString().c_str()); +} +#endif // GTEST_HAS_PARAM_TEST + +} // namespace internal + +namespace { + +// A predicate that checks the test name of a TestInfo against a known +// value. +// +// This is used for implementation of the TestCase class only. We put +// it in the anonymous namespace to prevent polluting the outer +// namespace. +// +// TestNameIs is copyable. +class TestNameIs { + public: + // Constructor. + // + // TestNameIs has NO default constructor. + explicit TestNameIs(const char* name) + : name_(name) {} + + // Returns true iff the test name of test_info matches name_. + bool operator()(const TestInfo * test_info) const { + return test_info && test_info->name() == name_; + } + + private: + std::string name_; +}; + +} // namespace + +namespace internal { + +// This method expands all parameterized tests registered with macros TEST_P +// and INSTANTIATE_TEST_CASE_P into regular tests and registers those. +// This will be done just once during the program runtime. +void UnitTestImpl::RegisterParameterizedTests() { +#if GTEST_HAS_PARAM_TEST + if (!parameterized_tests_registered_) { + parameterized_test_registry_.RegisterTests(); + parameterized_tests_registered_ = true; + } +#endif +} + +} // namespace internal + +// Creates the test object, runs it, records its result, and then +// deletes it. +void TestInfo::Run() { + if (!should_run_) return; + + // Tells UnitTest where to store test result. + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + impl->set_current_test_info(this); + + TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater(); + + // Notifies the unit test event listeners that a test is about to start. + repeater->OnTestStart(*this); + + const TimeInMillis start = internal::GetTimeInMillis(); + + impl->os_stack_trace_getter()->UponLeavingGTest(); + + // Creates the test object. + Test* const test = internal::HandleExceptionsInMethodIfSupported( + factory_, &internal::TestFactoryBase::CreateTest, + "the test fixture's constructor"); + + // Runs the test only if the test object was created and its + // constructor didn't generate a fatal failure. + if ((test != NULL) && !Test::HasFatalFailure()) { + // This doesn't throw as all user code that can throw are wrapped into + // exception handling code. + test->Run(); + } + + // Deletes the test object. + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + test, &Test::DeleteSelf_, "the test fixture's destructor"); + + result_.set_elapsed_time(internal::GetTimeInMillis() - start); + + // Notifies the unit test event listener that a test has just finished. + repeater->OnTestEnd(*this); + + // Tells UnitTest to stop associating assertion results to this + // test. + impl->set_current_test_info(NULL); +} + +// class TestCase + +// Gets the number of successful tests in this test case. +int TestCase::successful_test_count() const { + return CountIf(test_info_list_, TestPassed); +} + +// Gets the number of failed tests in this test case. +int TestCase::failed_test_count() const { + return CountIf(test_info_list_, TestFailed); +} + +// Gets the number of disabled tests that will be reported in the XML report. +int TestCase::reportable_disabled_test_count() const { + return CountIf(test_info_list_, TestReportableDisabled); +} + +// Gets the number of disabled tests in this test case. +int TestCase::disabled_test_count() const { + return CountIf(test_info_list_, TestDisabled); +} + +// Gets the number of tests to be printed in the XML report. +int TestCase::reportable_test_count() const { + return CountIf(test_info_list_, TestReportable); +} + +// Get the number of tests in this test case that should run. +int TestCase::test_to_run_count() const { + return CountIf(test_info_list_, ShouldRunTest); +} + +// Gets the number of all tests. +int TestCase::total_test_count() const { + return static_cast(test_info_list_.size()); +} + +// Creates a TestCase with the given name. +// +// Arguments: +// +// name: name of the test case +// a_type_param: the name of the test case's type parameter, or NULL if +// this is not a typed or a type-parameterized test case. +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +TestCase::TestCase(const char* a_name, const char* a_type_param, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc) + : name_(a_name), + type_param_(a_type_param ? new std::string(a_type_param) : NULL), + set_up_tc_(set_up_tc), + tear_down_tc_(tear_down_tc), + should_run_(false), + elapsed_time_(0) { +} + +// Destructor of TestCase. +TestCase::~TestCase() { + // Deletes every Test in the collection. + ForEach(test_info_list_, internal::Delete); +} + +// Returns the i-th test among all the tests. i can range from 0 to +// total_test_count() - 1. If i is not in that range, returns NULL. +const TestInfo* TestCase::GetTestInfo(int i) const { + const int index = GetElementOr(test_indices_, i, -1); + return index < 0 ? NULL : test_info_list_[index]; +} + +// Returns the i-th test among all the tests. i can range from 0 to +// total_test_count() - 1. If i is not in that range, returns NULL. +TestInfo* TestCase::GetMutableTestInfo(int i) { + const int index = GetElementOr(test_indices_, i, -1); + return index < 0 ? NULL : test_info_list_[index]; +} + +// Adds a test to this test case. Will delete the test upon +// destruction of the TestCase object. +void TestCase::AddTestInfo(TestInfo * test_info) { + test_info_list_.push_back(test_info); + test_indices_.push_back(static_cast(test_indices_.size())); +} + +// Runs every test in this TestCase. +void TestCase::Run() { + if (!should_run_) return; + + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + impl->set_current_test_case(this); + + TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater(); + + repeater->OnTestCaseStart(*this); + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + this, &TestCase::RunSetUpTestCase, "SetUpTestCase()"); + + const internal::TimeInMillis start = internal::GetTimeInMillis(); + for (int i = 0; i < total_test_count(); i++) { + GetMutableTestInfo(i)->Run(); + } + elapsed_time_ = internal::GetTimeInMillis() - start; + + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + this, &TestCase::RunTearDownTestCase, "TearDownTestCase()"); + + repeater->OnTestCaseEnd(*this); + impl->set_current_test_case(NULL); +} + +// Clears the results of all tests in this test case. +void TestCase::ClearResult() { + ad_hoc_test_result_.Clear(); + ForEach(test_info_list_, TestInfo::ClearTestResult); +} + +// Shuffles the tests in this test case. +void TestCase::ShuffleTests(internal::Random* random) { + Shuffle(random, &test_indices_); +} + +// Restores the test order to before the first shuffle. +void TestCase::UnshuffleTests() { + for (size_t i = 0; i < test_indices_.size(); i++) { + test_indices_[i] = static_cast(i); + } +} + +// Formats a countable noun. Depending on its quantity, either the +// singular form or the plural form is used. e.g. +// +// FormatCountableNoun(1, "formula", "formuli") returns "1 formula". +// FormatCountableNoun(5, "book", "books") returns "5 books". +static std::string FormatCountableNoun(int count, + const char * singular_form, + const char * plural_form) { + return internal::StreamableToString(count) + " " + + (count == 1 ? singular_form : plural_form); +} + +// Formats the count of tests. +static std::string FormatTestCount(int test_count) { + return FormatCountableNoun(test_count, "test", "tests"); +} + +// Formats the count of test cases. +static std::string FormatTestCaseCount(int test_case_count) { + return FormatCountableNoun(test_case_count, "test case", "test cases"); +} + +// Converts a TestPartResult::Type enum to human-friendly string +// representation. Both kNonFatalFailure and kFatalFailure are translated +// to "Failure", as the user usually doesn't care about the difference +// between the two when viewing the test result. +static const char * TestPartResultTypeToString(TestPartResult::Type type) { + switch (type) { + case TestPartResult::kSuccess: + return "Success"; + + case TestPartResult::kNonFatalFailure: + case TestPartResult::kFatalFailure: +#ifdef _MSC_VER + return "error: "; +#else + return "Failure\n"; +#endif + default: + return "Unknown result type"; + } +} + +namespace internal { + +// Prints a TestPartResult to an std::string. +static std::string PrintTestPartResultToString( + const TestPartResult& test_part_result) { + return (Message() + << internal::FormatFileLocation(test_part_result.file_name(), + test_part_result.line_number()) + << " " << TestPartResultTypeToString(test_part_result.type()) + << test_part_result.message()).GetString(); +} + +// Prints a TestPartResult. +static void PrintTestPartResult(const TestPartResult& test_part_result) { + const std::string& result = + PrintTestPartResultToString(test_part_result); + printf("%s\n", result.c_str()); + fflush(stdout); + // If the test program runs in Visual Studio or a debugger, the + // following statements add the test part result message to the Output + // window such that the user can double-click on it to jump to the + // corresponding source code location; otherwise they do nothing. +#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + // We don't call OutputDebugString*() on Windows Mobile, as printing + // to stdout is done by OutputDebugString() there already - we don't + // want the same message printed twice. + ::OutputDebugStringA(result.c_str()); + ::OutputDebugStringA("\n"); +#endif +} + +// class PrettyUnitTestResultPrinter + +enum GTestColor { + COLOR_DEFAULT, + COLOR_RED, + COLOR_GREEN, + COLOR_YELLOW +}; + +#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && \ + !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT + +// Returns the character attribute for the given color. +WORD GetColorAttribute(GTestColor color) { + switch (color) { + case COLOR_RED: return FOREGROUND_RED; + case COLOR_GREEN: return FOREGROUND_GREEN; + case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN; + default: return 0; + } +} + +#else + +// Returns the ANSI color code for the given color. COLOR_DEFAULT is +// an invalid input. +const char* GetAnsiColorCode(GTestColor color) { + switch (color) { + case COLOR_RED: return "1"; + case COLOR_GREEN: return "2"; + case COLOR_YELLOW: return "3"; + default: return NULL; + }; +} + +#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + +// Returns true iff Google Test should use colors in the output. +bool ShouldUseColor(bool stdout_is_tty) { + const char* const gtest_color = GTEST_FLAG(color).c_str(); + + if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) { +#if GTEST_OS_WINDOWS + // On Windows the TERM variable is usually not set, but the + // console there does support colors. + return stdout_is_tty; +#else + // On non-Windows platforms, we rely on the TERM variable. + const char* const term = posix::GetEnv("TERM"); + const bool term_supports_color = + String::CStringEquals(term, "xterm") || + String::CStringEquals(term, "xterm-color") || + String::CStringEquals(term, "xterm-256color") || + String::CStringEquals(term, "screen") || + String::CStringEquals(term, "screen-256color") || + String::CStringEquals(term, "tmux") || + String::CStringEquals(term, "tmux-256color") || + String::CStringEquals(term, "rxvt-unicode") || + String::CStringEquals(term, "rxvt-unicode-256color") || + String::CStringEquals(term, "linux") || + String::CStringEquals(term, "cygwin"); + return stdout_is_tty && term_supports_color; +#endif // GTEST_OS_WINDOWS + } + + return String::CaseInsensitiveCStringEquals(gtest_color, "yes") || + String::CaseInsensitiveCStringEquals(gtest_color, "true") || + String::CaseInsensitiveCStringEquals(gtest_color, "t") || + String::CStringEquals(gtest_color, "1"); + // We take "yes", "true", "t", and "1" as meaning "yes". If the + // value is neither one of these nor "auto", we treat it as "no" to + // be conservative. +} + +// Helpers for printing colored strings to stdout. Note that on Windows, we +// cannot simply emit special characters and have the terminal change colors. +// This routine must actually emit the characters rather than return a string +// that would be colored when printed, as can be done on Linux. +void ColoredPrintf(GTestColor color, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + +#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS || \ + GTEST_OS_IOS || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT + const bool use_color = AlwaysFalse(); +#else + static const bool in_color_mode = + ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0); + const bool use_color = in_color_mode && (color != COLOR_DEFAULT); +#endif // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS + // The '!= 0' comparison is necessary to satisfy MSVC 7.1. + + if (!use_color) { + vprintf(fmt, args); + va_end(args); + return; + } + +#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && \ + !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT + const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); + + // Gets the current text color. + CONSOLE_SCREEN_BUFFER_INFO buffer_info; + GetConsoleScreenBufferInfo(stdout_handle, &buffer_info); + const WORD old_color_attrs = buffer_info.wAttributes; + + // We need to flush the stream buffers into the console before each + // SetConsoleTextAttribute call lest it affect the text that is already + // printed but has not yet reached the console. + fflush(stdout); + SetConsoleTextAttribute(stdout_handle, + GetColorAttribute(color) | FOREGROUND_INTENSITY); + vprintf(fmt, args); + + fflush(stdout); + // Restores the text color. + SetConsoleTextAttribute(stdout_handle, old_color_attrs); +#else + printf("\033[0;3%sm", GetAnsiColorCode(color)); + vprintf(fmt, args); + printf("\033[m"); // Resets the terminal to default. +#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + va_end(args); +} + +// Text printed in Google Test's text output and --gunit_list_tests +// output to label the type parameter and value parameter for a test. +static const char kTypeParamLabel[] = "TypeParam"; +static const char kValueParamLabel[] = "GetParam()"; + +void PrintFullTestCommentIfPresent(const TestInfo& test_info) { + const char* const type_param = test_info.type_param(); + const char* const value_param = test_info.value_param(); + + if (type_param != NULL || value_param != NULL) { + printf(", where "); + if (type_param != NULL) { + printf("%s = %s", kTypeParamLabel, type_param); + if (value_param != NULL) + printf(" and "); + } + if (value_param != NULL) { + printf("%s = %s", kValueParamLabel, value_param); + } + } +} + +// This class implements the TestEventListener interface. +// +// Class PrettyUnitTestResultPrinter is copyable. +class PrettyUnitTestResultPrinter : public TestEventListener { + public: + PrettyUnitTestResultPrinter() {} + static void PrintTestName(const char * test_case, const char * test) { + printf("%s.%s", test_case, test); + } + + // The following methods override what's in the TestEventListener class. + virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {} + virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration); + virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test); + virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {} + virtual void OnTestCaseStart(const TestCase& test_case); + virtual void OnTestStart(const TestInfo& test_info); + virtual void OnTestPartResult(const TestPartResult& result); + virtual void OnTestEnd(const TestInfo& test_info); + virtual void OnTestCaseEnd(const TestCase& test_case); + virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test); + virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {} + virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); + virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {} + + private: + static void PrintFailedTests(const UnitTest& unit_test); +}; + + // Fired before each iteration of tests starts. +void PrettyUnitTestResultPrinter::OnTestIterationStart( + const UnitTest& unit_test, int iteration) { + if (GTEST_FLAG(repeat) != 1) + printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1); + + const char* const filter = GTEST_FLAG(filter).c_str(); + + // Prints the filter if it's not *. This reminds the user that some + // tests may be skipped. + if (!String::CStringEquals(filter, kUniversalFilter)) { + ColoredPrintf(COLOR_YELLOW, + "Note: %s filter = %s\n", GTEST_NAME_, filter); + } + + if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) { + const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1); + ColoredPrintf(COLOR_YELLOW, + "Note: This is test shard %d of %s.\n", + static_cast(shard_index) + 1, + internal::posix::GetEnv(kTestTotalShards)); + } + + if (GTEST_FLAG(shuffle)) { + ColoredPrintf(COLOR_YELLOW, + "Note: Randomizing tests' orders with a seed of %d .\n", + unit_test.random_seed()); + } + + ColoredPrintf(COLOR_GREEN, "[==========] "); + printf("Running %s from %s.\n", + FormatTestCount(unit_test.test_to_run_count()).c_str(), + FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str()); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart( + const UnitTest& /*unit_test*/) { + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("Global test environment set-up.\n"); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) { + const std::string counts = + FormatCountableNoun(test_case.test_to_run_count(), "test", "tests"); + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("%s from %s", counts.c_str(), test_case.name()); + if (test_case.type_param() == NULL) { + printf("\n"); + } else { + printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param()); + } + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) { + ColoredPrintf(COLOR_GREEN, "[ RUN ] "); + PrintTestName(test_info.test_case_name(), test_info.name()); + printf("\n"); + fflush(stdout); +} + +// Called after an assertion failure. +void PrettyUnitTestResultPrinter::OnTestPartResult( + const TestPartResult& result) { + // If the test part succeeded, we don't need to do anything. + if (result.type() == TestPartResult::kSuccess) + return; + + // Print failure message from the assertion (e.g. expected this and got that). + PrintTestPartResult(result); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) { + if (test_info.result()->Passed()) { + ColoredPrintf(COLOR_GREEN, "[ OK ] "); + } else { + ColoredPrintf(COLOR_RED, "[ FAILED ] "); + } + PrintTestName(test_info.test_case_name(), test_info.name()); + if (test_info.result()->Failed()) + PrintFullTestCommentIfPresent(test_info); + + if (GTEST_FLAG(print_time)) { + printf(" (%s ms)\n", internal::StreamableToString( + test_info.result()->elapsed_time()).c_str()); + } else { + printf("\n"); + } + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) { + if (!GTEST_FLAG(print_time)) return; + + const std::string counts = + FormatCountableNoun(test_case.test_to_run_count(), "test", "tests"); + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("%s from %s (%s ms total)\n\n", + counts.c_str(), test_case.name(), + internal::StreamableToString(test_case.elapsed_time()).c_str()); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart( + const UnitTest& /*unit_test*/) { + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("Global test environment tear-down\n"); + fflush(stdout); +} + +// Internal helper for printing the list of failed tests. +void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) { + const int failed_test_count = unit_test.failed_test_count(); + if (failed_test_count == 0) { + return; + } + + for (int i = 0; i < unit_test.total_test_case_count(); ++i) { + const TestCase& test_case = *unit_test.GetTestCase(i); + if (!test_case.should_run() || (test_case.failed_test_count() == 0)) { + continue; + } + for (int j = 0; j < test_case.total_test_count(); ++j) { + const TestInfo& test_info = *test_case.GetTestInfo(j); + if (!test_info.should_run() || test_info.result()->Passed()) { + continue; + } + ColoredPrintf(COLOR_RED, "[ FAILED ] "); + printf("%s.%s", test_case.name(), test_info.name()); + PrintFullTestCommentIfPresent(test_info); + printf("\n"); + } + } +} + +void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, + int /*iteration*/) { + ColoredPrintf(COLOR_GREEN, "[==========] "); + printf("%s from %s ran.", + FormatTestCount(unit_test.test_to_run_count()).c_str(), + FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str()); + if (GTEST_FLAG(print_time)) { + printf(" (%s ms total)", + internal::StreamableToString(unit_test.elapsed_time()).c_str()); + } + printf("\n"); + ColoredPrintf(COLOR_GREEN, "[ PASSED ] "); + printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str()); + + int num_failures = unit_test.failed_test_count(); + if (!unit_test.Passed()) { + const int failed_test_count = unit_test.failed_test_count(); + ColoredPrintf(COLOR_RED, "[ FAILED ] "); + printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str()); + PrintFailedTests(unit_test); + printf("\n%2d FAILED %s\n", num_failures, + num_failures == 1 ? "TEST" : "TESTS"); + } + + int num_disabled = unit_test.reportable_disabled_test_count(); + if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) { + if (!num_failures) { + printf("\n"); // Add a spacer if no FAILURE banner is displayed. + } + ColoredPrintf(COLOR_YELLOW, + " YOU HAVE %d DISABLED %s\n\n", + num_disabled, + num_disabled == 1 ? "TEST" : "TESTS"); + } + // Ensure that Google Test output is printed before, e.g., heapchecker output. + fflush(stdout); +} + +// End PrettyUnitTestResultPrinter + +// class TestEventRepeater +// +// This class forwards events to other event listeners. +class TestEventRepeater : public TestEventListener { + public: + TestEventRepeater() : forwarding_enabled_(true) {} + virtual ~TestEventRepeater(); + void Append(TestEventListener *listener); + TestEventListener* Release(TestEventListener* listener); + + // Controls whether events will be forwarded to listeners_. Set to false + // in death test child processes. + bool forwarding_enabled() const { return forwarding_enabled_; } + void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; } + + virtual void OnTestProgramStart(const UnitTest& unit_test); + virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration); + virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test); + virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test); + virtual void OnTestCaseStart(const TestCase& test_case); + virtual void OnTestStart(const TestInfo& test_info); + virtual void OnTestPartResult(const TestPartResult& result); + virtual void OnTestEnd(const TestInfo& test_info); + virtual void OnTestCaseEnd(const TestCase& test_case); + virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test); + virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test); + virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); + virtual void OnTestProgramEnd(const UnitTest& unit_test); + + private: + // Controls whether events will be forwarded to listeners_. Set to false + // in death test child processes. + bool forwarding_enabled_; + // The list of listeners that receive events. + std::vector listeners_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventRepeater); +}; + +TestEventRepeater::~TestEventRepeater() { + ForEach(listeners_, Delete); +} + +void TestEventRepeater::Append(TestEventListener *listener) { + listeners_.push_back(listener); +} + +// TODO(vladl@google.com): Factor the search functionality into Vector::Find. +TestEventListener* TestEventRepeater::Release(TestEventListener *listener) { + for (size_t i = 0; i < listeners_.size(); ++i) { + if (listeners_[i] == listener) { + listeners_.erase(listeners_.begin() + i); + return listener; + } + } + + return NULL; +} + +// Since most methods are very similar, use macros to reduce boilerplate. +// This defines a member that forwards the call to all listeners. +#define GTEST_REPEATER_METHOD_(Name, Type) \ +void TestEventRepeater::Name(const Type& parameter) { \ + if (forwarding_enabled_) { \ + for (size_t i = 0; i < listeners_.size(); i++) { \ + listeners_[i]->Name(parameter); \ + } \ + } \ +} +// This defines a member that forwards the call to all listeners in reverse +// order. +#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \ +void TestEventRepeater::Name(const Type& parameter) { \ + if (forwarding_enabled_) { \ + for (int i = static_cast(listeners_.size()) - 1; i >= 0; i--) { \ + listeners_[i]->Name(parameter); \ + } \ + } \ +} + +GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest) +GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest) +GTEST_REPEATER_METHOD_(OnTestCaseStart, TestCase) +GTEST_REPEATER_METHOD_(OnTestStart, TestInfo) +GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult) +GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest) +GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest) +GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest) +GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo) +GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestCase) +GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest) + +#undef GTEST_REPEATER_METHOD_ +#undef GTEST_REVERSE_REPEATER_METHOD_ + +void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test, + int iteration) { + if (forwarding_enabled_) { + for (size_t i = 0; i < listeners_.size(); i++) { + listeners_[i]->OnTestIterationStart(unit_test, iteration); + } + } +} + +void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test, + int iteration) { + if (forwarding_enabled_) { + for (int i = static_cast(listeners_.size()) - 1; i >= 0; i--) { + listeners_[i]->OnTestIterationEnd(unit_test, iteration); + } + } +} + +// End TestEventRepeater + +// This class generates an XML output file. +class XmlUnitTestResultPrinter : public EmptyTestEventListener { + public: + explicit XmlUnitTestResultPrinter(const char* output_file); + + virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); + + private: + // Is c a whitespace character that is normalized to a space character + // when it appears in an XML attribute value? + static bool IsNormalizableWhitespace(char c) { + return c == 0x9 || c == 0xA || c == 0xD; + } + + // May c appear in a well-formed XML document? + static bool IsValidXmlCharacter(char c) { + return IsNormalizableWhitespace(c) || c >= 0x20; + } + + // Returns an XML-escaped copy of the input string str. If + // is_attribute is true, the text is meant to appear as an attribute + // value, and normalizable whitespace is preserved by replacing it + // with character references. + static std::string EscapeXml(const std::string& str, bool is_attribute); + + // Returns the given string with all characters invalid in XML removed. + static std::string RemoveInvalidXmlCharacters(const std::string& str); + + // Convenience wrapper around EscapeXml when str is an attribute value. + static std::string EscapeXmlAttribute(const std::string& str) { + return EscapeXml(str, true); + } + + // Convenience wrapper around EscapeXml when str is not an attribute value. + static std::string EscapeXmlText(const char* str) { + return EscapeXml(str, false); + } + + // Verifies that the given attribute belongs to the given element and + // streams the attribute as XML. + static void OutputXmlAttribute(std::ostream* stream, + const std::string& element_name, + const std::string& name, + const std::string& value); + + // Streams an XML CDATA section, escaping invalid CDATA sequences as needed. + static void OutputXmlCDataSection(::std::ostream* stream, const char* data); + + // Streams an XML representation of a TestInfo object. + static void OutputXmlTestInfo(::std::ostream* stream, + const char* test_case_name, + const TestInfo& test_info); + + // Prints an XML representation of a TestCase object + static void PrintXmlTestCase(::std::ostream* stream, + const TestCase& test_case); + + // Prints an XML summary of unit_test to output stream out. + static void PrintXmlUnitTest(::std::ostream* stream, + const UnitTest& unit_test); + + // Produces a string representing the test properties in a result as space + // delimited XML attributes based on the property key="value" pairs. + // When the std::string is not empty, it includes a space at the beginning, + // to delimit this attribute from prior attributes. + static std::string TestPropertiesAsXmlAttributes(const TestResult& result); + + // The output file. + const std::string output_file_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(XmlUnitTestResultPrinter); +}; + +// Creates a new XmlUnitTestResultPrinter. +XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file) + : output_file_(output_file) { + if (output_file_.c_str() == NULL || output_file_.empty()) { + fprintf(stderr, "XML output file may not be null\n"); + fflush(stderr); + exit(EXIT_FAILURE); + } +} + +// Called after the unit test ends. +void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, + int /*iteration*/) { + FILE* xmlout = NULL; + FilePath output_file(output_file_); + FilePath output_dir(output_file.RemoveFileName()); + + if (output_dir.CreateDirectoriesRecursively()) { + xmlout = posix::FOpen(output_file_.c_str(), "w"); + } + if (xmlout == NULL) { + // TODO(wan): report the reason of the failure. + // + // We don't do it for now as: + // + // 1. There is no urgent need for it. + // 2. It's a bit involved to make the errno variable thread-safe on + // all three operating systems (Linux, Windows, and Mac OS). + // 3. To interpret the meaning of errno in a thread-safe way, + // we need the strerror_r() function, which is not available on + // Windows. + fprintf(stderr, + "Unable to open file \"%s\"\n", + output_file_.c_str()); + fflush(stderr); + exit(EXIT_FAILURE); + } + std::stringstream stream; + PrintXmlUnitTest(&stream, unit_test); + fprintf(xmlout, "%s", StringStreamToString(&stream).c_str()); + fclose(xmlout); +} + +// Returns an XML-escaped copy of the input string str. If is_attribute +// is true, the text is meant to appear as an attribute value, and +// normalizable whitespace is preserved by replacing it with character +// references. +// +// Invalid XML characters in str, if any, are stripped from the output. +// It is expected that most, if not all, of the text processed by this +// module will consist of ordinary English text. +// If this module is ever modified to produce version 1.1 XML output, +// most invalid characters can be retained using character references. +// TODO(wan): It might be nice to have a minimally invasive, human-readable +// escaping scheme for invalid characters, rather than dropping them. +std::string XmlUnitTestResultPrinter::EscapeXml( + const std::string& str, bool is_attribute) { + Message m; + + for (size_t i = 0; i < str.size(); ++i) { + const char ch = str[i]; + switch (ch) { + case '<': + m << "<"; + break; + case '>': + m << ">"; + break; + case '&': + m << "&"; + break; + case '\'': + if (is_attribute) + m << "'"; + else + m << '\''; + break; + case '"': + if (is_attribute) + m << """; + else + m << '"'; + break; + default: + if (IsValidXmlCharacter(ch)) { + if (is_attribute && IsNormalizableWhitespace(ch)) + m << "&#x" << String::FormatByte(static_cast(ch)) + << ";"; + else + m << ch; + } + break; + } + } + + return m.GetString(); +} + +// Returns the given string with all characters invalid in XML removed. +// Currently invalid characters are dropped from the string. An +// alternative is to replace them with certain characters such as . or ?. +std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters( + const std::string& str) { + std::string output; + output.reserve(str.size()); + for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) + if (IsValidXmlCharacter(*it)) + output.push_back(*it); + + return output; +} + +// The following routines generate an XML representation of a UnitTest +// object. +// +// This is how Google Test concepts map to the DTD: +// +// <-- corresponds to a UnitTest object +// <-- corresponds to a TestCase object +// <-- corresponds to a TestInfo object +// ... +// ... +// ... +// <-- individual assertion failures +// +// +// + +// Formats the given time in milliseconds as seconds. +std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) { + ::std::stringstream ss; + ss << (static_cast(ms) * 1e-3); + return ss.str(); +} + +static bool PortableLocaltime(time_t seconds, struct tm* out) { +#if defined(_MSC_VER) + return localtime_s(out, &seconds) == 0; +#elif defined(__MINGW32__) || defined(__MINGW64__) + // MINGW provides neither localtime_r nor localtime_s, but uses + // Windows' localtime(), which has a thread-local tm buffer. + struct tm* tm_ptr = localtime(&seconds); // NOLINT + if (tm_ptr == NULL) + return false; + *out = *tm_ptr; + return true; +#else + return localtime_r(&seconds, out) != NULL; +#endif +} + +// Converts the given epoch time in milliseconds to a date string in the ISO +// 8601 format, without the timezone information. +std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) { + struct tm time_struct; + if (!PortableLocaltime(static_cast(ms / 1000), &time_struct)) + return ""; + // YYYY-MM-DDThh:mm:ss + return StreamableToString(time_struct.tm_year + 1900) + "-" + + String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" + + String::FormatIntWidth2(time_struct.tm_mday) + "T" + + String::FormatIntWidth2(time_struct.tm_hour) + ":" + + String::FormatIntWidth2(time_struct.tm_min) + ":" + + String::FormatIntWidth2(time_struct.tm_sec); +} + +// Streams an XML CDATA section, escaping invalid CDATA sequences as needed. +void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream, + const char* data) { + const char* segment = data; + *stream << ""); + if (next_segment != NULL) { + stream->write( + segment, static_cast(next_segment - segment)); + *stream << "]]>]]>"); + } else { + *stream << segment; + break; + } + } + *stream << "]]>"; +} + +void XmlUnitTestResultPrinter::OutputXmlAttribute( + std::ostream* stream, + const std::string& element_name, + const std::string& name, + const std::string& value) { + const std::vector& allowed_names = + GetReservedAttributesForElement(element_name); + + GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) != + allowed_names.end()) + << "Attribute " << name << " is not allowed for element <" << element_name + << ">."; + + *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\""; +} + +// Prints an XML representation of a TestInfo object. +// TODO(wan): There is also value in printing properties with the plain printer. +void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream, + const char* test_case_name, + const TestInfo& test_info) { + const TestResult& result = *test_info.result(); + const std::string kTestcase = "testcase"; + + *stream << " \n"; + } + const string location = internal::FormatCompilerIndependentFileLocation( + part.file_name(), part.line_number()); + const string summary = location + "\n" + part.summary(); + *stream << " "; + const string detail = location + "\n" + part.message(); + OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str()); + *stream << "\n"; + } + } + + if (failures == 0) + *stream << " />\n"; + else + *stream << " \n"; +} + +// Prints an XML representation of a TestCase object +void XmlUnitTestResultPrinter::PrintXmlTestCase(std::ostream* stream, + const TestCase& test_case) { + const std::string kTestsuite = "testsuite"; + *stream << " <" << kTestsuite; + OutputXmlAttribute(stream, kTestsuite, "name", test_case.name()); + OutputXmlAttribute(stream, kTestsuite, "tests", + StreamableToString(test_case.reportable_test_count())); + OutputXmlAttribute(stream, kTestsuite, "failures", + StreamableToString(test_case.failed_test_count())); + OutputXmlAttribute( + stream, kTestsuite, "disabled", + StreamableToString(test_case.reportable_disabled_test_count())); + OutputXmlAttribute(stream, kTestsuite, "errors", "0"); + OutputXmlAttribute(stream, kTestsuite, "time", + FormatTimeInMillisAsSeconds(test_case.elapsed_time())); + *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result()) + << ">\n"; + + for (int i = 0; i < test_case.total_test_count(); ++i) { + if (test_case.GetTestInfo(i)->is_reportable()) + OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i)); + } + *stream << " \n"; +} + +// Prints an XML summary of unit_test to output stream out. +void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream, + const UnitTest& unit_test) { + const std::string kTestsuites = "testsuites"; + + *stream << "\n"; + *stream << "<" << kTestsuites; + + OutputXmlAttribute(stream, kTestsuites, "tests", + StreamableToString(unit_test.reportable_test_count())); + OutputXmlAttribute(stream, kTestsuites, "failures", + StreamableToString(unit_test.failed_test_count())); + OutputXmlAttribute( + stream, kTestsuites, "disabled", + StreamableToString(unit_test.reportable_disabled_test_count())); + OutputXmlAttribute(stream, kTestsuites, "errors", "0"); + OutputXmlAttribute( + stream, kTestsuites, "timestamp", + FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp())); + OutputXmlAttribute(stream, kTestsuites, "time", + FormatTimeInMillisAsSeconds(unit_test.elapsed_time())); + + if (GTEST_FLAG(shuffle)) { + OutputXmlAttribute(stream, kTestsuites, "random_seed", + StreamableToString(unit_test.random_seed())); + } + + *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result()); + + OutputXmlAttribute(stream, kTestsuites, "name", "AllTests"); + *stream << ">\n"; + + for (int i = 0; i < unit_test.total_test_case_count(); ++i) { + if (unit_test.GetTestCase(i)->reportable_test_count() > 0) + PrintXmlTestCase(stream, *unit_test.GetTestCase(i)); + } + *stream << "\n"; +} + +// Produces a string representing the test properties in a result as space +// delimited XML attributes based on the property key="value" pairs. +std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes( + const TestResult& result) { + Message attributes; + for (int i = 0; i < result.test_property_count(); ++i) { + const TestProperty& property = result.GetTestProperty(i); + attributes << " " << property.key() << "=" + << "\"" << EscapeXmlAttribute(property.value()) << "\""; + } + return attributes.GetString(); +} + +// End XmlUnitTestResultPrinter + +#if GTEST_CAN_STREAM_RESULTS_ + +// Checks if str contains '=', '&', '%' or '\n' characters. If yes, +// replaces them by "%xx" where xx is their hexadecimal value. For +// example, replaces "=" with "%3D". This algorithm is O(strlen(str)) +// in both time and space -- important as the input str may contain an +// arbitrarily long test failure message and stack trace. +string StreamingListener::UrlEncode(const char* str) { + string result; + result.reserve(strlen(str) + 1); + for (char ch = *str; ch != '\0'; ch = *++str) { + switch (ch) { + case '%': + case '=': + case '&': + case '\n': + result.append("%" + String::FormatByte(static_cast(ch))); + break; + default: + result.push_back(ch); + break; + } + } + return result; +} + +void StreamingListener::SocketWriter::MakeConnection() { + GTEST_CHECK_(sockfd_ == -1) + << "MakeConnection() can't be called when there is already a connection."; + + addrinfo hints; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; // To allow both IPv4 and IPv6 addresses. + hints.ai_socktype = SOCK_STREAM; + addrinfo* servinfo = NULL; + + // Use the getaddrinfo() to get a linked list of IP addresses for + // the given host name. + const int error_num = getaddrinfo( + host_name_.c_str(), port_num_.c_str(), &hints, &servinfo); + if (error_num != 0) { + GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: " + << gai_strerror(error_num); + } + + // Loop through all the results and connect to the first we can. + for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != NULL; + cur_addr = cur_addr->ai_next) { + sockfd_ = socket( + cur_addr->ai_family, cur_addr->ai_socktype, cur_addr->ai_protocol); + if (sockfd_ != -1) { + // Connect the client socket to the server socket. + if (connect(sockfd_, cur_addr->ai_addr, cur_addr->ai_addrlen) == -1) { + close(sockfd_); + sockfd_ = -1; + } + } + } + + freeaddrinfo(servinfo); // all done with this structure + + if (sockfd_ == -1) { + GTEST_LOG_(WARNING) << "stream_result_to: failed to connect to " + << host_name_ << ":" << port_num_; + } +} + +// End of class Streaming Listener +#endif // GTEST_CAN_STREAM_RESULTS__ + +// Class ScopedTrace + +// Pushes the given source file location and message onto a per-thread +// trace stack maintained by Google Test. +ScopedTrace::ScopedTrace(const char* file, int line, const Message& message) + GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) { + TraceInfo trace; + trace.file = file; + trace.line = line; + trace.message = message.GetString(); + + UnitTest::GetInstance()->PushGTestTrace(trace); +} + +// Pops the info pushed by the c'tor. +ScopedTrace::~ScopedTrace() + GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) { + UnitTest::GetInstance()->PopGTestTrace(); +} + + +// class OsStackTraceGetter + +const char* const OsStackTraceGetterInterface::kElidedFramesMarker = + "... " GTEST_NAME_ " internal frames ..."; + +string OsStackTraceGetter::CurrentStackTrace(int /*max_depth*/, + int /*skip_count*/) { + return ""; +} + +void OsStackTraceGetter::UponLeavingGTest() {} + +// A helper class that creates the premature-exit file in its +// constructor and deletes the file in its destructor. +class ScopedPrematureExitFile { + public: + explicit ScopedPrematureExitFile(const char* premature_exit_filepath) + : premature_exit_filepath_(premature_exit_filepath) { + // If a path to the premature-exit file is specified... + if (premature_exit_filepath != NULL && *premature_exit_filepath != '\0') { + // create the file with a single "0" character in it. I/O + // errors are ignored as there's nothing better we can do and we + // don't want to fail the test because of this. + FILE* pfile = posix::FOpen(premature_exit_filepath, "w"); + fwrite("0", 1, 1, pfile); + fclose(pfile); + } + } + + ~ScopedPrematureExitFile() { + if (premature_exit_filepath_ != NULL && *premature_exit_filepath_ != '\0') { + remove(premature_exit_filepath_); + } + } + + private: + const char* const premature_exit_filepath_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedPrematureExitFile); +}; + +} // namespace internal + +// class TestEventListeners + +TestEventListeners::TestEventListeners() + : repeater_(new internal::TestEventRepeater()), + default_result_printer_(NULL), + default_xml_generator_(NULL) { +} + +TestEventListeners::~TestEventListeners() { delete repeater_; } + +// Returns the standard listener responsible for the default console +// output. Can be removed from the listeners list to shut down default +// console output. Note that removing this object from the listener list +// with Release transfers its ownership to the user. +void TestEventListeners::Append(TestEventListener* listener) { + repeater_->Append(listener); +} + +// Removes the given event listener from the list and returns it. It then +// becomes the caller's responsibility to delete the listener. Returns +// NULL if the listener is not found in the list. +TestEventListener* TestEventListeners::Release(TestEventListener* listener) { + if (listener == default_result_printer_) + default_result_printer_ = NULL; + else if (listener == default_xml_generator_) + default_xml_generator_ = NULL; + return repeater_->Release(listener); +} + +// Returns repeater that broadcasts the TestEventListener events to all +// subscribers. +TestEventListener* TestEventListeners::repeater() { return repeater_; } + +// Sets the default_result_printer attribute to the provided listener. +// The listener is also added to the listener list and previous +// default_result_printer is removed from it and deleted. The listener can +// also be NULL in which case it will not be added to the list. Does +// nothing if the previous and the current listener objects are the same. +void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) { + if (default_result_printer_ != listener) { + // It is an error to pass this method a listener that is already in the + // list. + delete Release(default_result_printer_); + default_result_printer_ = listener; + if (listener != NULL) + Append(listener); + } +} + +// Sets the default_xml_generator attribute to the provided listener. The +// listener is also added to the listener list and previous +// default_xml_generator is removed from it and deleted. The listener can +// also be NULL in which case it will not be added to the list. Does +// nothing if the previous and the current listener objects are the same. +void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) { + if (default_xml_generator_ != listener) { + // It is an error to pass this method a listener that is already in the + // list. + delete Release(default_xml_generator_); + default_xml_generator_ = listener; + if (listener != NULL) + Append(listener); + } +} + +// Controls whether events will be forwarded by the repeater to the +// listeners in the list. +bool TestEventListeners::EventForwardingEnabled() const { + return repeater_->forwarding_enabled(); +} + +void TestEventListeners::SuppressEventForwarding() { + repeater_->set_forwarding_enabled(false); +} + +// class UnitTest + +// Gets the singleton UnitTest object. The first time this method is +// called, a UnitTest object is constructed and returned. Consecutive +// calls will return the same object. +// +// We don't protect this under mutex_ as a user is not supposed to +// call this before main() starts, from which point on the return +// value will never change. +UnitTest* UnitTest::GetInstance() { + // When compiled with MSVC 7.1 in optimized mode, destroying the + // UnitTest object upon exiting the program messes up the exit code, + // causing successful tests to appear failed. We have to use a + // different implementation in this case to bypass the compiler bug. + // This implementation makes the compiler happy, at the cost of + // leaking the UnitTest object. + + // CodeGear C++Builder insists on a public destructor for the + // default implementation. Use this implementation to keep good OO + // design with private destructor. + +#if (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__) + static UnitTest* const instance = new UnitTest; + return instance; +#else + static UnitTest instance; + return &instance; +#endif // (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__) +} + +// Gets the number of successful test cases. +int UnitTest::successful_test_case_count() const { + return impl()->successful_test_case_count(); +} + +// Gets the number of failed test cases. +int UnitTest::failed_test_case_count() const { + return impl()->failed_test_case_count(); +} + +// Gets the number of all test cases. +int UnitTest::total_test_case_count() const { + return impl()->total_test_case_count(); +} + +// Gets the number of all test cases that contain at least one test +// that should run. +int UnitTest::test_case_to_run_count() const { + return impl()->test_case_to_run_count(); +} + +// Gets the number of successful tests. +int UnitTest::successful_test_count() const { + return impl()->successful_test_count(); +} + +// Gets the number of failed tests. +int UnitTest::failed_test_count() const { return impl()->failed_test_count(); } + +// Gets the number of disabled tests that will be reported in the XML report. +int UnitTest::reportable_disabled_test_count() const { + return impl()->reportable_disabled_test_count(); +} + +// Gets the number of disabled tests. +int UnitTest::disabled_test_count() const { + return impl()->disabled_test_count(); +} + +// Gets the number of tests to be printed in the XML report. +int UnitTest::reportable_test_count() const { + return impl()->reportable_test_count(); +} + +// Gets the number of all tests. +int UnitTest::total_test_count() const { return impl()->total_test_count(); } + +// Gets the number of tests that should run. +int UnitTest::test_to_run_count() const { return impl()->test_to_run_count(); } + +// Gets the time of the test program start, in ms from the start of the +// UNIX epoch. +internal::TimeInMillis UnitTest::start_timestamp() const { + return impl()->start_timestamp(); +} + +// Gets the elapsed time, in milliseconds. +internal::TimeInMillis UnitTest::elapsed_time() const { + return impl()->elapsed_time(); +} + +// Returns true iff the unit test passed (i.e. all test cases passed). +bool UnitTest::Passed() const { return impl()->Passed(); } + +// Returns true iff the unit test failed (i.e. some test case failed +// or something outside of all tests failed). +bool UnitTest::Failed() const { return impl()->Failed(); } + +// Gets the i-th test case among all the test cases. i can range from 0 to +// total_test_case_count() - 1. If i is not in that range, returns NULL. +const TestCase* UnitTest::GetTestCase(int i) const { + return impl()->GetTestCase(i); +} + +// Returns the TestResult containing information on test failures and +// properties logged outside of individual test cases. +const TestResult& UnitTest::ad_hoc_test_result() const { + return *impl()->ad_hoc_test_result(); +} + +// Gets the i-th test case among all the test cases. i can range from 0 to +// total_test_case_count() - 1. If i is not in that range, returns NULL. +TestCase* UnitTest::GetMutableTestCase(int i) { + return impl()->GetMutableTestCase(i); +} + +// Returns the list of event listeners that can be used to track events +// inside Google Test. +TestEventListeners& UnitTest::listeners() { + return *impl()->listeners(); +} + +// Registers and returns a global test environment. When a test +// program is run, all global test environments will be set-up in the +// order they were registered. After all tests in the program have +// finished, all global test environments will be torn-down in the +// *reverse* order they were registered. +// +// The UnitTest object takes ownership of the given environment. +// +// We don't protect this under mutex_, as we only support calling it +// from the main thread. +Environment* UnitTest::AddEnvironment(Environment* env) { + if (env == NULL) { + return NULL; + } + + impl_->environments().push_back(env); + return env; +} + +// Adds a TestPartResult to the current TestResult object. All Google Test +// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call +// this to report their results. The user code should use the +// assertion macros instead of calling this directly. +void UnitTest::AddTestPartResult( + TestPartResult::Type result_type, + const char* file_name, + int line_number, + const std::string& message, + const std::string& os_stack_trace) GTEST_LOCK_EXCLUDED_(mutex_) { + Message msg; + msg << message; + + internal::MutexLock lock(&mutex_); + if (impl_->gtest_trace_stack().size() > 0) { + msg << "\n" << GTEST_NAME_ << " trace:"; + + for (int i = static_cast(impl_->gtest_trace_stack().size()); + i > 0; --i) { + const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1]; + msg << "\n" << internal::FormatFileLocation(trace.file, trace.line) + << " " << trace.message; + } + } + + if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) { + msg << internal::kStackTraceMarker << os_stack_trace; + } + + const TestPartResult result = + TestPartResult(result_type, file_name, line_number, + msg.GetString().c_str()); + impl_->GetTestPartResultReporterForCurrentThread()-> + ReportTestPartResult(result); + + if (result_type != TestPartResult::kSuccess) { + // gtest_break_on_failure takes precedence over + // gtest_throw_on_failure. This allows a user to set the latter + // in the code (perhaps in order to use Google Test assertions + // with another testing framework) and specify the former on the + // command line for debugging. + if (GTEST_FLAG(break_on_failure)) { +#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT + // Using DebugBreak on Windows allows gtest to still break into a debugger + // when a failure happens and both the --gtest_break_on_failure and + // the --gtest_catch_exceptions flags are specified. + DebugBreak(); +#else + // Dereference NULL through a volatile pointer to prevent the compiler + // from removing. We use this rather than abort() or __builtin_trap() for + // portability: Symbian doesn't implement abort() well, and some debuggers + // don't correctly trap abort(). + *static_cast(NULL) = 1; +#endif // GTEST_OS_WINDOWS + } else if (GTEST_FLAG(throw_on_failure)) { +#if GTEST_HAS_EXCEPTIONS + throw internal::GoogleTestFailureException(result); +#else + // We cannot call abort() as it generates a pop-up in debug mode + // that cannot be suppressed in VC 7.1 or below. + exit(1); +#endif + } + } +} + +// Adds a TestProperty to the current TestResult object when invoked from +// inside a test, to current TestCase's ad_hoc_test_result_ when invoked +// from SetUpTestCase or TearDownTestCase, or to the global property set +// when invoked elsewhere. If the result already contains a property with +// the same key, the value will be updated. +void UnitTest::RecordProperty(const std::string& key, + const std::string& value) { + impl_->RecordProperty(TestProperty(key, value)); +} + +// Runs all tests in this UnitTest object and prints the result. +// Returns 0 if successful, or 1 otherwise. +// +// We don't protect this under mutex_, as we only support calling it +// from the main thread. +int UnitTest::Run() { + const bool in_death_test_child_process = + internal::GTEST_FLAG(internal_run_death_test).length() > 0; + + // Google Test implements this protocol for catching that a test + // program exits before returning control to Google Test: + // + // 1. Upon start, Google Test creates a file whose absolute path + // is specified by the environment variable + // TEST_PREMATURE_EXIT_FILE. + // 2. When Google Test has finished its work, it deletes the file. + // + // This allows a test runner to set TEST_PREMATURE_EXIT_FILE before + // running a Google-Test-based test program and check the existence + // of the file at the end of the test execution to see if it has + // exited prematurely. + + // If we are in the child process of a death test, don't + // create/delete the premature exit file, as doing so is unnecessary + // and will confuse the parent process. Otherwise, create/delete + // the file upon entering/leaving this function. If the program + // somehow exits before this function has a chance to return, the + // premature-exit file will be left undeleted, causing a test runner + // that understands the premature-exit-file protocol to report the + // test as having failed. + const internal::ScopedPrematureExitFile premature_exit_file( + in_death_test_child_process ? + NULL : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE")); + + // Captures the value of GTEST_FLAG(catch_exceptions). This value will be + // used for the duration of the program. + impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions)); + +#if GTEST_HAS_SEH + // Either the user wants Google Test to catch exceptions thrown by the + // tests or this is executing in the context of death test child + // process. In either case the user does not want to see pop-up dialogs + // about crashes - they are expected. + if (impl()->catch_exceptions() || in_death_test_child_process) { +# if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT + // SetErrorMode doesn't exist on CE. + SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT | + SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX); +# endif // !GTEST_OS_WINDOWS_MOBILE + +# if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE + // Death test children can be terminated with _abort(). On Windows, + // _abort() can show a dialog with a warning message. This forces the + // abort message to go to stderr instead. + _set_error_mode(_OUT_TO_STDERR); +# endif + +# if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE + // In the debug version, Visual Studio pops up a separate dialog + // offering a choice to debug the aborted program. We need to suppress + // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement + // executed. Google Test will notify the user of any unexpected + // failure via stderr. + // + // VC++ doesn't define _set_abort_behavior() prior to the version 8.0. + // Users of prior VC versions shall suffer the agony and pain of + // clicking through the countless debug dialogs. + // TODO(vladl@google.com): find a way to suppress the abort dialog() in the + // debug mode when compiled with VC 7.1 or lower. + if (!GTEST_FLAG(break_on_failure)) + _set_abort_behavior( + 0x0, // Clear the following flags: + _WRITE_ABORT_MSG | _CALL_REPORTFAULT); // pop-up window, core dump. +# endif + } +#endif // GTEST_HAS_SEH + + return internal::HandleExceptionsInMethodIfSupported( + impl(), + &internal::UnitTestImpl::RunAllTests, + "auxiliary test code (environments or event listeners)") ? 0 : 1; +} + +// Returns the working directory when the first TEST() or TEST_F() was +// executed. +const char* UnitTest::original_working_dir() const { + return impl_->original_working_dir_.c_str(); +} + +// Returns the TestCase object for the test that's currently running, +// or NULL if no test is running. +const TestCase* UnitTest::current_test_case() const + GTEST_LOCK_EXCLUDED_(mutex_) { + internal::MutexLock lock(&mutex_); + return impl_->current_test_case(); +} + +// Returns the TestInfo object for the test that's currently running, +// or NULL if no test is running. +const TestInfo* UnitTest::current_test_info() const + GTEST_LOCK_EXCLUDED_(mutex_) { + internal::MutexLock lock(&mutex_); + return impl_->current_test_info(); +} + +// Returns the random seed used at the start of the current test run. +int UnitTest::random_seed() const { return impl_->random_seed(); } + +#if GTEST_HAS_PARAM_TEST +// Returns ParameterizedTestCaseRegistry object used to keep track of +// value-parameterized tests and instantiate and register them. +internal::ParameterizedTestCaseRegistry& + UnitTest::parameterized_test_registry() + GTEST_LOCK_EXCLUDED_(mutex_) { + return impl_->parameterized_test_registry(); +} +#endif // GTEST_HAS_PARAM_TEST + +// Creates an empty UnitTest. +UnitTest::UnitTest() { + impl_ = new internal::UnitTestImpl(this); +} + +// Destructor of UnitTest. +UnitTest::~UnitTest() { + delete impl_; +} + +// Pushes a trace defined by SCOPED_TRACE() on to the per-thread +// Google Test trace stack. +void UnitTest::PushGTestTrace(const internal::TraceInfo& trace) + GTEST_LOCK_EXCLUDED_(mutex_) { + internal::MutexLock lock(&mutex_); + impl_->gtest_trace_stack().push_back(trace); +} + +// Pops a trace from the per-thread Google Test trace stack. +void UnitTest::PopGTestTrace() + GTEST_LOCK_EXCLUDED_(mutex_) { + internal::MutexLock lock(&mutex_); + impl_->gtest_trace_stack().pop_back(); +} + +namespace internal { + +UnitTestImpl::UnitTestImpl(UnitTest* parent) + : parent_(parent), + GTEST_DISABLE_MSC_WARNINGS_PUSH_(4355 /* using this in initializer */) + default_global_test_part_result_reporter_(this), + default_per_thread_test_part_result_reporter_(this), + GTEST_DISABLE_MSC_WARNINGS_POP_() + global_test_part_result_repoter_( + &default_global_test_part_result_reporter_), + per_thread_test_part_result_reporter_( + &default_per_thread_test_part_result_reporter_), +#if GTEST_HAS_PARAM_TEST + parameterized_test_registry_(), + parameterized_tests_registered_(false), +#endif // GTEST_HAS_PARAM_TEST + last_death_test_case_(-1), + current_test_case_(NULL), + current_test_info_(NULL), + ad_hoc_test_result_(), + os_stack_trace_getter_(NULL), + post_flag_parse_init_performed_(false), + random_seed_(0), // Will be overridden by the flag before first use. + random_(0), // Will be reseeded before first use. + start_timestamp_(0), + elapsed_time_(0), +#if GTEST_HAS_DEATH_TEST + death_test_factory_(new DefaultDeathTestFactory), +#endif + // Will be overridden by the flag before first use. + catch_exceptions_(false) { + listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter); +} + +UnitTestImpl::~UnitTestImpl() { + // Deletes every TestCase. + ForEach(test_cases_, internal::Delete); + + // Deletes every Environment. + ForEach(environments_, internal::Delete); + + delete os_stack_trace_getter_; +} + +// Adds a TestProperty to the current TestResult object when invoked in a +// context of a test, to current test case's ad_hoc_test_result when invoke +// from SetUpTestCase/TearDownTestCase, or to the global property set +// otherwise. If the result already contains a property with the same key, +// the value will be updated. +void UnitTestImpl::RecordProperty(const TestProperty& test_property) { + std::string xml_element; + TestResult* test_result; // TestResult appropriate for property recording. + + if (current_test_info_ != NULL) { + xml_element = "testcase"; + test_result = &(current_test_info_->result_); + } else if (current_test_case_ != NULL) { + xml_element = "testsuite"; + test_result = &(current_test_case_->ad_hoc_test_result_); + } else { + xml_element = "testsuites"; + test_result = &ad_hoc_test_result_; + } + test_result->RecordProperty(xml_element, test_property); +} + +#if GTEST_HAS_DEATH_TEST +// Disables event forwarding if the control is currently in a death test +// subprocess. Must not be called before InitGoogleTest. +void UnitTestImpl::SuppressTestEventsIfInSubprocess() { + if (internal_run_death_test_flag_.get() != NULL) + listeners()->SuppressEventForwarding(); +} +#endif // GTEST_HAS_DEATH_TEST + +// Initializes event listeners performing XML output as specified by +// UnitTestOptions. Must not be called before InitGoogleTest. +void UnitTestImpl::ConfigureXmlOutput() { + const std::string& output_format = UnitTestOptions::GetOutputFormat(); + if (output_format == "xml") { + listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter( + UnitTestOptions::GetAbsolutePathToOutputFile().c_str())); + } else if (output_format != "") { + printf("WARNING: unrecognized output format \"%s\" ignored.\n", + output_format.c_str()); + fflush(stdout); + } +} + +#if GTEST_CAN_STREAM_RESULTS_ +// Initializes event listeners for streaming test results in string form. +// Must not be called before InitGoogleTest. +void UnitTestImpl::ConfigureStreamingOutput() { + const std::string& target = GTEST_FLAG(stream_result_to); + if (!target.empty()) { + const size_t pos = target.find(':'); + if (pos != std::string::npos) { + listeners()->Append(new StreamingListener(target.substr(0, pos), + target.substr(pos+1))); + } else { + printf("WARNING: unrecognized streaming target \"%s\" ignored.\n", + target.c_str()); + fflush(stdout); + } + } +} +#endif // GTEST_CAN_STREAM_RESULTS_ + +// Performs initialization dependent upon flag values obtained in +// ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to +// ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest +// this function is also called from RunAllTests. Since this function can be +// called more than once, it has to be idempotent. +void UnitTestImpl::PostFlagParsingInit() { + // Ensures that this function does not execute more than once. + if (!post_flag_parse_init_performed_) { + post_flag_parse_init_performed_ = true; + +#if defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_) + // Register to send notifications about key process state changes. + listeners()->Append(new GTEST_CUSTOM_TEST_EVENT_LISTENER_()); +#endif // defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_) + +#if GTEST_HAS_DEATH_TEST + InitDeathTestSubprocessControlInfo(); + SuppressTestEventsIfInSubprocess(); +#endif // GTEST_HAS_DEATH_TEST + + // Registers parameterized tests. This makes parameterized tests + // available to the UnitTest reflection API without running + // RUN_ALL_TESTS. + RegisterParameterizedTests(); + + // Configures listeners for XML output. This makes it possible for users + // to shut down the default XML output before invoking RUN_ALL_TESTS. + ConfigureXmlOutput(); + +#if GTEST_CAN_STREAM_RESULTS_ + // Configures listeners for streaming test results to the specified server. + ConfigureStreamingOutput(); +#endif // GTEST_CAN_STREAM_RESULTS_ + } +} + +// A predicate that checks the name of a TestCase against a known +// value. +// +// This is used for implementation of the UnitTest class only. We put +// it in the anonymous namespace to prevent polluting the outer +// namespace. +// +// TestCaseNameIs is copyable. +class TestCaseNameIs { + public: + // Constructor. + explicit TestCaseNameIs(const std::string& name) + : name_(name) {} + + // Returns true iff the name of test_case matches name_. + bool operator()(const TestCase* test_case) const { + return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0; + } + + private: + std::string name_; +}; + +// Finds and returns a TestCase with the given name. If one doesn't +// exist, creates one and returns it. It's the CALLER'S +// RESPONSIBILITY to ensure that this function is only called WHEN THE +// TESTS ARE NOT SHUFFLED. +// +// Arguments: +// +// test_case_name: name of the test case +// type_param: the name of the test case's type parameter, or NULL if +// this is not a typed or a type-parameterized test case. +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +TestCase* UnitTestImpl::GetTestCase(const char* test_case_name, + const char* type_param, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc) { + // Can we find a TestCase with the given name? + const std::vector::const_iterator test_case = + std::find_if(test_cases_.begin(), test_cases_.end(), + TestCaseNameIs(test_case_name)); + + if (test_case != test_cases_.end()) + return *test_case; + + // No. Let's create one. + TestCase* const new_test_case = + new TestCase(test_case_name, type_param, set_up_tc, tear_down_tc); + + // Is this a death test case? + if (internal::UnitTestOptions::MatchesFilter(test_case_name, + kDeathTestCaseFilter)) { + // Yes. Inserts the test case after the last death test case + // defined so far. This only works when the test cases haven't + // been shuffled. Otherwise we may end up running a death test + // after a non-death test. + ++last_death_test_case_; + test_cases_.insert(test_cases_.begin() + last_death_test_case_, + new_test_case); + } else { + // No. Appends to the end of the list. + test_cases_.push_back(new_test_case); + } + + test_case_indices_.push_back(static_cast(test_case_indices_.size())); + return new_test_case; +} + +// Helpers for setting up / tearing down the given environment. They +// are for use in the ForEach() function. +static void SetUpEnvironment(Environment* env) { env->SetUp(); } +static void TearDownEnvironment(Environment* env) { env->TearDown(); } + +// Runs all tests in this UnitTest object, prints the result, and +// returns true if all tests are successful. If any exception is +// thrown during a test, the test is considered to be failed, but the +// rest of the tests will still be run. +// +// When parameterized tests are enabled, it expands and registers +// parameterized tests first in RegisterParameterizedTests(). +// All other functions called from RunAllTests() may safely assume that +// parameterized tests are ready to be counted and run. +bool UnitTestImpl::RunAllTests() { + // Makes sure InitGoogleTest() was called. + if (!GTestIsInitialized()) { + printf("%s", + "\nThis test program did NOT call ::testing::InitGoogleTest " + "before calling RUN_ALL_TESTS(). Please fix it.\n"); + return false; + } + + // Do not run any test if the --help flag was specified. + if (g_help_flag) + return true; + + // Repeats the call to the post-flag parsing initialization in case the + // user didn't call InitGoogleTest. + PostFlagParsingInit(); + + // Even if sharding is not on, test runners may want to use the + // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding + // protocol. + internal::WriteToShardStatusFileIfNeeded(); + + // True iff we are in a subprocess for running a thread-safe-style + // death test. + bool in_subprocess_for_death_test = false; + +#if GTEST_HAS_DEATH_TEST + in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL); +# if defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_) + if (in_subprocess_for_death_test) { + GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_(); + } +# endif // defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_) +#endif // GTEST_HAS_DEATH_TEST + + const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex, + in_subprocess_for_death_test); + + // Compares the full test names with the filter to decide which + // tests to run. + const bool has_tests_to_run = FilterTests(should_shard + ? HONOR_SHARDING_PROTOCOL + : IGNORE_SHARDING_PROTOCOL) > 0; + + // Lists the tests and exits if the --gtest_list_tests flag was specified. + if (GTEST_FLAG(list_tests)) { + // This must be called *after* FilterTests() has been called. + ListTestsMatchingFilter(); + return true; + } + + random_seed_ = GTEST_FLAG(shuffle) ? + GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0; + + // True iff at least one test has failed. + bool failed = false; + + TestEventListener* repeater = listeners()->repeater(); + + start_timestamp_ = GetTimeInMillis(); + repeater->OnTestProgramStart(*parent_); + + // How many times to repeat the tests? We don't want to repeat them + // when we are inside the subprocess of a death test. + const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat); + // Repeats forever if the repeat count is negative. + const bool forever = repeat < 0; + for (int i = 0; forever || i != repeat; i++) { + // We want to preserve failures generated by ad-hoc test + // assertions executed before RUN_ALL_TESTS(). + ClearNonAdHocTestResult(); + + const TimeInMillis start = GetTimeInMillis(); + + // Shuffles test cases and tests if requested. + if (has_tests_to_run && GTEST_FLAG(shuffle)) { + random()->Reseed(random_seed_); + // This should be done before calling OnTestIterationStart(), + // such that a test event listener can see the actual test order + // in the event. + ShuffleTests(); + } + + // Tells the unit test event listeners that the tests are about to start. + repeater->OnTestIterationStart(*parent_, i); + + // Runs each test case if there is at least one test to run. + if (has_tests_to_run) { + // Sets up all environments beforehand. + repeater->OnEnvironmentsSetUpStart(*parent_); + ForEach(environments_, SetUpEnvironment); + repeater->OnEnvironmentsSetUpEnd(*parent_); + + // Runs the tests only if there was no fatal failure during global + // set-up. + if (!Test::HasFatalFailure()) { + for (int test_index = 0; test_index < total_test_case_count(); + test_index++) { + GetMutableTestCase(test_index)->Run(); + } + } + + // Tears down all environments in reverse order afterwards. + repeater->OnEnvironmentsTearDownStart(*parent_); + std::for_each(environments_.rbegin(), environments_.rend(), + TearDownEnvironment); + repeater->OnEnvironmentsTearDownEnd(*parent_); + } + + elapsed_time_ = GetTimeInMillis() - start; + + // Tells the unit test event listener that the tests have just finished. + repeater->OnTestIterationEnd(*parent_, i); + + // Gets the result and clears it. + if (!Passed()) { + failed = true; + } + + // Restores the original test order after the iteration. This + // allows the user to quickly repro a failure that happens in the + // N-th iteration without repeating the first (N - 1) iterations. + // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in + // case the user somehow changes the value of the flag somewhere + // (it's always safe to unshuffle the tests). + UnshuffleTests(); + + if (GTEST_FLAG(shuffle)) { + // Picks a new random seed for each iteration. + random_seed_ = GetNextRandomSeed(random_seed_); + } + } + + repeater->OnTestProgramEnd(*parent_); + + return !failed; +} + +// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file +// if the variable is present. If a file already exists at this location, this +// function will write over it. If the variable is present, but the file cannot +// be created, prints an error and exits. +void WriteToShardStatusFileIfNeeded() { + const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile); + if (test_shard_file != NULL) { + FILE* const file = posix::FOpen(test_shard_file, "w"); + if (file == NULL) { + ColoredPrintf(COLOR_RED, + "Could not write to the test shard status file \"%s\" " + "specified by the %s environment variable.\n", + test_shard_file, kTestShardStatusFile); + fflush(stdout); + exit(EXIT_FAILURE); + } + fclose(file); + } +} + +// Checks whether sharding is enabled by examining the relevant +// environment variable values. If the variables are present, +// but inconsistent (i.e., shard_index >= total_shards), prints +// an error and exits. If in_subprocess_for_death_test, sharding is +// disabled because it must only be applied to the original test +// process. Otherwise, we could filter out death tests we intended to execute. +bool ShouldShard(const char* total_shards_env, + const char* shard_index_env, + bool in_subprocess_for_death_test) { + if (in_subprocess_for_death_test) { + return false; + } + + const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1); + const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1); + + if (total_shards == -1 && shard_index == -1) { + return false; + } else if (total_shards == -1 && shard_index != -1) { + const Message msg = Message() + << "Invalid environment variables: you have " + << kTestShardIndex << " = " << shard_index + << ", but have left " << kTestTotalShards << " unset.\n"; + ColoredPrintf(COLOR_RED, msg.GetString().c_str()); + fflush(stdout); + exit(EXIT_FAILURE); + } else if (total_shards != -1 && shard_index == -1) { + const Message msg = Message() + << "Invalid environment variables: you have " + << kTestTotalShards << " = " << total_shards + << ", but have left " << kTestShardIndex << " unset.\n"; + ColoredPrintf(COLOR_RED, msg.GetString().c_str()); + fflush(stdout); + exit(EXIT_FAILURE); + } else if (shard_index < 0 || shard_index >= total_shards) { + const Message msg = Message() + << "Invalid environment variables: we require 0 <= " + << kTestShardIndex << " < " << kTestTotalShards + << ", but you have " << kTestShardIndex << "=" << shard_index + << ", " << kTestTotalShards << "=" << total_shards << ".\n"; + ColoredPrintf(COLOR_RED, msg.GetString().c_str()); + fflush(stdout); + exit(EXIT_FAILURE); + } + + return total_shards > 1; +} + +// Parses the environment variable var as an Int32. If it is unset, +// returns default_val. If it is not an Int32, prints an error +// and aborts. +Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) { + const char* str_val = posix::GetEnv(var); + if (str_val == NULL) { + return default_val; + } + + Int32 result; + if (!ParseInt32(Message() << "The value of environment variable " << var, + str_val, &result)) { + exit(EXIT_FAILURE); + } + return result; +} + +// Given the total number of shards, the shard index, and the test id, +// returns true iff the test should be run on this shard. The test id is +// some arbitrary but unique non-negative integer assigned to each test +// method. Assumes that 0 <= shard_index < total_shards. +bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) { + return (test_id % total_shards) == shard_index; +} + +// Compares the name of each test with the user-specified filter to +// decide whether the test should be run, then records the result in +// each TestCase and TestInfo object. +// If shard_tests == true, further filters tests based on sharding +// variables in the environment - see +// http://code.google.com/p/googletest/wiki/GoogleTestAdvancedGuide. +// Returns the number of tests that should run. +int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) { + const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ? + Int32FromEnvOrDie(kTestTotalShards, -1) : -1; + const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ? + Int32FromEnvOrDie(kTestShardIndex, -1) : -1; + + // num_runnable_tests are the number of tests that will + // run across all shards (i.e., match filter and are not disabled). + // num_selected_tests are the number of tests to be run on + // this shard. + int num_runnable_tests = 0; + int num_selected_tests = 0; + for (size_t i = 0; i < test_cases_.size(); i++) { + TestCase* const test_case = test_cases_[i]; + const std::string &test_case_name = test_case->name(); + test_case->set_should_run(false); + + for (size_t j = 0; j < test_case->test_info_list().size(); j++) { + TestInfo* const test_info = test_case->test_info_list()[j]; + const std::string test_name(test_info->name()); + // A test is disabled if test case name or test name matches + // kDisableTestFilter. + const bool is_disabled = + internal::UnitTestOptions::MatchesFilter(test_case_name, + kDisableTestFilter) || + internal::UnitTestOptions::MatchesFilter(test_name, + kDisableTestFilter); + test_info->is_disabled_ = is_disabled; + + const bool matches_filter = + internal::UnitTestOptions::FilterMatchesTest(test_case_name, + test_name); + test_info->matches_filter_ = matches_filter; + + const bool is_runnable = + (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) && + matches_filter; + + const bool is_selected = is_runnable && + (shard_tests == IGNORE_SHARDING_PROTOCOL || + ShouldRunTestOnShard(total_shards, shard_index, + num_runnable_tests)); + + num_runnable_tests += is_runnable; + num_selected_tests += is_selected; + + test_info->should_run_ = is_selected; + test_case->set_should_run(test_case->should_run() || is_selected); + } + } + return num_selected_tests; +} + +// Prints the given C-string on a single line by replacing all '\n' +// characters with string "\\n". If the output takes more than +// max_length characters, only prints the first max_length characters +// and "...". +static void PrintOnOneLine(const char* str, int max_length) { + if (str != NULL) { + for (int i = 0; *str != '\0'; ++str) { + if (i >= max_length) { + printf("..."); + break; + } + if (*str == '\n') { + printf("\\n"); + i += 2; + } else { + printf("%c", *str); + ++i; + } + } + } +} + +// Prints the names of the tests matching the user-specified filter flag. +void UnitTestImpl::ListTestsMatchingFilter() { + // Print at most this many characters for each type/value parameter. + const int kMaxParamLength = 250; + + for (size_t i = 0; i < test_cases_.size(); i++) { + const TestCase* const test_case = test_cases_[i]; + bool printed_test_case_name = false; + + for (size_t j = 0; j < test_case->test_info_list().size(); j++) { + const TestInfo* const test_info = + test_case->test_info_list()[j]; + if (test_info->matches_filter_) { + if (!printed_test_case_name) { + printed_test_case_name = true; + printf("%s.", test_case->name()); + if (test_case->type_param() != NULL) { + printf(" # %s = ", kTypeParamLabel); + // We print the type parameter on a single line to make + // the output easy to parse by a program. + PrintOnOneLine(test_case->type_param(), kMaxParamLength); + } + printf("\n"); + } + printf(" %s", test_info->name()); + if (test_info->value_param() != NULL) { + printf(" # %s = ", kValueParamLabel); + // We print the value parameter on a single line to make the + // output easy to parse by a program. + PrintOnOneLine(test_info->value_param(), kMaxParamLength); + } + printf("\n"); + } + } + } + fflush(stdout); +} + +// Sets the OS stack trace getter. +// +// Does nothing if the input and the current OS stack trace getter are +// the same; otherwise, deletes the old getter and makes the input the +// current getter. +void UnitTestImpl::set_os_stack_trace_getter( + OsStackTraceGetterInterface* getter) { + if (os_stack_trace_getter_ != getter) { + delete os_stack_trace_getter_; + os_stack_trace_getter_ = getter; + } +} + +// Returns the current OS stack trace getter if it is not NULL; +// otherwise, creates an OsStackTraceGetter, makes it the current +// getter, and returns it. +OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() { + if (os_stack_trace_getter_ == NULL) { +#ifdef GTEST_OS_STACK_TRACE_GETTER_ + os_stack_trace_getter_ = new GTEST_OS_STACK_TRACE_GETTER_; +#else + os_stack_trace_getter_ = new OsStackTraceGetter; +#endif // GTEST_OS_STACK_TRACE_GETTER_ + } + + return os_stack_trace_getter_; +} + +// Returns the TestResult for the test that's currently running, or +// the TestResult for the ad hoc test if no test is running. +TestResult* UnitTestImpl::current_test_result() { + return current_test_info_ ? + &(current_test_info_->result_) : &ad_hoc_test_result_; +} + +// Shuffles all test cases, and the tests within each test case, +// making sure that death tests are still run first. +void UnitTestImpl::ShuffleTests() { + // Shuffles the death test cases. + ShuffleRange(random(), 0, last_death_test_case_ + 1, &test_case_indices_); + + // Shuffles the non-death test cases. + ShuffleRange(random(), last_death_test_case_ + 1, + static_cast(test_cases_.size()), &test_case_indices_); + + // Shuffles the tests inside each test case. + for (size_t i = 0; i < test_cases_.size(); i++) { + test_cases_[i]->ShuffleTests(random()); + } +} + +// Restores the test cases and tests to their order before the first shuffle. +void UnitTestImpl::UnshuffleTests() { + for (size_t i = 0; i < test_cases_.size(); i++) { + // Unshuffles the tests in each test case. + test_cases_[i]->UnshuffleTests(); + // Resets the index of each test case. + test_case_indices_[i] = static_cast(i); + } +} + +// Returns the current OS stack trace as an std::string. +// +// The maximum number of stack frames to be included is specified by +// the gtest_stack_trace_depth flag. The skip_count parameter +// specifies the number of top frames to be skipped, which doesn't +// count against the number of frames to be included. +// +// For example, if Foo() calls Bar(), which in turn calls +// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in +// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't. +std::string GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/, + int skip_count) { + // We pass skip_count + 1 to skip this wrapper function in addition + // to what the user really wants to skip. + return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1); +} + +// Used by the GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_ macro to +// suppress unreachable code warnings. +namespace { +class ClassUniqueToAlwaysTrue {}; +} + +bool IsTrue(bool condition) { return condition; } + +bool AlwaysTrue() { +#if GTEST_HAS_EXCEPTIONS + // This condition is always false so AlwaysTrue() never actually throws, + // but it makes the compiler think that it may throw. + if (IsTrue(false)) + throw ClassUniqueToAlwaysTrue(); +#endif // GTEST_HAS_EXCEPTIONS + return true; +} + +// If *pstr starts with the given prefix, modifies *pstr to be right +// past the prefix and returns true; otherwise leaves *pstr unchanged +// and returns false. None of pstr, *pstr, and prefix can be NULL. +bool SkipPrefix(const char* prefix, const char** pstr) { + const size_t prefix_len = strlen(prefix); + if (strncmp(*pstr, prefix, prefix_len) == 0) { + *pstr += prefix_len; + return true; + } + return false; +} + +// Parses a string as a command line flag. The string should have +// the format "--flag=value". When def_optional is true, the "=value" +// part can be omitted. +// +// Returns the value of the flag, or NULL if the parsing failed. +const char* ParseFlagValue(const char* str, + const char* flag, + bool def_optional) { + // str and flag must not be NULL. + if (str == NULL || flag == NULL) return NULL; + + // The flag must start with "--" followed by GTEST_FLAG_PREFIX_. + const std::string flag_str = std::string("--") + GTEST_FLAG_PREFIX_ + flag; + const size_t flag_len = flag_str.length(); + if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL; + + // Skips the flag name. + const char* flag_end = str + flag_len; + + // When def_optional is true, it's OK to not have a "=value" part. + if (def_optional && (flag_end[0] == '\0')) { + return flag_end; + } + + // If def_optional is true and there are more characters after the + // flag name, or if def_optional is false, there must be a '=' after + // the flag name. + if (flag_end[0] != '=') return NULL; + + // Returns the string after "=". + return flag_end + 1; +} + +// Parses a string for a bool flag, in the form of either +// "--flag=value" or "--flag". +// +// In the former case, the value is taken as true as long as it does +// not start with '0', 'f', or 'F'. +// +// In the latter case, the value is taken as true. +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseBoolFlag(const char* str, const char* flag, bool* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, true); + + // Aborts if the parsing failed. + if (value_str == NULL) return false; + + // Converts the string value to a bool. + *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F'); + return true; +} + +// Parses a string for an Int32 flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseInt32Flag(const char* str, const char* flag, Int32* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == NULL) return false; + + // Sets *value to the value of the flag. + return ParseInt32(Message() << "The value of flag --" << flag, + value_str, value); +} + +// Parses a string for a string flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseStringFlag(const char* str, const char* flag, std::string* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == NULL) return false; + + // Sets *value to the value of the flag. + *value = value_str; + return true; +} + +// Determines whether a string has a prefix that Google Test uses for its +// flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_. +// If Google Test detects that a command line flag has its prefix but is not +// recognized, it will print its help message. Flags starting with +// GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test +// internal flags and do not trigger the help message. +static bool HasGoogleTestFlagPrefix(const char* str) { + return (SkipPrefix("--", &str) || + SkipPrefix("-", &str) || + SkipPrefix("/", &str)) && + !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) && + (SkipPrefix(GTEST_FLAG_PREFIX_, &str) || + SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str)); +} + +// Prints a string containing code-encoded text. The following escape +// sequences can be used in the string to control the text color: +// +// @@ prints a single '@' character. +// @R changes the color to red. +// @G changes the color to green. +// @Y changes the color to yellow. +// @D changes to the default terminal text color. +// +// TODO(wan@google.com): Write tests for this once we add stdout +// capturing to Google Test. +static void PrintColorEncoded(const char* str) { + GTestColor color = COLOR_DEFAULT; // The current color. + + // Conceptually, we split the string into segments divided by escape + // sequences. Then we print one segment at a time. At the end of + // each iteration, the str pointer advances to the beginning of the + // next segment. + for (;;) { + const char* p = strchr(str, '@'); + if (p == NULL) { + ColoredPrintf(color, "%s", str); + return; + } + + ColoredPrintf(color, "%s", std::string(str, p).c_str()); + + const char ch = p[1]; + str = p + 2; + if (ch == '@') { + ColoredPrintf(color, "@"); + } else if (ch == 'D') { + color = COLOR_DEFAULT; + } else if (ch == 'R') { + color = COLOR_RED; + } else if (ch == 'G') { + color = COLOR_GREEN; + } else if (ch == 'Y') { + color = COLOR_YELLOW; + } else { + --str; + } + } +} + +static const char kColorEncodedHelpMessage[] = +"This program contains tests written using " GTEST_NAME_ ". You can use the\n" +"following command line flags to control its behavior:\n" +"\n" +"Test Selection:\n" +" @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n" +" List the names of all tests instead of running them. The name of\n" +" TEST(Foo, Bar) is \"Foo.Bar\".\n" +" @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS" + "[@G-@YNEGATIVE_PATTERNS]@D\n" +" Run only the tests whose name matches one of the positive patterns but\n" +" none of the negative patterns. '?' matches any single character; '*'\n" +" matches any substring; ':' separates two patterns.\n" +" @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n" +" Run all disabled tests too.\n" +"\n" +"Test Execution:\n" +" @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n" +" Run the tests repeatedly; use a negative count to repeat forever.\n" +" @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n" +" Randomize tests' orders on every iteration.\n" +" @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n" +" Random number seed to use for shuffling test orders (between 1 and\n" +" 99999, or 0 to use a seed based on the current time).\n" +"\n" +"Test Output:\n" +" @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n" +" Enable/disable colored output. The default is @Gauto@D.\n" +" -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n" +" Don't print the elapsed time of each test.\n" +" @G--" GTEST_FLAG_PREFIX_ "output=xml@Y[@G:@YDIRECTORY_PATH@G" + GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n" +" Generate an XML report in the given directory or with the given file\n" +" name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n" +#if GTEST_CAN_STREAM_RESULTS_ +" @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n" +" Stream test results to the given server.\n" +#endif // GTEST_CAN_STREAM_RESULTS_ +"\n" +"Assertion Behavior:\n" +#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS +" @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n" +" Set the default death test style.\n" +#endif // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS +" @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n" +" Turn assertion failures into debugger break-points.\n" +" @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n" +" Turn assertion failures into C++ exceptions.\n" +" @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n" +" Do not report exceptions as test failures. Instead, allow them\n" +" to crash the program or throw a pop-up (on Windows).\n" +"\n" +"Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set " + "the corresponding\n" +"environment variable of a flag (all letters in upper-case). For example, to\n" +"disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_ + "color=no@D or set\n" +"the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n" +"\n" +"For more information, please read the " GTEST_NAME_ " documentation at\n" +"@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n" +"(not one in your own code or tests), please report it to\n" +"@G<" GTEST_DEV_EMAIL_ ">@D.\n"; + +bool ParseGoogleTestFlag(const char* const arg) { + return ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag, + >EST_FLAG(also_run_disabled_tests)) || + ParseBoolFlag(arg, kBreakOnFailureFlag, + >EST_FLAG(break_on_failure)) || + ParseBoolFlag(arg, kCatchExceptionsFlag, + >EST_FLAG(catch_exceptions)) || + ParseStringFlag(arg, kColorFlag, >EST_FLAG(color)) || + ParseStringFlag(arg, kDeathTestStyleFlag, + >EST_FLAG(death_test_style)) || + ParseBoolFlag(arg, kDeathTestUseFork, + >EST_FLAG(death_test_use_fork)) || + ParseStringFlag(arg, kFilterFlag, >EST_FLAG(filter)) || + ParseStringFlag(arg, kInternalRunDeathTestFlag, + >EST_FLAG(internal_run_death_test)) || + ParseBoolFlag(arg, kListTestsFlag, >EST_FLAG(list_tests)) || + ParseStringFlag(arg, kOutputFlag, >EST_FLAG(output)) || + ParseBoolFlag(arg, kPrintTimeFlag, >EST_FLAG(print_time)) || + ParseInt32Flag(arg, kRandomSeedFlag, >EST_FLAG(random_seed)) || + ParseInt32Flag(arg, kRepeatFlag, >EST_FLAG(repeat)) || + ParseBoolFlag(arg, kShuffleFlag, >EST_FLAG(shuffle)) || + ParseInt32Flag(arg, kStackTraceDepthFlag, + >EST_FLAG(stack_trace_depth)) || + ParseStringFlag(arg, kStreamResultToFlag, + >EST_FLAG(stream_result_to)) || + ParseBoolFlag(arg, kThrowOnFailureFlag, + >EST_FLAG(throw_on_failure)); +} + +#if GTEST_USE_OWN_FLAGFILE_FLAG_ +void LoadFlagsFromFile(const std::string& path) { + FILE* flagfile = posix::FOpen(path.c_str(), "r"); + if (!flagfile) { + fprintf(stderr, + "Unable to open file \"%s\"\n", + GTEST_FLAG(flagfile).c_str()); + fflush(stderr); + exit(EXIT_FAILURE); + } + std::string contents(ReadEntireFile(flagfile)); + posix::FClose(flagfile); + std::vector lines; + SplitString(contents, '\n', &lines); + for (size_t i = 0; i < lines.size(); ++i) { + if (lines[i].empty()) + continue; + if (!ParseGoogleTestFlag(lines[i].c_str())) + g_help_flag = true; + } +} +#endif // GTEST_USE_OWN_FLAGFILE_FLAG_ + +// Parses the command line for Google Test flags, without initializing +// other parts of Google Test. The type parameter CharType can be +// instantiated to either char or wchar_t. +template +void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) { + for (int i = 1; i < *argc; i++) { + const std::string arg_string = StreamableToString(argv[i]); + const char* const arg = arg_string.c_str(); + + using internal::ParseBoolFlag; + using internal::ParseInt32Flag; + using internal::ParseStringFlag; + + bool remove_flag = false; + if (ParseGoogleTestFlag(arg)) { + remove_flag = true; +#if GTEST_USE_OWN_FLAGFILE_FLAG_ + } else if (ParseStringFlag(arg, kFlagfileFlag, >EST_FLAG(flagfile))) { + LoadFlagsFromFile(GTEST_FLAG(flagfile)); + remove_flag = true; +#endif // GTEST_USE_OWN_FLAGFILE_FLAG_ + } else if (arg_string == "--help" || arg_string == "-h" || + arg_string == "-?" || arg_string == "/?" || + HasGoogleTestFlagPrefix(arg)) { + // Both help flag and unrecognized Google Test flags (excluding + // internal ones) trigger help display. + g_help_flag = true; + } + + if (remove_flag) { + // Shift the remainder of the argv list left by one. Note + // that argv has (*argc + 1) elements, the last one always being + // NULL. The following loop moves the trailing NULL element as + // well. + for (int j = i; j != *argc; j++) { + argv[j] = argv[j + 1]; + } + + // Decrements the argument count. + (*argc)--; + + // We also need to decrement the iterator as we just removed + // an element. + i--; + } + } + + if (g_help_flag) { + // We print the help here instead of in RUN_ALL_TESTS(), as the + // latter may not be called at all if the user is using Google + // Test with another testing framework. + PrintColorEncoded(kColorEncodedHelpMessage); + } +} + +// Parses the command line for Google Test flags, without initializing +// other parts of Google Test. +void ParseGoogleTestFlagsOnly(int* argc, char** argv) { + ParseGoogleTestFlagsOnlyImpl(argc, argv); +} +void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) { + ParseGoogleTestFlagsOnlyImpl(argc, argv); +} + +// The internal implementation of InitGoogleTest(). +// +// The type parameter CharType can be instantiated to either char or +// wchar_t. +template +void InitGoogleTestImpl(int* argc, CharType** argv) { + // We don't want to run the initialization code twice. + if (GTestIsInitialized()) return; + + if (*argc <= 0) return; + + g_argvs.clear(); + for (int i = 0; i != *argc; i++) { + g_argvs.push_back(StreamableToString(argv[i])); + } + + ParseGoogleTestFlagsOnly(argc, argv); + GetUnitTestImpl()->PostFlagParsingInit(); +} + +} // namespace internal + +// Initializes Google Test. This must be called before calling +// RUN_ALL_TESTS(). In particular, it parses a command line for the +// flags that Google Test recognizes. Whenever a Google Test flag is +// seen, it is removed from argv, and *argc is decremented. +// +// No value is returned. Instead, the Google Test flag variables are +// updated. +// +// Calling the function for the second time has no user-visible effect. +void InitGoogleTest(int* argc, char** argv) { +#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) + GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv); +#else // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) + internal::InitGoogleTestImpl(argc, argv); +#endif // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) +} + +// This overloaded version can be used in Windows programs compiled in +// UNICODE mode. +void InitGoogleTest(int* argc, wchar_t** argv) { +#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) + GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv); +#else // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) + internal::InitGoogleTestImpl(argc, argv); +#endif // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) +} + +} // namespace testing +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include "gtest/gtest.h" + +GTEST_API_ int main(int argc, char **argv) { + printf("Running main() from gtest_main.cc\n"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} +// Copyright 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) +// +// Tests that verify interaction of exceptions and death tests. + +#include "gtest/gtest-death-test.h" +#include "gtest/gtest.h" + +#if GTEST_HAS_DEATH_TEST + +# if GTEST_HAS_SEH +# include // For RaiseException(). +# endif + +# include "gtest/gtest-spi.h" + +# if GTEST_HAS_EXCEPTIONS + +# include // For std::exception. + +// Tests that death tests report thrown exceptions as failures and that the +// exceptions do not escape death test macros. +TEST(CxxExceptionDeathTest, ExceptionIsFailure) { + try { + EXPECT_NONFATAL_FAILURE(EXPECT_DEATH(throw 1, ""), "threw an exception"); + } catch (...) { // NOLINT + FAIL() << "An exception escaped a death test macro invocation " + << "with catch_exceptions " + << (testing::GTEST_FLAG(catch_exceptions) ? "enabled" : "disabled"); + } +} + +class TestException : public std::exception { + public: + virtual const char* what() const throw() { return "exceptional message"; } +}; + +TEST(CxxExceptionDeathTest, PrintsMessageForStdExceptions) { + // Verifies that the exception message is quoted in the failure text. + EXPECT_NONFATAL_FAILURE(EXPECT_DEATH(throw TestException(), ""), + "exceptional message"); + // Verifies that the location is mentioned in the failure text. + EXPECT_NONFATAL_FAILURE(EXPECT_DEATH(throw TestException(), ""), + "gtest-death-test_ex_test.cc"); +} +# endif // GTEST_HAS_EXCEPTIONS + +# if GTEST_HAS_SEH +// Tests that enabling interception of SEH exceptions with the +// catch_exceptions flag does not interfere with SEH exceptions being +// treated as death by death tests. +TEST(SehExceptionDeasTest, CatchExceptionsDoesNotInterfere) { + EXPECT_DEATH(RaiseException(42, 0x0, 0, NULL), "") + << "with catch_exceptions " + << (testing::GTEST_FLAG(catch_exceptions) ? "enabled" : "disabled"); +} +# endif + +#endif // GTEST_HAS_DEATH_TEST + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + testing::GTEST_FLAG(catch_exceptions) = GTEST_ENABLE_CATCH_EXCEPTIONS_ != 0; + return RUN_ALL_TESTS(); +} +// Copyright 2003, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Dan Egnor (egnor@google.com) +// Ported to Windows: Vadim Berman (vadimb@google.com) + +#include "gtest/internal/gtest-linked_ptr.h" + +#include +#include "gtest/gtest.h" + +namespace { + +using testing::Message; +using testing::internal::linked_ptr; + +int num; +Message* history = NULL; + +// Class which tracks allocation/deallocation +class A { + public: + A(): mynum(num++) { *history << "A" << mynum << " ctor\n"; } + virtual ~A() { *history << "A" << mynum << " dtor\n"; } + virtual void Use() { *history << "A" << mynum << " use\n"; } + protected: + int mynum; +}; + +// Subclass +class B : public A { + public: + B() { *history << "B" << mynum << " ctor\n"; } + ~B() { *history << "B" << mynum << " dtor\n"; } + virtual void Use() { *history << "B" << mynum << " use\n"; } +}; + +class LinkedPtrTest : public testing::Test { + public: + LinkedPtrTest() { + num = 0; + history = new Message; + } + + virtual ~LinkedPtrTest() { + delete history; + history = NULL; + } +}; + +TEST_F(LinkedPtrTest, GeneralTest) { + { + linked_ptr a0, a1, a2; + // Use explicit function call notation here to suppress self-assign warning. + a0.operator=(a0); + a1 = a2; + ASSERT_EQ(a0.get(), static_cast(NULL)); + ASSERT_EQ(a1.get(), static_cast(NULL)); + ASSERT_EQ(a2.get(), static_cast(NULL)); + ASSERT_TRUE(a0 == NULL); + ASSERT_TRUE(a1 == NULL); + ASSERT_TRUE(a2 == NULL); + + { + linked_ptr a3(new A); + a0 = a3; + ASSERT_TRUE(a0 == a3); + ASSERT_TRUE(a0 != NULL); + ASSERT_TRUE(a0.get() == a3); + ASSERT_TRUE(a0 == a3.get()); + linked_ptr a4(a0); + a1 = a4; + linked_ptr a5(new A); + ASSERT_TRUE(a5.get() != a3); + ASSERT_TRUE(a5 != a3.get()); + a2 = a5; + linked_ptr b0(new B); + linked_ptr a6(b0); + ASSERT_TRUE(b0 == a6); + ASSERT_TRUE(a6 == b0); + ASSERT_TRUE(b0 != NULL); + a5 = b0; + a5 = b0; + a3->Use(); + a4->Use(); + a5->Use(); + a6->Use(); + b0->Use(); + (*b0).Use(); + b0.get()->Use(); + } + + a0->Use(); + a1->Use(); + a2->Use(); + + a1 = a2; + a2.reset(new A); + a0.reset(); + + linked_ptr a7; + } + + ASSERT_STREQ( + "A0 ctor\n" + "A1 ctor\n" + "A2 ctor\n" + "B2 ctor\n" + "A0 use\n" + "A0 use\n" + "B2 use\n" + "B2 use\n" + "B2 use\n" + "B2 use\n" + "B2 use\n" + "B2 dtor\n" + "A2 dtor\n" + "A0 use\n" + "A0 use\n" + "A1 use\n" + "A3 ctor\n" + "A0 dtor\n" + "A3 dtor\n" + "A1 dtor\n", + history->GetString().c_str()); +} + +} // Unnamed namespace +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// Tests for the Message class. + +#include "gtest/gtest-message.h" + +#include "gtest/gtest.h" + +namespace { + +using ::testing::Message; + +// Tests the testing::Message class + +// Tests the default constructor. +TEST(MessageTest, DefaultConstructor) { + const Message msg; + EXPECT_EQ("", msg.GetString()); +} + +// Tests the copy constructor. +TEST(MessageTest, CopyConstructor) { + const Message msg1("Hello"); + const Message msg2(msg1); + EXPECT_EQ("Hello", msg2.GetString()); +} + +// Tests constructing a Message from a C-string. +TEST(MessageTest, ConstructsFromCString) { + Message msg("Hello"); + EXPECT_EQ("Hello", msg.GetString()); +} + +// Tests streaming a float. +TEST(MessageTest, StreamsFloat) { + const std::string s = (Message() << 1.23456F << " " << 2.34567F).GetString(); + // Both numbers should be printed with enough precision. + EXPECT_PRED_FORMAT2(testing::IsSubstring, "1.234560", s.c_str()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, " 2.345669", s.c_str()); +} + +// Tests streaming a double. +TEST(MessageTest, StreamsDouble) { + const std::string s = (Message() << 1260570880.4555497 << " " + << 1260572265.1954534).GetString(); + // Both numbers should be printed with enough precision. + EXPECT_PRED_FORMAT2(testing::IsSubstring, "1260570880.45", s.c_str()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, " 1260572265.19", s.c_str()); +} + +// Tests streaming a non-char pointer. +TEST(MessageTest, StreamsPointer) { + int n = 0; + int* p = &n; + EXPECT_NE("(null)", (Message() << p).GetString()); +} + +// Tests streaming a NULL non-char pointer. +TEST(MessageTest, StreamsNullPointer) { + int* p = NULL; + EXPECT_EQ("(null)", (Message() << p).GetString()); +} + +// Tests streaming a C string. +TEST(MessageTest, StreamsCString) { + EXPECT_EQ("Foo", (Message() << "Foo").GetString()); +} + +// Tests streaming a NULL C string. +TEST(MessageTest, StreamsNullCString) { + char* p = NULL; + EXPECT_EQ("(null)", (Message() << p).GetString()); +} + +// Tests streaming std::string. +TEST(MessageTest, StreamsString) { + const ::std::string str("Hello"); + EXPECT_EQ("Hello", (Message() << str).GetString()); +} + +// Tests that we can output strings containing embedded NULs. +TEST(MessageTest, StreamsStringWithEmbeddedNUL) { + const char char_array_with_nul[] = + "Here's a NUL\0 and some more string"; + const ::std::string string_with_nul(char_array_with_nul, + sizeof(char_array_with_nul) - 1); + EXPECT_EQ("Here's a NUL\\0 and some more string", + (Message() << string_with_nul).GetString()); +} + +// Tests streaming a NUL char. +TEST(MessageTest, StreamsNULChar) { + EXPECT_EQ("\\0", (Message() << '\0').GetString()); +} + +// Tests streaming int. +TEST(MessageTest, StreamsInt) { + EXPECT_EQ("123", (Message() << 123).GetString()); +} + +// Tests that basic IO manipulators (endl, ends, and flush) can be +// streamed to Message. +TEST(MessageTest, StreamsBasicIoManip) { + EXPECT_EQ("Line 1.\nA NUL char \\0 in line 2.", + (Message() << "Line 1." << std::endl + << "A NUL char " << std::ends << std::flush + << " in line 2.").GetString()); +} + +// Tests Message::GetString() +TEST(MessageTest, GetString) { + Message msg; + msg << 1 << " lamb"; + EXPECT_EQ("1 lamb", msg.GetString()); +} + +// Tests streaming a Message object to an ostream. +TEST(MessageTest, StreamsToOStream) { + Message msg("Hello"); + ::std::stringstream ss; + ss << msg; + EXPECT_EQ("Hello", testing::internal::StringStreamToString(&ss)); +} + +// Tests that a Message object doesn't take up too much stack space. +TEST(MessageTest, DoesNotTakeUpMuchStackSpace) { + EXPECT_LE(sizeof(Message), 16U); +} + +} // namespace +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "gtest/gtest.h" + +TEST(DummyTest, Dummy) { + // This test doesn't verify anything. We just need it to create a + // realistic stage for testing the behavior of Google Test when + // RUN_ALL_TESTS() is called without testing::InitGoogleTest() being + // called first. +} + +int main() { + return RUN_ALL_TESTS(); +} +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: mheule@google.com (Markus Heule) +// + +#include "gtest/gtest-test-part.h" + +#include "gtest/gtest.h" + +using testing::Message; +using testing::Test; +using testing::TestPartResult; +using testing::TestPartResultArray; + +namespace { + +// Tests the TestPartResult class. + +// The test fixture for testing TestPartResult. +class TestPartResultTest : public Test { + protected: + TestPartResultTest() + : r1_(TestPartResult::kSuccess, "foo/bar.cc", 10, "Success!"), + r2_(TestPartResult::kNonFatalFailure, "foo/bar.cc", -1, "Failure!"), + r3_(TestPartResult::kFatalFailure, NULL, -1, "Failure!") {} + + TestPartResult r1_, r2_, r3_; +}; + + +TEST_F(TestPartResultTest, ConstructorWorks) { + Message message; + message << "something is terribly wrong"; + message << static_cast(testing::internal::kStackTraceMarker); + message << "some unimportant stack trace"; + + const TestPartResult result(TestPartResult::kNonFatalFailure, + "some_file.cc", + 42, + message.GetString().c_str()); + + EXPECT_EQ(TestPartResult::kNonFatalFailure, result.type()); + EXPECT_STREQ("some_file.cc", result.file_name()); + EXPECT_EQ(42, result.line_number()); + EXPECT_STREQ(message.GetString().c_str(), result.message()); + EXPECT_STREQ("something is terribly wrong", result.summary()); +} + +TEST_F(TestPartResultTest, ResultAccessorsWork) { + const TestPartResult success(TestPartResult::kSuccess, + "file.cc", + 42, + "message"); + EXPECT_TRUE(success.passed()); + EXPECT_FALSE(success.failed()); + EXPECT_FALSE(success.nonfatally_failed()); + EXPECT_FALSE(success.fatally_failed()); + + const TestPartResult nonfatal_failure(TestPartResult::kNonFatalFailure, + "file.cc", + 42, + "message"); + EXPECT_FALSE(nonfatal_failure.passed()); + EXPECT_TRUE(nonfatal_failure.failed()); + EXPECT_TRUE(nonfatal_failure.nonfatally_failed()); + EXPECT_FALSE(nonfatal_failure.fatally_failed()); + + const TestPartResult fatal_failure(TestPartResult::kFatalFailure, + "file.cc", + 42, + "message"); + EXPECT_FALSE(fatal_failure.passed()); + EXPECT_TRUE(fatal_failure.failed()); + EXPECT_FALSE(fatal_failure.nonfatally_failed()); + EXPECT_TRUE(fatal_failure.fatally_failed()); +} + +// Tests TestPartResult::type(). +TEST_F(TestPartResultTest, type) { + EXPECT_EQ(TestPartResult::kSuccess, r1_.type()); + EXPECT_EQ(TestPartResult::kNonFatalFailure, r2_.type()); + EXPECT_EQ(TestPartResult::kFatalFailure, r3_.type()); +} + +// Tests TestPartResult::file_name(). +TEST_F(TestPartResultTest, file_name) { + EXPECT_STREQ("foo/bar.cc", r1_.file_name()); + EXPECT_STREQ(NULL, r3_.file_name()); +} + +// Tests TestPartResult::line_number(). +TEST_F(TestPartResultTest, line_number) { + EXPECT_EQ(10, r1_.line_number()); + EXPECT_EQ(-1, r2_.line_number()); +} + +// Tests TestPartResult::message(). +TEST_F(TestPartResultTest, message) { + EXPECT_STREQ("Success!", r1_.message()); +} + +// Tests TestPartResult::passed(). +TEST_F(TestPartResultTest, Passed) { + EXPECT_TRUE(r1_.passed()); + EXPECT_FALSE(r2_.passed()); + EXPECT_FALSE(r3_.passed()); +} + +// Tests TestPartResult::failed(). +TEST_F(TestPartResultTest, Failed) { + EXPECT_FALSE(r1_.failed()); + EXPECT_TRUE(r2_.failed()); + EXPECT_TRUE(r3_.failed()); +} + +// Tests TestPartResult::fatally_failed(). +TEST_F(TestPartResultTest, FatallyFailed) { + EXPECT_FALSE(r1_.fatally_failed()); + EXPECT_FALSE(r2_.fatally_failed()); + EXPECT_TRUE(r3_.fatally_failed()); +} + +// Tests TestPartResult::nonfatally_failed(). +TEST_F(TestPartResultTest, NonfatallyFailed) { + EXPECT_FALSE(r1_.nonfatally_failed()); + EXPECT_TRUE(r2_.nonfatally_failed()); + EXPECT_FALSE(r3_.nonfatally_failed()); +} + +// Tests the TestPartResultArray class. + +class TestPartResultArrayTest : public Test { + protected: + TestPartResultArrayTest() + : r1_(TestPartResult::kNonFatalFailure, "foo/bar.cc", -1, "Failure 1"), + r2_(TestPartResult::kFatalFailure, "foo/bar.cc", -1, "Failure 2") {} + + const TestPartResult r1_, r2_; +}; + +// Tests that TestPartResultArray initially has size 0. +TEST_F(TestPartResultArrayTest, InitialSizeIsZero) { + TestPartResultArray results; + EXPECT_EQ(0, results.size()); +} + +// Tests that TestPartResultArray contains the given TestPartResult +// after one Append() operation. +TEST_F(TestPartResultArrayTest, ContainsGivenResultAfterAppend) { + TestPartResultArray results; + results.Append(r1_); + EXPECT_EQ(1, results.size()); + EXPECT_STREQ("Failure 1", results.GetTestPartResult(0).message()); +} + +// Tests that TestPartResultArray contains the given TestPartResults +// after two Append() operations. +TEST_F(TestPartResultArrayTest, ContainsGivenResultsAfterTwoAppends) { + TestPartResultArray results; + results.Append(r1_); + results.Append(r2_); + EXPECT_EQ(2, results.size()); + EXPECT_STREQ("Failure 1", results.GetTestPartResult(0).message()); + EXPECT_STREQ("Failure 2", results.GetTestPartResult(1).message()); +} + +typedef TestPartResultArrayTest TestPartResultArrayDeathTest; + +// Tests that the program dies when GetTestPartResult() is called with +// an invalid index. +TEST_F(TestPartResultArrayDeathTest, DiesWhenIndexIsOutOfBound) { + TestPartResultArray results; + results.Append(r1_); + + EXPECT_DEATH_IF_SUPPORTED(results.GetTestPartResult(-1), ""); + EXPECT_DEATH_IF_SUPPORTED(results.GetTestPartResult(1), ""); +} + +// TODO(mheule@google.com): Add a test for the class HasNewFatalFailureHelper. + +} // namespace +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "gtest/internal/gtest-tuple.h" +#include +#include "gtest/gtest.h" + +namespace { + +using ::std::tr1::get; +using ::std::tr1::make_tuple; +using ::std::tr1::tuple; +using ::std::tr1::tuple_element; +using ::std::tr1::tuple_size; +using ::testing::StaticAssertTypeEq; + +// Tests that tuple_element >::type returns TK. +TEST(tuple_element_Test, ReturnsElementType) { + StaticAssertTypeEq >::type>(); + StaticAssertTypeEq >::type>(); + StaticAssertTypeEq >::type>(); +} + +// Tests that tuple_size::value gives the number of fields in tuple +// type T. +TEST(tuple_size_Test, ReturnsNumberOfFields) { + EXPECT_EQ(0, +tuple_size >::value); + EXPECT_EQ(1, +tuple_size >::value); + EXPECT_EQ(1, +tuple_size >::value); + EXPECT_EQ(1, +(tuple_size > >::value)); + EXPECT_EQ(2, +(tuple_size >::value)); + EXPECT_EQ(3, +(tuple_size >::value)); +} + +// Tests comparing a tuple with itself. +TEST(ComparisonTest, ComparesWithSelf) { + const tuple a(5, 'a', false); + + EXPECT_TRUE(a == a); + EXPECT_FALSE(a != a); +} + +// Tests comparing two tuples with the same value. +TEST(ComparisonTest, ComparesEqualTuples) { + const tuple a(5, true), b(5, true); + + EXPECT_TRUE(a == b); + EXPECT_FALSE(a != b); +} + +// Tests comparing two different tuples that have no reference fields. +TEST(ComparisonTest, ComparesUnequalTuplesWithoutReferenceFields) { + typedef tuple FooTuple; + + const FooTuple a(0, 'x'); + const FooTuple b(1, 'a'); + + EXPECT_TRUE(a != b); + EXPECT_FALSE(a == b); + + const FooTuple c(1, 'b'); + + EXPECT_TRUE(b != c); + EXPECT_FALSE(b == c); +} + +// Tests comparing two different tuples that have reference fields. +TEST(ComparisonTest, ComparesUnequalTuplesWithReferenceFields) { + typedef tuple FooTuple; + + int i = 5; + const char ch = 'a'; + const FooTuple a(i, ch); + + int j = 6; + const FooTuple b(j, ch); + + EXPECT_TRUE(a != b); + EXPECT_FALSE(a == b); + + j = 5; + const char ch2 = 'b'; + const FooTuple c(j, ch2); + + EXPECT_TRUE(b != c); + EXPECT_FALSE(b == c); +} + +// Tests that a tuple field with a reference type is an alias of the +// variable it's supposed to reference. +TEST(ReferenceFieldTest, IsAliasOfReferencedVariable) { + int n = 0; + tuple t(true, n); + + n = 1; + EXPECT_EQ(n, get<1>(t)) + << "Changing a underlying variable should update the reference field."; + + // Makes sure that the implementation doesn't do anything funny with + // the & operator for the return type of get<>(). + EXPECT_EQ(&n, &(get<1>(t))) + << "The address of a reference field should equal the address of " + << "the underlying variable."; + + get<1>(t) = 2; + EXPECT_EQ(2, n) + << "Changing a reference field should update the underlying variable."; +} + +// Tests that tuple's default constructor default initializes each field. +// This test needs to compile without generating warnings. +TEST(TupleConstructorTest, DefaultConstructorDefaultInitializesEachField) { + // The TR1 report requires that tuple's default constructor default + // initializes each field, even if it's a primitive type. If the + // implementation forgets to do this, this test will catch it by + // generating warnings about using uninitialized variables (assuming + // a decent compiler). + + tuple<> empty; + + tuple a1, b1; + b1 = a1; + EXPECT_EQ(0, get<0>(b1)); + + tuple a2, b2; + b2 = a2; + EXPECT_EQ(0, get<0>(b2)); + EXPECT_EQ(0.0, get<1>(b2)); + + tuple a3, b3; + b3 = a3; + EXPECT_EQ(0.0, get<0>(b3)); + EXPECT_EQ('\0', get<1>(b3)); + EXPECT_TRUE(get<2>(b3) == NULL); + + tuple a10, b10; + b10 = a10; + EXPECT_EQ(0, get<0>(b10)); + EXPECT_EQ(0, get<1>(b10)); + EXPECT_EQ(0, get<2>(b10)); + EXPECT_EQ(0, get<3>(b10)); + EXPECT_EQ(0, get<4>(b10)); + EXPECT_EQ(0, get<5>(b10)); + EXPECT_EQ(0, get<6>(b10)); + EXPECT_EQ(0, get<7>(b10)); + EXPECT_EQ(0, get<8>(b10)); + EXPECT_EQ(0, get<9>(b10)); +} + +// Tests constructing a tuple from its fields. +TEST(TupleConstructorTest, ConstructsFromFields) { + int n = 1; + // Reference field. + tuple a(n); + EXPECT_EQ(&n, &(get<0>(a))); + + // Non-reference fields. + tuple b(5, 'a'); + EXPECT_EQ(5, get<0>(b)); + EXPECT_EQ('a', get<1>(b)); + + // Const reference field. + const int m = 2; + tuple c(true, m); + EXPECT_TRUE(get<0>(c)); + EXPECT_EQ(&m, &(get<1>(c))); +} + +// Tests tuple's copy constructor. +TEST(TupleConstructorTest, CopyConstructor) { + tuple a(0.0, true); + tuple b(a); + + EXPECT_DOUBLE_EQ(0.0, get<0>(b)); + EXPECT_TRUE(get<1>(b)); +} + +// Tests constructing a tuple from another tuple that has a compatible +// but different type. +TEST(TupleConstructorTest, ConstructsFromDifferentTupleType) { + tuple a(0, 1, 'a'); + tuple b(a); + + EXPECT_DOUBLE_EQ(0.0, get<0>(b)); + EXPECT_EQ(1, get<1>(b)); + EXPECT_EQ('a', get<2>(b)); +} + +// Tests constructing a 2-tuple from an std::pair. +TEST(TupleConstructorTest, ConstructsFromPair) { + ::std::pair a(1, 'a'); + tuple b(a); + tuple c(a); +} + +// Tests assigning a tuple to another tuple with the same type. +TEST(TupleAssignmentTest, AssignsToSameTupleType) { + const tuple a(5, 7L); + tuple b; + b = a; + EXPECT_EQ(5, get<0>(b)); + EXPECT_EQ(7L, get<1>(b)); +} + +// Tests assigning a tuple to another tuple with a different but +// compatible type. +TEST(TupleAssignmentTest, AssignsToDifferentTupleType) { + const tuple a(1, 7L, true); + tuple b; + b = a; + EXPECT_EQ(1L, get<0>(b)); + EXPECT_EQ(7, get<1>(b)); + EXPECT_TRUE(get<2>(b)); +} + +// Tests assigning an std::pair to a 2-tuple. +TEST(TupleAssignmentTest, AssignsFromPair) { + const ::std::pair a(5, true); + tuple b; + b = a; + EXPECT_EQ(5, get<0>(b)); + EXPECT_TRUE(get<1>(b)); + + tuple c; + c = a; + EXPECT_EQ(5L, get<0>(c)); + EXPECT_TRUE(get<1>(c)); +} + +// A fixture for testing big tuples. +class BigTupleTest : public testing::Test { + protected: + typedef tuple BigTuple; + + BigTupleTest() : + a_(1, 0, 0, 0, 0, 0, 0, 0, 0, 2), + b_(1, 0, 0, 0, 0, 0, 0, 0, 0, 3) {} + + BigTuple a_, b_; +}; + +// Tests constructing big tuples. +TEST_F(BigTupleTest, Construction) { + BigTuple a; + BigTuple b(b_); +} + +// Tests that get(t) returns the N-th (0-based) field of tuple t. +TEST_F(BigTupleTest, get) { + EXPECT_EQ(1, get<0>(a_)); + EXPECT_EQ(2, get<9>(a_)); + + // Tests that get() works on a const tuple too. + const BigTuple a(a_); + EXPECT_EQ(1, get<0>(a)); + EXPECT_EQ(2, get<9>(a)); +} + +// Tests comparing big tuples. +TEST_F(BigTupleTest, Comparisons) { + EXPECT_TRUE(a_ == a_); + EXPECT_FALSE(a_ != a_); + + EXPECT_TRUE(a_ != b_); + EXPECT_FALSE(a_ == b_); +} + +TEST(MakeTupleTest, WorksForScalarTypes) { + tuple a; + a = make_tuple(true, 5); + EXPECT_TRUE(get<0>(a)); + EXPECT_EQ(5, get<1>(a)); + + tuple b; + b = make_tuple('a', 'b', 5); + EXPECT_EQ('a', get<0>(b)); + EXPECT_EQ('b', get<1>(b)); + EXPECT_EQ(5, get<2>(b)); +} + +TEST(MakeTupleTest, WorksForPointers) { + int a[] = { 1, 2, 3, 4 }; + const char* const str = "hi"; + int* const p = a; + + tuple t; + t = make_tuple(str, p); + EXPECT_EQ(str, get<0>(t)); + EXPECT_EQ(p, get<1>(t)); +} + +} // namespace +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file is AUTOMATICALLY GENERATED on 10/31/2011 by command +// 'gen_gtest_pred_impl.py 5'. DO NOT EDIT BY HAND! + +// Regression test for gtest_pred_impl.h +// +// This file is generated by a script and quite long. If you intend to +// learn how Google Test works by reading its unit tests, read +// gtest_unittest.cc instead. +// +// This is intended as a regression test for the Google Test predicate +// assertions. We compile it as part of the gtest_unittest target +// only to keep the implementation tidy and compact, as it is quite +// involved to set up the stage for testing Google Test using Google +// Test itself. +// +// Currently, gtest_unittest takes ~11 seconds to run in the testing +// daemon. In the future, if it grows too large and needs much more +// time to finish, we should consider separating this file into a +// stand-alone regression test. + +#include + +#include "gtest/gtest.h" +#include "gtest/gtest-spi.h" + +// A user-defined data type. +struct Bool { + explicit Bool(int val) : value(val != 0) {} + + bool operator>(int n) const { return value > Bool(n).value; } + + Bool operator+(const Bool& rhs) const { return Bool(value + rhs.value); } + + bool operator==(const Bool& rhs) const { return value == rhs.value; } + + bool value; +}; + +// Enables Bool to be used in assertions. +std::ostream& operator<<(std::ostream& os, const Bool& x) { + return os << (x.value ? "true" : "false"); +} + +// Sample functions/functors for testing unary predicate assertions. + +// A unary predicate function. +template +bool PredFunction1(T1 v1) { + return v1 > 0; +} + +// The following two functions are needed to circumvent a bug in +// gcc 2.95.3, which sometimes has problem with the above template +// function. +bool PredFunction1Int(int v1) { + return v1 > 0; +} +bool PredFunction1Bool(Bool v1) { + return v1 > 0; +} + +// A unary predicate functor. +struct PredFunctor1 { + template + bool operator()(const T1& v1) { + return v1 > 0; + } +}; + +// A unary predicate-formatter function. +template +testing::AssertionResult PredFormatFunction1(const char* e1, + const T1& v1) { + if (PredFunction1(v1)) + return testing::AssertionSuccess(); + + return testing::AssertionFailure() + << e1 + << " is expected to be positive, but evaluates to " + << v1 << "."; +} + +// A unary predicate-formatter functor. +struct PredFormatFunctor1 { + template + testing::AssertionResult operator()(const char* e1, + const T1& v1) const { + return PredFormatFunction1(e1, v1); + } +}; + +// Tests for {EXPECT|ASSERT}_PRED_FORMAT1. + +class Predicate1Test : public testing::Test { + protected: + virtual void SetUp() { + expected_to_finish_ = true; + finished_ = false; + n1_ = 0; + } + + virtual void TearDown() { + // Verifies that each of the predicate's arguments was evaluated + // exactly once. + EXPECT_EQ(1, n1_) << + "The predicate assertion didn't evaluate argument 2 " + "exactly once."; + + // Verifies that the control flow in the test function is expected. + if (expected_to_finish_ && !finished_) { + FAIL() << "The predicate assertion unexpactedly aborted the test."; + } else if (!expected_to_finish_ && finished_) { + FAIL() << "The failed predicate assertion didn't abort the test " + "as expected."; + } + } + + // true iff the test function is expected to run to finish. + static bool expected_to_finish_; + + // true iff the test function did run to finish. + static bool finished_; + + static int n1_; +}; + +bool Predicate1Test::expected_to_finish_; +bool Predicate1Test::finished_; +int Predicate1Test::n1_; + +typedef Predicate1Test EXPECT_PRED_FORMAT1Test; +typedef Predicate1Test ASSERT_PRED_FORMAT1Test; +typedef Predicate1Test EXPECT_PRED1Test; +typedef Predicate1Test ASSERT_PRED1Test; + +// Tests a successful EXPECT_PRED1 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED1Test, FunctionOnBuiltInTypeSuccess) { + EXPECT_PRED1(PredFunction1Int, + ++n1_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED1 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED1Test, FunctionOnUserTypeSuccess) { + EXPECT_PRED1(PredFunction1Bool, + Bool(++n1_)); + finished_ = true; +} + +// Tests a successful EXPECT_PRED1 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED1Test, FunctorOnBuiltInTypeSuccess) { + EXPECT_PRED1(PredFunctor1(), + ++n1_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED1 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED1Test, FunctorOnUserTypeSuccess) { + EXPECT_PRED1(PredFunctor1(), + Bool(++n1_)); + finished_ = true; +} + +// Tests a failed EXPECT_PRED1 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED1Test, FunctionOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED1(PredFunction1Int, + n1_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED1 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED1Test, FunctionOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED1(PredFunction1Bool, + Bool(n1_++)); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED1 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED1Test, FunctorOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED1(PredFunctor1(), + n1_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED1 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED1Test, FunctorOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED1(PredFunctor1(), + Bool(n1_++)); + finished_ = true; + }, ""); +} + +// Tests a successful ASSERT_PRED1 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED1Test, FunctionOnBuiltInTypeSuccess) { + ASSERT_PRED1(PredFunction1Int, + ++n1_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED1 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED1Test, FunctionOnUserTypeSuccess) { + ASSERT_PRED1(PredFunction1Bool, + Bool(++n1_)); + finished_ = true; +} + +// Tests a successful ASSERT_PRED1 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED1Test, FunctorOnBuiltInTypeSuccess) { + ASSERT_PRED1(PredFunctor1(), + ++n1_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED1 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED1Test, FunctorOnUserTypeSuccess) { + ASSERT_PRED1(PredFunctor1(), + Bool(++n1_)); + finished_ = true; +} + +// Tests a failed ASSERT_PRED1 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED1Test, FunctionOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED1(PredFunction1Int, + n1_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED1 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED1Test, FunctionOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED1(PredFunction1Bool, + Bool(n1_++)); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED1 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED1Test, FunctorOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED1(PredFunctor1(), + n1_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED1 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED1Test, FunctorOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED1(PredFunctor1(), + Bool(n1_++)); + finished_ = true; + }, ""); +} + +// Tests a successful EXPECT_PRED_FORMAT1 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT1Test, FunctionOnBuiltInTypeSuccess) { + EXPECT_PRED_FORMAT1(PredFormatFunction1, + ++n1_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT1 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT1Test, FunctionOnUserTypeSuccess) { + EXPECT_PRED_FORMAT1(PredFormatFunction1, + Bool(++n1_)); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT1 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT1Test, FunctorOnBuiltInTypeSuccess) { + EXPECT_PRED_FORMAT1(PredFormatFunctor1(), + ++n1_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT1 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT1Test, FunctorOnUserTypeSuccess) { + EXPECT_PRED_FORMAT1(PredFormatFunctor1(), + Bool(++n1_)); + finished_ = true; +} + +// Tests a failed EXPECT_PRED_FORMAT1 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT1Test, FunctionOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT1(PredFormatFunction1, + n1_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT1 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT1Test, FunctionOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT1(PredFormatFunction1, + Bool(n1_++)); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT1 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT1Test, FunctorOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT1(PredFormatFunctor1(), + n1_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT1 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT1Test, FunctorOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT1(PredFormatFunctor1(), + Bool(n1_++)); + finished_ = true; + }, ""); +} + +// Tests a successful ASSERT_PRED_FORMAT1 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT1Test, FunctionOnBuiltInTypeSuccess) { + ASSERT_PRED_FORMAT1(PredFormatFunction1, + ++n1_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT1 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT1Test, FunctionOnUserTypeSuccess) { + ASSERT_PRED_FORMAT1(PredFormatFunction1, + Bool(++n1_)); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT1 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT1Test, FunctorOnBuiltInTypeSuccess) { + ASSERT_PRED_FORMAT1(PredFormatFunctor1(), + ++n1_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT1 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT1Test, FunctorOnUserTypeSuccess) { + ASSERT_PRED_FORMAT1(PredFormatFunctor1(), + Bool(++n1_)); + finished_ = true; +} + +// Tests a failed ASSERT_PRED_FORMAT1 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT1Test, FunctionOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT1(PredFormatFunction1, + n1_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT1 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT1Test, FunctionOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT1(PredFormatFunction1, + Bool(n1_++)); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT1 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT1Test, FunctorOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT1(PredFormatFunctor1(), + n1_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT1 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT1Test, FunctorOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT1(PredFormatFunctor1(), + Bool(n1_++)); + finished_ = true; + }, ""); +} +// Sample functions/functors for testing binary predicate assertions. + +// A binary predicate function. +template +bool PredFunction2(T1 v1, T2 v2) { + return v1 + v2 > 0; +} + +// The following two functions are needed to circumvent a bug in +// gcc 2.95.3, which sometimes has problem with the above template +// function. +bool PredFunction2Int(int v1, int v2) { + return v1 + v2 > 0; +} +bool PredFunction2Bool(Bool v1, Bool v2) { + return v1 + v2 > 0; +} + +// A binary predicate functor. +struct PredFunctor2 { + template + bool operator()(const T1& v1, + const T2& v2) { + return v1 + v2 > 0; + } +}; + +// A binary predicate-formatter function. +template +testing::AssertionResult PredFormatFunction2(const char* e1, + const char* e2, + const T1& v1, + const T2& v2) { + if (PredFunction2(v1, v2)) + return testing::AssertionSuccess(); + + return testing::AssertionFailure() + << e1 << " + " << e2 + << " is expected to be positive, but evaluates to " + << v1 + v2 << "."; +} + +// A binary predicate-formatter functor. +struct PredFormatFunctor2 { + template + testing::AssertionResult operator()(const char* e1, + const char* e2, + const T1& v1, + const T2& v2) const { + return PredFormatFunction2(e1, e2, v1, v2); + } +}; + +// Tests for {EXPECT|ASSERT}_PRED_FORMAT2. + +class Predicate2Test : public testing::Test { + protected: + virtual void SetUp() { + expected_to_finish_ = true; + finished_ = false; + n1_ = n2_ = 0; + } + + virtual void TearDown() { + // Verifies that each of the predicate's arguments was evaluated + // exactly once. + EXPECT_EQ(1, n1_) << + "The predicate assertion didn't evaluate argument 2 " + "exactly once."; + EXPECT_EQ(1, n2_) << + "The predicate assertion didn't evaluate argument 3 " + "exactly once."; + + // Verifies that the control flow in the test function is expected. + if (expected_to_finish_ && !finished_) { + FAIL() << "The predicate assertion unexpactedly aborted the test."; + } else if (!expected_to_finish_ && finished_) { + FAIL() << "The failed predicate assertion didn't abort the test " + "as expected."; + } + } + + // true iff the test function is expected to run to finish. + static bool expected_to_finish_; + + // true iff the test function did run to finish. + static bool finished_; + + static int n1_; + static int n2_; +}; + +bool Predicate2Test::expected_to_finish_; +bool Predicate2Test::finished_; +int Predicate2Test::n1_; +int Predicate2Test::n2_; + +typedef Predicate2Test EXPECT_PRED_FORMAT2Test; +typedef Predicate2Test ASSERT_PRED_FORMAT2Test; +typedef Predicate2Test EXPECT_PRED2Test; +typedef Predicate2Test ASSERT_PRED2Test; + +// Tests a successful EXPECT_PRED2 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED2Test, FunctionOnBuiltInTypeSuccess) { + EXPECT_PRED2(PredFunction2Int, + ++n1_, + ++n2_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED2 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED2Test, FunctionOnUserTypeSuccess) { + EXPECT_PRED2(PredFunction2Bool, + Bool(++n1_), + Bool(++n2_)); + finished_ = true; +} + +// Tests a successful EXPECT_PRED2 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED2Test, FunctorOnBuiltInTypeSuccess) { + EXPECT_PRED2(PredFunctor2(), + ++n1_, + ++n2_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED2 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED2Test, FunctorOnUserTypeSuccess) { + EXPECT_PRED2(PredFunctor2(), + Bool(++n1_), + Bool(++n2_)); + finished_ = true; +} + +// Tests a failed EXPECT_PRED2 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED2Test, FunctionOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED2(PredFunction2Int, + n1_++, + n2_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED2 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED2Test, FunctionOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED2(PredFunction2Bool, + Bool(n1_++), + Bool(n2_++)); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED2 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED2Test, FunctorOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED2(PredFunctor2(), + n1_++, + n2_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED2 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED2Test, FunctorOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED2(PredFunctor2(), + Bool(n1_++), + Bool(n2_++)); + finished_ = true; + }, ""); +} + +// Tests a successful ASSERT_PRED2 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED2Test, FunctionOnBuiltInTypeSuccess) { + ASSERT_PRED2(PredFunction2Int, + ++n1_, + ++n2_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED2 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED2Test, FunctionOnUserTypeSuccess) { + ASSERT_PRED2(PredFunction2Bool, + Bool(++n1_), + Bool(++n2_)); + finished_ = true; +} + +// Tests a successful ASSERT_PRED2 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED2Test, FunctorOnBuiltInTypeSuccess) { + ASSERT_PRED2(PredFunctor2(), + ++n1_, + ++n2_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED2 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED2Test, FunctorOnUserTypeSuccess) { + ASSERT_PRED2(PredFunctor2(), + Bool(++n1_), + Bool(++n2_)); + finished_ = true; +} + +// Tests a failed ASSERT_PRED2 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED2Test, FunctionOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED2(PredFunction2Int, + n1_++, + n2_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED2 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED2Test, FunctionOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED2(PredFunction2Bool, + Bool(n1_++), + Bool(n2_++)); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED2 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED2Test, FunctorOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED2(PredFunctor2(), + n1_++, + n2_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED2 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED2Test, FunctorOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED2(PredFunctor2(), + Bool(n1_++), + Bool(n2_++)); + finished_ = true; + }, ""); +} + +// Tests a successful EXPECT_PRED_FORMAT2 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT2Test, FunctionOnBuiltInTypeSuccess) { + EXPECT_PRED_FORMAT2(PredFormatFunction2, + ++n1_, + ++n2_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT2 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT2Test, FunctionOnUserTypeSuccess) { + EXPECT_PRED_FORMAT2(PredFormatFunction2, + Bool(++n1_), + Bool(++n2_)); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT2 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT2Test, FunctorOnBuiltInTypeSuccess) { + EXPECT_PRED_FORMAT2(PredFormatFunctor2(), + ++n1_, + ++n2_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT2 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT2Test, FunctorOnUserTypeSuccess) { + EXPECT_PRED_FORMAT2(PredFormatFunctor2(), + Bool(++n1_), + Bool(++n2_)); + finished_ = true; +} + +// Tests a failed EXPECT_PRED_FORMAT2 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT2Test, FunctionOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT2(PredFormatFunction2, + n1_++, + n2_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT2 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT2Test, FunctionOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT2(PredFormatFunction2, + Bool(n1_++), + Bool(n2_++)); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT2 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT2Test, FunctorOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT2(PredFormatFunctor2(), + n1_++, + n2_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT2 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT2Test, FunctorOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT2(PredFormatFunctor2(), + Bool(n1_++), + Bool(n2_++)); + finished_ = true; + }, ""); +} + +// Tests a successful ASSERT_PRED_FORMAT2 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT2Test, FunctionOnBuiltInTypeSuccess) { + ASSERT_PRED_FORMAT2(PredFormatFunction2, + ++n1_, + ++n2_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT2 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT2Test, FunctionOnUserTypeSuccess) { + ASSERT_PRED_FORMAT2(PredFormatFunction2, + Bool(++n1_), + Bool(++n2_)); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT2 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT2Test, FunctorOnBuiltInTypeSuccess) { + ASSERT_PRED_FORMAT2(PredFormatFunctor2(), + ++n1_, + ++n2_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT2 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT2Test, FunctorOnUserTypeSuccess) { + ASSERT_PRED_FORMAT2(PredFormatFunctor2(), + Bool(++n1_), + Bool(++n2_)); + finished_ = true; +} + +// Tests a failed ASSERT_PRED_FORMAT2 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT2Test, FunctionOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT2(PredFormatFunction2, + n1_++, + n2_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT2 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT2Test, FunctionOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT2(PredFormatFunction2, + Bool(n1_++), + Bool(n2_++)); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT2 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT2Test, FunctorOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT2(PredFormatFunctor2(), + n1_++, + n2_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT2 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT2Test, FunctorOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT2(PredFormatFunctor2(), + Bool(n1_++), + Bool(n2_++)); + finished_ = true; + }, ""); +} +// Sample functions/functors for testing ternary predicate assertions. + +// A ternary predicate function. +template +bool PredFunction3(T1 v1, T2 v2, T3 v3) { + return v1 + v2 + v3 > 0; +} + +// The following two functions are needed to circumvent a bug in +// gcc 2.95.3, which sometimes has problem with the above template +// function. +bool PredFunction3Int(int v1, int v2, int v3) { + return v1 + v2 + v3 > 0; +} +bool PredFunction3Bool(Bool v1, Bool v2, Bool v3) { + return v1 + v2 + v3 > 0; +} + +// A ternary predicate functor. +struct PredFunctor3 { + template + bool operator()(const T1& v1, + const T2& v2, + const T3& v3) { + return v1 + v2 + v3 > 0; + } +}; + +// A ternary predicate-formatter function. +template +testing::AssertionResult PredFormatFunction3(const char* e1, + const char* e2, + const char* e3, + const T1& v1, + const T2& v2, + const T3& v3) { + if (PredFunction3(v1, v2, v3)) + return testing::AssertionSuccess(); + + return testing::AssertionFailure() + << e1 << " + " << e2 << " + " << e3 + << " is expected to be positive, but evaluates to " + << v1 + v2 + v3 << "."; +} + +// A ternary predicate-formatter functor. +struct PredFormatFunctor3 { + template + testing::AssertionResult operator()(const char* e1, + const char* e2, + const char* e3, + const T1& v1, + const T2& v2, + const T3& v3) const { + return PredFormatFunction3(e1, e2, e3, v1, v2, v3); + } +}; + +// Tests for {EXPECT|ASSERT}_PRED_FORMAT3. + +class Predicate3Test : public testing::Test { + protected: + virtual void SetUp() { + expected_to_finish_ = true; + finished_ = false; + n1_ = n2_ = n3_ = 0; + } + + virtual void TearDown() { + // Verifies that each of the predicate's arguments was evaluated + // exactly once. + EXPECT_EQ(1, n1_) << + "The predicate assertion didn't evaluate argument 2 " + "exactly once."; + EXPECT_EQ(1, n2_) << + "The predicate assertion didn't evaluate argument 3 " + "exactly once."; + EXPECT_EQ(1, n3_) << + "The predicate assertion didn't evaluate argument 4 " + "exactly once."; + + // Verifies that the control flow in the test function is expected. + if (expected_to_finish_ && !finished_) { + FAIL() << "The predicate assertion unexpactedly aborted the test."; + } else if (!expected_to_finish_ && finished_) { + FAIL() << "The failed predicate assertion didn't abort the test " + "as expected."; + } + } + + // true iff the test function is expected to run to finish. + static bool expected_to_finish_; + + // true iff the test function did run to finish. + static bool finished_; + + static int n1_; + static int n2_; + static int n3_; +}; + +bool Predicate3Test::expected_to_finish_; +bool Predicate3Test::finished_; +int Predicate3Test::n1_; +int Predicate3Test::n2_; +int Predicate3Test::n3_; + +typedef Predicate3Test EXPECT_PRED_FORMAT3Test; +typedef Predicate3Test ASSERT_PRED_FORMAT3Test; +typedef Predicate3Test EXPECT_PRED3Test; +typedef Predicate3Test ASSERT_PRED3Test; + +// Tests a successful EXPECT_PRED3 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED3Test, FunctionOnBuiltInTypeSuccess) { + EXPECT_PRED3(PredFunction3Int, + ++n1_, + ++n2_, + ++n3_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED3 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED3Test, FunctionOnUserTypeSuccess) { + EXPECT_PRED3(PredFunction3Bool, + Bool(++n1_), + Bool(++n2_), + Bool(++n3_)); + finished_ = true; +} + +// Tests a successful EXPECT_PRED3 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED3Test, FunctorOnBuiltInTypeSuccess) { + EXPECT_PRED3(PredFunctor3(), + ++n1_, + ++n2_, + ++n3_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED3 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED3Test, FunctorOnUserTypeSuccess) { + EXPECT_PRED3(PredFunctor3(), + Bool(++n1_), + Bool(++n2_), + Bool(++n3_)); + finished_ = true; +} + +// Tests a failed EXPECT_PRED3 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED3Test, FunctionOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED3(PredFunction3Int, + n1_++, + n2_++, + n3_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED3 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED3Test, FunctionOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED3(PredFunction3Bool, + Bool(n1_++), + Bool(n2_++), + Bool(n3_++)); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED3 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED3Test, FunctorOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED3(PredFunctor3(), + n1_++, + n2_++, + n3_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED3 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED3Test, FunctorOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED3(PredFunctor3(), + Bool(n1_++), + Bool(n2_++), + Bool(n3_++)); + finished_ = true; + }, ""); +} + +// Tests a successful ASSERT_PRED3 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED3Test, FunctionOnBuiltInTypeSuccess) { + ASSERT_PRED3(PredFunction3Int, + ++n1_, + ++n2_, + ++n3_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED3 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED3Test, FunctionOnUserTypeSuccess) { + ASSERT_PRED3(PredFunction3Bool, + Bool(++n1_), + Bool(++n2_), + Bool(++n3_)); + finished_ = true; +} + +// Tests a successful ASSERT_PRED3 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED3Test, FunctorOnBuiltInTypeSuccess) { + ASSERT_PRED3(PredFunctor3(), + ++n1_, + ++n2_, + ++n3_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED3 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED3Test, FunctorOnUserTypeSuccess) { + ASSERT_PRED3(PredFunctor3(), + Bool(++n1_), + Bool(++n2_), + Bool(++n3_)); + finished_ = true; +} + +// Tests a failed ASSERT_PRED3 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED3Test, FunctionOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED3(PredFunction3Int, + n1_++, + n2_++, + n3_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED3 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED3Test, FunctionOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED3(PredFunction3Bool, + Bool(n1_++), + Bool(n2_++), + Bool(n3_++)); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED3 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED3Test, FunctorOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED3(PredFunctor3(), + n1_++, + n2_++, + n3_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED3 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED3Test, FunctorOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED3(PredFunctor3(), + Bool(n1_++), + Bool(n2_++), + Bool(n3_++)); + finished_ = true; + }, ""); +} + +// Tests a successful EXPECT_PRED_FORMAT3 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT3Test, FunctionOnBuiltInTypeSuccess) { + EXPECT_PRED_FORMAT3(PredFormatFunction3, + ++n1_, + ++n2_, + ++n3_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT3 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT3Test, FunctionOnUserTypeSuccess) { + EXPECT_PRED_FORMAT3(PredFormatFunction3, + Bool(++n1_), + Bool(++n2_), + Bool(++n3_)); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT3 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT3Test, FunctorOnBuiltInTypeSuccess) { + EXPECT_PRED_FORMAT3(PredFormatFunctor3(), + ++n1_, + ++n2_, + ++n3_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT3 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT3Test, FunctorOnUserTypeSuccess) { + EXPECT_PRED_FORMAT3(PredFormatFunctor3(), + Bool(++n1_), + Bool(++n2_), + Bool(++n3_)); + finished_ = true; +} + +// Tests a failed EXPECT_PRED_FORMAT3 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT3Test, FunctionOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT3(PredFormatFunction3, + n1_++, + n2_++, + n3_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT3 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT3Test, FunctionOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT3(PredFormatFunction3, + Bool(n1_++), + Bool(n2_++), + Bool(n3_++)); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT3 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT3Test, FunctorOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT3(PredFormatFunctor3(), + n1_++, + n2_++, + n3_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT3 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT3Test, FunctorOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT3(PredFormatFunctor3(), + Bool(n1_++), + Bool(n2_++), + Bool(n3_++)); + finished_ = true; + }, ""); +} + +// Tests a successful ASSERT_PRED_FORMAT3 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT3Test, FunctionOnBuiltInTypeSuccess) { + ASSERT_PRED_FORMAT3(PredFormatFunction3, + ++n1_, + ++n2_, + ++n3_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT3 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT3Test, FunctionOnUserTypeSuccess) { + ASSERT_PRED_FORMAT3(PredFormatFunction3, + Bool(++n1_), + Bool(++n2_), + Bool(++n3_)); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT3 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT3Test, FunctorOnBuiltInTypeSuccess) { + ASSERT_PRED_FORMAT3(PredFormatFunctor3(), + ++n1_, + ++n2_, + ++n3_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT3 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT3Test, FunctorOnUserTypeSuccess) { + ASSERT_PRED_FORMAT3(PredFormatFunctor3(), + Bool(++n1_), + Bool(++n2_), + Bool(++n3_)); + finished_ = true; +} + +// Tests a failed ASSERT_PRED_FORMAT3 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT3Test, FunctionOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT3(PredFormatFunction3, + n1_++, + n2_++, + n3_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT3 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT3Test, FunctionOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT3(PredFormatFunction3, + Bool(n1_++), + Bool(n2_++), + Bool(n3_++)); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT3 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT3Test, FunctorOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT3(PredFormatFunctor3(), + n1_++, + n2_++, + n3_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT3 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT3Test, FunctorOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT3(PredFormatFunctor3(), + Bool(n1_++), + Bool(n2_++), + Bool(n3_++)); + finished_ = true; + }, ""); +} +// Sample functions/functors for testing 4-ary predicate assertions. + +// A 4-ary predicate function. +template +bool PredFunction4(T1 v1, T2 v2, T3 v3, T4 v4) { + return v1 + v2 + v3 + v4 > 0; +} + +// The following two functions are needed to circumvent a bug in +// gcc 2.95.3, which sometimes has problem with the above template +// function. +bool PredFunction4Int(int v1, int v2, int v3, int v4) { + return v1 + v2 + v3 + v4 > 0; +} +bool PredFunction4Bool(Bool v1, Bool v2, Bool v3, Bool v4) { + return v1 + v2 + v3 + v4 > 0; +} + +// A 4-ary predicate functor. +struct PredFunctor4 { + template + bool operator()(const T1& v1, + const T2& v2, + const T3& v3, + const T4& v4) { + return v1 + v2 + v3 + v4 > 0; + } +}; + +// A 4-ary predicate-formatter function. +template +testing::AssertionResult PredFormatFunction4(const char* e1, + const char* e2, + const char* e3, + const char* e4, + const T1& v1, + const T2& v2, + const T3& v3, + const T4& v4) { + if (PredFunction4(v1, v2, v3, v4)) + return testing::AssertionSuccess(); + + return testing::AssertionFailure() + << e1 << " + " << e2 << " + " << e3 << " + " << e4 + << " is expected to be positive, but evaluates to " + << v1 + v2 + v3 + v4 << "."; +} + +// A 4-ary predicate-formatter functor. +struct PredFormatFunctor4 { + template + testing::AssertionResult operator()(const char* e1, + const char* e2, + const char* e3, + const char* e4, + const T1& v1, + const T2& v2, + const T3& v3, + const T4& v4) const { + return PredFormatFunction4(e1, e2, e3, e4, v1, v2, v3, v4); + } +}; + +// Tests for {EXPECT|ASSERT}_PRED_FORMAT4. + +class Predicate4Test : public testing::Test { + protected: + virtual void SetUp() { + expected_to_finish_ = true; + finished_ = false; + n1_ = n2_ = n3_ = n4_ = 0; + } + + virtual void TearDown() { + // Verifies that each of the predicate's arguments was evaluated + // exactly once. + EXPECT_EQ(1, n1_) << + "The predicate assertion didn't evaluate argument 2 " + "exactly once."; + EXPECT_EQ(1, n2_) << + "The predicate assertion didn't evaluate argument 3 " + "exactly once."; + EXPECT_EQ(1, n3_) << + "The predicate assertion didn't evaluate argument 4 " + "exactly once."; + EXPECT_EQ(1, n4_) << + "The predicate assertion didn't evaluate argument 5 " + "exactly once."; + + // Verifies that the control flow in the test function is expected. + if (expected_to_finish_ && !finished_) { + FAIL() << "The predicate assertion unexpactedly aborted the test."; + } else if (!expected_to_finish_ && finished_) { + FAIL() << "The failed predicate assertion didn't abort the test " + "as expected."; + } + } + + // true iff the test function is expected to run to finish. + static bool expected_to_finish_; + + // true iff the test function did run to finish. + static bool finished_; + + static int n1_; + static int n2_; + static int n3_; + static int n4_; +}; + +bool Predicate4Test::expected_to_finish_; +bool Predicate4Test::finished_; +int Predicate4Test::n1_; +int Predicate4Test::n2_; +int Predicate4Test::n3_; +int Predicate4Test::n4_; + +typedef Predicate4Test EXPECT_PRED_FORMAT4Test; +typedef Predicate4Test ASSERT_PRED_FORMAT4Test; +typedef Predicate4Test EXPECT_PRED4Test; +typedef Predicate4Test ASSERT_PRED4Test; + +// Tests a successful EXPECT_PRED4 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED4Test, FunctionOnBuiltInTypeSuccess) { + EXPECT_PRED4(PredFunction4Int, + ++n1_, + ++n2_, + ++n3_, + ++n4_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED4 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED4Test, FunctionOnUserTypeSuccess) { + EXPECT_PRED4(PredFunction4Bool, + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_)); + finished_ = true; +} + +// Tests a successful EXPECT_PRED4 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED4Test, FunctorOnBuiltInTypeSuccess) { + EXPECT_PRED4(PredFunctor4(), + ++n1_, + ++n2_, + ++n3_, + ++n4_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED4 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED4Test, FunctorOnUserTypeSuccess) { + EXPECT_PRED4(PredFunctor4(), + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_)); + finished_ = true; +} + +// Tests a failed EXPECT_PRED4 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED4Test, FunctionOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED4(PredFunction4Int, + n1_++, + n2_++, + n3_++, + n4_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED4 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED4Test, FunctionOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED4(PredFunction4Bool, + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++)); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED4 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED4Test, FunctorOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED4(PredFunctor4(), + n1_++, + n2_++, + n3_++, + n4_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED4 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED4Test, FunctorOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED4(PredFunctor4(), + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++)); + finished_ = true; + }, ""); +} + +// Tests a successful ASSERT_PRED4 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED4Test, FunctionOnBuiltInTypeSuccess) { + ASSERT_PRED4(PredFunction4Int, + ++n1_, + ++n2_, + ++n3_, + ++n4_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED4 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED4Test, FunctionOnUserTypeSuccess) { + ASSERT_PRED4(PredFunction4Bool, + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_)); + finished_ = true; +} + +// Tests a successful ASSERT_PRED4 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED4Test, FunctorOnBuiltInTypeSuccess) { + ASSERT_PRED4(PredFunctor4(), + ++n1_, + ++n2_, + ++n3_, + ++n4_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED4 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED4Test, FunctorOnUserTypeSuccess) { + ASSERT_PRED4(PredFunctor4(), + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_)); + finished_ = true; +} + +// Tests a failed ASSERT_PRED4 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED4Test, FunctionOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED4(PredFunction4Int, + n1_++, + n2_++, + n3_++, + n4_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED4 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED4Test, FunctionOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED4(PredFunction4Bool, + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++)); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED4 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED4Test, FunctorOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED4(PredFunctor4(), + n1_++, + n2_++, + n3_++, + n4_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED4 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED4Test, FunctorOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED4(PredFunctor4(), + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++)); + finished_ = true; + }, ""); +} + +// Tests a successful EXPECT_PRED_FORMAT4 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT4Test, FunctionOnBuiltInTypeSuccess) { + EXPECT_PRED_FORMAT4(PredFormatFunction4, + ++n1_, + ++n2_, + ++n3_, + ++n4_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT4 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT4Test, FunctionOnUserTypeSuccess) { + EXPECT_PRED_FORMAT4(PredFormatFunction4, + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_)); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT4 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT4Test, FunctorOnBuiltInTypeSuccess) { + EXPECT_PRED_FORMAT4(PredFormatFunctor4(), + ++n1_, + ++n2_, + ++n3_, + ++n4_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT4 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT4Test, FunctorOnUserTypeSuccess) { + EXPECT_PRED_FORMAT4(PredFormatFunctor4(), + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_)); + finished_ = true; +} + +// Tests a failed EXPECT_PRED_FORMAT4 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT4Test, FunctionOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT4(PredFormatFunction4, + n1_++, + n2_++, + n3_++, + n4_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT4 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT4Test, FunctionOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT4(PredFormatFunction4, + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++)); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT4 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT4Test, FunctorOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT4(PredFormatFunctor4(), + n1_++, + n2_++, + n3_++, + n4_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT4 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT4Test, FunctorOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT4(PredFormatFunctor4(), + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++)); + finished_ = true; + }, ""); +} + +// Tests a successful ASSERT_PRED_FORMAT4 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT4Test, FunctionOnBuiltInTypeSuccess) { + ASSERT_PRED_FORMAT4(PredFormatFunction4, + ++n1_, + ++n2_, + ++n3_, + ++n4_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT4 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT4Test, FunctionOnUserTypeSuccess) { + ASSERT_PRED_FORMAT4(PredFormatFunction4, + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_)); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT4 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT4Test, FunctorOnBuiltInTypeSuccess) { + ASSERT_PRED_FORMAT4(PredFormatFunctor4(), + ++n1_, + ++n2_, + ++n3_, + ++n4_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT4 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT4Test, FunctorOnUserTypeSuccess) { + ASSERT_PRED_FORMAT4(PredFormatFunctor4(), + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_)); + finished_ = true; +} + +// Tests a failed ASSERT_PRED_FORMAT4 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT4Test, FunctionOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT4(PredFormatFunction4, + n1_++, + n2_++, + n3_++, + n4_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT4 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT4Test, FunctionOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT4(PredFormatFunction4, + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++)); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT4 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT4Test, FunctorOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT4(PredFormatFunctor4(), + n1_++, + n2_++, + n3_++, + n4_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT4 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT4Test, FunctorOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT4(PredFormatFunctor4(), + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++)); + finished_ = true; + }, ""); +} +// Sample functions/functors for testing 5-ary predicate assertions. + +// A 5-ary predicate function. +template +bool PredFunction5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) { + return v1 + v2 + v3 + v4 + v5 > 0; +} + +// The following two functions are needed to circumvent a bug in +// gcc 2.95.3, which sometimes has problem with the above template +// function. +bool PredFunction5Int(int v1, int v2, int v3, int v4, int v5) { + return v1 + v2 + v3 + v4 + v5 > 0; +} +bool PredFunction5Bool(Bool v1, Bool v2, Bool v3, Bool v4, Bool v5) { + return v1 + v2 + v3 + v4 + v5 > 0; +} + +// A 5-ary predicate functor. +struct PredFunctor5 { + template + bool operator()(const T1& v1, + const T2& v2, + const T3& v3, + const T4& v4, + const T5& v5) { + return v1 + v2 + v3 + v4 + v5 > 0; + } +}; + +// A 5-ary predicate-formatter function. +template +testing::AssertionResult PredFormatFunction5(const char* e1, + const char* e2, + const char* e3, + const char* e4, + const char* e5, + const T1& v1, + const T2& v2, + const T3& v3, + const T4& v4, + const T5& v5) { + if (PredFunction5(v1, v2, v3, v4, v5)) + return testing::AssertionSuccess(); + + return testing::AssertionFailure() + << e1 << " + " << e2 << " + " << e3 << " + " << e4 << " + " << e5 + << " is expected to be positive, but evaluates to " + << v1 + v2 + v3 + v4 + v5 << "."; +} + +// A 5-ary predicate-formatter functor. +struct PredFormatFunctor5 { + template + testing::AssertionResult operator()(const char* e1, + const char* e2, + const char* e3, + const char* e4, + const char* e5, + const T1& v1, + const T2& v2, + const T3& v3, + const T4& v4, + const T5& v5) const { + return PredFormatFunction5(e1, e2, e3, e4, e5, v1, v2, v3, v4, v5); + } +}; + +// Tests for {EXPECT|ASSERT}_PRED_FORMAT5. + +class Predicate5Test : public testing::Test { + protected: + virtual void SetUp() { + expected_to_finish_ = true; + finished_ = false; + n1_ = n2_ = n3_ = n4_ = n5_ = 0; + } + + virtual void TearDown() { + // Verifies that each of the predicate's arguments was evaluated + // exactly once. + EXPECT_EQ(1, n1_) << + "The predicate assertion didn't evaluate argument 2 " + "exactly once."; + EXPECT_EQ(1, n2_) << + "The predicate assertion didn't evaluate argument 3 " + "exactly once."; + EXPECT_EQ(1, n3_) << + "The predicate assertion didn't evaluate argument 4 " + "exactly once."; + EXPECT_EQ(1, n4_) << + "The predicate assertion didn't evaluate argument 5 " + "exactly once."; + EXPECT_EQ(1, n5_) << + "The predicate assertion didn't evaluate argument 6 " + "exactly once."; + + // Verifies that the control flow in the test function is expected. + if (expected_to_finish_ && !finished_) { + FAIL() << "The predicate assertion unexpactedly aborted the test."; + } else if (!expected_to_finish_ && finished_) { + FAIL() << "The failed predicate assertion didn't abort the test " + "as expected."; + } + } + + // true iff the test function is expected to run to finish. + static bool expected_to_finish_; + + // true iff the test function did run to finish. + static bool finished_; + + static int n1_; + static int n2_; + static int n3_; + static int n4_; + static int n5_; +}; + +bool Predicate5Test::expected_to_finish_; +bool Predicate5Test::finished_; +int Predicate5Test::n1_; +int Predicate5Test::n2_; +int Predicate5Test::n3_; +int Predicate5Test::n4_; +int Predicate5Test::n5_; + +typedef Predicate5Test EXPECT_PRED_FORMAT5Test; +typedef Predicate5Test ASSERT_PRED_FORMAT5Test; +typedef Predicate5Test EXPECT_PRED5Test; +typedef Predicate5Test ASSERT_PRED5Test; + +// Tests a successful EXPECT_PRED5 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED5Test, FunctionOnBuiltInTypeSuccess) { + EXPECT_PRED5(PredFunction5Int, + ++n1_, + ++n2_, + ++n3_, + ++n4_, + ++n5_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED5 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED5Test, FunctionOnUserTypeSuccess) { + EXPECT_PRED5(PredFunction5Bool, + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_), + Bool(++n5_)); + finished_ = true; +} + +// Tests a successful EXPECT_PRED5 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED5Test, FunctorOnBuiltInTypeSuccess) { + EXPECT_PRED5(PredFunctor5(), + ++n1_, + ++n2_, + ++n3_, + ++n4_, + ++n5_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED5 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED5Test, FunctorOnUserTypeSuccess) { + EXPECT_PRED5(PredFunctor5(), + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_), + Bool(++n5_)); + finished_ = true; +} + +// Tests a failed EXPECT_PRED5 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED5Test, FunctionOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED5(PredFunction5Int, + n1_++, + n2_++, + n3_++, + n4_++, + n5_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED5 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED5Test, FunctionOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED5(PredFunction5Bool, + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++), + Bool(n5_++)); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED5 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED5Test, FunctorOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED5(PredFunctor5(), + n1_++, + n2_++, + n3_++, + n4_++, + n5_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED5 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED5Test, FunctorOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED5(PredFunctor5(), + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++), + Bool(n5_++)); + finished_ = true; + }, ""); +} + +// Tests a successful ASSERT_PRED5 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED5Test, FunctionOnBuiltInTypeSuccess) { + ASSERT_PRED5(PredFunction5Int, + ++n1_, + ++n2_, + ++n3_, + ++n4_, + ++n5_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED5 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED5Test, FunctionOnUserTypeSuccess) { + ASSERT_PRED5(PredFunction5Bool, + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_), + Bool(++n5_)); + finished_ = true; +} + +// Tests a successful ASSERT_PRED5 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED5Test, FunctorOnBuiltInTypeSuccess) { + ASSERT_PRED5(PredFunctor5(), + ++n1_, + ++n2_, + ++n3_, + ++n4_, + ++n5_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED5 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED5Test, FunctorOnUserTypeSuccess) { + ASSERT_PRED5(PredFunctor5(), + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_), + Bool(++n5_)); + finished_ = true; +} + +// Tests a failed ASSERT_PRED5 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED5Test, FunctionOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED5(PredFunction5Int, + n1_++, + n2_++, + n3_++, + n4_++, + n5_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED5 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED5Test, FunctionOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED5(PredFunction5Bool, + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++), + Bool(n5_++)); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED5 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED5Test, FunctorOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED5(PredFunctor5(), + n1_++, + n2_++, + n3_++, + n4_++, + n5_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED5 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED5Test, FunctorOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED5(PredFunctor5(), + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++), + Bool(n5_++)); + finished_ = true; + }, ""); +} + +// Tests a successful EXPECT_PRED_FORMAT5 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT5Test, FunctionOnBuiltInTypeSuccess) { + EXPECT_PRED_FORMAT5(PredFormatFunction5, + ++n1_, + ++n2_, + ++n3_, + ++n4_, + ++n5_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT5 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT5Test, FunctionOnUserTypeSuccess) { + EXPECT_PRED_FORMAT5(PredFormatFunction5, + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_), + Bool(++n5_)); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT5 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT5Test, FunctorOnBuiltInTypeSuccess) { + EXPECT_PRED_FORMAT5(PredFormatFunctor5(), + ++n1_, + ++n2_, + ++n3_, + ++n4_, + ++n5_); + finished_ = true; +} + +// Tests a successful EXPECT_PRED_FORMAT5 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT5Test, FunctorOnUserTypeSuccess) { + EXPECT_PRED_FORMAT5(PredFormatFunctor5(), + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_), + Bool(++n5_)); + finished_ = true; +} + +// Tests a failed EXPECT_PRED_FORMAT5 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT5Test, FunctionOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT5(PredFormatFunction5, + n1_++, + n2_++, + n3_++, + n4_++, + n5_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT5 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT5Test, FunctionOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT5(PredFormatFunction5, + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++), + Bool(n5_++)); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT5 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(EXPECT_PRED_FORMAT5Test, FunctorOnBuiltInTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT5(PredFormatFunctor5(), + n1_++, + n2_++, + n3_++, + n4_++, + n5_++); + finished_ = true; + }, ""); +} + +// Tests a failed EXPECT_PRED_FORMAT5 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(EXPECT_PRED_FORMAT5Test, FunctorOnUserTypeFailure) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT5(PredFormatFunctor5(), + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++), + Bool(n5_++)); + finished_ = true; + }, ""); +} + +// Tests a successful ASSERT_PRED_FORMAT5 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT5Test, FunctionOnBuiltInTypeSuccess) { + ASSERT_PRED_FORMAT5(PredFormatFunction5, + ++n1_, + ++n2_, + ++n3_, + ++n4_, + ++n5_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT5 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT5Test, FunctionOnUserTypeSuccess) { + ASSERT_PRED_FORMAT5(PredFormatFunction5, + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_), + Bool(++n5_)); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT5 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT5Test, FunctorOnBuiltInTypeSuccess) { + ASSERT_PRED_FORMAT5(PredFormatFunctor5(), + ++n1_, + ++n2_, + ++n3_, + ++n4_, + ++n5_); + finished_ = true; +} + +// Tests a successful ASSERT_PRED_FORMAT5 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT5Test, FunctorOnUserTypeSuccess) { + ASSERT_PRED_FORMAT5(PredFormatFunctor5(), + Bool(++n1_), + Bool(++n2_), + Bool(++n3_), + Bool(++n4_), + Bool(++n5_)); + finished_ = true; +} + +// Tests a failed ASSERT_PRED_FORMAT5 where the +// predicate-formatter is a function on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT5Test, FunctionOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT5(PredFormatFunction5, + n1_++, + n2_++, + n3_++, + n4_++, + n5_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT5 where the +// predicate-formatter is a function on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT5Test, FunctionOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT5(PredFormatFunction5, + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++), + Bool(n5_++)); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT5 where the +// predicate-formatter is a functor on a built-in type (int). +TEST_F(ASSERT_PRED_FORMAT5Test, FunctorOnBuiltInTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT5(PredFormatFunctor5(), + n1_++, + n2_++, + n3_++, + n4_++, + n5_++); + finished_ = true; + }, ""); +} + +// Tests a failed ASSERT_PRED_FORMAT5 where the +// predicate-formatter is a functor on a user-defined type (Bool). +TEST_F(ASSERT_PRED_FORMAT5Test, FunctorOnUserTypeFailure) { + expected_to_finish_ = false; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT5(PredFormatFunctor5(), + Bool(n1_++), + Bool(n2_++), + Bool(n3_++), + Bool(n4_++), + Bool(n5_++)); + finished_ = true; + }, ""); +} +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Tests the --gtest_repeat=number flag. + +#include +#include +#include "gtest/gtest.h" + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +namespace testing { + +GTEST_DECLARE_string_(death_test_style); +GTEST_DECLARE_string_(filter); +GTEST_DECLARE_int32_(repeat); + +} // namespace testing + +using testing::GTEST_FLAG(death_test_style); +using testing::GTEST_FLAG(filter); +using testing::GTEST_FLAG(repeat); + +namespace { + +// We need this when we are testing Google Test itself and therefore +// cannot use Google Test assertions. +#define GTEST_CHECK_INT_EQ_(expected, actual) \ + do {\ + const int expected_val = (expected);\ + const int actual_val = (actual);\ + if (::testing::internal::IsTrue(expected_val != actual_val)) {\ + ::std::cout << "Value of: " #actual "\n"\ + << " Actual: " << actual_val << "\n"\ + << "Expected: " #expected "\n"\ + << "Which is: " << expected_val << "\n";\ + ::testing::internal::posix::Abort();\ + }\ + } while (::testing::internal::AlwaysFalse()) + + +// Used for verifying that global environment set-up and tear-down are +// inside the gtest_repeat loop. + +int g_environment_set_up_count = 0; +int g_environment_tear_down_count = 0; + +class MyEnvironment : public testing::Environment { + public: + MyEnvironment() {} + virtual void SetUp() { g_environment_set_up_count++; } + virtual void TearDown() { g_environment_tear_down_count++; } +}; + +// A test that should fail. + +int g_should_fail_count = 0; + +TEST(FooTest, ShouldFail) { + g_should_fail_count++; + EXPECT_EQ(0, 1) << "Expected failure."; +} + +// A test that should pass. + +int g_should_pass_count = 0; + +TEST(FooTest, ShouldPass) { + g_should_pass_count++; +} + +// A test that contains a thread-safe death test and a fast death +// test. It should pass. + +int g_death_test_count = 0; + +TEST(BarDeathTest, ThreadSafeAndFast) { + g_death_test_count++; + + GTEST_FLAG(death_test_style) = "threadsafe"; + EXPECT_DEATH_IF_SUPPORTED(::testing::internal::posix::Abort(), ""); + + GTEST_FLAG(death_test_style) = "fast"; + EXPECT_DEATH_IF_SUPPORTED(::testing::internal::posix::Abort(), ""); +} + +#if GTEST_HAS_PARAM_TEST +int g_param_test_count = 0; + +const int kNumberOfParamTests = 10; + +class MyParamTest : public testing::TestWithParam {}; + +TEST_P(MyParamTest, ShouldPass) { + // TODO(vladl@google.com): Make parameter value checking robust + // WRT order of tests. + GTEST_CHECK_INT_EQ_(g_param_test_count % kNumberOfParamTests, GetParam()); + g_param_test_count++; +} +INSTANTIATE_TEST_CASE_P(MyParamSequence, + MyParamTest, + testing::Range(0, kNumberOfParamTests)); +#endif // GTEST_HAS_PARAM_TEST + +// Resets the count for each test. +void ResetCounts() { + g_environment_set_up_count = 0; + g_environment_tear_down_count = 0; + g_should_fail_count = 0; + g_should_pass_count = 0; + g_death_test_count = 0; +#if GTEST_HAS_PARAM_TEST + g_param_test_count = 0; +#endif // GTEST_HAS_PARAM_TEST +} + +// Checks that the count for each test is expected. +void CheckCounts(int expected) { + GTEST_CHECK_INT_EQ_(expected, g_environment_set_up_count); + GTEST_CHECK_INT_EQ_(expected, g_environment_tear_down_count); + GTEST_CHECK_INT_EQ_(expected, g_should_fail_count); + GTEST_CHECK_INT_EQ_(expected, g_should_pass_count); + GTEST_CHECK_INT_EQ_(expected, g_death_test_count); +#if GTEST_HAS_PARAM_TEST + GTEST_CHECK_INT_EQ_(expected * kNumberOfParamTests, g_param_test_count); +#endif // GTEST_HAS_PARAM_TEST +} + +// Tests the behavior of Google Test when --gtest_repeat is not specified. +void TestRepeatUnspecified() { + ResetCounts(); + GTEST_CHECK_INT_EQ_(1, RUN_ALL_TESTS()); + CheckCounts(1); +} + +// Tests the behavior of Google Test when --gtest_repeat has the given value. +void TestRepeat(int repeat) { + GTEST_FLAG(repeat) = repeat; + + ResetCounts(); + GTEST_CHECK_INT_EQ_(repeat > 0 ? 1 : 0, RUN_ALL_TESTS()); + CheckCounts(repeat); +} + +// Tests using --gtest_repeat when --gtest_filter specifies an empty +// set of tests. +void TestRepeatWithEmptyFilter(int repeat) { + GTEST_FLAG(repeat) = repeat; + GTEST_FLAG(filter) = "None"; + + ResetCounts(); + GTEST_CHECK_INT_EQ_(0, RUN_ALL_TESTS()); + CheckCounts(0); +} + +// Tests using --gtest_repeat when --gtest_filter specifies a set of +// successful tests. +void TestRepeatWithFilterForSuccessfulTests(int repeat) { + GTEST_FLAG(repeat) = repeat; + GTEST_FLAG(filter) = "*-*ShouldFail"; + + ResetCounts(); + GTEST_CHECK_INT_EQ_(0, RUN_ALL_TESTS()); + GTEST_CHECK_INT_EQ_(repeat, g_environment_set_up_count); + GTEST_CHECK_INT_EQ_(repeat, g_environment_tear_down_count); + GTEST_CHECK_INT_EQ_(0, g_should_fail_count); + GTEST_CHECK_INT_EQ_(repeat, g_should_pass_count); + GTEST_CHECK_INT_EQ_(repeat, g_death_test_count); +#if GTEST_HAS_PARAM_TEST + GTEST_CHECK_INT_EQ_(repeat * kNumberOfParamTests, g_param_test_count); +#endif // GTEST_HAS_PARAM_TEST +} + +// Tests using --gtest_repeat when --gtest_filter specifies a set of +// failed tests. +void TestRepeatWithFilterForFailedTests(int repeat) { + GTEST_FLAG(repeat) = repeat; + GTEST_FLAG(filter) = "*ShouldFail"; + + ResetCounts(); + GTEST_CHECK_INT_EQ_(1, RUN_ALL_TESTS()); + GTEST_CHECK_INT_EQ_(repeat, g_environment_set_up_count); + GTEST_CHECK_INT_EQ_(repeat, g_environment_tear_down_count); + GTEST_CHECK_INT_EQ_(repeat, g_should_fail_count); + GTEST_CHECK_INT_EQ_(0, g_should_pass_count); + GTEST_CHECK_INT_EQ_(0, g_death_test_count); +#if GTEST_HAS_PARAM_TEST + GTEST_CHECK_INT_EQ_(0, g_param_test_count); +#endif // GTEST_HAS_PARAM_TEST +} + +} // namespace + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + testing::AddGlobalTestEnvironment(new MyEnvironment); + + TestRepeatUnspecified(); + TestRepeat(0); + TestRepeat(1); + TestRepeat(5); + + TestRepeatWithEmptyFilter(2); + TestRepeatWithEmptyFilter(3); + + TestRepeatWithFilterForSuccessfulTests(3); + + TestRepeatWithFilterForFailedTests(4); + + // It would be nice to verify that the tests indeed loop forever + // when GTEST_FLAG(repeat) is negative, but this test will be quite + // complicated to write. Since this flag is for interactive + // debugging only and doesn't affect the normal test result, such a + // test would be an overkill. + + printf("PASS\n"); + return 0; +} +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Tests that SCOPED_TRACE() and various Google Test assertions can be +// used in a large number of threads concurrently. + +#include "gtest/gtest.h" + +#include +#include + +// We must define this macro in order to #include +// gtest-internal-inl.h. This is how Google Test prevents a user from +// accidentally depending on its internal implementation. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +#if GTEST_IS_THREADSAFE + +namespace testing { +namespace { + +using internal::Notification; +using internal::TestPropertyKeyIs; +using internal::ThreadWithParam; +using internal::scoped_ptr; + +// In order to run tests in this file, for platforms where Google Test is +// thread safe, implement ThreadWithParam. See the description of its API +// in gtest-port.h, where it is defined for already supported platforms. + +// How many threads to create? +const int kThreadCount = 50; + +std::string IdToKey(int id, const char* suffix) { + Message key; + key << "key_" << id << "_" << suffix; + return key.GetString(); +} + +std::string IdToString(int id) { + Message id_message; + id_message << id; + return id_message.GetString(); +} + +void ExpectKeyAndValueWereRecordedForId( + const std::vector& properties, + int id, const char* suffix) { + TestPropertyKeyIs matches_key(IdToKey(id, suffix).c_str()); + const std::vector::const_iterator property = + std::find_if(properties.begin(), properties.end(), matches_key); + ASSERT_TRUE(property != properties.end()) + << "expecting " << suffix << " value for id " << id; + EXPECT_STREQ(IdToString(id).c_str(), property->value()); +} + +// Calls a large number of Google Test assertions, where exactly one of them +// will fail. +void ManyAsserts(int id) { + GTEST_LOG_(INFO) << "Thread #" << id << " running..."; + + SCOPED_TRACE(Message() << "Thread #" << id); + + for (int i = 0; i < kThreadCount; i++) { + SCOPED_TRACE(Message() << "Iteration #" << i); + + // A bunch of assertions that should succeed. + EXPECT_TRUE(true); + ASSERT_FALSE(false) << "This shouldn't fail."; + EXPECT_STREQ("a", "a"); + ASSERT_LE(5, 6); + EXPECT_EQ(i, i) << "This shouldn't fail."; + + // RecordProperty() should interact safely with other threads as well. + // The shared_key forces property updates. + Test::RecordProperty(IdToKey(id, "string").c_str(), IdToString(id).c_str()); + Test::RecordProperty(IdToKey(id, "int").c_str(), id); + Test::RecordProperty("shared_key", IdToString(id).c_str()); + + // This assertion should fail kThreadCount times per thread. It + // is for testing whether Google Test can handle failed assertions in a + // multi-threaded context. + EXPECT_LT(i, 0) << "This should always fail."; + } +} + +void CheckTestFailureCount(int expected_failures) { + const TestInfo* const info = UnitTest::GetInstance()->current_test_info(); + const TestResult* const result = info->result(); + GTEST_CHECK_(expected_failures == result->total_part_count()) + << "Logged " << result->total_part_count() << " failures " + << " vs. " << expected_failures << " expected"; +} + +// Tests using SCOPED_TRACE() and Google Test assertions in many threads +// concurrently. +TEST(StressTest, CanUseScopedTraceAndAssertionsInManyThreads) { + { + scoped_ptr > threads[kThreadCount]; + Notification threads_can_start; + for (int i = 0; i != kThreadCount; i++) + threads[i].reset(new ThreadWithParam(&ManyAsserts, + i, + &threads_can_start)); + + threads_can_start.Notify(); + + // Blocks until all the threads are done. + for (int i = 0; i != kThreadCount; i++) + threads[i]->Join(); + } + + // Ensures that kThreadCount*kThreadCount failures have been reported. + const TestInfo* const info = UnitTest::GetInstance()->current_test_info(); + const TestResult* const result = info->result(); + + std::vector properties; + // We have no access to the TestResult's list of properties but we can + // copy them one by one. + for (int i = 0; i < result->test_property_count(); ++i) + properties.push_back(result->GetTestProperty(i)); + + EXPECT_EQ(kThreadCount * 2 + 1, result->test_property_count()) + << "String and int values recorded on each thread, " + << "as well as one shared_key"; + for (int i = 0; i < kThreadCount; ++i) { + ExpectKeyAndValueWereRecordedForId(properties, i, "string"); + ExpectKeyAndValueWereRecordedForId(properties, i, "int"); + } + CheckTestFailureCount(kThreadCount*kThreadCount); +} + +void FailingThread(bool is_fatal) { + if (is_fatal) + FAIL() << "Fatal failure in some other thread. " + << "(This failure is expected.)"; + else + ADD_FAILURE() << "Non-fatal failure in some other thread. " + << "(This failure is expected.)"; +} + +void GenerateFatalFailureInAnotherThread(bool is_fatal) { + ThreadWithParam thread(&FailingThread, is_fatal, NULL); + thread.Join(); +} + +TEST(NoFatalFailureTest, ExpectNoFatalFailureIgnoresFailuresInOtherThreads) { + EXPECT_NO_FATAL_FAILURE(GenerateFatalFailureInAnotherThread(true)); + // We should only have one failure (the one from + // GenerateFatalFailureInAnotherThread()), since the EXPECT_NO_FATAL_FAILURE + // should succeed. + CheckTestFailureCount(1); +} + +void AssertNoFatalFailureIgnoresFailuresInOtherThreads() { + ASSERT_NO_FATAL_FAILURE(GenerateFatalFailureInAnotherThread(true)); +} +TEST(NoFatalFailureTest, AssertNoFatalFailureIgnoresFailuresInOtherThreads) { + // Using a subroutine, to make sure, that the test continues. + AssertNoFatalFailureIgnoresFailuresInOtherThreads(); + // We should only have one failure (the one from + // GenerateFatalFailureInAnotherThread()), since the EXPECT_NO_FATAL_FAILURE + // should succeed. + CheckTestFailureCount(1); +} + +TEST(FatalFailureTest, ExpectFatalFailureIgnoresFailuresInOtherThreads) { + // This statement should fail, since the current thread doesn't generate a + // fatal failure, only another one does. + EXPECT_FATAL_FAILURE(GenerateFatalFailureInAnotherThread(true), "expected"); + CheckTestFailureCount(2); +} + +TEST(FatalFailureOnAllThreadsTest, ExpectFatalFailureOnAllThreads) { + // This statement should succeed, because failures in all threads are + // considered. + EXPECT_FATAL_FAILURE_ON_ALL_THREADS( + GenerateFatalFailureInAnotherThread(true), "expected"); + CheckTestFailureCount(0); + // We need to add a failure, because main() checks that there are failures. + // But when only this test is run, we shouldn't have any failures. + ADD_FAILURE() << "This is an expected non-fatal failure."; +} + +TEST(NonFatalFailureTest, ExpectNonFatalFailureIgnoresFailuresInOtherThreads) { + // This statement should fail, since the current thread doesn't generate a + // fatal failure, only another one does. + EXPECT_NONFATAL_FAILURE(GenerateFatalFailureInAnotherThread(false), + "expected"); + CheckTestFailureCount(2); +} + +TEST(NonFatalFailureOnAllThreadsTest, ExpectNonFatalFailureOnAllThreads) { + // This statement should succeed, because failures in all threads are + // considered. + EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS( + GenerateFatalFailureInAnotherThread(false), "expected"); + CheckTestFailureCount(0); + // We need to add a failure, because main() checks that there are failures, + // But when only this test is run, we shouldn't have any failures. + ADD_FAILURE() << "This is an expected non-fatal failure."; +} + +} // namespace +} // namespace testing + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + + const int result = RUN_ALL_TESTS(); // Expected to fail. + GTEST_CHECK_(result == 1) << "RUN_ALL_TESTS() did not fail as expected"; + + printf("\nPASS\n"); + return 0; +} + +#else +TEST(StressTest, + DISABLED_ThreadSafetyTestsAreSkippedWhenGoogleTestIsNotThreadSafe) { +} + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} +#endif // GTEST_IS_THREADSAFE +// Copyright 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// This program is meant to be run by gtest_help_test.py. Do not run +// it directly. + +#include "gtest/gtest.h" + +// When a help flag is specified, this program should skip the tests +// and exit with 0; otherwise the following test will be executed, +// causing this program to exit with a non-zero code. +TEST(HelpFlagTest, ShouldNotBeRun) { + ASSERT_TRUE(false) << "Tests shouldn't be run when --help is specified."; +} + +#if GTEST_HAS_DEATH_TEST +TEST(DeathTest, UsedByPythonScriptToDetectSupportForDeathTestsInThisBinary) {} +#endif +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: phanna@google.com (Patrick Hanna) + +// Unit test for Google Test's --gtest_list_tests flag. +// +// A user can ask Google Test to list all tests that will run +// so that when using a filter, a user will know what +// tests to look for. The tests will not be run after listing. +// +// This program will be invoked from a Python unit test. +// Don't run it directly. + +#include "gtest/gtest.h" + +// Several different test cases and tests that will be listed. +TEST(Foo, Bar1) { +} + +TEST(Foo, Bar2) { +} + +TEST(Foo, DISABLED_Bar3) { +} + +TEST(Abc, Xyz) { +} + +TEST(Abc, Def) { +} + +TEST(FooBar, Baz) { +} + +class FooTest : public testing::Test { +}; + +TEST_F(FooTest, Test1) { +} + +TEST_F(FooTest, DISABLED_Test2) { +} + +TEST_F(FooTest, Test3) { +} + +TEST(FooDeathTest, Test1) { +} + +// A group of value-parameterized tests. + +class MyType { + public: + explicit MyType(const std::string& a_value) : value_(a_value) {} + + const std::string& value() const { return value_; } + + private: + std::string value_; +}; + +// Teaches Google Test how to print a MyType. +void PrintTo(const MyType& x, std::ostream* os) { + *os << x.value(); +} + +class ValueParamTest : public testing::TestWithParam { +}; + +TEST_P(ValueParamTest, TestA) { +} + +TEST_P(ValueParamTest, TestB) { +} + +INSTANTIATE_TEST_CASE_P( + MyInstantiation, ValueParamTest, + testing::Values(MyType("one line"), + MyType("two\nlines"), + MyType("a very\nloooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong line"))); // NOLINT + +// A group of typed tests. + +// A deliberately long type name for testing the line-truncating +// behavior when printing a type parameter. +class VeryLoooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooogName { // NOLINT +}; + +template +class TypedTest : public testing::Test { +}; + +template +class MyArray { +}; + +typedef testing::Types > MyTypes; + +TYPED_TEST_CASE(TypedTest, MyTypes); + +TYPED_TEST(TypedTest, TestA) { +} + +TYPED_TEST(TypedTest, TestB) { +} + +// A group of type-parameterized tests. + +template +class TypeParamTest : public testing::Test { +}; + +TYPED_TEST_CASE_P(TypeParamTest); + +TYPED_TEST_P(TypeParamTest, TestA) { +} + +TYPED_TEST_P(TypeParamTest, TestB) { +} + +REGISTER_TYPED_TEST_CASE_P(TypeParamTest, TestA, TestB); + +INSTANTIATE_TYPED_TEST_CASE_P(My, TypeParamTest, MyTypes); + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Tests that a Google Test program that has no test defined can run +// successfully. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "gtest/gtest.h" + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + + // An ad-hoc assertion outside of all tests. + // + // This serves three purposes: + // + // 1. It verifies that an ad-hoc assertion can be executed even if + // no test is defined. + // 2. It verifies that a failed ad-hoc assertion causes the test + // program to fail. + // 3. We had a bug where the XML output won't be generated if an + // assertion is executed before RUN_ALL_TESTS() is called, even + // though --gtest_output=xml is specified. This makes sure the + // bug is fixed and doesn't regress. + EXPECT_EQ(1, 2); + + // The above EXPECT_EQ() should cause RUN_ALL_TESTS() to return non-zero. + return RUN_ALL_TESTS() ? 0 : 1; +} +// Copyright 2013, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// Tests that Google Test manipulates the premature-exit-detection +// file correctly. + +#include + +#include "gtest/gtest.h" + +using ::testing::InitGoogleTest; +using ::testing::Test; +using ::testing::internal::posix::GetEnv; +using ::testing::internal::posix::Stat; +using ::testing::internal::posix::StatStruct; + +namespace { + +class PrematureExitTest : public Test { + public: + // Returns true iff the given file exists. + static bool FileExists(const char* filepath) { + StatStruct stat; + return Stat(filepath, &stat) == 0; + } + + protected: + PrematureExitTest() { + premature_exit_file_path_ = GetEnv("TEST_PREMATURE_EXIT_FILE"); + + // Normalize NULL to "" for ease of handling. + if (premature_exit_file_path_ == NULL) { + premature_exit_file_path_ = ""; + } + } + + // Returns true iff the premature-exit file exists. + bool PrematureExitFileExists() const { + return FileExists(premature_exit_file_path_); + } + + const char* premature_exit_file_path_; +}; + +typedef PrematureExitTest PrematureExitDeathTest; + +// Tests that: +// - the premature-exit file exists during the execution of a +// death test (EXPECT_DEATH*), and +// - a death test doesn't interfere with the main test process's +// handling of the premature-exit file. +TEST_F(PrematureExitDeathTest, FileExistsDuringExecutionOfDeathTest) { + if (*premature_exit_file_path_ == '\0') { + return; + } + + EXPECT_DEATH_IF_SUPPORTED({ + // If the file exists, crash the process such that the main test + // process will catch the (expected) crash and report a success; + // otherwise don't crash, which will cause the main test process + // to report that the death test has failed. + if (PrematureExitFileExists()) { + exit(1); + } + }, ""); +} + +// Tests that the premature-exit file exists during the execution of a +// normal (non-death) test. +TEST_F(PrematureExitTest, PrematureExitFileExistsDuringTestExecution) { + if (*premature_exit_file_path_ == '\0') { + return; + } + + EXPECT_TRUE(PrematureExitFileExists()) + << " file " << premature_exit_file_path_ + << " should exist during test execution, but doesn't."; +} + +} // namespace + +int main(int argc, char **argv) { + InitGoogleTest(&argc, argv); + const int exit_code = RUN_ALL_TESTS(); + + // Test that the premature-exit file is deleted upon return from + // RUN_ALL_TESTS(). + const char* const filepath = GetEnv("TEST_PREMATURE_EXIT_FILE"); + if (filepath != NULL && *filepath != '\0') { + if (PrematureExitTest::FileExists(filepath)) { + printf( + "File %s shouldn't exist after the test program finishes, but does.", + filepath); + return 1; + } + } + + return exit_code; +} +// Copyright 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Verifies that test shuffling works. + +#include "gtest/gtest.h" + +namespace { + +using ::testing::EmptyTestEventListener; +using ::testing::InitGoogleTest; +using ::testing::Message; +using ::testing::Test; +using ::testing::TestEventListeners; +using ::testing::TestInfo; +using ::testing::UnitTest; +using ::testing::internal::scoped_ptr; + +// The test methods are empty, as the sole purpose of this program is +// to print the test names before/after shuffling. + +class A : public Test {}; +TEST_F(A, A) {} +TEST_F(A, B) {} + +TEST(ADeathTest, A) {} +TEST(ADeathTest, B) {} +TEST(ADeathTest, C) {} + +TEST(B, A) {} +TEST(B, B) {} +TEST(B, C) {} +TEST(B, DISABLED_D) {} +TEST(B, DISABLED_E) {} + +TEST(BDeathTest, A) {} +TEST(BDeathTest, B) {} + +TEST(C, A) {} +TEST(C, B) {} +TEST(C, C) {} +TEST(C, DISABLED_D) {} + +TEST(CDeathTest, A) {} + +TEST(DISABLED_D, A) {} +TEST(DISABLED_D, DISABLED_B) {} + +// This printer prints the full test names only, starting each test +// iteration with a "----" marker. +class TestNamePrinter : public EmptyTestEventListener { + public: + virtual void OnTestIterationStart(const UnitTest& /* unit_test */, + int /* iteration */) { + printf("----\n"); + } + + virtual void OnTestStart(const TestInfo& test_info) { + printf("%s.%s\n", test_info.test_case_name(), test_info.name()); + } +}; + +} // namespace + +int main(int argc, char **argv) { + InitGoogleTest(&argc, argv); + + // Replaces the default printer with TestNamePrinter, which prints + // the test name only. + TestEventListeners& listeners = UnitTest::GetInstance()->listeners(); + delete listeners.Release(listeners.default_result_printer()); + listeners.Append(new TestNamePrinter); + + return RUN_ALL_TESTS(); +} +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: mheule@google.com (Markus Heule) +// +// This test verifies that it's possible to use Google Test by including +// the gtest.h header file alone. + +#include "gtest/gtest.h" + +namespace { + +void Subroutine() { + EXPECT_EQ(42, 42); +} + +TEST(NoFatalFailureTest, ExpectNoFatalFailure) { + EXPECT_NO_FATAL_FAILURE(;); + EXPECT_NO_FATAL_FAILURE(SUCCEED()); + EXPECT_NO_FATAL_FAILURE(Subroutine()); + EXPECT_NO_FATAL_FAILURE({ SUCCEED(); }); +} + +TEST(NoFatalFailureTest, AssertNoFatalFailure) { + ASSERT_NO_FATAL_FAILURE(;); + ASSERT_NO_FATAL_FAILURE(SUCCEED()); + ASSERT_NO_FATAL_FAILURE(Subroutine()); + ASSERT_NO_FATAL_FAILURE({ SUCCEED(); }); +} + +} // namespace +// Copyright 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Tests Google Test's throw-on-failure mode with exceptions enabled. + +#include "gtest/gtest.h" + +#include +#include +#include +#include + +// Prints the given failure message and exits the program with +// non-zero. We use this instead of a Google Test assertion to +// indicate a failure, as the latter is been tested and cannot be +// relied on. +void Fail(const char* msg) { + printf("FAILURE: %s\n", msg); + fflush(stdout); + exit(1); +} + +// Tests that an assertion failure throws a subclass of +// std::runtime_error. +void TestFailureThrowsRuntimeError() { + testing::GTEST_FLAG(throw_on_failure) = true; + + // A successful assertion shouldn't throw. + try { + EXPECT_EQ(3, 3); + } catch(...) { + Fail("A successful assertion wrongfully threw."); + } + + // A failed assertion should throw a subclass of std::runtime_error. + try { + EXPECT_EQ(2, 3) << "Expected failure"; + } catch(const std::runtime_error& e) { + if (strstr(e.what(), "Expected failure") != NULL) + return; + + printf("%s", + "A failed assertion did throw an exception of the right type, " + "but the message is incorrect. Instead of containing \"Expected " + "failure\", it is:\n"); + Fail(e.what()); + } catch(...) { + Fail("A failed assertion threw the wrong type of exception."); + } + Fail("A failed assertion should've thrown but didn't."); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + + // We want to ensure that people can use Google Test assertions in + // other testing frameworks, as long as they initialize Google Test + // properly and set the thrown-on-failure mode. Therefore, we don't + // use Google Test's constructs for defining and running tests + // (e.g. TEST and RUN_ALL_TESTS) here. + + TestFailureThrowsRuntimeError(); + return 0; +} +// Copyright 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Tests Google Test's throw-on-failure mode with exceptions disabled. +// +// This program must be compiled with exceptions disabled. It will be +// invoked by gtest_throw_on_failure_test.py, and is expected to exit +// with non-zero in the throw-on-failure mode or 0 otherwise. + +#include "gtest/gtest.h" + +#include // for fflush, fprintf, NULL, etc. +#include // for exit +#include // for set_terminate + +// This terminate handler aborts the program using exit() rather than abort(). +// This avoids showing pop-ups on Windows systems and core dumps on Unix-like +// ones. +void TerminateHandler() { + fprintf(stderr, "%s\n", "Unhandled C++ exception terminating the program."); + fflush(NULL); + exit(1); +} + +int main(int argc, char** argv) { +#if GTEST_HAS_EXCEPTIONS + std::set_terminate(&TerminateHandler); +#endif + testing::InitGoogleTest(&argc, argv); + + // We want to ensure that people can use Google Test assertions in + // other testing frameworks, as long as they initialize Google Test + // properly and set the throw-on-failure mode. Therefore, we don't + // use Google Test's constructs for defining and running tests + // (e.g. TEST and RUN_ALL_TESTS) here. + + // In the throw-on-failure mode with exceptions disabled, this + // assertion will cause the program to exit with a non-zero code. + EXPECT_EQ(2, 3); + + // When not in the throw-on-failure mode, the control will reach + // here. + return 0; +} +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// Tests using global test environments. + +#include +#include +#include "gtest/gtest.h" + +#define GTEST_IMPLEMENTATION_ 1 // Required for the next #include. +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +namespace testing { +GTEST_DECLARE_string_(filter); +} + +namespace { + +enum FailureType { + NO_FAILURE, NON_FATAL_FAILURE, FATAL_FAILURE +}; + +// For testing using global test environments. +class MyEnvironment : public testing::Environment { + public: + MyEnvironment() { Reset(); } + + // Depending on the value of failure_in_set_up_, SetUp() will + // generate a non-fatal failure, generate a fatal failure, or + // succeed. + virtual void SetUp() { + set_up_was_run_ = true; + + switch (failure_in_set_up_) { + case NON_FATAL_FAILURE: + ADD_FAILURE() << "Expected non-fatal failure in global set-up."; + break; + case FATAL_FAILURE: + FAIL() << "Expected fatal failure in global set-up."; + break; + default: + break; + } + } + + // Generates a non-fatal failure. + virtual void TearDown() { + tear_down_was_run_ = true; + ADD_FAILURE() << "Expected non-fatal failure in global tear-down."; + } + + // Resets the state of the environment s.t. it can be reused. + void Reset() { + failure_in_set_up_ = NO_FAILURE; + set_up_was_run_ = false; + tear_down_was_run_ = false; + } + + // We call this function to set the type of failure SetUp() should + // generate. + void set_failure_in_set_up(FailureType type) { + failure_in_set_up_ = type; + } + + // Was SetUp() run? + bool set_up_was_run() const { return set_up_was_run_; } + + // Was TearDown() run? + bool tear_down_was_run() const { return tear_down_was_run_; } + + private: + FailureType failure_in_set_up_; + bool set_up_was_run_; + bool tear_down_was_run_; +}; + +// Was the TEST run? +bool test_was_run; + +// The sole purpose of this TEST is to enable us to check whether it +// was run. +TEST(FooTest, Bar) { + test_was_run = true; +} + +// Prints the message and aborts the program if condition is false. +void Check(bool condition, const char* msg) { + if (!condition) { + printf("FAILED: %s\n", msg); + testing::internal::posix::Abort(); + } +} + +// Runs the tests. Return true iff successful. +// +// The 'failure' parameter specifies the type of failure that should +// be generated by the global set-up. +int RunAllTests(MyEnvironment* env, FailureType failure) { + env->Reset(); + env->set_failure_in_set_up(failure); + test_was_run = false; + testing::internal::GetUnitTestImpl()->ClearAdHocTestResult(); + return RUN_ALL_TESTS(); +} + +} // namespace + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + + // Registers a global test environment, and verifies that the + // registration function returns its argument. + MyEnvironment* const env = new MyEnvironment; + Check(testing::AddGlobalTestEnvironment(env) == env, + "AddGlobalTestEnvironment() should return its argument."); + + // Verifies that RUN_ALL_TESTS() runs the tests when the global + // set-up is successful. + Check(RunAllTests(env, NO_FAILURE) != 0, + "RUN_ALL_TESTS() should return non-zero, as the global tear-down " + "should generate a failure."); + Check(test_was_run, + "The tests should run, as the global set-up should generate no " + "failure"); + Check(env->tear_down_was_run(), + "The global tear-down should run, as the global set-up was run."); + + // Verifies that RUN_ALL_TESTS() runs the tests when the global + // set-up generates no fatal failure. + Check(RunAllTests(env, NON_FATAL_FAILURE) != 0, + "RUN_ALL_TESTS() should return non-zero, as both the global set-up " + "and the global tear-down should generate a non-fatal failure."); + Check(test_was_run, + "The tests should run, as the global set-up should generate no " + "fatal failure."); + Check(env->tear_down_was_run(), + "The global tear-down should run, as the global set-up was run."); + + // Verifies that RUN_ALL_TESTS() runs no test when the global set-up + // generates a fatal failure. + Check(RunAllTests(env, FATAL_FAILURE) != 0, + "RUN_ALL_TESTS() should return non-zero, as the global set-up " + "should generate a fatal failure."); + Check(!test_was_run, + "The tests should not run, as the global set-up should generate " + "a fatal failure."); + Check(env->tear_down_was_run(), + "The global tear-down should run, as the global set-up was run."); + + // Verifies that RUN_ALL_TESTS() doesn't do global set-up or + // tear-down when there is no test to run. + testing::GTEST_FLAG(filter) = "-*"; + Check(RunAllTests(env, NO_FAILURE) == 0, + "RUN_ALL_TESTS() should return zero, as there is no test to run."); + Check(!env->set_up_was_run(), + "The global set-up should not run, as there is no test to run."); + Check(!env->tear_down_was_run(), + "The global tear-down should not run, " + "as the global set-up was not run."); + + printf("PASS\n"); + return 0; +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Unit test for Google Test test filters. +// +// A user can specify which test(s) in a Google Test program to run via +// either the GTEST_FILTER environment variable or the --gtest_filter +// flag. This is used for testing such functionality. +// +// The program will be invoked from a Python unit test. Don't run it +// directly. + +#include "gtest/gtest.h" + +namespace { + +// Test case FooTest. + +class FooTest : public testing::Test { +}; + +TEST_F(FooTest, Abc) { +} + +TEST_F(FooTest, Xyz) { + FAIL() << "Expected failure."; +} + +// Test case BarTest. + +TEST(BarTest, TestOne) { +} + +TEST(BarTest, TestTwo) { +} + +TEST(BarTest, TestThree) { +} + +TEST(BarTest, DISABLED_TestFour) { + FAIL() << "Expected failure."; +} + +TEST(BarTest, DISABLED_TestFive) { + FAIL() << "Expected failure."; +} + +// Test case BazTest. + +TEST(BazTest, TestOne) { + FAIL() << "Expected failure."; +} + +TEST(BazTest, TestA) { +} + +TEST(BazTest, TestB) { +} + +TEST(BazTest, DISABLED_TestC) { + FAIL() << "Expected failure."; +} + +// Test case HasDeathTest + +TEST(HasDeathTest, Test1) { + EXPECT_DEATH_IF_SUPPORTED(exit(1), ".*"); +} + +// We need at least two death tests to make sure that the all death tests +// aren't on the first shard. +TEST(HasDeathTest, Test2) { + EXPECT_DEATH_IF_SUPPORTED(exit(1), ".*"); +} + +// Test case FoobarTest + +TEST(DISABLED_FoobarTest, Test1) { + FAIL() << "Expected failure."; +} + +TEST(DISABLED_FoobarTest, DISABLED_Test2) { + FAIL() << "Expected failure."; +} + +// Test case FoobarbazTest + +TEST(DISABLED_FoobarbazTest, TestA) { + FAIL() << "Expected failure."; +} + +#if GTEST_HAS_PARAM_TEST +class ParamTest : public testing::TestWithParam { +}; + +TEST_P(ParamTest, TestX) { +} + +TEST_P(ParamTest, TestY) { +} + +INSTANTIATE_TEST_CASE_P(SeqP, ParamTest, testing::Values(1, 2)); +INSTANTIATE_TEST_CASE_P(SeqQ, ParamTest, testing::Values(5, 6)); +#endif // GTEST_HAS_PARAM_TEST + +} // namespace + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// Unit test for include/gtest/gtest_prod.h. + +#include "gtest/gtest.h" +#include "test/production.h" + +// Tests that private members can be accessed from a TEST declared as +// a friend of the class. +TEST(PrivateCodeTest, CanAccessPrivateMembers) { + PrivateCode a; + EXPECT_EQ(0, a.x_); + + a.set_x(1); + EXPECT_EQ(1, a.x_); +} + +typedef testing::Test PrivateCodeFixtureTest; + +// Tests that private members can be accessed from a TEST_F declared +// as a friend of the class. +TEST_F(PrivateCodeFixtureTest, CanAccessPrivateMembers) { + PrivateCode a; + EXPECT_EQ(0, a.x_); + + a.set_x(2); + EXPECT_EQ(2, a.x_); +} +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "gtest/gtest.h" + +// Tests that we don't have to define main() when we link to +// gtest_main instead of gtest. + +namespace { + +TEST(GTestMainTest, ShouldSucceed) { +} + +} // namespace + +// We are using the main() function defined in src/gtest_main.cc, so +// we don't define it here. +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// The purpose of this file is to generate Google Test output under +// various conditions. The output will then be verified by +// gtest_output_test.py to ensure that Google Test generates the +// desired messages. Therefore, most tests in this file are MEANT TO +// FAIL. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "gtest/gtest-spi.h" +#include "gtest/gtest.h" + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +#include + +#if GTEST_IS_THREADSAFE +using testing::ScopedFakeTestPartResultReporter; +using testing::TestPartResultArray; + +using testing::internal::Notification; +using testing::internal::ThreadWithParam; +#endif + +namespace posix = ::testing::internal::posix; + +// Tests catching fatal failures. + +// A subroutine used by the following test. +void TestEq1(int x) { + ASSERT_EQ(1, x); +} + +// This function calls a test subroutine, catches the fatal failure it +// generates, and then returns early. +void TryTestSubroutine() { + // Calls a subrountine that yields a fatal failure. + TestEq1(2); + + // Catches the fatal failure and aborts the test. + // + // The testing::Test:: prefix is necessary when calling + // HasFatalFailure() outside of a TEST, TEST_F, or test fixture. + if (testing::Test::HasFatalFailure()) return; + + // If we get here, something is wrong. + FAIL() << "This should never be reached."; +} + +TEST(PassingTest, PassingTest1) { +} + +TEST(PassingTest, PassingTest2) { +} + +// Tests that parameters of failing parameterized tests are printed in the +// failing test summary. +class FailingParamTest : public testing::TestWithParam {}; + +TEST_P(FailingParamTest, Fails) { + EXPECT_EQ(1, GetParam()); +} + +// This generates a test which will fail. Google Test is expected to print +// its parameter when it outputs the list of all failed tests. +INSTANTIATE_TEST_CASE_P(PrintingFailingParams, + FailingParamTest, + testing::Values(2)); + +static const char kGoldenString[] = "\"Line\0 1\"\nLine 2"; + +TEST(NonfatalFailureTest, EscapesStringOperands) { + std::string actual = "actual \"string\""; + EXPECT_EQ(kGoldenString, actual); + + const char* golden = kGoldenString; + EXPECT_EQ(golden, actual); +} + +TEST(NonfatalFailureTest, DiffForLongStrings) { + std::string golden_str(kGoldenString, sizeof(kGoldenString) - 1); + EXPECT_EQ(golden_str, "Line 2"); +} + +// Tests catching a fatal failure in a subroutine. +TEST(FatalFailureTest, FatalFailureInSubroutine) { + printf("(expecting a failure that x should be 1)\n"); + + TryTestSubroutine(); +} + +// Tests catching a fatal failure in a nested subroutine. +TEST(FatalFailureTest, FatalFailureInNestedSubroutine) { + printf("(expecting a failure that x should be 1)\n"); + + // Calls a subrountine that yields a fatal failure. + TryTestSubroutine(); + + // Catches the fatal failure and aborts the test. + // + // When calling HasFatalFailure() inside a TEST, TEST_F, or test + // fixture, the testing::Test:: prefix is not needed. + if (HasFatalFailure()) return; + + // If we get here, something is wrong. + FAIL() << "This should never be reached."; +} + +// Tests HasFatalFailure() after a failed EXPECT check. +TEST(FatalFailureTest, NonfatalFailureInSubroutine) { + printf("(expecting a failure on false)\n"); + EXPECT_TRUE(false); // Generates a nonfatal failure + ASSERT_FALSE(HasFatalFailure()); // This should succeed. +} + +// Tests interleaving user logging and Google Test assertions. +TEST(LoggingTest, InterleavingLoggingAndAssertions) { + static const int a[4] = { + 3, 9, 2, 6 + }; + + printf("(expecting 2 failures on (3) >= (a[i]))\n"); + for (int i = 0; i < static_cast(sizeof(a)/sizeof(*a)); i++) { + printf("i == %d\n", i); + EXPECT_GE(3, a[i]); + } +} + +// Tests the SCOPED_TRACE macro. + +// A helper function for testing SCOPED_TRACE. +void SubWithoutTrace(int n) { + EXPECT_EQ(1, n); + ASSERT_EQ(2, n); +} + +// Another helper function for testing SCOPED_TRACE. +void SubWithTrace(int n) { + SCOPED_TRACE(testing::Message() << "n = " << n); + + SubWithoutTrace(n); +} + +// Tests that SCOPED_TRACE() obeys lexical scopes. +TEST(SCOPED_TRACETest, ObeysScopes) { + printf("(expected to fail)\n"); + + // There should be no trace before SCOPED_TRACE() is invoked. + ADD_FAILURE() << "This failure is expected, and shouldn't have a trace."; + + { + SCOPED_TRACE("Expected trace"); + // After SCOPED_TRACE(), a failure in the current scope should contain + // the trace. + ADD_FAILURE() << "This failure is expected, and should have a trace."; + } + + // Once the control leaves the scope of the SCOPED_TRACE(), there + // should be no trace again. + ADD_FAILURE() << "This failure is expected, and shouldn't have a trace."; +} + +// Tests that SCOPED_TRACE works inside a loop. +TEST(SCOPED_TRACETest, WorksInLoop) { + printf("(expected to fail)\n"); + + for (int i = 1; i <= 2; i++) { + SCOPED_TRACE(testing::Message() << "i = " << i); + + SubWithoutTrace(i); + } +} + +// Tests that SCOPED_TRACE works in a subroutine. +TEST(SCOPED_TRACETest, WorksInSubroutine) { + printf("(expected to fail)\n"); + + SubWithTrace(1); + SubWithTrace(2); +} + +// Tests that SCOPED_TRACE can be nested. +TEST(SCOPED_TRACETest, CanBeNested) { + printf("(expected to fail)\n"); + + SCOPED_TRACE(""); // A trace without a message. + + SubWithTrace(2); +} + +// Tests that multiple SCOPED_TRACEs can be used in the same scope. +TEST(SCOPED_TRACETest, CanBeRepeated) { + printf("(expected to fail)\n"); + + SCOPED_TRACE("A"); + ADD_FAILURE() + << "This failure is expected, and should contain trace point A."; + + SCOPED_TRACE("B"); + ADD_FAILURE() + << "This failure is expected, and should contain trace point A and B."; + + { + SCOPED_TRACE("C"); + ADD_FAILURE() << "This failure is expected, and should " + << "contain trace point A, B, and C."; + } + + SCOPED_TRACE("D"); + ADD_FAILURE() << "This failure is expected, and should " + << "contain trace point A, B, and D."; +} + +#if GTEST_IS_THREADSAFE +// Tests that SCOPED_TRACE()s can be used concurrently from multiple +// threads. Namely, an assertion should be affected by +// SCOPED_TRACE()s in its own thread only. + +// Here's the sequence of actions that happen in the test: +// +// Thread A (main) | Thread B (spawned) +// ===============================|================================ +// spawns thread B | +// -------------------------------+-------------------------------- +// waits for n1 | SCOPED_TRACE("Trace B"); +// | generates failure #1 +// | notifies n1 +// -------------------------------+-------------------------------- +// SCOPED_TRACE("Trace A"); | waits for n2 +// generates failure #2 | +// notifies n2 | +// -------------------------------|-------------------------------- +// waits for n3 | generates failure #3 +// | trace B dies +// | generates failure #4 +// | notifies n3 +// -------------------------------|-------------------------------- +// generates failure #5 | finishes +// trace A dies | +// generates failure #6 | +// -------------------------------|-------------------------------- +// waits for thread B to finish | + +struct CheckPoints { + Notification n1; + Notification n2; + Notification n3; +}; + +static void ThreadWithScopedTrace(CheckPoints* check_points) { + { + SCOPED_TRACE("Trace B"); + ADD_FAILURE() + << "Expected failure #1 (in thread B, only trace B alive)."; + check_points->n1.Notify(); + check_points->n2.WaitForNotification(); + + ADD_FAILURE() + << "Expected failure #3 (in thread B, trace A & B both alive)."; + } // Trace B dies here. + ADD_FAILURE() + << "Expected failure #4 (in thread B, only trace A alive)."; + check_points->n3.Notify(); +} + +TEST(SCOPED_TRACETest, WorksConcurrently) { + printf("(expecting 6 failures)\n"); + + CheckPoints check_points; + ThreadWithParam thread(&ThreadWithScopedTrace, + &check_points, + NULL); + check_points.n1.WaitForNotification(); + + { + SCOPED_TRACE("Trace A"); + ADD_FAILURE() + << "Expected failure #2 (in thread A, trace A & B both alive)."; + check_points.n2.Notify(); + check_points.n3.WaitForNotification(); + + ADD_FAILURE() + << "Expected failure #5 (in thread A, only trace A alive)."; + } // Trace A dies here. + ADD_FAILURE() + << "Expected failure #6 (in thread A, no trace alive)."; + thread.Join(); +} +#endif // GTEST_IS_THREADSAFE + +TEST(DisabledTestsWarningTest, + DISABLED_AlsoRunDisabledTestsFlagSuppressesWarning) { + // This test body is intentionally empty. Its sole purpose is for + // verifying that the --gtest_also_run_disabled_tests flag + // suppresses the "YOU HAVE 12 DISABLED TESTS" warning at the end of + // the test output. +} + +// Tests using assertions outside of TEST and TEST_F. +// +// This function creates two failures intentionally. +void AdHocTest() { + printf("The non-test part of the code is expected to have 2 failures.\n\n"); + EXPECT_TRUE(false); + EXPECT_EQ(2, 3); +} + +// Runs all TESTs, all TEST_Fs, and the ad hoc test. +int RunAllTests() { + AdHocTest(); + return RUN_ALL_TESTS(); +} + +// Tests non-fatal failures in the fixture constructor. +class NonFatalFailureInFixtureConstructorTest : public testing::Test { + protected: + NonFatalFailureInFixtureConstructorTest() { + printf("(expecting 5 failures)\n"); + ADD_FAILURE() << "Expected failure #1, in the test fixture c'tor."; + } + + ~NonFatalFailureInFixtureConstructorTest() { + ADD_FAILURE() << "Expected failure #5, in the test fixture d'tor."; + } + + virtual void SetUp() { + ADD_FAILURE() << "Expected failure #2, in SetUp()."; + } + + virtual void TearDown() { + ADD_FAILURE() << "Expected failure #4, in TearDown."; + } +}; + +TEST_F(NonFatalFailureInFixtureConstructorTest, FailureInConstructor) { + ADD_FAILURE() << "Expected failure #3, in the test body."; +} + +// Tests fatal failures in the fixture constructor. +class FatalFailureInFixtureConstructorTest : public testing::Test { + protected: + FatalFailureInFixtureConstructorTest() { + printf("(expecting 2 failures)\n"); + Init(); + } + + ~FatalFailureInFixtureConstructorTest() { + ADD_FAILURE() << "Expected failure #2, in the test fixture d'tor."; + } + + virtual void SetUp() { + ADD_FAILURE() << "UNEXPECTED failure in SetUp(). " + << "We should never get here, as the test fixture c'tor " + << "had a fatal failure."; + } + + virtual void TearDown() { + ADD_FAILURE() << "UNEXPECTED failure in TearDown(). " + << "We should never get here, as the test fixture c'tor " + << "had a fatal failure."; + } + + private: + void Init() { + FAIL() << "Expected failure #1, in the test fixture c'tor."; + } +}; + +TEST_F(FatalFailureInFixtureConstructorTest, FailureInConstructor) { + ADD_FAILURE() << "UNEXPECTED failure in the test body. " + << "We should never get here, as the test fixture c'tor " + << "had a fatal failure."; +} + +// Tests non-fatal failures in SetUp(). +class NonFatalFailureInSetUpTest : public testing::Test { + protected: + virtual ~NonFatalFailureInSetUpTest() { + Deinit(); + } + + virtual void SetUp() { + printf("(expecting 4 failures)\n"); + ADD_FAILURE() << "Expected failure #1, in SetUp()."; + } + + virtual void TearDown() { + FAIL() << "Expected failure #3, in TearDown()."; + } + private: + void Deinit() { + FAIL() << "Expected failure #4, in the test fixture d'tor."; + } +}; + +TEST_F(NonFatalFailureInSetUpTest, FailureInSetUp) { + FAIL() << "Expected failure #2, in the test function."; +} + +// Tests fatal failures in SetUp(). +class FatalFailureInSetUpTest : public testing::Test { + protected: + virtual ~FatalFailureInSetUpTest() { + Deinit(); + } + + virtual void SetUp() { + printf("(expecting 3 failures)\n"); + FAIL() << "Expected failure #1, in SetUp()."; + } + + virtual void TearDown() { + FAIL() << "Expected failure #2, in TearDown()."; + } + private: + void Deinit() { + FAIL() << "Expected failure #3, in the test fixture d'tor."; + } +}; + +TEST_F(FatalFailureInSetUpTest, FailureInSetUp) { + FAIL() << "UNEXPECTED failure in the test function. " + << "We should never get here, as SetUp() failed."; +} + +TEST(AddFailureAtTest, MessageContainsSpecifiedFileAndLineNumber) { + ADD_FAILURE_AT("foo.cc", 42) << "Expected failure in foo.cc"; +} + +#if GTEST_IS_THREADSAFE + +// A unary function that may die. +void DieIf(bool should_die) { + GTEST_CHECK_(!should_die) << " - death inside DieIf()."; +} + +// Tests running death tests in a multi-threaded context. + +// Used for coordination between the main and the spawn thread. +struct SpawnThreadNotifications { + SpawnThreadNotifications() {} + + Notification spawn_thread_started; + Notification spawn_thread_ok_to_terminate; + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(SpawnThreadNotifications); +}; + +// The function to be executed in the thread spawn by the +// MultipleThreads test (below). +static void ThreadRoutine(SpawnThreadNotifications* notifications) { + // Signals the main thread that this thread has started. + notifications->spawn_thread_started.Notify(); + + // Waits for permission to finish from the main thread. + notifications->spawn_thread_ok_to_terminate.WaitForNotification(); +} + +// This is a death-test test, but it's not named with a DeathTest +// suffix. It starts threads which might interfere with later +// death tests, so it must run after all other death tests. +class DeathTestAndMultiThreadsTest : public testing::Test { + protected: + // Starts a thread and waits for it to begin. + virtual void SetUp() { + thread_.reset(new ThreadWithParam( + &ThreadRoutine, ¬ifications_, NULL)); + notifications_.spawn_thread_started.WaitForNotification(); + } + // Tells the thread to finish, and reaps it. + // Depending on the version of the thread library in use, + // a manager thread might still be left running that will interfere + // with later death tests. This is unfortunate, but this class + // cleans up after itself as best it can. + virtual void TearDown() { + notifications_.spawn_thread_ok_to_terminate.Notify(); + } + + private: + SpawnThreadNotifications notifications_; + testing::internal::scoped_ptr > + thread_; +}; + +#endif // GTEST_IS_THREADSAFE + +// The MixedUpTestCaseTest test case verifies that Google Test will fail a +// test if it uses a different fixture class than what other tests in +// the same test case use. It deliberately contains two fixture +// classes with the same name but defined in different namespaces. + +// The MixedUpTestCaseWithSameTestNameTest test case verifies that +// when the user defines two tests with the same test case name AND +// same test name (but in different namespaces), the second test will +// fail. + +namespace foo { + +class MixedUpTestCaseTest : public testing::Test { +}; + +TEST_F(MixedUpTestCaseTest, FirstTestFromNamespaceFoo) {} +TEST_F(MixedUpTestCaseTest, SecondTestFromNamespaceFoo) {} + +class MixedUpTestCaseWithSameTestNameTest : public testing::Test { +}; + +TEST_F(MixedUpTestCaseWithSameTestNameTest, + TheSecondTestWithThisNameShouldFail) {} + +} // namespace foo + +namespace bar { + +class MixedUpTestCaseTest : public testing::Test { +}; + +// The following two tests are expected to fail. We rely on the +// golden file to check that Google Test generates the right error message. +TEST_F(MixedUpTestCaseTest, ThisShouldFail) {} +TEST_F(MixedUpTestCaseTest, ThisShouldFailToo) {} + +class MixedUpTestCaseWithSameTestNameTest : public testing::Test { +}; + +// Expected to fail. We rely on the golden file to check that Google Test +// generates the right error message. +TEST_F(MixedUpTestCaseWithSameTestNameTest, + TheSecondTestWithThisNameShouldFail) {} + +} // namespace bar + +// The following two test cases verify that Google Test catches the user +// error of mixing TEST and TEST_F in the same test case. The first +// test case checks the scenario where TEST_F appears before TEST, and +// the second one checks where TEST appears before TEST_F. + +class TEST_F_before_TEST_in_same_test_case : public testing::Test { +}; + +TEST_F(TEST_F_before_TEST_in_same_test_case, DefinedUsingTEST_F) {} + +// Expected to fail. We rely on the golden file to check that Google Test +// generates the right error message. +TEST(TEST_F_before_TEST_in_same_test_case, DefinedUsingTESTAndShouldFail) {} + +class TEST_before_TEST_F_in_same_test_case : public testing::Test { +}; + +TEST(TEST_before_TEST_F_in_same_test_case, DefinedUsingTEST) {} + +// Expected to fail. We rely on the golden file to check that Google Test +// generates the right error message. +TEST_F(TEST_before_TEST_F_in_same_test_case, DefinedUsingTEST_FAndShouldFail) { +} + +// Used for testing EXPECT_NONFATAL_FAILURE() and EXPECT_FATAL_FAILURE(). +int global_integer = 0; + +// Tests that EXPECT_NONFATAL_FAILURE() can reference global variables. +TEST(ExpectNonfatalFailureTest, CanReferenceGlobalVariables) { + global_integer = 0; + EXPECT_NONFATAL_FAILURE({ + EXPECT_EQ(1, global_integer) << "Expected non-fatal failure."; + }, "Expected non-fatal failure."); +} + +// Tests that EXPECT_NONFATAL_FAILURE() can reference local variables +// (static or not). +TEST(ExpectNonfatalFailureTest, CanReferenceLocalVariables) { + int m = 0; + static int n; + n = 1; + EXPECT_NONFATAL_FAILURE({ + EXPECT_EQ(m, n) << "Expected non-fatal failure."; + }, "Expected non-fatal failure."); +} + +// Tests that EXPECT_NONFATAL_FAILURE() succeeds when there is exactly +// one non-fatal failure and no fatal failure. +TEST(ExpectNonfatalFailureTest, SucceedsWhenThereIsOneNonfatalFailure) { + EXPECT_NONFATAL_FAILURE({ + ADD_FAILURE() << "Expected non-fatal failure."; + }, "Expected non-fatal failure."); +} + +// Tests that EXPECT_NONFATAL_FAILURE() fails when there is no +// non-fatal failure. +TEST(ExpectNonfatalFailureTest, FailsWhenThereIsNoNonfatalFailure) { + printf("(expecting a failure)\n"); + EXPECT_NONFATAL_FAILURE({ + }, ""); +} + +// Tests that EXPECT_NONFATAL_FAILURE() fails when there are two +// non-fatal failures. +TEST(ExpectNonfatalFailureTest, FailsWhenThereAreTwoNonfatalFailures) { + printf("(expecting a failure)\n"); + EXPECT_NONFATAL_FAILURE({ + ADD_FAILURE() << "Expected non-fatal failure 1."; + ADD_FAILURE() << "Expected non-fatal failure 2."; + }, ""); +} + +// Tests that EXPECT_NONFATAL_FAILURE() fails when there is one fatal +// failure. +TEST(ExpectNonfatalFailureTest, FailsWhenThereIsOneFatalFailure) { + printf("(expecting a failure)\n"); + EXPECT_NONFATAL_FAILURE({ + FAIL() << "Expected fatal failure."; + }, ""); +} + +// Tests that EXPECT_NONFATAL_FAILURE() fails when the statement being +// tested returns. +TEST(ExpectNonfatalFailureTest, FailsWhenStatementReturns) { + printf("(expecting a failure)\n"); + EXPECT_NONFATAL_FAILURE({ + return; + }, ""); +} + +#if GTEST_HAS_EXCEPTIONS + +// Tests that EXPECT_NONFATAL_FAILURE() fails when the statement being +// tested throws. +TEST(ExpectNonfatalFailureTest, FailsWhenStatementThrows) { + printf("(expecting a failure)\n"); + try { + EXPECT_NONFATAL_FAILURE({ + throw 0; + }, ""); + } catch(int) { // NOLINT + } +} + +#endif // GTEST_HAS_EXCEPTIONS + +// Tests that EXPECT_FATAL_FAILURE() can reference global variables. +TEST(ExpectFatalFailureTest, CanReferenceGlobalVariables) { + global_integer = 0; + EXPECT_FATAL_FAILURE({ + ASSERT_EQ(1, global_integer) << "Expected fatal failure."; + }, "Expected fatal failure."); +} + +// Tests that EXPECT_FATAL_FAILURE() can reference local static +// variables. +TEST(ExpectFatalFailureTest, CanReferenceLocalStaticVariables) { + static int n; + n = 1; + EXPECT_FATAL_FAILURE({ + ASSERT_EQ(0, n) << "Expected fatal failure."; + }, "Expected fatal failure."); +} + +// Tests that EXPECT_FATAL_FAILURE() succeeds when there is exactly +// one fatal failure and no non-fatal failure. +TEST(ExpectFatalFailureTest, SucceedsWhenThereIsOneFatalFailure) { + EXPECT_FATAL_FAILURE({ + FAIL() << "Expected fatal failure."; + }, "Expected fatal failure."); +} + +// Tests that EXPECT_FATAL_FAILURE() fails when there is no fatal +// failure. +TEST(ExpectFatalFailureTest, FailsWhenThereIsNoFatalFailure) { + printf("(expecting a failure)\n"); + EXPECT_FATAL_FAILURE({ + }, ""); +} + +// A helper for generating a fatal failure. +void FatalFailure() { + FAIL() << "Expected fatal failure."; +} + +// Tests that EXPECT_FATAL_FAILURE() fails when there are two +// fatal failures. +TEST(ExpectFatalFailureTest, FailsWhenThereAreTwoFatalFailures) { + printf("(expecting a failure)\n"); + EXPECT_FATAL_FAILURE({ + FatalFailure(); + FatalFailure(); + }, ""); +} + +// Tests that EXPECT_FATAL_FAILURE() fails when there is one non-fatal +// failure. +TEST(ExpectFatalFailureTest, FailsWhenThereIsOneNonfatalFailure) { + printf("(expecting a failure)\n"); + EXPECT_FATAL_FAILURE({ + ADD_FAILURE() << "Expected non-fatal failure."; + }, ""); +} + +// Tests that EXPECT_FATAL_FAILURE() fails when the statement being +// tested returns. +TEST(ExpectFatalFailureTest, FailsWhenStatementReturns) { + printf("(expecting a failure)\n"); + EXPECT_FATAL_FAILURE({ + return; + }, ""); +} + +#if GTEST_HAS_EXCEPTIONS + +// Tests that EXPECT_FATAL_FAILURE() fails when the statement being +// tested throws. +TEST(ExpectFatalFailureTest, FailsWhenStatementThrows) { + printf("(expecting a failure)\n"); + try { + EXPECT_FATAL_FAILURE({ + throw 0; + }, ""); + } catch(int) { // NOLINT + } +} + +#endif // GTEST_HAS_EXCEPTIONS + +// This #ifdef block tests the output of value-parameterized tests. + +#if GTEST_HAS_PARAM_TEST + +std::string ParamNameFunc(const testing::TestParamInfo& info) { + return info.param; +} + +class ParamTest : public testing::TestWithParam { +}; + +TEST_P(ParamTest, Success) { + EXPECT_EQ("a", GetParam()); +} + +TEST_P(ParamTest, Failure) { + EXPECT_EQ("b", GetParam()) << "Expected failure"; +} + +INSTANTIATE_TEST_CASE_P(PrintingStrings, + ParamTest, + testing::Values(std::string("a")), + ParamNameFunc); + +#endif // GTEST_HAS_PARAM_TEST + +// This #ifdef block tests the output of typed tests. +#if GTEST_HAS_TYPED_TEST + +template +class TypedTest : public testing::Test { +}; + +TYPED_TEST_CASE(TypedTest, testing::Types); + +TYPED_TEST(TypedTest, Success) { + EXPECT_EQ(0, TypeParam()); +} + +TYPED_TEST(TypedTest, Failure) { + EXPECT_EQ(1, TypeParam()) << "Expected failure"; +} + +#endif // GTEST_HAS_TYPED_TEST + +// This #ifdef block tests the output of type-parameterized tests. +#if GTEST_HAS_TYPED_TEST_P + +template +class TypedTestP : public testing::Test { +}; + +TYPED_TEST_CASE_P(TypedTestP); + +TYPED_TEST_P(TypedTestP, Success) { + EXPECT_EQ(0U, TypeParam()); +} + +TYPED_TEST_P(TypedTestP, Failure) { + EXPECT_EQ(1U, TypeParam()) << "Expected failure"; +} + +REGISTER_TYPED_TEST_CASE_P(TypedTestP, Success, Failure); + +typedef testing::Types UnsignedTypes; +INSTANTIATE_TYPED_TEST_CASE_P(Unsigned, TypedTestP, UnsignedTypes); + +#endif // GTEST_HAS_TYPED_TEST_P + +#if GTEST_HAS_DEATH_TEST + +// We rely on the golden file to verify that tests whose test case +// name ends with DeathTest are run first. + +TEST(ADeathTest, ShouldRunFirst) { +} + +# if GTEST_HAS_TYPED_TEST + +// We rely on the golden file to verify that typed tests whose test +// case name ends with DeathTest are run first. + +template +class ATypedDeathTest : public testing::Test { +}; + +typedef testing::Types NumericTypes; +TYPED_TEST_CASE(ATypedDeathTest, NumericTypes); + +TYPED_TEST(ATypedDeathTest, ShouldRunFirst) { +} + +# endif // GTEST_HAS_TYPED_TEST + +# if GTEST_HAS_TYPED_TEST_P + + +// We rely on the golden file to verify that type-parameterized tests +// whose test case name ends with DeathTest are run first. + +template +class ATypeParamDeathTest : public testing::Test { +}; + +TYPED_TEST_CASE_P(ATypeParamDeathTest); + +TYPED_TEST_P(ATypeParamDeathTest, ShouldRunFirst) { +} + +REGISTER_TYPED_TEST_CASE_P(ATypeParamDeathTest, ShouldRunFirst); + +INSTANTIATE_TYPED_TEST_CASE_P(My, ATypeParamDeathTest, NumericTypes); + +# endif // GTEST_HAS_TYPED_TEST_P + +#endif // GTEST_HAS_DEATH_TEST + +// Tests various failure conditions of +// EXPECT_{,NON}FATAL_FAILURE{,_ON_ALL_THREADS}. +class ExpectFailureTest : public testing::Test { + public: // Must be public and not protected due to a bug in g++ 3.4.2. + enum FailureMode { + FATAL_FAILURE, + NONFATAL_FAILURE + }; + static void AddFailure(FailureMode failure) { + if (failure == FATAL_FAILURE) { + FAIL() << "Expected fatal failure."; + } else { + ADD_FAILURE() << "Expected non-fatal failure."; + } + } +}; + +TEST_F(ExpectFailureTest, ExpectFatalFailure) { + // Expected fatal failure, but succeeds. + printf("(expecting 1 failure)\n"); + EXPECT_FATAL_FAILURE(SUCCEED(), "Expected fatal failure."); + // Expected fatal failure, but got a non-fatal failure. + printf("(expecting 1 failure)\n"); + EXPECT_FATAL_FAILURE(AddFailure(NONFATAL_FAILURE), "Expected non-fatal " + "failure."); + // Wrong message. + printf("(expecting 1 failure)\n"); + EXPECT_FATAL_FAILURE(AddFailure(FATAL_FAILURE), "Some other fatal failure " + "expected."); +} + +TEST_F(ExpectFailureTest, ExpectNonFatalFailure) { + // Expected non-fatal failure, but succeeds. + printf("(expecting 1 failure)\n"); + EXPECT_NONFATAL_FAILURE(SUCCEED(), "Expected non-fatal failure."); + // Expected non-fatal failure, but got a fatal failure. + printf("(expecting 1 failure)\n"); + EXPECT_NONFATAL_FAILURE(AddFailure(FATAL_FAILURE), "Expected fatal failure."); + // Wrong message. + printf("(expecting 1 failure)\n"); + EXPECT_NONFATAL_FAILURE(AddFailure(NONFATAL_FAILURE), "Some other non-fatal " + "failure."); +} + +#if GTEST_IS_THREADSAFE + +class ExpectFailureWithThreadsTest : public ExpectFailureTest { + protected: + static void AddFailureInOtherThread(FailureMode failure) { + ThreadWithParam thread(&AddFailure, failure, NULL); + thread.Join(); + } +}; + +TEST_F(ExpectFailureWithThreadsTest, ExpectFatalFailure) { + // We only intercept the current thread. + printf("(expecting 2 failures)\n"); + EXPECT_FATAL_FAILURE(AddFailureInOtherThread(FATAL_FAILURE), + "Expected fatal failure."); +} + +TEST_F(ExpectFailureWithThreadsTest, ExpectNonFatalFailure) { + // We only intercept the current thread. + printf("(expecting 2 failures)\n"); + EXPECT_NONFATAL_FAILURE(AddFailureInOtherThread(NONFATAL_FAILURE), + "Expected non-fatal failure."); +} + +typedef ExpectFailureWithThreadsTest ScopedFakeTestPartResultReporterTest; + +// Tests that the ScopedFakeTestPartResultReporter only catches failures from +// the current thread if it is instantiated with INTERCEPT_ONLY_CURRENT_THREAD. +TEST_F(ScopedFakeTestPartResultReporterTest, InterceptOnlyCurrentThread) { + printf("(expecting 2 failures)\n"); + TestPartResultArray results; + { + ScopedFakeTestPartResultReporter reporter( + ScopedFakeTestPartResultReporter::INTERCEPT_ONLY_CURRENT_THREAD, + &results); + AddFailureInOtherThread(FATAL_FAILURE); + AddFailureInOtherThread(NONFATAL_FAILURE); + } + // The two failures should not have been intercepted. + EXPECT_EQ(0, results.size()) << "This shouldn't fail."; +} + +#endif // GTEST_IS_THREADSAFE + +TEST_F(ExpectFailureTest, ExpectFatalFailureOnAllThreads) { + // Expected fatal failure, but succeeds. + printf("(expecting 1 failure)\n"); + EXPECT_FATAL_FAILURE_ON_ALL_THREADS(SUCCEED(), "Expected fatal failure."); + // Expected fatal failure, but got a non-fatal failure. + printf("(expecting 1 failure)\n"); + EXPECT_FATAL_FAILURE_ON_ALL_THREADS(AddFailure(NONFATAL_FAILURE), + "Expected non-fatal failure."); + // Wrong message. + printf("(expecting 1 failure)\n"); + EXPECT_FATAL_FAILURE_ON_ALL_THREADS(AddFailure(FATAL_FAILURE), + "Some other fatal failure expected."); +} + +TEST_F(ExpectFailureTest, ExpectNonFatalFailureOnAllThreads) { + // Expected non-fatal failure, but succeeds. + printf("(expecting 1 failure)\n"); + EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(SUCCEED(), "Expected non-fatal " + "failure."); + // Expected non-fatal failure, but got a fatal failure. + printf("(expecting 1 failure)\n"); + EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(AddFailure(FATAL_FAILURE), + "Expected fatal failure."); + // Wrong message. + printf("(expecting 1 failure)\n"); + EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(AddFailure(NONFATAL_FAILURE), + "Some other non-fatal failure."); +} + + +// Two test environments for testing testing::AddGlobalTestEnvironment(). + +class FooEnvironment : public testing::Environment { + public: + virtual void SetUp() { + printf("%s", "FooEnvironment::SetUp() called.\n"); + } + + virtual void TearDown() { + printf("%s", "FooEnvironment::TearDown() called.\n"); + FAIL() << "Expected fatal failure."; + } +}; + +class BarEnvironment : public testing::Environment { + public: + virtual void SetUp() { + printf("%s", "BarEnvironment::SetUp() called.\n"); + } + + virtual void TearDown() { + printf("%s", "BarEnvironment::TearDown() called.\n"); + ADD_FAILURE() << "Expected non-fatal failure."; + } +}; + +// The main function. +// +// The idea is to use Google Test to run all the tests we have defined (some +// of them are intended to fail), and then compare the test results +// with the "golden" file. +int main(int argc, char **argv) { + testing::GTEST_FLAG(print_time) = false; + + // We just run the tests, knowing some of them are intended to fail. + // We will use a separate Python script to compare the output of + // this program with the golden file. + + // It's hard to test InitGoogleTest() directly, as it has many + // global side effects. The following line serves as a sanity test + // for it. + testing::InitGoogleTest(&argc, argv); + bool internal_skip_environment_and_ad_hoc_tests = + std::count(argv, argv + argc, + std::string("internal_skip_environment_and_ad_hoc_tests")) > 0; + +#if GTEST_HAS_DEATH_TEST + if (testing::internal::GTEST_FLAG(internal_run_death_test) != "") { + // Skip the usual output capturing if we're running as the child + // process of an threadsafe-style death test. +# if GTEST_OS_WINDOWS + posix::FReopen("nul:", "w", stdout); +# else + posix::FReopen("/dev/null", "w", stdout); +# endif // GTEST_OS_WINDOWS + return RUN_ALL_TESTS(); + } +#endif // GTEST_HAS_DEATH_TEST + + if (internal_skip_environment_and_ad_hoc_tests) + return RUN_ALL_TESTS(); + + // Registers two global test environments. + // The golden file verifies that they are set up in the order they + // are registered, and torn down in the reverse order. + testing::AddGlobalTestEnvironment(new FooEnvironment); + testing::AddGlobalTestEnvironment(new BarEnvironment); + + return RunAllTests(); +} +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: eefacm@gmail.com (Sean Mcafee) + +// Unit test for Google Test XML output. +// +// A user can specify XML output in a Google Test program to run via +// either the GTEST_OUTPUT environment variable or the --gtest_output +// flag. This is used for testing such functionality. +// +// This program will be invoked from a Python unit test. Don't run it +// directly. + +#include "gtest/gtest.h" + +using ::testing::InitGoogleTest; +using ::testing::TestEventListeners; +using ::testing::TestWithParam; +using ::testing::UnitTest; +using ::testing::Test; +using ::testing::Values; + +class SuccessfulTest : public Test { +}; + +TEST_F(SuccessfulTest, Succeeds) { + SUCCEED() << "This is a success."; + ASSERT_EQ(1, 1); +} + +class FailedTest : public Test { +}; + +TEST_F(FailedTest, Fails) { + ASSERT_EQ(1, 2); +} + +class DisabledTest : public Test { +}; + +TEST_F(DisabledTest, DISABLED_test_not_run) { + FAIL() << "Unexpected failure: Disabled test should not be run"; +} + +TEST(MixedResultTest, Succeeds) { + EXPECT_EQ(1, 1); + ASSERT_EQ(1, 1); +} + +TEST(MixedResultTest, Fails) { + EXPECT_EQ(1, 2); + ASSERT_EQ(2, 3); +} + +TEST(MixedResultTest, DISABLED_test) { + FAIL() << "Unexpected failure: Disabled test should not be run"; +} + +TEST(XmlQuotingTest, OutputsCData) { + FAIL() << "XML output: " + ""; +} + +// Helps to test that invalid characters produced by test code do not make +// it into the XML file. +TEST(InvalidCharactersTest, InvalidCharactersInMessage) { + FAIL() << "Invalid characters in brackets [\x1\x2]"; +} + +class PropertyRecordingTest : public Test { + public: + static void SetUpTestCase() { RecordProperty("SetUpTestCase", "yes"); } + static void TearDownTestCase() { RecordProperty("TearDownTestCase", "aye"); } +}; + +TEST_F(PropertyRecordingTest, OneProperty) { + RecordProperty("key_1", "1"); +} + +TEST_F(PropertyRecordingTest, IntValuedProperty) { + RecordProperty("key_int", 1); +} + +TEST_F(PropertyRecordingTest, ThreeProperties) { + RecordProperty("key_1", "1"); + RecordProperty("key_2", "2"); + RecordProperty("key_3", "3"); +} + +TEST_F(PropertyRecordingTest, TwoValuesForOneKeyUsesLastValue) { + RecordProperty("key_1", "1"); + RecordProperty("key_1", "2"); +} + +TEST(NoFixtureTest, RecordProperty) { + RecordProperty("key", "1"); +} + +void ExternalUtilityThatCallsRecordProperty(const std::string& key, int value) { + testing::Test::RecordProperty(key, value); +} + +void ExternalUtilityThatCallsRecordProperty(const std::string& key, + const std::string& value) { + testing::Test::RecordProperty(key, value); +} + +TEST(NoFixtureTest, ExternalUtilityThatCallsRecordIntValuedProperty) { + ExternalUtilityThatCallsRecordProperty("key_for_utility_int", 1); +} + +TEST(NoFixtureTest, ExternalUtilityThatCallsRecordStringValuedProperty) { + ExternalUtilityThatCallsRecordProperty("key_for_utility_string", "1"); +} + +// Verifies that the test parameter value is output in the 'value_param' +// XML attribute for value-parameterized tests. +class ValueParamTest : public TestWithParam {}; +TEST_P(ValueParamTest, HasValueParamAttribute) {} +TEST_P(ValueParamTest, AnotherTestThatHasValueParamAttribute) {} +INSTANTIATE_TEST_CASE_P(Single, ValueParamTest, Values(33, 42)); + +#if GTEST_HAS_TYPED_TEST +// Verifies that the type parameter name is output in the 'type_param' +// XML attribute for typed tests. +template class TypedTest : public Test {}; +typedef testing::Types TypedTestTypes; +TYPED_TEST_CASE(TypedTest, TypedTestTypes); +TYPED_TEST(TypedTest, HasTypeParamAttribute) {} +#endif + +#if GTEST_HAS_TYPED_TEST_P +// Verifies that the type parameter name is output in the 'type_param' +// XML attribute for type-parameterized tests. +template class TypeParameterizedTestCase : public Test {}; +TYPED_TEST_CASE_P(TypeParameterizedTestCase); +TYPED_TEST_P(TypeParameterizedTestCase, HasTypeParamAttribute) {} +REGISTER_TYPED_TEST_CASE_P(TypeParameterizedTestCase, HasTypeParamAttribute); +typedef testing::Types TypeParameterizedTestCaseTypes; +INSTANTIATE_TYPED_TEST_CASE_P(Single, + TypeParameterizedTestCase, + TypeParameterizedTestCaseTypes); +#endif + +int main(int argc, char** argv) { + InitGoogleTest(&argc, argv); + + if (argc > 1 && strcmp(argv[1], "--shut_down_xml") == 0) { + TestEventListeners& listeners = UnitTest::GetInstance()->listeners(); + delete listeners.Release(listeners.default_xml_generator()); + } + testing::Test::RecordProperty("ad_hoc_property", "42"); + return RUN_ALL_TESTS(); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// Tests for Google Test itself. This verifies that the basic constructs of +// Google Test work. + +#include "gtest/gtest.h" + +// Verifies that the command line flag variables can be accessed +// in code once has been #included. +// Do not move it after other #includes. +TEST(CommandLineFlagsTest, CanBeAccessedInCodeOnceGTestHIsIncluded) { + bool dummy = testing::GTEST_FLAG(also_run_disabled_tests) + || testing::GTEST_FLAG(break_on_failure) + || testing::GTEST_FLAG(catch_exceptions) + || testing::GTEST_FLAG(color) != "unknown" + || testing::GTEST_FLAG(filter) != "unknown" + || testing::GTEST_FLAG(list_tests) + || testing::GTEST_FLAG(output) != "unknown" + || testing::GTEST_FLAG(print_time) + || testing::GTEST_FLAG(random_seed) + || testing::GTEST_FLAG(repeat) > 0 + || testing::GTEST_FLAG(show_internal_stack_frames) + || testing::GTEST_FLAG(shuffle) + || testing::GTEST_FLAG(stack_trace_depth) > 0 + || testing::GTEST_FLAG(stream_result_to) != "unknown" + || testing::GTEST_FLAG(throw_on_failure); + EXPECT_TRUE(dummy || !dummy); // Suppresses warning that dummy is unused. +} + +#include // For INT_MAX. +#include +#include +#include + +#include +#include +#include + +#include "gtest/gtest-spi.h" + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +namespace testing { +namespace internal { + +#if GTEST_CAN_STREAM_RESULTS_ + +class StreamingListenerTest : public Test { + public: + class FakeSocketWriter : public StreamingListener::AbstractSocketWriter { + public: + // Sends a string to the socket. + virtual void Send(const string& message) { output_ += message; } + + string output_; + }; + + StreamingListenerTest() + : fake_sock_writer_(new FakeSocketWriter), + streamer_(fake_sock_writer_), + test_info_obj_("FooTest", "Bar", NULL, NULL, + CodeLocation(__FILE__, __LINE__), 0, NULL) {} + + protected: + string* output() { return &(fake_sock_writer_->output_); } + + FakeSocketWriter* const fake_sock_writer_; + StreamingListener streamer_; + UnitTest unit_test_; + TestInfo test_info_obj_; // The name test_info_ was taken by testing::Test. +}; + +TEST_F(StreamingListenerTest, OnTestProgramEnd) { + *output() = ""; + streamer_.OnTestProgramEnd(unit_test_); + EXPECT_EQ("event=TestProgramEnd&passed=1\n", *output()); +} + +TEST_F(StreamingListenerTest, OnTestIterationEnd) { + *output() = ""; + streamer_.OnTestIterationEnd(unit_test_, 42); + EXPECT_EQ("event=TestIterationEnd&passed=1&elapsed_time=0ms\n", *output()); +} + +TEST_F(StreamingListenerTest, OnTestCaseStart) { + *output() = ""; + streamer_.OnTestCaseStart(TestCase("FooTest", "Bar", NULL, NULL)); + EXPECT_EQ("event=TestCaseStart&name=FooTest\n", *output()); +} + +TEST_F(StreamingListenerTest, OnTestCaseEnd) { + *output() = ""; + streamer_.OnTestCaseEnd(TestCase("FooTest", "Bar", NULL, NULL)); + EXPECT_EQ("event=TestCaseEnd&passed=1&elapsed_time=0ms\n", *output()); +} + +TEST_F(StreamingListenerTest, OnTestStart) { + *output() = ""; + streamer_.OnTestStart(test_info_obj_); + EXPECT_EQ("event=TestStart&name=Bar\n", *output()); +} + +TEST_F(StreamingListenerTest, OnTestEnd) { + *output() = ""; + streamer_.OnTestEnd(test_info_obj_); + EXPECT_EQ("event=TestEnd&passed=1&elapsed_time=0ms\n", *output()); +} + +TEST_F(StreamingListenerTest, OnTestPartResult) { + *output() = ""; + streamer_.OnTestPartResult(TestPartResult( + TestPartResult::kFatalFailure, "foo.cc", 42, "failed=\n&%")); + + // Meta characters in the failure message should be properly escaped. + EXPECT_EQ( + "event=TestPartResult&file=foo.cc&line=42&message=failed%3D%0A%26%25\n", + *output()); +} + +#endif // GTEST_CAN_STREAM_RESULTS_ + +// Provides access to otherwise private parts of the TestEventListeners class +// that are needed to test it. +class TestEventListenersAccessor { + public: + static TestEventListener* GetRepeater(TestEventListeners* listeners) { + return listeners->repeater(); + } + + static void SetDefaultResultPrinter(TestEventListeners* listeners, + TestEventListener* listener) { + listeners->SetDefaultResultPrinter(listener); + } + static void SetDefaultXmlGenerator(TestEventListeners* listeners, + TestEventListener* listener) { + listeners->SetDefaultXmlGenerator(listener); + } + + static bool EventForwardingEnabled(const TestEventListeners& listeners) { + return listeners.EventForwardingEnabled(); + } + + static void SuppressEventForwarding(TestEventListeners* listeners) { + listeners->SuppressEventForwarding(); + } +}; + +class UnitTestRecordPropertyTestHelper : public Test { + protected: + UnitTestRecordPropertyTestHelper() {} + + // Forwards to UnitTest::RecordProperty() to bypass access controls. + void UnitTestRecordProperty(const char* key, const std::string& value) { + unit_test_.RecordProperty(key, value); + } + + UnitTest unit_test_; +}; + +} // namespace internal +} // namespace testing + +using testing::AssertionFailure; +using testing::AssertionResult; +using testing::AssertionSuccess; +using testing::DoubleLE; +using testing::EmptyTestEventListener; +using testing::Environment; +using testing::FloatLE; +using testing::GTEST_FLAG(also_run_disabled_tests); +using testing::GTEST_FLAG(break_on_failure); +using testing::GTEST_FLAG(catch_exceptions); +using testing::GTEST_FLAG(color); +using testing::GTEST_FLAG(death_test_use_fork); +using testing::GTEST_FLAG(filter); +using testing::GTEST_FLAG(list_tests); +using testing::GTEST_FLAG(output); +using testing::GTEST_FLAG(print_time); +using testing::GTEST_FLAG(random_seed); +using testing::GTEST_FLAG(repeat); +using testing::GTEST_FLAG(show_internal_stack_frames); +using testing::GTEST_FLAG(shuffle); +using testing::GTEST_FLAG(stack_trace_depth); +using testing::GTEST_FLAG(stream_result_to); +using testing::GTEST_FLAG(throw_on_failure); +using testing::IsNotSubstring; +using testing::IsSubstring; +using testing::Message; +using testing::ScopedFakeTestPartResultReporter; +using testing::StaticAssertTypeEq; +using testing::Test; +using testing::TestCase; +using testing::TestEventListeners; +using testing::TestInfo; +using testing::TestPartResult; +using testing::TestPartResultArray; +using testing::TestProperty; +using testing::TestResult; +using testing::TimeInMillis; +using testing::UnitTest; +using testing::internal::AddReference; +using testing::internal::AlwaysFalse; +using testing::internal::AlwaysTrue; +using testing::internal::AppendUserMessage; +using testing::internal::ArrayAwareFind; +using testing::internal::ArrayEq; +using testing::internal::CodePointToUtf8; +using testing::internal::CompileAssertTypesEqual; +using testing::internal::CopyArray; +using testing::internal::CountIf; +using testing::internal::EqFailure; +using testing::internal::FloatingPoint; +using testing::internal::ForEach; +using testing::internal::FormatEpochTimeInMillisAsIso8601; +using testing::internal::FormatTimeInMillisAsSeconds; +using testing::internal::GTestFlagSaver; +using testing::internal::GetCurrentOsStackTraceExceptTop; +using testing::internal::GetElementOr; +using testing::internal::GetNextRandomSeed; +using testing::internal::GetRandomSeedFromFlag; +using testing::internal::GetTestTypeId; +using testing::internal::GetTimeInMillis; +using testing::internal::GetTypeId; +using testing::internal::GetUnitTestImpl; +using testing::internal::ImplicitlyConvertible; +using testing::internal::Int32; +using testing::internal::Int32FromEnvOrDie; +using testing::internal::IsAProtocolMessage; +using testing::internal::IsContainer; +using testing::internal::IsContainerTest; +using testing::internal::IsNotContainer; +using testing::internal::NativeArray; +using testing::internal::ParseInt32Flag; +using testing::internal::RelationToSourceCopy; +using testing::internal::RelationToSourceReference; +using testing::internal::RemoveConst; +using testing::internal::RemoveReference; +using testing::internal::ShouldRunTestOnShard; +using testing::internal::ShouldShard; +using testing::internal::ShouldUseColor; +using testing::internal::Shuffle; +using testing::internal::ShuffleRange; +using testing::internal::SkipPrefix; +using testing::internal::StreamableToString; +using testing::internal::String; +using testing::internal::TestEventListenersAccessor; +using testing::internal::TestResultAccessor; +using testing::internal::UInt32; +using testing::internal::WideStringToUtf8; +using testing::internal::edit_distance::CalculateOptimalEdits; +using testing::internal::edit_distance::CreateUnifiedDiff; +using testing::internal::edit_distance::EditType; +using testing::internal::kMaxRandomSeed; +using testing::internal::kTestTypeIdInGoogleTest; +using testing::kMaxStackTraceDepth; + +#if GTEST_HAS_STREAM_REDIRECTION +using testing::internal::CaptureStdout; +using testing::internal::GetCapturedStdout; +#endif + +#if GTEST_IS_THREADSAFE +using testing::internal::ThreadWithParam; +#endif + +class TestingVector : public std::vector { +}; + +::std::ostream& operator<<(::std::ostream& os, + const TestingVector& vector) { + os << "{ "; + for (size_t i = 0; i < vector.size(); i++) { + os << vector[i] << " "; + } + os << "}"; + return os; +} + +// This line tests that we can define tests in an unnamed namespace. +namespace { + +TEST(GetRandomSeedFromFlagTest, HandlesZero) { + const int seed = GetRandomSeedFromFlag(0); + EXPECT_LE(1, seed); + EXPECT_LE(seed, static_cast(kMaxRandomSeed)); +} + +TEST(GetRandomSeedFromFlagTest, PreservesValidSeed) { + EXPECT_EQ(1, GetRandomSeedFromFlag(1)); + EXPECT_EQ(2, GetRandomSeedFromFlag(2)); + EXPECT_EQ(kMaxRandomSeed - 1, GetRandomSeedFromFlag(kMaxRandomSeed - 1)); + EXPECT_EQ(static_cast(kMaxRandomSeed), + GetRandomSeedFromFlag(kMaxRandomSeed)); +} + +TEST(GetRandomSeedFromFlagTest, NormalizesInvalidSeed) { + const int seed1 = GetRandomSeedFromFlag(-1); + EXPECT_LE(1, seed1); + EXPECT_LE(seed1, static_cast(kMaxRandomSeed)); + + const int seed2 = GetRandomSeedFromFlag(kMaxRandomSeed + 1); + EXPECT_LE(1, seed2); + EXPECT_LE(seed2, static_cast(kMaxRandomSeed)); +} + +TEST(GetNextRandomSeedTest, WorksForValidInput) { + EXPECT_EQ(2, GetNextRandomSeed(1)); + EXPECT_EQ(3, GetNextRandomSeed(2)); + EXPECT_EQ(static_cast(kMaxRandomSeed), + GetNextRandomSeed(kMaxRandomSeed - 1)); + EXPECT_EQ(1, GetNextRandomSeed(kMaxRandomSeed)); + + // We deliberately don't test GetNextRandomSeed() with invalid + // inputs, as that requires death tests, which are expensive. This + // is fine as GetNextRandomSeed() is internal and has a + // straightforward definition. +} + +static void ClearCurrentTestPartResults() { + TestResultAccessor::ClearTestPartResults( + GetUnitTestImpl()->current_test_result()); +} + +// Tests GetTypeId. + +TEST(GetTypeIdTest, ReturnsSameValueForSameType) { + EXPECT_EQ(GetTypeId(), GetTypeId()); + EXPECT_EQ(GetTypeId(), GetTypeId()); +} + +class SubClassOfTest : public Test {}; +class AnotherSubClassOfTest : public Test {}; + +TEST(GetTypeIdTest, ReturnsDifferentValuesForDifferentTypes) { + EXPECT_NE(GetTypeId(), GetTypeId()); + EXPECT_NE(GetTypeId(), GetTypeId()); + EXPECT_NE(GetTypeId(), GetTestTypeId()); + EXPECT_NE(GetTypeId(), GetTestTypeId()); + EXPECT_NE(GetTypeId(), GetTestTypeId()); + EXPECT_NE(GetTypeId(), GetTypeId()); +} + +// Verifies that GetTestTypeId() returns the same value, no matter it +// is called from inside Google Test or outside of it. +TEST(GetTestTypeIdTest, ReturnsTheSameValueInsideOrOutsideOfGoogleTest) { + EXPECT_EQ(kTestTypeIdInGoogleTest, GetTestTypeId()); +} + +// Tests FormatTimeInMillisAsSeconds(). + +TEST(FormatTimeInMillisAsSecondsTest, FormatsZero) { + EXPECT_EQ("0", FormatTimeInMillisAsSeconds(0)); +} + +TEST(FormatTimeInMillisAsSecondsTest, FormatsPositiveNumber) { + EXPECT_EQ("0.003", FormatTimeInMillisAsSeconds(3)); + EXPECT_EQ("0.01", FormatTimeInMillisAsSeconds(10)); + EXPECT_EQ("0.2", FormatTimeInMillisAsSeconds(200)); + EXPECT_EQ("1.2", FormatTimeInMillisAsSeconds(1200)); + EXPECT_EQ("3", FormatTimeInMillisAsSeconds(3000)); +} + +TEST(FormatTimeInMillisAsSecondsTest, FormatsNegativeNumber) { + EXPECT_EQ("-0.003", FormatTimeInMillisAsSeconds(-3)); + EXPECT_EQ("-0.01", FormatTimeInMillisAsSeconds(-10)); + EXPECT_EQ("-0.2", FormatTimeInMillisAsSeconds(-200)); + EXPECT_EQ("-1.2", FormatTimeInMillisAsSeconds(-1200)); + EXPECT_EQ("-3", FormatTimeInMillisAsSeconds(-3000)); +} + +// Tests FormatEpochTimeInMillisAsIso8601(). The correctness of conversion +// for particular dates below was verified in Python using +// datetime.datetime.fromutctimestamp(/1000). + +// FormatEpochTimeInMillisAsIso8601 depends on the current timezone, so we +// have to set up a particular timezone to obtain predictable results. +class FormatEpochTimeInMillisAsIso8601Test : public Test { + public: + // On Cygwin, GCC doesn't allow unqualified integer literals to exceed + // 32 bits, even when 64-bit integer types are available. We have to + // force the constants to have a 64-bit type here. + static const TimeInMillis kMillisPerSec = 1000; + + private: + virtual void SetUp() { + saved_tz_ = NULL; + + GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996 /* getenv, strdup: deprecated */) + if (getenv("TZ")) + saved_tz_ = strdup(getenv("TZ")); + GTEST_DISABLE_MSC_WARNINGS_POP_() + + // Set up the time zone for FormatEpochTimeInMillisAsIso8601 to use. We + // cannot use the local time zone because the function's output depends + // on the time zone. + SetTimeZone("UTC+00"); + } + + virtual void TearDown() { + SetTimeZone(saved_tz_); + free(const_cast(saved_tz_)); + saved_tz_ = NULL; + } + + static void SetTimeZone(const char* time_zone) { + // tzset() distinguishes between the TZ variable being present and empty + // and not being present, so we have to consider the case of time_zone + // being NULL. +#if _MSC_VER + // ...Unless it's MSVC, whose standard library's _putenv doesn't + // distinguish between an empty and a missing variable. + const std::string env_var = + std::string("TZ=") + (time_zone ? time_zone : ""); + _putenv(env_var.c_str()); + GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996 /* deprecated function */) + tzset(); + GTEST_DISABLE_MSC_WARNINGS_POP_() +#else + if (time_zone) { + setenv(("TZ"), time_zone, 1); + } else { + unsetenv("TZ"); + } + tzset(); +#endif + } + + const char* saved_tz_; +}; + +const TimeInMillis FormatEpochTimeInMillisAsIso8601Test::kMillisPerSec; + +TEST_F(FormatEpochTimeInMillisAsIso8601Test, PrintsTwoDigitSegments) { + EXPECT_EQ("2011-10-31T18:52:42", + FormatEpochTimeInMillisAsIso8601(1320087162 * kMillisPerSec)); +} + +TEST_F(FormatEpochTimeInMillisAsIso8601Test, MillisecondsDoNotAffectResult) { + EXPECT_EQ( + "2011-10-31T18:52:42", + FormatEpochTimeInMillisAsIso8601(1320087162 * kMillisPerSec + 234)); +} + +TEST_F(FormatEpochTimeInMillisAsIso8601Test, PrintsLeadingZeroes) { + EXPECT_EQ("2011-09-03T05:07:02", + FormatEpochTimeInMillisAsIso8601(1315026422 * kMillisPerSec)); +} + +TEST_F(FormatEpochTimeInMillisAsIso8601Test, Prints24HourTime) { + EXPECT_EQ("2011-09-28T17:08:22", + FormatEpochTimeInMillisAsIso8601(1317229702 * kMillisPerSec)); +} + +TEST_F(FormatEpochTimeInMillisAsIso8601Test, PrintsEpochStart) { + EXPECT_EQ("1970-01-01T00:00:00", FormatEpochTimeInMillisAsIso8601(0)); +} + +#if GTEST_CAN_COMPARE_NULL + +# ifdef __BORLANDC__ +// Silences warnings: "Condition is always true", "Unreachable code" +# pragma option push -w-ccc -w-rch +# endif + +// Tests that GTEST_IS_NULL_LITERAL_(x) is true when x is a null +// pointer literal. +TEST(NullLiteralTest, IsTrueForNullLiterals) { + EXPECT_TRUE(GTEST_IS_NULL_LITERAL_(NULL)); + EXPECT_TRUE(GTEST_IS_NULL_LITERAL_(0)); + EXPECT_TRUE(GTEST_IS_NULL_LITERAL_(0U)); + EXPECT_TRUE(GTEST_IS_NULL_LITERAL_(0L)); +} + +// Tests that GTEST_IS_NULL_LITERAL_(x) is false when x is not a null +// pointer literal. +TEST(NullLiteralTest, IsFalseForNonNullLiterals) { + EXPECT_FALSE(GTEST_IS_NULL_LITERAL_(1)); + EXPECT_FALSE(GTEST_IS_NULL_LITERAL_(0.0)); + EXPECT_FALSE(GTEST_IS_NULL_LITERAL_('a')); + EXPECT_FALSE(GTEST_IS_NULL_LITERAL_(static_cast(NULL))); +} + +# ifdef __BORLANDC__ +// Restores warnings after previous "#pragma option push" suppressed them. +# pragma option pop +# endif + +#endif // GTEST_CAN_COMPARE_NULL +// +// Tests CodePointToUtf8(). + +// Tests that the NUL character L'\0' is encoded correctly. +TEST(CodePointToUtf8Test, CanEncodeNul) { + EXPECT_EQ("", CodePointToUtf8(L'\0')); +} + +// Tests that ASCII characters are encoded correctly. +TEST(CodePointToUtf8Test, CanEncodeAscii) { + EXPECT_EQ("a", CodePointToUtf8(L'a')); + EXPECT_EQ("Z", CodePointToUtf8(L'Z')); + EXPECT_EQ("&", CodePointToUtf8(L'&')); + EXPECT_EQ("\x7F", CodePointToUtf8(L'\x7F')); +} + +// Tests that Unicode code-points that have 8 to 11 bits are encoded +// as 110xxxxx 10xxxxxx. +TEST(CodePointToUtf8Test, CanEncode8To11Bits) { + // 000 1101 0011 => 110-00011 10-010011 + EXPECT_EQ("\xC3\x93", CodePointToUtf8(L'\xD3')); + + // 101 0111 0110 => 110-10101 10-110110 + // Some compilers (e.g., GCC on MinGW) cannot handle non-ASCII codepoints + // in wide strings and wide chars. In order to accomodate them, we have to + // introduce such character constants as integers. + EXPECT_EQ("\xD5\xB6", + CodePointToUtf8(static_cast(0x576))); +} + +// Tests that Unicode code-points that have 12 to 16 bits are encoded +// as 1110xxxx 10xxxxxx 10xxxxxx. +TEST(CodePointToUtf8Test, CanEncode12To16Bits) { + // 0000 1000 1101 0011 => 1110-0000 10-100011 10-010011 + EXPECT_EQ("\xE0\xA3\x93", + CodePointToUtf8(static_cast(0x8D3))); + + // 1100 0111 0100 1101 => 1110-1100 10-011101 10-001101 + EXPECT_EQ("\xEC\x9D\x8D", + CodePointToUtf8(static_cast(0xC74D))); +} + +#if !GTEST_WIDE_STRING_USES_UTF16_ +// Tests in this group require a wchar_t to hold > 16 bits, and thus +// are skipped on Windows, Cygwin, and Symbian, where a wchar_t is +// 16-bit wide. This code may not compile on those systems. + +// Tests that Unicode code-points that have 17 to 21 bits are encoded +// as 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. +TEST(CodePointToUtf8Test, CanEncode17To21Bits) { + // 0 0001 0000 1000 1101 0011 => 11110-000 10-010000 10-100011 10-010011 + EXPECT_EQ("\xF0\x90\xA3\x93", CodePointToUtf8(L'\x108D3')); + + // 0 0001 0000 0100 0000 0000 => 11110-000 10-010000 10-010000 10-000000 + EXPECT_EQ("\xF0\x90\x90\x80", CodePointToUtf8(L'\x10400')); + + // 1 0000 1000 0110 0011 0100 => 11110-100 10-001000 10-011000 10-110100 + EXPECT_EQ("\xF4\x88\x98\xB4", CodePointToUtf8(L'\x108634')); +} + +// Tests that encoding an invalid code-point generates the expected result. +TEST(CodePointToUtf8Test, CanEncodeInvalidCodePoint) { + EXPECT_EQ("(Invalid Unicode 0x1234ABCD)", CodePointToUtf8(L'\x1234ABCD')); +} + +#endif // !GTEST_WIDE_STRING_USES_UTF16_ + +// Tests WideStringToUtf8(). + +// Tests that the NUL character L'\0' is encoded correctly. +TEST(WideStringToUtf8Test, CanEncodeNul) { + EXPECT_STREQ("", WideStringToUtf8(L"", 0).c_str()); + EXPECT_STREQ("", WideStringToUtf8(L"", -1).c_str()); +} + +// Tests that ASCII strings are encoded correctly. +TEST(WideStringToUtf8Test, CanEncodeAscii) { + EXPECT_STREQ("a", WideStringToUtf8(L"a", 1).c_str()); + EXPECT_STREQ("ab", WideStringToUtf8(L"ab", 2).c_str()); + EXPECT_STREQ("a", WideStringToUtf8(L"a", -1).c_str()); + EXPECT_STREQ("ab", WideStringToUtf8(L"ab", -1).c_str()); +} + +// Tests that Unicode code-points that have 8 to 11 bits are encoded +// as 110xxxxx 10xxxxxx. +TEST(WideStringToUtf8Test, CanEncode8To11Bits) { + // 000 1101 0011 => 110-00011 10-010011 + EXPECT_STREQ("\xC3\x93", WideStringToUtf8(L"\xD3", 1).c_str()); + EXPECT_STREQ("\xC3\x93", WideStringToUtf8(L"\xD3", -1).c_str()); + + // 101 0111 0110 => 110-10101 10-110110 + const wchar_t s[] = { 0x576, '\0' }; + EXPECT_STREQ("\xD5\xB6", WideStringToUtf8(s, 1).c_str()); + EXPECT_STREQ("\xD5\xB6", WideStringToUtf8(s, -1).c_str()); +} + +// Tests that Unicode code-points that have 12 to 16 bits are encoded +// as 1110xxxx 10xxxxxx 10xxxxxx. +TEST(WideStringToUtf8Test, CanEncode12To16Bits) { + // 0000 1000 1101 0011 => 1110-0000 10-100011 10-010011 + const wchar_t s1[] = { 0x8D3, '\0' }; + EXPECT_STREQ("\xE0\xA3\x93", WideStringToUtf8(s1, 1).c_str()); + EXPECT_STREQ("\xE0\xA3\x93", WideStringToUtf8(s1, -1).c_str()); + + // 1100 0111 0100 1101 => 1110-1100 10-011101 10-001101 + const wchar_t s2[] = { 0xC74D, '\0' }; + EXPECT_STREQ("\xEC\x9D\x8D", WideStringToUtf8(s2, 1).c_str()); + EXPECT_STREQ("\xEC\x9D\x8D", WideStringToUtf8(s2, -1).c_str()); +} + +// Tests that the conversion stops when the function encounters \0 character. +TEST(WideStringToUtf8Test, StopsOnNulCharacter) { + EXPECT_STREQ("ABC", WideStringToUtf8(L"ABC\0XYZ", 100).c_str()); +} + +// Tests that the conversion stops when the function reaches the limit +// specified by the 'length' parameter. +TEST(WideStringToUtf8Test, StopsWhenLengthLimitReached) { + EXPECT_STREQ("ABC", WideStringToUtf8(L"ABCDEF", 3).c_str()); +} + +#if !GTEST_WIDE_STRING_USES_UTF16_ +// Tests that Unicode code-points that have 17 to 21 bits are encoded +// as 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. This code may not compile +// on the systems using UTF-16 encoding. +TEST(WideStringToUtf8Test, CanEncode17To21Bits) { + // 0 0001 0000 1000 1101 0011 => 11110-000 10-010000 10-100011 10-010011 + EXPECT_STREQ("\xF0\x90\xA3\x93", WideStringToUtf8(L"\x108D3", 1).c_str()); + EXPECT_STREQ("\xF0\x90\xA3\x93", WideStringToUtf8(L"\x108D3", -1).c_str()); + + // 1 0000 1000 0110 0011 0100 => 11110-100 10-001000 10-011000 10-110100 + EXPECT_STREQ("\xF4\x88\x98\xB4", WideStringToUtf8(L"\x108634", 1).c_str()); + EXPECT_STREQ("\xF4\x88\x98\xB4", WideStringToUtf8(L"\x108634", -1).c_str()); +} + +// Tests that encoding an invalid code-point generates the expected result. +TEST(WideStringToUtf8Test, CanEncodeInvalidCodePoint) { + EXPECT_STREQ("(Invalid Unicode 0xABCDFF)", + WideStringToUtf8(L"\xABCDFF", -1).c_str()); +} +#else // !GTEST_WIDE_STRING_USES_UTF16_ +// Tests that surrogate pairs are encoded correctly on the systems using +// UTF-16 encoding in the wide strings. +TEST(WideStringToUtf8Test, CanEncodeValidUtf16SUrrogatePairs) { + const wchar_t s[] = { 0xD801, 0xDC00, '\0' }; + EXPECT_STREQ("\xF0\x90\x90\x80", WideStringToUtf8(s, -1).c_str()); +} + +// Tests that encoding an invalid UTF-16 surrogate pair +// generates the expected result. +TEST(WideStringToUtf8Test, CanEncodeInvalidUtf16SurrogatePair) { + // Leading surrogate is at the end of the string. + const wchar_t s1[] = { 0xD800, '\0' }; + EXPECT_STREQ("\xED\xA0\x80", WideStringToUtf8(s1, -1).c_str()); + // Leading surrogate is not followed by the trailing surrogate. + const wchar_t s2[] = { 0xD800, 'M', '\0' }; + EXPECT_STREQ("\xED\xA0\x80M", WideStringToUtf8(s2, -1).c_str()); + // Trailing surrogate appearas without a leading surrogate. + const wchar_t s3[] = { 0xDC00, 'P', 'Q', 'R', '\0' }; + EXPECT_STREQ("\xED\xB0\x80PQR", WideStringToUtf8(s3, -1).c_str()); +} +#endif // !GTEST_WIDE_STRING_USES_UTF16_ + +// Tests that codepoint concatenation works correctly. +#if !GTEST_WIDE_STRING_USES_UTF16_ +TEST(WideStringToUtf8Test, ConcatenatesCodepointsCorrectly) { + const wchar_t s[] = { 0x108634, 0xC74D, '\n', 0x576, 0x8D3, 0x108634, '\0'}; + EXPECT_STREQ( + "\xF4\x88\x98\xB4" + "\xEC\x9D\x8D" + "\n" + "\xD5\xB6" + "\xE0\xA3\x93" + "\xF4\x88\x98\xB4", + WideStringToUtf8(s, -1).c_str()); +} +#else +TEST(WideStringToUtf8Test, ConcatenatesCodepointsCorrectly) { + const wchar_t s[] = { 0xC74D, '\n', 0x576, 0x8D3, '\0'}; + EXPECT_STREQ( + "\xEC\x9D\x8D" "\n" "\xD5\xB6" "\xE0\xA3\x93", + WideStringToUtf8(s, -1).c_str()); +} +#endif // !GTEST_WIDE_STRING_USES_UTF16_ + +// Tests the Random class. + +TEST(RandomDeathTest, GeneratesCrashesOnInvalidRange) { + testing::internal::Random random(42); + EXPECT_DEATH_IF_SUPPORTED( + random.Generate(0), + "Cannot generate a number in the range \\[0, 0\\)"); + EXPECT_DEATH_IF_SUPPORTED( + random.Generate(testing::internal::Random::kMaxRange + 1), + "Generation of a number in \\[0, 2147483649\\) was requested, " + "but this can only generate numbers in \\[0, 2147483648\\)"); +} + +TEST(RandomTest, GeneratesNumbersWithinRange) { + const UInt32 kRange = 10000; + testing::internal::Random random(12345); + for (int i = 0; i < 10; i++) { + EXPECT_LT(random.Generate(kRange), kRange) << " for iteration " << i; + } + + testing::internal::Random random2(testing::internal::Random::kMaxRange); + for (int i = 0; i < 10; i++) { + EXPECT_LT(random2.Generate(kRange), kRange) << " for iteration " << i; + } +} + +TEST(RandomTest, RepeatsWhenReseeded) { + const int kSeed = 123; + const int kArraySize = 10; + const UInt32 kRange = 10000; + UInt32 values[kArraySize]; + + testing::internal::Random random(kSeed); + for (int i = 0; i < kArraySize; i++) { + values[i] = random.Generate(kRange); + } + + random.Reseed(kSeed); + for (int i = 0; i < kArraySize; i++) { + EXPECT_EQ(values[i], random.Generate(kRange)) << " for iteration " << i; + } +} + +// Tests STL container utilities. + +// Tests CountIf(). + +static bool IsPositive(int n) { return n > 0; } + +TEST(ContainerUtilityTest, CountIf) { + std::vector v; + EXPECT_EQ(0, CountIf(v, IsPositive)); // Works for an empty container. + + v.push_back(-1); + v.push_back(0); + EXPECT_EQ(0, CountIf(v, IsPositive)); // Works when no value satisfies. + + v.push_back(2); + v.push_back(-10); + v.push_back(10); + EXPECT_EQ(2, CountIf(v, IsPositive)); +} + +// Tests ForEach(). + +static int g_sum = 0; +static void Accumulate(int n) { g_sum += n; } + +TEST(ContainerUtilityTest, ForEach) { + std::vector v; + g_sum = 0; + ForEach(v, Accumulate); + EXPECT_EQ(0, g_sum); // Works for an empty container; + + g_sum = 0; + v.push_back(1); + ForEach(v, Accumulate); + EXPECT_EQ(1, g_sum); // Works for a container with one element. + + g_sum = 0; + v.push_back(20); + v.push_back(300); + ForEach(v, Accumulate); + EXPECT_EQ(321, g_sum); +} + +// Tests GetElementOr(). +TEST(ContainerUtilityTest, GetElementOr) { + std::vector a; + EXPECT_EQ('x', GetElementOr(a, 0, 'x')); + + a.push_back('a'); + a.push_back('b'); + EXPECT_EQ('a', GetElementOr(a, 0, 'x')); + EXPECT_EQ('b', GetElementOr(a, 1, 'x')); + EXPECT_EQ('x', GetElementOr(a, -2, 'x')); + EXPECT_EQ('x', GetElementOr(a, 2, 'x')); +} + +TEST(ContainerUtilityDeathTest, ShuffleRange) { + std::vector a; + a.push_back(0); + a.push_back(1); + a.push_back(2); + testing::internal::Random random(1); + + EXPECT_DEATH_IF_SUPPORTED( + ShuffleRange(&random, -1, 1, &a), + "Invalid shuffle range start -1: must be in range \\[0, 3\\]"); + EXPECT_DEATH_IF_SUPPORTED( + ShuffleRange(&random, 4, 4, &a), + "Invalid shuffle range start 4: must be in range \\[0, 3\\]"); + EXPECT_DEATH_IF_SUPPORTED( + ShuffleRange(&random, 3, 2, &a), + "Invalid shuffle range finish 2: must be in range \\[3, 3\\]"); + EXPECT_DEATH_IF_SUPPORTED( + ShuffleRange(&random, 3, 4, &a), + "Invalid shuffle range finish 4: must be in range \\[3, 3\\]"); +} + +class VectorShuffleTest : public Test { + protected: + static const int kVectorSize = 20; + + VectorShuffleTest() : random_(1) { + for (int i = 0; i < kVectorSize; i++) { + vector_.push_back(i); + } + } + + static bool VectorIsCorrupt(const TestingVector& vector) { + if (kVectorSize != static_cast(vector.size())) { + return true; + } + + bool found_in_vector[kVectorSize] = { false }; + for (size_t i = 0; i < vector.size(); i++) { + const int e = vector[i]; + if (e < 0 || e >= kVectorSize || found_in_vector[e]) { + return true; + } + found_in_vector[e] = true; + } + + // Vector size is correct, elements' range is correct, no + // duplicate elements. Therefore no corruption has occurred. + return false; + } + + static bool VectorIsNotCorrupt(const TestingVector& vector) { + return !VectorIsCorrupt(vector); + } + + static bool RangeIsShuffled(const TestingVector& vector, int begin, int end) { + for (int i = begin; i < end; i++) { + if (i != vector[i]) { + return true; + } + } + return false; + } + + static bool RangeIsUnshuffled( + const TestingVector& vector, int begin, int end) { + return !RangeIsShuffled(vector, begin, end); + } + + static bool VectorIsShuffled(const TestingVector& vector) { + return RangeIsShuffled(vector, 0, static_cast(vector.size())); + } + + static bool VectorIsUnshuffled(const TestingVector& vector) { + return !VectorIsShuffled(vector); + } + + testing::internal::Random random_; + TestingVector vector_; +}; // class VectorShuffleTest + +const int VectorShuffleTest::kVectorSize; + +TEST_F(VectorShuffleTest, HandlesEmptyRange) { + // Tests an empty range at the beginning... + ShuffleRange(&random_, 0, 0, &vector_); + ASSERT_PRED1(VectorIsNotCorrupt, vector_); + ASSERT_PRED1(VectorIsUnshuffled, vector_); + + // ...in the middle... + ShuffleRange(&random_, kVectorSize/2, kVectorSize/2, &vector_); + ASSERT_PRED1(VectorIsNotCorrupt, vector_); + ASSERT_PRED1(VectorIsUnshuffled, vector_); + + // ...at the end... + ShuffleRange(&random_, kVectorSize - 1, kVectorSize - 1, &vector_); + ASSERT_PRED1(VectorIsNotCorrupt, vector_); + ASSERT_PRED1(VectorIsUnshuffled, vector_); + + // ...and past the end. + ShuffleRange(&random_, kVectorSize, kVectorSize, &vector_); + ASSERT_PRED1(VectorIsNotCorrupt, vector_); + ASSERT_PRED1(VectorIsUnshuffled, vector_); +} + +TEST_F(VectorShuffleTest, HandlesRangeOfSizeOne) { + // Tests a size one range at the beginning... + ShuffleRange(&random_, 0, 1, &vector_); + ASSERT_PRED1(VectorIsNotCorrupt, vector_); + ASSERT_PRED1(VectorIsUnshuffled, vector_); + + // ...in the middle... + ShuffleRange(&random_, kVectorSize/2, kVectorSize/2 + 1, &vector_); + ASSERT_PRED1(VectorIsNotCorrupt, vector_); + ASSERT_PRED1(VectorIsUnshuffled, vector_); + + // ...and at the end. + ShuffleRange(&random_, kVectorSize - 1, kVectorSize, &vector_); + ASSERT_PRED1(VectorIsNotCorrupt, vector_); + ASSERT_PRED1(VectorIsUnshuffled, vector_); +} + +// Because we use our own random number generator and a fixed seed, +// we can guarantee that the following "random" tests will succeed. + +TEST_F(VectorShuffleTest, ShufflesEntireVector) { + Shuffle(&random_, &vector_); + ASSERT_PRED1(VectorIsNotCorrupt, vector_); + EXPECT_FALSE(VectorIsUnshuffled(vector_)) << vector_; + + // Tests the first and last elements in particular to ensure that + // there are no off-by-one problems in our shuffle algorithm. + EXPECT_NE(0, vector_[0]); + EXPECT_NE(kVectorSize - 1, vector_[kVectorSize - 1]); +} + +TEST_F(VectorShuffleTest, ShufflesStartOfVector) { + const int kRangeSize = kVectorSize/2; + + ShuffleRange(&random_, 0, kRangeSize, &vector_); + + ASSERT_PRED1(VectorIsNotCorrupt, vector_); + EXPECT_PRED3(RangeIsShuffled, vector_, 0, kRangeSize); + EXPECT_PRED3(RangeIsUnshuffled, vector_, kRangeSize, kVectorSize); +} + +TEST_F(VectorShuffleTest, ShufflesEndOfVector) { + const int kRangeSize = kVectorSize / 2; + ShuffleRange(&random_, kRangeSize, kVectorSize, &vector_); + + ASSERT_PRED1(VectorIsNotCorrupt, vector_); + EXPECT_PRED3(RangeIsUnshuffled, vector_, 0, kRangeSize); + EXPECT_PRED3(RangeIsShuffled, vector_, kRangeSize, kVectorSize); +} + +TEST_F(VectorShuffleTest, ShufflesMiddleOfVector) { + int kRangeSize = kVectorSize/3; + ShuffleRange(&random_, kRangeSize, 2*kRangeSize, &vector_); + + ASSERT_PRED1(VectorIsNotCorrupt, vector_); + EXPECT_PRED3(RangeIsUnshuffled, vector_, 0, kRangeSize); + EXPECT_PRED3(RangeIsShuffled, vector_, kRangeSize, 2*kRangeSize); + EXPECT_PRED3(RangeIsUnshuffled, vector_, 2*kRangeSize, kVectorSize); +} + +TEST_F(VectorShuffleTest, ShufflesRepeatably) { + TestingVector vector2; + for (int i = 0; i < kVectorSize; i++) { + vector2.push_back(i); + } + + random_.Reseed(1234); + Shuffle(&random_, &vector_); + random_.Reseed(1234); + Shuffle(&random_, &vector2); + + ASSERT_PRED1(VectorIsNotCorrupt, vector_); + ASSERT_PRED1(VectorIsNotCorrupt, vector2); + + for (int i = 0; i < kVectorSize; i++) { + EXPECT_EQ(vector_[i], vector2[i]) << " where i is " << i; + } +} + +// Tests the size of the AssertHelper class. + +TEST(AssertHelperTest, AssertHelperIsSmall) { + // To avoid breaking clients that use lots of assertions in one + // function, we cannot grow the size of AssertHelper. + EXPECT_LE(sizeof(testing::internal::AssertHelper), sizeof(void*)); +} + +// Tests String::EndsWithCaseInsensitive(). +TEST(StringTest, EndsWithCaseInsensitive) { + EXPECT_TRUE(String::EndsWithCaseInsensitive("foobar", "BAR")); + EXPECT_TRUE(String::EndsWithCaseInsensitive("foobaR", "bar")); + EXPECT_TRUE(String::EndsWithCaseInsensitive("foobar", "")); + EXPECT_TRUE(String::EndsWithCaseInsensitive("", "")); + + EXPECT_FALSE(String::EndsWithCaseInsensitive("Foobar", "foo")); + EXPECT_FALSE(String::EndsWithCaseInsensitive("foobar", "Foo")); + EXPECT_FALSE(String::EndsWithCaseInsensitive("", "foo")); +} + +// C++Builder's preprocessor is buggy; it fails to expand macros that +// appear in macro parameters after wide char literals. Provide an alias +// for NULL as a workaround. +static const wchar_t* const kNull = NULL; + +// Tests String::CaseInsensitiveWideCStringEquals +TEST(StringTest, CaseInsensitiveWideCStringEquals) { + EXPECT_TRUE(String::CaseInsensitiveWideCStringEquals(NULL, NULL)); + EXPECT_FALSE(String::CaseInsensitiveWideCStringEquals(kNull, L"")); + EXPECT_FALSE(String::CaseInsensitiveWideCStringEquals(L"", kNull)); + EXPECT_FALSE(String::CaseInsensitiveWideCStringEquals(kNull, L"foobar")); + EXPECT_FALSE(String::CaseInsensitiveWideCStringEquals(L"foobar", kNull)); + EXPECT_TRUE(String::CaseInsensitiveWideCStringEquals(L"foobar", L"foobar")); + EXPECT_TRUE(String::CaseInsensitiveWideCStringEquals(L"foobar", L"FOOBAR")); + EXPECT_TRUE(String::CaseInsensitiveWideCStringEquals(L"FOOBAR", L"foobar")); +} + +#if GTEST_OS_WINDOWS + +// Tests String::ShowWideCString(). +TEST(StringTest, ShowWideCString) { + EXPECT_STREQ("(null)", + String::ShowWideCString(NULL).c_str()); + EXPECT_STREQ("", String::ShowWideCString(L"").c_str()); + EXPECT_STREQ("foo", String::ShowWideCString(L"foo").c_str()); +} + +# if GTEST_OS_WINDOWS_MOBILE +TEST(StringTest, AnsiAndUtf16Null) { + EXPECT_EQ(NULL, String::AnsiToUtf16(NULL)); + EXPECT_EQ(NULL, String::Utf16ToAnsi(NULL)); +} + +TEST(StringTest, AnsiAndUtf16ConvertBasic) { + const char* ansi = String::Utf16ToAnsi(L"str"); + EXPECT_STREQ("str", ansi); + delete [] ansi; + const WCHAR* utf16 = String::AnsiToUtf16("str"); + EXPECT_EQ(0, wcsncmp(L"str", utf16, 3)); + delete [] utf16; +} + +TEST(StringTest, AnsiAndUtf16ConvertPathChars) { + const char* ansi = String::Utf16ToAnsi(L".:\\ \"*?"); + EXPECT_STREQ(".:\\ \"*?", ansi); + delete [] ansi; + const WCHAR* utf16 = String::AnsiToUtf16(".:\\ \"*?"); + EXPECT_EQ(0, wcsncmp(L".:\\ \"*?", utf16, 3)); + delete [] utf16; +} +# endif // GTEST_OS_WINDOWS_MOBILE + +#endif // GTEST_OS_WINDOWS + +// Tests TestProperty construction. +TEST(TestPropertyTest, StringValue) { + TestProperty property("key", "1"); + EXPECT_STREQ("key", property.key()); + EXPECT_STREQ("1", property.value()); +} + +// Tests TestProperty replacing a value. +TEST(TestPropertyTest, ReplaceStringValue) { + TestProperty property("key", "1"); + EXPECT_STREQ("1", property.value()); + property.SetValue("2"); + EXPECT_STREQ("2", property.value()); +} + +// AddFatalFailure() and AddNonfatalFailure() must be stand-alone +// functions (i.e. their definitions cannot be inlined at the call +// sites), or C++Builder won't compile the code. +static void AddFatalFailure() { + FAIL() << "Expected fatal failure."; +} + +static void AddNonfatalFailure() { + ADD_FAILURE() << "Expected non-fatal failure."; +} + +class ScopedFakeTestPartResultReporterTest : public Test { + public: // Must be public and not protected due to a bug in g++ 3.4.2. + enum FailureMode { + FATAL_FAILURE, + NONFATAL_FAILURE + }; + static void AddFailure(FailureMode failure) { + if (failure == FATAL_FAILURE) { + AddFatalFailure(); + } else { + AddNonfatalFailure(); + } + } +}; + +// Tests that ScopedFakeTestPartResultReporter intercepts test +// failures. +TEST_F(ScopedFakeTestPartResultReporterTest, InterceptsTestFailures) { + TestPartResultArray results; + { + ScopedFakeTestPartResultReporter reporter( + ScopedFakeTestPartResultReporter::INTERCEPT_ONLY_CURRENT_THREAD, + &results); + AddFailure(NONFATAL_FAILURE); + AddFailure(FATAL_FAILURE); + } + + EXPECT_EQ(2, results.size()); + EXPECT_TRUE(results.GetTestPartResult(0).nonfatally_failed()); + EXPECT_TRUE(results.GetTestPartResult(1).fatally_failed()); +} + +TEST_F(ScopedFakeTestPartResultReporterTest, DeprecatedConstructor) { + TestPartResultArray results; + { + // Tests, that the deprecated constructor still works. + ScopedFakeTestPartResultReporter reporter(&results); + AddFailure(NONFATAL_FAILURE); + } + EXPECT_EQ(1, results.size()); +} + +#if GTEST_IS_THREADSAFE + +class ScopedFakeTestPartResultReporterWithThreadsTest + : public ScopedFakeTestPartResultReporterTest { + protected: + static void AddFailureInOtherThread(FailureMode failure) { + ThreadWithParam thread(&AddFailure, failure, NULL); + thread.Join(); + } +}; + +TEST_F(ScopedFakeTestPartResultReporterWithThreadsTest, + InterceptsTestFailuresInAllThreads) { + TestPartResultArray results; + { + ScopedFakeTestPartResultReporter reporter( + ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, &results); + AddFailure(NONFATAL_FAILURE); + AddFailure(FATAL_FAILURE); + AddFailureInOtherThread(NONFATAL_FAILURE); + AddFailureInOtherThread(FATAL_FAILURE); + } + + EXPECT_EQ(4, results.size()); + EXPECT_TRUE(results.GetTestPartResult(0).nonfatally_failed()); + EXPECT_TRUE(results.GetTestPartResult(1).fatally_failed()); + EXPECT_TRUE(results.GetTestPartResult(2).nonfatally_failed()); + EXPECT_TRUE(results.GetTestPartResult(3).fatally_failed()); +} + +#endif // GTEST_IS_THREADSAFE + +// Tests EXPECT_FATAL_FAILURE{,ON_ALL_THREADS}. Makes sure that they +// work even if the failure is generated in a called function rather than +// the current context. + +typedef ScopedFakeTestPartResultReporterTest ExpectFatalFailureTest; + +TEST_F(ExpectFatalFailureTest, CatchesFatalFaliure) { + EXPECT_FATAL_FAILURE(AddFatalFailure(), "Expected fatal failure."); +} + +#if GTEST_HAS_GLOBAL_STRING +TEST_F(ExpectFatalFailureTest, AcceptsStringObject) { + EXPECT_FATAL_FAILURE(AddFatalFailure(), ::string("Expected fatal failure.")); +} +#endif + +TEST_F(ExpectFatalFailureTest, AcceptsStdStringObject) { + EXPECT_FATAL_FAILURE(AddFatalFailure(), + ::std::string("Expected fatal failure.")); +} + +TEST_F(ExpectFatalFailureTest, CatchesFatalFailureOnAllThreads) { + // We have another test below to verify that the macro catches fatal + // failures generated on another thread. + EXPECT_FATAL_FAILURE_ON_ALL_THREADS(AddFatalFailure(), + "Expected fatal failure."); +} + +#ifdef __BORLANDC__ +// Silences warnings: "Condition is always true" +# pragma option push -w-ccc +#endif + +// Tests that EXPECT_FATAL_FAILURE() can be used in a non-void +// function even when the statement in it contains ASSERT_*. + +int NonVoidFunction() { + EXPECT_FATAL_FAILURE(ASSERT_TRUE(false), ""); + EXPECT_FATAL_FAILURE_ON_ALL_THREADS(FAIL(), ""); + return 0; +} + +TEST_F(ExpectFatalFailureTest, CanBeUsedInNonVoidFunction) { + NonVoidFunction(); +} + +// Tests that EXPECT_FATAL_FAILURE(statement, ...) doesn't abort the +// current function even though 'statement' generates a fatal failure. + +void DoesNotAbortHelper(bool* aborted) { + EXPECT_FATAL_FAILURE(ASSERT_TRUE(false), ""); + EXPECT_FATAL_FAILURE_ON_ALL_THREADS(FAIL(), ""); + + *aborted = false; +} + +#ifdef __BORLANDC__ +// Restores warnings after previous "#pragma option push" suppressed them. +# pragma option pop +#endif + +TEST_F(ExpectFatalFailureTest, DoesNotAbort) { + bool aborted = true; + DoesNotAbortHelper(&aborted); + EXPECT_FALSE(aborted); +} + +// Tests that the EXPECT_FATAL_FAILURE{,_ON_ALL_THREADS} accepts a +// statement that contains a macro which expands to code containing an +// unprotected comma. + +static int global_var = 0; +#define GTEST_USE_UNPROTECTED_COMMA_ global_var++, global_var++ + +TEST_F(ExpectFatalFailureTest, AcceptsMacroThatExpandsToUnprotectedComma) { +#ifndef __BORLANDC__ + // ICE's in C++Builder. + EXPECT_FATAL_FAILURE({ + GTEST_USE_UNPROTECTED_COMMA_; + AddFatalFailure(); + }, ""); +#endif + + EXPECT_FATAL_FAILURE_ON_ALL_THREADS({ + GTEST_USE_UNPROTECTED_COMMA_; + AddFatalFailure(); + }, ""); +} + +// Tests EXPECT_NONFATAL_FAILURE{,ON_ALL_THREADS}. + +typedef ScopedFakeTestPartResultReporterTest ExpectNonfatalFailureTest; + +TEST_F(ExpectNonfatalFailureTest, CatchesNonfatalFailure) { + EXPECT_NONFATAL_FAILURE(AddNonfatalFailure(), + "Expected non-fatal failure."); +} + +#if GTEST_HAS_GLOBAL_STRING +TEST_F(ExpectNonfatalFailureTest, AcceptsStringObject) { + EXPECT_NONFATAL_FAILURE(AddNonfatalFailure(), + ::string("Expected non-fatal failure.")); +} +#endif + +TEST_F(ExpectNonfatalFailureTest, AcceptsStdStringObject) { + EXPECT_NONFATAL_FAILURE(AddNonfatalFailure(), + ::std::string("Expected non-fatal failure.")); +} + +TEST_F(ExpectNonfatalFailureTest, CatchesNonfatalFailureOnAllThreads) { + // We have another test below to verify that the macro catches + // non-fatal failures generated on another thread. + EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(AddNonfatalFailure(), + "Expected non-fatal failure."); +} + +// Tests that the EXPECT_NONFATAL_FAILURE{,_ON_ALL_THREADS} accepts a +// statement that contains a macro which expands to code containing an +// unprotected comma. +TEST_F(ExpectNonfatalFailureTest, AcceptsMacroThatExpandsToUnprotectedComma) { + EXPECT_NONFATAL_FAILURE({ + GTEST_USE_UNPROTECTED_COMMA_; + AddNonfatalFailure(); + }, ""); + + EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS({ + GTEST_USE_UNPROTECTED_COMMA_; + AddNonfatalFailure(); + }, ""); +} + +#if GTEST_IS_THREADSAFE + +typedef ScopedFakeTestPartResultReporterWithThreadsTest + ExpectFailureWithThreadsTest; + +TEST_F(ExpectFailureWithThreadsTest, ExpectFatalFailureOnAllThreads) { + EXPECT_FATAL_FAILURE_ON_ALL_THREADS(AddFailureInOtherThread(FATAL_FAILURE), + "Expected fatal failure."); +} + +TEST_F(ExpectFailureWithThreadsTest, ExpectNonFatalFailureOnAllThreads) { + EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS( + AddFailureInOtherThread(NONFATAL_FAILURE), "Expected non-fatal failure."); +} + +#endif // GTEST_IS_THREADSAFE + +// Tests the TestProperty class. + +TEST(TestPropertyTest, ConstructorWorks) { + const TestProperty property("key", "value"); + EXPECT_STREQ("key", property.key()); + EXPECT_STREQ("value", property.value()); +} + +TEST(TestPropertyTest, SetValue) { + TestProperty property("key", "value_1"); + EXPECT_STREQ("key", property.key()); + property.SetValue("value_2"); + EXPECT_STREQ("key", property.key()); + EXPECT_STREQ("value_2", property.value()); +} + +// Tests the TestResult class + +// The test fixture for testing TestResult. +class TestResultTest : public Test { + protected: + typedef std::vector TPRVector; + + // We make use of 2 TestPartResult objects, + TestPartResult * pr1, * pr2; + + // ... and 3 TestResult objects. + TestResult * r0, * r1, * r2; + + virtual void SetUp() { + // pr1 is for success. + pr1 = new TestPartResult(TestPartResult::kSuccess, + "foo/bar.cc", + 10, + "Success!"); + + // pr2 is for fatal failure. + pr2 = new TestPartResult(TestPartResult::kFatalFailure, + "foo/bar.cc", + -1, // This line number means "unknown" + "Failure!"); + + // Creates the TestResult objects. + r0 = new TestResult(); + r1 = new TestResult(); + r2 = new TestResult(); + + // In order to test TestResult, we need to modify its internal + // state, in particular the TestPartResult vector it holds. + // test_part_results() returns a const reference to this vector. + // We cast it to a non-const object s.t. it can be modified (yes, + // this is a hack). + TPRVector* results1 = const_cast( + &TestResultAccessor::test_part_results(*r1)); + TPRVector* results2 = const_cast( + &TestResultAccessor::test_part_results(*r2)); + + // r0 is an empty TestResult. + + // r1 contains a single SUCCESS TestPartResult. + results1->push_back(*pr1); + + // r2 contains a SUCCESS, and a FAILURE. + results2->push_back(*pr1); + results2->push_back(*pr2); + } + + virtual void TearDown() { + delete pr1; + delete pr2; + + delete r0; + delete r1; + delete r2; + } + + // Helper that compares two two TestPartResults. + static void CompareTestPartResult(const TestPartResult& expected, + const TestPartResult& actual) { + EXPECT_EQ(expected.type(), actual.type()); + EXPECT_STREQ(expected.file_name(), actual.file_name()); + EXPECT_EQ(expected.line_number(), actual.line_number()); + EXPECT_STREQ(expected.summary(), actual.summary()); + EXPECT_STREQ(expected.message(), actual.message()); + EXPECT_EQ(expected.passed(), actual.passed()); + EXPECT_EQ(expected.failed(), actual.failed()); + EXPECT_EQ(expected.nonfatally_failed(), actual.nonfatally_failed()); + EXPECT_EQ(expected.fatally_failed(), actual.fatally_failed()); + } +}; + +// Tests TestResult::total_part_count(). +TEST_F(TestResultTest, total_part_count) { + ASSERT_EQ(0, r0->total_part_count()); + ASSERT_EQ(1, r1->total_part_count()); + ASSERT_EQ(2, r2->total_part_count()); +} + +// Tests TestResult::Passed(). +TEST_F(TestResultTest, Passed) { + ASSERT_TRUE(r0->Passed()); + ASSERT_TRUE(r1->Passed()); + ASSERT_FALSE(r2->Passed()); +} + +// Tests TestResult::Failed(). +TEST_F(TestResultTest, Failed) { + ASSERT_FALSE(r0->Failed()); + ASSERT_FALSE(r1->Failed()); + ASSERT_TRUE(r2->Failed()); +} + +// Tests TestResult::GetTestPartResult(). + +typedef TestResultTest TestResultDeathTest; + +TEST_F(TestResultDeathTest, GetTestPartResult) { + CompareTestPartResult(*pr1, r2->GetTestPartResult(0)); + CompareTestPartResult(*pr2, r2->GetTestPartResult(1)); + EXPECT_DEATH_IF_SUPPORTED(r2->GetTestPartResult(2), ""); + EXPECT_DEATH_IF_SUPPORTED(r2->GetTestPartResult(-1), ""); +} + +// Tests TestResult has no properties when none are added. +TEST(TestResultPropertyTest, NoPropertiesFoundWhenNoneAreAdded) { + TestResult test_result; + ASSERT_EQ(0, test_result.test_property_count()); +} + +// Tests TestResult has the expected property when added. +TEST(TestResultPropertyTest, OnePropertyFoundWhenAdded) { + TestResult test_result; + TestProperty property("key_1", "1"); + TestResultAccessor::RecordProperty(&test_result, "testcase", property); + ASSERT_EQ(1, test_result.test_property_count()); + const TestProperty& actual_property = test_result.GetTestProperty(0); + EXPECT_STREQ("key_1", actual_property.key()); + EXPECT_STREQ("1", actual_property.value()); +} + +// Tests TestResult has multiple properties when added. +TEST(TestResultPropertyTest, MultiplePropertiesFoundWhenAdded) { + TestResult test_result; + TestProperty property_1("key_1", "1"); + TestProperty property_2("key_2", "2"); + TestResultAccessor::RecordProperty(&test_result, "testcase", property_1); + TestResultAccessor::RecordProperty(&test_result, "testcase", property_2); + ASSERT_EQ(2, test_result.test_property_count()); + const TestProperty& actual_property_1 = test_result.GetTestProperty(0); + EXPECT_STREQ("key_1", actual_property_1.key()); + EXPECT_STREQ("1", actual_property_1.value()); + + const TestProperty& actual_property_2 = test_result.GetTestProperty(1); + EXPECT_STREQ("key_2", actual_property_2.key()); + EXPECT_STREQ("2", actual_property_2.value()); +} + +// Tests TestResult::RecordProperty() overrides values for duplicate keys. +TEST(TestResultPropertyTest, OverridesValuesForDuplicateKeys) { + TestResult test_result; + TestProperty property_1_1("key_1", "1"); + TestProperty property_2_1("key_2", "2"); + TestProperty property_1_2("key_1", "12"); + TestProperty property_2_2("key_2", "22"); + TestResultAccessor::RecordProperty(&test_result, "testcase", property_1_1); + TestResultAccessor::RecordProperty(&test_result, "testcase", property_2_1); + TestResultAccessor::RecordProperty(&test_result, "testcase", property_1_2); + TestResultAccessor::RecordProperty(&test_result, "testcase", property_2_2); + + ASSERT_EQ(2, test_result.test_property_count()); + const TestProperty& actual_property_1 = test_result.GetTestProperty(0); + EXPECT_STREQ("key_1", actual_property_1.key()); + EXPECT_STREQ("12", actual_property_1.value()); + + const TestProperty& actual_property_2 = test_result.GetTestProperty(1); + EXPECT_STREQ("key_2", actual_property_2.key()); + EXPECT_STREQ("22", actual_property_2.value()); +} + +// Tests TestResult::GetTestProperty(). +TEST(TestResultPropertyTest, GetTestProperty) { + TestResult test_result; + TestProperty property_1("key_1", "1"); + TestProperty property_2("key_2", "2"); + TestProperty property_3("key_3", "3"); + TestResultAccessor::RecordProperty(&test_result, "testcase", property_1); + TestResultAccessor::RecordProperty(&test_result, "testcase", property_2); + TestResultAccessor::RecordProperty(&test_result, "testcase", property_3); + + const TestProperty& fetched_property_1 = test_result.GetTestProperty(0); + const TestProperty& fetched_property_2 = test_result.GetTestProperty(1); + const TestProperty& fetched_property_3 = test_result.GetTestProperty(2); + + EXPECT_STREQ("key_1", fetched_property_1.key()); + EXPECT_STREQ("1", fetched_property_1.value()); + + EXPECT_STREQ("key_2", fetched_property_2.key()); + EXPECT_STREQ("2", fetched_property_2.value()); + + EXPECT_STREQ("key_3", fetched_property_3.key()); + EXPECT_STREQ("3", fetched_property_3.value()); + + EXPECT_DEATH_IF_SUPPORTED(test_result.GetTestProperty(3), ""); + EXPECT_DEATH_IF_SUPPORTED(test_result.GetTestProperty(-1), ""); +} + +// Tests the Test class. +// +// It's difficult to test every public method of this class (we are +// already stretching the limit of Google Test by using it to test itself!). +// Fortunately, we don't have to do that, as we are already testing +// the functionalities of the Test class extensively by using Google Test +// alone. +// +// Therefore, this section only contains one test. + +// Tests that GTestFlagSaver works on Windows and Mac. + +class GTestFlagSaverTest : public Test { + protected: + // Saves the Google Test flags such that we can restore them later, and + // then sets them to their default values. This will be called + // before the first test in this test case is run. + static void SetUpTestCase() { + saver_ = new GTestFlagSaver; + + GTEST_FLAG(also_run_disabled_tests) = false; + GTEST_FLAG(break_on_failure) = false; + GTEST_FLAG(catch_exceptions) = false; + GTEST_FLAG(death_test_use_fork) = false; + GTEST_FLAG(color) = "auto"; + GTEST_FLAG(filter) = ""; + GTEST_FLAG(list_tests) = false; + GTEST_FLAG(output) = ""; + GTEST_FLAG(print_time) = true; + GTEST_FLAG(random_seed) = 0; + GTEST_FLAG(repeat) = 1; + GTEST_FLAG(shuffle) = false; + GTEST_FLAG(stack_trace_depth) = kMaxStackTraceDepth; + GTEST_FLAG(stream_result_to) = ""; + GTEST_FLAG(throw_on_failure) = false; + } + + // Restores the Google Test flags that the tests have modified. This will + // be called after the last test in this test case is run. + static void TearDownTestCase() { + delete saver_; + saver_ = NULL; + } + + // Verifies that the Google Test flags have their default values, and then + // modifies each of them. + void VerifyAndModifyFlags() { + EXPECT_FALSE(GTEST_FLAG(also_run_disabled_tests)); + EXPECT_FALSE(GTEST_FLAG(break_on_failure)); + EXPECT_FALSE(GTEST_FLAG(catch_exceptions)); + EXPECT_STREQ("auto", GTEST_FLAG(color).c_str()); + EXPECT_FALSE(GTEST_FLAG(death_test_use_fork)); + EXPECT_STREQ("", GTEST_FLAG(filter).c_str()); + EXPECT_FALSE(GTEST_FLAG(list_tests)); + EXPECT_STREQ("", GTEST_FLAG(output).c_str()); + EXPECT_TRUE(GTEST_FLAG(print_time)); + EXPECT_EQ(0, GTEST_FLAG(random_seed)); + EXPECT_EQ(1, GTEST_FLAG(repeat)); + EXPECT_FALSE(GTEST_FLAG(shuffle)); + EXPECT_EQ(kMaxStackTraceDepth, GTEST_FLAG(stack_trace_depth)); + EXPECT_STREQ("", GTEST_FLAG(stream_result_to).c_str()); + EXPECT_FALSE(GTEST_FLAG(throw_on_failure)); + + GTEST_FLAG(also_run_disabled_tests) = true; + GTEST_FLAG(break_on_failure) = true; + GTEST_FLAG(catch_exceptions) = true; + GTEST_FLAG(color) = "no"; + GTEST_FLAG(death_test_use_fork) = true; + GTEST_FLAG(filter) = "abc"; + GTEST_FLAG(list_tests) = true; + GTEST_FLAG(output) = "xml:foo.xml"; + GTEST_FLAG(print_time) = false; + GTEST_FLAG(random_seed) = 1; + GTEST_FLAG(repeat) = 100; + GTEST_FLAG(shuffle) = true; + GTEST_FLAG(stack_trace_depth) = 1; + GTEST_FLAG(stream_result_to) = "localhost:1234"; + GTEST_FLAG(throw_on_failure) = true; + } + + private: + // For saving Google Test flags during this test case. + static GTestFlagSaver* saver_; +}; + +GTestFlagSaver* GTestFlagSaverTest::saver_ = NULL; + +// Google Test doesn't guarantee the order of tests. The following two +// tests are designed to work regardless of their order. + +// Modifies the Google Test flags in the test body. +TEST_F(GTestFlagSaverTest, ModifyGTestFlags) { + VerifyAndModifyFlags(); +} + +// Verifies that the Google Test flags in the body of the previous test were +// restored to their original values. +TEST_F(GTestFlagSaverTest, VerifyGTestFlags) { + VerifyAndModifyFlags(); +} + +// Sets an environment variable with the given name to the given +// value. If the value argument is "", unsets the environment +// variable. The caller must ensure that both arguments are not NULL. +static void SetEnv(const char* name, const char* value) { +#if GTEST_OS_WINDOWS_MOBILE + // Environment variables are not supported on Windows CE. + return; +#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9) + // C++Builder's putenv only stores a pointer to its parameter; we have to + // ensure that the string remains valid as long as it might be needed. + // We use an std::map to do so. + static std::map added_env; + + // Because putenv stores a pointer to the string buffer, we can't delete the + // previous string (if present) until after it's replaced. + std::string *prev_env = NULL; + if (added_env.find(name) != added_env.end()) { + prev_env = added_env[name]; + } + added_env[name] = new std::string( + (Message() << name << "=" << value).GetString()); + + // The standard signature of putenv accepts a 'char*' argument. Other + // implementations, like C++Builder's, accept a 'const char*'. + // We cast away the 'const' since that would work for both variants. + putenv(const_cast(added_env[name]->c_str())); + delete prev_env; +#elif GTEST_OS_WINDOWS // If we are on Windows proper. + _putenv((Message() << name << "=" << value).GetString().c_str()); +#else + if (*value == '\0') { + unsetenv(name); + } else { + setenv(name, value, 1); + } +#endif // GTEST_OS_WINDOWS_MOBILE +} + +#if !GTEST_OS_WINDOWS_MOBILE +// Environment variables are not supported on Windows CE. + +using testing::internal::Int32FromGTestEnv; + +// Tests Int32FromGTestEnv(). + +// Tests that Int32FromGTestEnv() returns the default value when the +// environment variable is not set. +TEST(Int32FromGTestEnvTest, ReturnsDefaultWhenVariableIsNotSet) { + SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", ""); + EXPECT_EQ(10, Int32FromGTestEnv("temp", 10)); +} + +# if !defined(GTEST_GET_INT32_FROM_ENV_) + +// Tests that Int32FromGTestEnv() returns the default value when the +// environment variable overflows as an Int32. +TEST(Int32FromGTestEnvTest, ReturnsDefaultWhenValueOverflows) { + printf("(expecting 2 warnings)\n"); + + SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "12345678987654321"); + EXPECT_EQ(20, Int32FromGTestEnv("temp", 20)); + + SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "-12345678987654321"); + EXPECT_EQ(30, Int32FromGTestEnv("temp", 30)); +} + +// Tests that Int32FromGTestEnv() returns the default value when the +// environment variable does not represent a valid decimal integer. +TEST(Int32FromGTestEnvTest, ReturnsDefaultWhenValueIsInvalid) { + printf("(expecting 2 warnings)\n"); + + SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "A1"); + EXPECT_EQ(40, Int32FromGTestEnv("temp", 40)); + + SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "12X"); + EXPECT_EQ(50, Int32FromGTestEnv("temp", 50)); +} + +# endif // !defined(GTEST_GET_INT32_FROM_ENV_) + +// Tests that Int32FromGTestEnv() parses and returns the value of the +// environment variable when it represents a valid decimal integer in +// the range of an Int32. +TEST(Int32FromGTestEnvTest, ParsesAndReturnsValidValue) { + SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "123"); + EXPECT_EQ(123, Int32FromGTestEnv("temp", 0)); + + SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "-321"); + EXPECT_EQ(-321, Int32FromGTestEnv("temp", 0)); +} +#endif // !GTEST_OS_WINDOWS_MOBILE + +// Tests ParseInt32Flag(). + +// Tests that ParseInt32Flag() returns false and doesn't change the +// output value when the flag has wrong format +TEST(ParseInt32FlagTest, ReturnsFalseForInvalidFlag) { + Int32 value = 123; + EXPECT_FALSE(ParseInt32Flag("--a=100", "b", &value)); + EXPECT_EQ(123, value); + + EXPECT_FALSE(ParseInt32Flag("a=100", "a", &value)); + EXPECT_EQ(123, value); +} + +// Tests that ParseInt32Flag() returns false and doesn't change the +// output value when the flag overflows as an Int32. +TEST(ParseInt32FlagTest, ReturnsDefaultWhenValueOverflows) { + printf("(expecting 2 warnings)\n"); + + Int32 value = 123; + EXPECT_FALSE(ParseInt32Flag("--abc=12345678987654321", "abc", &value)); + EXPECT_EQ(123, value); + + EXPECT_FALSE(ParseInt32Flag("--abc=-12345678987654321", "abc", &value)); + EXPECT_EQ(123, value); +} + +// Tests that ParseInt32Flag() returns false and doesn't change the +// output value when the flag does not represent a valid decimal +// integer. +TEST(ParseInt32FlagTest, ReturnsDefaultWhenValueIsInvalid) { + printf("(expecting 2 warnings)\n"); + + Int32 value = 123; + EXPECT_FALSE(ParseInt32Flag("--abc=A1", "abc", &value)); + EXPECT_EQ(123, value); + + EXPECT_FALSE(ParseInt32Flag("--abc=12X", "abc", &value)); + EXPECT_EQ(123, value); +} + +// Tests that ParseInt32Flag() parses the value of the flag and +// returns true when the flag represents a valid decimal integer in +// the range of an Int32. +TEST(ParseInt32FlagTest, ParsesAndReturnsValidValue) { + Int32 value = 123; + EXPECT_TRUE(ParseInt32Flag("--" GTEST_FLAG_PREFIX_ "abc=456", "abc", &value)); + EXPECT_EQ(456, value); + + EXPECT_TRUE(ParseInt32Flag("--" GTEST_FLAG_PREFIX_ "abc=-789", + "abc", &value)); + EXPECT_EQ(-789, value); +} + +// Tests that Int32FromEnvOrDie() parses the value of the var or +// returns the correct default. +// Environment variables are not supported on Windows CE. +#if !GTEST_OS_WINDOWS_MOBILE +TEST(Int32FromEnvOrDieTest, ParsesAndReturnsValidValue) { + EXPECT_EQ(333, Int32FromEnvOrDie(GTEST_FLAG_PREFIX_UPPER_ "UnsetVar", 333)); + SetEnv(GTEST_FLAG_PREFIX_UPPER_ "UnsetVar", "123"); + EXPECT_EQ(123, Int32FromEnvOrDie(GTEST_FLAG_PREFIX_UPPER_ "UnsetVar", 333)); + SetEnv(GTEST_FLAG_PREFIX_UPPER_ "UnsetVar", "-123"); + EXPECT_EQ(-123, Int32FromEnvOrDie(GTEST_FLAG_PREFIX_UPPER_ "UnsetVar", 333)); +} +#endif // !GTEST_OS_WINDOWS_MOBILE + +// Tests that Int32FromEnvOrDie() aborts with an error message +// if the variable is not an Int32. +TEST(Int32FromEnvOrDieDeathTest, AbortsOnFailure) { + SetEnv(GTEST_FLAG_PREFIX_UPPER_ "VAR", "xxx"); + EXPECT_DEATH_IF_SUPPORTED( + Int32FromEnvOrDie(GTEST_FLAG_PREFIX_UPPER_ "VAR", 123), + ".*"); +} + +// Tests that Int32FromEnvOrDie() aborts with an error message +// if the variable cannot be represnted by an Int32. +TEST(Int32FromEnvOrDieDeathTest, AbortsOnInt32Overflow) { + SetEnv(GTEST_FLAG_PREFIX_UPPER_ "VAR", "1234567891234567891234"); + EXPECT_DEATH_IF_SUPPORTED( + Int32FromEnvOrDie(GTEST_FLAG_PREFIX_UPPER_ "VAR", 123), + ".*"); +} + +// Tests that ShouldRunTestOnShard() selects all tests +// where there is 1 shard. +TEST(ShouldRunTestOnShardTest, IsPartitionWhenThereIsOneShard) { + EXPECT_TRUE(ShouldRunTestOnShard(1, 0, 0)); + EXPECT_TRUE(ShouldRunTestOnShard(1, 0, 1)); + EXPECT_TRUE(ShouldRunTestOnShard(1, 0, 2)); + EXPECT_TRUE(ShouldRunTestOnShard(1, 0, 3)); + EXPECT_TRUE(ShouldRunTestOnShard(1, 0, 4)); +} + +class ShouldShardTest : public testing::Test { + protected: + virtual void SetUp() { + index_var_ = GTEST_FLAG_PREFIX_UPPER_ "INDEX"; + total_var_ = GTEST_FLAG_PREFIX_UPPER_ "TOTAL"; + } + + virtual void TearDown() { + SetEnv(index_var_, ""); + SetEnv(total_var_, ""); + } + + const char* index_var_; + const char* total_var_; +}; + +// Tests that sharding is disabled if neither of the environment variables +// are set. +TEST_F(ShouldShardTest, ReturnsFalseWhenNeitherEnvVarIsSet) { + SetEnv(index_var_, ""); + SetEnv(total_var_, ""); + + EXPECT_FALSE(ShouldShard(total_var_, index_var_, false)); + EXPECT_FALSE(ShouldShard(total_var_, index_var_, true)); +} + +// Tests that sharding is not enabled if total_shards == 1. +TEST_F(ShouldShardTest, ReturnsFalseWhenTotalShardIsOne) { + SetEnv(index_var_, "0"); + SetEnv(total_var_, "1"); + EXPECT_FALSE(ShouldShard(total_var_, index_var_, false)); + EXPECT_FALSE(ShouldShard(total_var_, index_var_, true)); +} + +// Tests that sharding is enabled if total_shards > 1 and +// we are not in a death test subprocess. +// Environment variables are not supported on Windows CE. +#if !GTEST_OS_WINDOWS_MOBILE +TEST_F(ShouldShardTest, WorksWhenShardEnvVarsAreValid) { + SetEnv(index_var_, "4"); + SetEnv(total_var_, "22"); + EXPECT_TRUE(ShouldShard(total_var_, index_var_, false)); + EXPECT_FALSE(ShouldShard(total_var_, index_var_, true)); + + SetEnv(index_var_, "8"); + SetEnv(total_var_, "9"); + EXPECT_TRUE(ShouldShard(total_var_, index_var_, false)); + EXPECT_FALSE(ShouldShard(total_var_, index_var_, true)); + + SetEnv(index_var_, "0"); + SetEnv(total_var_, "9"); + EXPECT_TRUE(ShouldShard(total_var_, index_var_, false)); + EXPECT_FALSE(ShouldShard(total_var_, index_var_, true)); +} +#endif // !GTEST_OS_WINDOWS_MOBILE + +// Tests that we exit in error if the sharding values are not valid. + +typedef ShouldShardTest ShouldShardDeathTest; + +TEST_F(ShouldShardDeathTest, AbortsWhenShardingEnvVarsAreInvalid) { + SetEnv(index_var_, "4"); + SetEnv(total_var_, "4"); + EXPECT_DEATH_IF_SUPPORTED(ShouldShard(total_var_, index_var_, false), ".*"); + + SetEnv(index_var_, "4"); + SetEnv(total_var_, "-2"); + EXPECT_DEATH_IF_SUPPORTED(ShouldShard(total_var_, index_var_, false), ".*"); + + SetEnv(index_var_, "5"); + SetEnv(total_var_, ""); + EXPECT_DEATH_IF_SUPPORTED(ShouldShard(total_var_, index_var_, false), ".*"); + + SetEnv(index_var_, ""); + SetEnv(total_var_, "5"); + EXPECT_DEATH_IF_SUPPORTED(ShouldShard(total_var_, index_var_, false), ".*"); +} + +// Tests that ShouldRunTestOnShard is a partition when 5 +// shards are used. +TEST(ShouldRunTestOnShardTest, IsPartitionWhenThereAreFiveShards) { + // Choose an arbitrary number of tests and shards. + const int num_tests = 17; + const int num_shards = 5; + + // Check partitioning: each test should be on exactly 1 shard. + for (int test_id = 0; test_id < num_tests; test_id++) { + int prev_selected_shard_index = -1; + for (int shard_index = 0; shard_index < num_shards; shard_index++) { + if (ShouldRunTestOnShard(num_shards, shard_index, test_id)) { + if (prev_selected_shard_index < 0) { + prev_selected_shard_index = shard_index; + } else { + ADD_FAILURE() << "Shard " << prev_selected_shard_index << " and " + << shard_index << " are both selected to run test " << test_id; + } + } + } + } + + // Check balance: This is not required by the sharding protocol, but is a + // desirable property for performance. + for (int shard_index = 0; shard_index < num_shards; shard_index++) { + int num_tests_on_shard = 0; + for (int test_id = 0; test_id < num_tests; test_id++) { + num_tests_on_shard += + ShouldRunTestOnShard(num_shards, shard_index, test_id); + } + EXPECT_GE(num_tests_on_shard, num_tests / num_shards); + } +} + +// For the same reason we are not explicitly testing everything in the +// Test class, there are no separate tests for the following classes +// (except for some trivial cases): +// +// TestCase, UnitTest, UnitTestResultPrinter. +// +// Similarly, there are no separate tests for the following macros: +// +// TEST, TEST_F, RUN_ALL_TESTS + +TEST(UnitTestTest, CanGetOriginalWorkingDir) { + ASSERT_TRUE(UnitTest::GetInstance()->original_working_dir() != NULL); + EXPECT_STRNE(UnitTest::GetInstance()->original_working_dir(), ""); +} + +TEST(UnitTestTest, ReturnsPlausibleTimestamp) { + EXPECT_LT(0, UnitTest::GetInstance()->start_timestamp()); + EXPECT_LE(UnitTest::GetInstance()->start_timestamp(), GetTimeInMillis()); +} + +// When a property using a reserved key is supplied to this function, it +// tests that a non-fatal failure is added, a fatal failure is not added, +// and that the property is not recorded. +void ExpectNonFatalFailureRecordingPropertyWithReservedKey( + const TestResult& test_result, const char* key) { + EXPECT_NONFATAL_FAILURE(Test::RecordProperty(key, "1"), "Reserved key"); + ASSERT_EQ(0, test_result.test_property_count()) << "Property for key '" << key + << "' recorded unexpectedly."; +} + +void ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest( + const char* key) { + const TestInfo* test_info = UnitTest::GetInstance()->current_test_info(); + ASSERT_TRUE(test_info != NULL); + ExpectNonFatalFailureRecordingPropertyWithReservedKey(*test_info->result(), + key); +} + +void ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestCase( + const char* key) { + const TestCase* test_case = UnitTest::GetInstance()->current_test_case(); + ASSERT_TRUE(test_case != NULL); + ExpectNonFatalFailureRecordingPropertyWithReservedKey( + test_case->ad_hoc_test_result(), key); +} + +void ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestCase( + const char* key) { + ExpectNonFatalFailureRecordingPropertyWithReservedKey( + UnitTest::GetInstance()->ad_hoc_test_result(), key); +} + +// Tests that property recording functions in UnitTest outside of tests +// functions correcly. Creating a separate instance of UnitTest ensures it +// is in a state similar to the UnitTest's singleton's between tests. +class UnitTestRecordPropertyTest : + public testing::internal::UnitTestRecordPropertyTestHelper { + public: + static void SetUpTestCase() { + ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestCase( + "disabled"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestCase( + "errors"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestCase( + "failures"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestCase( + "name"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestCase( + "tests"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestCase( + "time"); + + Test::RecordProperty("test_case_key_1", "1"); + const TestCase* test_case = UnitTest::GetInstance()->current_test_case(); + ASSERT_TRUE(test_case != NULL); + + ASSERT_EQ(1, test_case->ad_hoc_test_result().test_property_count()); + EXPECT_STREQ("test_case_key_1", + test_case->ad_hoc_test_result().GetTestProperty(0).key()); + EXPECT_STREQ("1", + test_case->ad_hoc_test_result().GetTestProperty(0).value()); + } +}; + +// Tests TestResult has the expected property when added. +TEST_F(UnitTestRecordPropertyTest, OnePropertyFoundWhenAdded) { + UnitTestRecordProperty("key_1", "1"); + + ASSERT_EQ(1, unit_test_.ad_hoc_test_result().test_property_count()); + + EXPECT_STREQ("key_1", + unit_test_.ad_hoc_test_result().GetTestProperty(0).key()); + EXPECT_STREQ("1", + unit_test_.ad_hoc_test_result().GetTestProperty(0).value()); +} + +// Tests TestResult has multiple properties when added. +TEST_F(UnitTestRecordPropertyTest, MultiplePropertiesFoundWhenAdded) { + UnitTestRecordProperty("key_1", "1"); + UnitTestRecordProperty("key_2", "2"); + + ASSERT_EQ(2, unit_test_.ad_hoc_test_result().test_property_count()); + + EXPECT_STREQ("key_1", + unit_test_.ad_hoc_test_result().GetTestProperty(0).key()); + EXPECT_STREQ("1", unit_test_.ad_hoc_test_result().GetTestProperty(0).value()); + + EXPECT_STREQ("key_2", + unit_test_.ad_hoc_test_result().GetTestProperty(1).key()); + EXPECT_STREQ("2", unit_test_.ad_hoc_test_result().GetTestProperty(1).value()); +} + +// Tests TestResult::RecordProperty() overrides values for duplicate keys. +TEST_F(UnitTestRecordPropertyTest, OverridesValuesForDuplicateKeys) { + UnitTestRecordProperty("key_1", "1"); + UnitTestRecordProperty("key_2", "2"); + UnitTestRecordProperty("key_1", "12"); + UnitTestRecordProperty("key_2", "22"); + + ASSERT_EQ(2, unit_test_.ad_hoc_test_result().test_property_count()); + + EXPECT_STREQ("key_1", + unit_test_.ad_hoc_test_result().GetTestProperty(0).key()); + EXPECT_STREQ("12", + unit_test_.ad_hoc_test_result().GetTestProperty(0).value()); + + EXPECT_STREQ("key_2", + unit_test_.ad_hoc_test_result().GetTestProperty(1).key()); + EXPECT_STREQ("22", + unit_test_.ad_hoc_test_result().GetTestProperty(1).value()); +} + +TEST_F(UnitTestRecordPropertyTest, + AddFailureInsideTestsWhenUsingTestCaseReservedKeys) { + ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest( + "name"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest( + "value_param"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest( + "type_param"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest( + "status"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest( + "time"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest( + "classname"); +} + +TEST_F(UnitTestRecordPropertyTest, + AddRecordWithReservedKeysGeneratesCorrectPropertyList) { + EXPECT_NONFATAL_FAILURE( + Test::RecordProperty("name", "1"), + "'classname', 'name', 'status', 'time', 'type_param', and 'value_param'" + " are reserved"); +} + +class UnitTestRecordPropertyTestEnvironment : public Environment { + public: + virtual void TearDown() { + ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestCase( + "tests"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestCase( + "failures"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestCase( + "disabled"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestCase( + "errors"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestCase( + "name"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestCase( + "timestamp"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestCase( + "time"); + ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestCase( + "random_seed"); + } +}; + +// This will test property recording outside of any test or test case. +static Environment* record_property_env = + AddGlobalTestEnvironment(new UnitTestRecordPropertyTestEnvironment); + +// This group of tests is for predicate assertions (ASSERT_PRED*, etc) +// of various arities. They do not attempt to be exhaustive. Rather, +// view them as smoke tests that can be easily reviewed and verified. +// A more complete set of tests for predicate assertions can be found +// in gtest_pred_impl_unittest.cc. + +// First, some predicates and predicate-formatters needed by the tests. + +// Returns true iff the argument is an even number. +bool IsEven(int n) { + return (n % 2) == 0; +} + +// A functor that returns true iff the argument is an even number. +struct IsEvenFunctor { + bool operator()(int n) { return IsEven(n); } +}; + +// A predicate-formatter function that asserts the argument is an even +// number. +AssertionResult AssertIsEven(const char* expr, int n) { + if (IsEven(n)) { + return AssertionSuccess(); + } + + Message msg; + msg << expr << " evaluates to " << n << ", which is not even."; + return AssertionFailure(msg); +} + +// A predicate function that returns AssertionResult for use in +// EXPECT/ASSERT_TRUE/FALSE. +AssertionResult ResultIsEven(int n) { + if (IsEven(n)) + return AssertionSuccess() << n << " is even"; + else + return AssertionFailure() << n << " is odd"; +} + +// A predicate function that returns AssertionResult but gives no +// explanation why it succeeds. Needed for testing that +// EXPECT/ASSERT_FALSE handles such functions correctly. +AssertionResult ResultIsEvenNoExplanation(int n) { + if (IsEven(n)) + return AssertionSuccess(); + else + return AssertionFailure() << n << " is odd"; +} + +// A predicate-formatter functor that asserts the argument is an even +// number. +struct AssertIsEvenFunctor { + AssertionResult operator()(const char* expr, int n) { + return AssertIsEven(expr, n); + } +}; + +// Returns true iff the sum of the arguments is an even number. +bool SumIsEven2(int n1, int n2) { + return IsEven(n1 + n2); +} + +// A functor that returns true iff the sum of the arguments is an even +// number. +struct SumIsEven3Functor { + bool operator()(int n1, int n2, int n3) { + return IsEven(n1 + n2 + n3); + } +}; + +// A predicate-formatter function that asserts the sum of the +// arguments is an even number. +AssertionResult AssertSumIsEven4( + const char* e1, const char* e2, const char* e3, const char* e4, + int n1, int n2, int n3, int n4) { + const int sum = n1 + n2 + n3 + n4; + if (IsEven(sum)) { + return AssertionSuccess(); + } + + Message msg; + msg << e1 << " + " << e2 << " + " << e3 << " + " << e4 + << " (" << n1 << " + " << n2 << " + " << n3 << " + " << n4 + << ") evaluates to " << sum << ", which is not even."; + return AssertionFailure(msg); +} + +// A predicate-formatter functor that asserts the sum of the arguments +// is an even number. +struct AssertSumIsEven5Functor { + AssertionResult operator()( + const char* e1, const char* e2, const char* e3, const char* e4, + const char* e5, int n1, int n2, int n3, int n4, int n5) { + const int sum = n1 + n2 + n3 + n4 + n5; + if (IsEven(sum)) { + return AssertionSuccess(); + } + + Message msg; + msg << e1 << " + " << e2 << " + " << e3 << " + " << e4 << " + " << e5 + << " (" + << n1 << " + " << n2 << " + " << n3 << " + " << n4 << " + " << n5 + << ") evaluates to " << sum << ", which is not even."; + return AssertionFailure(msg); + } +}; + + +// Tests unary predicate assertions. + +// Tests unary predicate assertions that don't use a custom formatter. +TEST(Pred1Test, WithoutFormat) { + // Success cases. + EXPECT_PRED1(IsEvenFunctor(), 2) << "This failure is UNEXPECTED!"; + ASSERT_PRED1(IsEven, 4); + + // Failure cases. + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED1(IsEven, 5) << "This failure is expected."; + }, "This failure is expected."); + EXPECT_FATAL_FAILURE(ASSERT_PRED1(IsEvenFunctor(), 5), + "evaluates to false"); +} + +// Tests unary predicate assertions that use a custom formatter. +TEST(Pred1Test, WithFormat) { + // Success cases. + EXPECT_PRED_FORMAT1(AssertIsEven, 2); + ASSERT_PRED_FORMAT1(AssertIsEvenFunctor(), 4) + << "This failure is UNEXPECTED!"; + + // Failure cases. + const int n = 5; + EXPECT_NONFATAL_FAILURE(EXPECT_PRED_FORMAT1(AssertIsEvenFunctor(), n), + "n evaluates to 5, which is not even."); + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT1(AssertIsEven, 5) << "This failure is expected."; + }, "This failure is expected."); +} + +// Tests that unary predicate assertions evaluates their arguments +// exactly once. +TEST(Pred1Test, SingleEvaluationOnFailure) { + // A success case. + static int n = 0; + EXPECT_PRED1(IsEven, n++); + EXPECT_EQ(1, n) << "The argument is not evaluated exactly once."; + + // A failure case. + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT1(AssertIsEvenFunctor(), n++) + << "This failure is expected."; + }, "This failure is expected."); + EXPECT_EQ(2, n) << "The argument is not evaluated exactly once."; +} + + +// Tests predicate assertions whose arity is >= 2. + +// Tests predicate assertions that don't use a custom formatter. +TEST(PredTest, WithoutFormat) { + // Success cases. + ASSERT_PRED2(SumIsEven2, 2, 4) << "This failure is UNEXPECTED!"; + EXPECT_PRED3(SumIsEven3Functor(), 4, 6, 8); + + // Failure cases. + const int n1 = 1; + const int n2 = 2; + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED2(SumIsEven2, n1, n2) << "This failure is expected."; + }, "This failure is expected."); + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED3(SumIsEven3Functor(), 1, 2, 4); + }, "evaluates to false"); +} + +// Tests predicate assertions that use a custom formatter. +TEST(PredTest, WithFormat) { + // Success cases. + ASSERT_PRED_FORMAT4(AssertSumIsEven4, 4, 6, 8, 10) << + "This failure is UNEXPECTED!"; + EXPECT_PRED_FORMAT5(AssertSumIsEven5Functor(), 2, 4, 6, 8, 10); + + // Failure cases. + const int n1 = 1; + const int n2 = 2; + const int n3 = 4; + const int n4 = 6; + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT4(AssertSumIsEven4, n1, n2, n3, n4); + }, "evaluates to 13, which is not even."); + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT5(AssertSumIsEven5Functor(), 1, 2, 4, 6, 8) + << "This failure is expected."; + }, "This failure is expected."); +} + +// Tests that predicate assertions evaluates their arguments +// exactly once. +TEST(PredTest, SingleEvaluationOnFailure) { + // A success case. + int n1 = 0; + int n2 = 0; + EXPECT_PRED2(SumIsEven2, n1++, n2++); + EXPECT_EQ(1, n1) << "Argument 1 is not evaluated exactly once."; + EXPECT_EQ(1, n2) << "Argument 2 is not evaluated exactly once."; + + // Another success case. + n1 = n2 = 0; + int n3 = 0; + int n4 = 0; + int n5 = 0; + ASSERT_PRED_FORMAT5(AssertSumIsEven5Functor(), + n1++, n2++, n3++, n4++, n5++) + << "This failure is UNEXPECTED!"; + EXPECT_EQ(1, n1) << "Argument 1 is not evaluated exactly once."; + EXPECT_EQ(1, n2) << "Argument 2 is not evaluated exactly once."; + EXPECT_EQ(1, n3) << "Argument 3 is not evaluated exactly once."; + EXPECT_EQ(1, n4) << "Argument 4 is not evaluated exactly once."; + EXPECT_EQ(1, n5) << "Argument 5 is not evaluated exactly once."; + + // A failure case. + n1 = n2 = n3 = 0; + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED3(SumIsEven3Functor(), ++n1, n2++, n3++) + << "This failure is expected."; + }, "This failure is expected."); + EXPECT_EQ(1, n1) << "Argument 1 is not evaluated exactly once."; + EXPECT_EQ(1, n2) << "Argument 2 is not evaluated exactly once."; + EXPECT_EQ(1, n3) << "Argument 3 is not evaluated exactly once."; + + // Another failure case. + n1 = n2 = n3 = n4 = 0; + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT4(AssertSumIsEven4, ++n1, n2++, n3++, n4++); + }, "evaluates to 1, which is not even."); + EXPECT_EQ(1, n1) << "Argument 1 is not evaluated exactly once."; + EXPECT_EQ(1, n2) << "Argument 2 is not evaluated exactly once."; + EXPECT_EQ(1, n3) << "Argument 3 is not evaluated exactly once."; + EXPECT_EQ(1, n4) << "Argument 4 is not evaluated exactly once."; +} + + +// Some helper functions for testing using overloaded/template +// functions with ASSERT_PREDn and EXPECT_PREDn. + +bool IsPositive(double x) { + return x > 0; +} + +template +bool IsNegative(T x) { + return x < 0; +} + +template +bool GreaterThan(T1 x1, T2 x2) { + return x1 > x2; +} + +// Tests that overloaded functions can be used in *_PRED* as long as +// their types are explicitly specified. +TEST(PredicateAssertionTest, AcceptsOverloadedFunction) { + // C++Builder requires C-style casts rather than static_cast. + EXPECT_PRED1((bool (*)(int))(IsPositive), 5); // NOLINT + ASSERT_PRED1((bool (*)(double))(IsPositive), 6.0); // NOLINT +} + +// Tests that template functions can be used in *_PRED* as long as +// their types are explicitly specified. +TEST(PredicateAssertionTest, AcceptsTemplateFunction) { + EXPECT_PRED1(IsNegative, -5); + // Makes sure that we can handle templates with more than one + // parameter. + ASSERT_PRED2((GreaterThan), 5, 0); +} + + +// Some helper functions for testing using overloaded/template +// functions with ASSERT_PRED_FORMATn and EXPECT_PRED_FORMATn. + +AssertionResult IsPositiveFormat(const char* /* expr */, int n) { + return n > 0 ? AssertionSuccess() : + AssertionFailure(Message() << "Failure"); +} + +AssertionResult IsPositiveFormat(const char* /* expr */, double x) { + return x > 0 ? AssertionSuccess() : + AssertionFailure(Message() << "Failure"); +} + +template +AssertionResult IsNegativeFormat(const char* /* expr */, T x) { + return x < 0 ? AssertionSuccess() : + AssertionFailure(Message() << "Failure"); +} + +template +AssertionResult EqualsFormat(const char* /* expr1 */, const char* /* expr2 */, + const T1& x1, const T2& x2) { + return x1 == x2 ? AssertionSuccess() : + AssertionFailure(Message() << "Failure"); +} + +// Tests that overloaded functions can be used in *_PRED_FORMAT* +// without explicitly specifying their types. +TEST(PredicateFormatAssertionTest, AcceptsOverloadedFunction) { + EXPECT_PRED_FORMAT1(IsPositiveFormat, 5); + ASSERT_PRED_FORMAT1(IsPositiveFormat, 6.0); +} + +// Tests that template functions can be used in *_PRED_FORMAT* without +// explicitly specifying their types. +TEST(PredicateFormatAssertionTest, AcceptsTemplateFunction) { + EXPECT_PRED_FORMAT1(IsNegativeFormat, -5); + ASSERT_PRED_FORMAT2(EqualsFormat, 3, 3); +} + + +// Tests string assertions. + +// Tests ASSERT_STREQ with non-NULL arguments. +TEST(StringAssertionTest, ASSERT_STREQ) { + const char * const p1 = "good"; + ASSERT_STREQ(p1, p1); + + // Let p2 have the same content as p1, but be at a different address. + const char p2[] = "good"; + ASSERT_STREQ(p1, p2); + + EXPECT_FATAL_FAILURE(ASSERT_STREQ("bad", "good"), + "Expected: \"bad\""); +} + +// Tests ASSERT_STREQ with NULL arguments. +TEST(StringAssertionTest, ASSERT_STREQ_Null) { + ASSERT_STREQ(static_cast(NULL), NULL); + EXPECT_FATAL_FAILURE(ASSERT_STREQ(NULL, "non-null"), + "non-null"); +} + +// Tests ASSERT_STREQ with NULL arguments. +TEST(StringAssertionTest, ASSERT_STREQ_Null2) { + EXPECT_FATAL_FAILURE(ASSERT_STREQ("non-null", NULL), + "non-null"); +} + +// Tests ASSERT_STRNE. +TEST(StringAssertionTest, ASSERT_STRNE) { + ASSERT_STRNE("hi", "Hi"); + ASSERT_STRNE("Hi", NULL); + ASSERT_STRNE(NULL, "Hi"); + ASSERT_STRNE("", NULL); + ASSERT_STRNE(NULL, ""); + ASSERT_STRNE("", "Hi"); + ASSERT_STRNE("Hi", ""); + EXPECT_FATAL_FAILURE(ASSERT_STRNE("Hi", "Hi"), + "\"Hi\" vs \"Hi\""); +} + +// Tests ASSERT_STRCASEEQ. +TEST(StringAssertionTest, ASSERT_STRCASEEQ) { + ASSERT_STRCASEEQ("hi", "Hi"); + ASSERT_STRCASEEQ(static_cast(NULL), NULL); + + ASSERT_STRCASEEQ("", ""); + EXPECT_FATAL_FAILURE(ASSERT_STRCASEEQ("Hi", "hi2"), + "Ignoring case"); +} + +// Tests ASSERT_STRCASENE. +TEST(StringAssertionTest, ASSERT_STRCASENE) { + ASSERT_STRCASENE("hi1", "Hi2"); + ASSERT_STRCASENE("Hi", NULL); + ASSERT_STRCASENE(NULL, "Hi"); + ASSERT_STRCASENE("", NULL); + ASSERT_STRCASENE(NULL, ""); + ASSERT_STRCASENE("", "Hi"); + ASSERT_STRCASENE("Hi", ""); + EXPECT_FATAL_FAILURE(ASSERT_STRCASENE("Hi", "hi"), + "(ignoring case)"); +} + +// Tests *_STREQ on wide strings. +TEST(StringAssertionTest, STREQ_Wide) { + // NULL strings. + ASSERT_STREQ(static_cast(NULL), NULL); + + // Empty strings. + ASSERT_STREQ(L"", L""); + + // Non-null vs NULL. + EXPECT_NONFATAL_FAILURE(EXPECT_STREQ(L"non-null", NULL), + "non-null"); + + // Equal strings. + EXPECT_STREQ(L"Hi", L"Hi"); + + // Unequal strings. + EXPECT_NONFATAL_FAILURE(EXPECT_STREQ(L"abc", L"Abc"), + "Abc"); + + // Strings containing wide characters. + EXPECT_NONFATAL_FAILURE(EXPECT_STREQ(L"abc\x8119", L"abc\x8120"), + "abc"); + + // The streaming variation. + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_STREQ(L"abc\x8119", L"abc\x8121") << "Expected failure"; + }, "Expected failure"); +} + +// Tests *_STRNE on wide strings. +TEST(StringAssertionTest, STRNE_Wide) { + // NULL strings. + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_STRNE(static_cast(NULL), NULL); + }, ""); + + // Empty strings. + EXPECT_NONFATAL_FAILURE(EXPECT_STRNE(L"", L""), + "L\"\""); + + // Non-null vs NULL. + ASSERT_STRNE(L"non-null", NULL); + + // Equal strings. + EXPECT_NONFATAL_FAILURE(EXPECT_STRNE(L"Hi", L"Hi"), + "L\"Hi\""); + + // Unequal strings. + EXPECT_STRNE(L"abc", L"Abc"); + + // Strings containing wide characters. + EXPECT_NONFATAL_FAILURE(EXPECT_STRNE(L"abc\x8119", L"abc\x8119"), + "abc"); + + // The streaming variation. + ASSERT_STRNE(L"abc\x8119", L"abc\x8120") << "This shouldn't happen"; +} + +// Tests for ::testing::IsSubstring(). + +// Tests that IsSubstring() returns the correct result when the input +// argument type is const char*. +TEST(IsSubstringTest, ReturnsCorrectResultForCString) { + EXPECT_FALSE(IsSubstring("", "", NULL, "a")); + EXPECT_FALSE(IsSubstring("", "", "b", NULL)); + EXPECT_FALSE(IsSubstring("", "", "needle", "haystack")); + + EXPECT_TRUE(IsSubstring("", "", static_cast(NULL), NULL)); + EXPECT_TRUE(IsSubstring("", "", "needle", "two needles")); +} + +// Tests that IsSubstring() returns the correct result when the input +// argument type is const wchar_t*. +TEST(IsSubstringTest, ReturnsCorrectResultForWideCString) { + EXPECT_FALSE(IsSubstring("", "", kNull, L"a")); + EXPECT_FALSE(IsSubstring("", "", L"b", kNull)); + EXPECT_FALSE(IsSubstring("", "", L"needle", L"haystack")); + + EXPECT_TRUE(IsSubstring("", "", static_cast(NULL), NULL)); + EXPECT_TRUE(IsSubstring("", "", L"needle", L"two needles")); +} + +// Tests that IsSubstring() generates the correct message when the input +// argument type is const char*. +TEST(IsSubstringTest, GeneratesCorrectMessageForCString) { + EXPECT_STREQ("Value of: needle_expr\n" + " Actual: \"needle\"\n" + "Expected: a substring of haystack_expr\n" + "Which is: \"haystack\"", + IsSubstring("needle_expr", "haystack_expr", + "needle", "haystack").failure_message()); +} + +// Tests that IsSubstring returns the correct result when the input +// argument type is ::std::string. +TEST(IsSubstringTest, ReturnsCorrectResultsForStdString) { + EXPECT_TRUE(IsSubstring("", "", std::string("hello"), "ahellob")); + EXPECT_FALSE(IsSubstring("", "", "hello", std::string("world"))); +} + +#if GTEST_HAS_STD_WSTRING +// Tests that IsSubstring returns the correct result when the input +// argument type is ::std::wstring. +TEST(IsSubstringTest, ReturnsCorrectResultForStdWstring) { + EXPECT_TRUE(IsSubstring("", "", ::std::wstring(L"needle"), L"two needles")); + EXPECT_FALSE(IsSubstring("", "", L"needle", ::std::wstring(L"haystack"))); +} + +// Tests that IsSubstring() generates the correct message when the input +// argument type is ::std::wstring. +TEST(IsSubstringTest, GeneratesCorrectMessageForWstring) { + EXPECT_STREQ("Value of: needle_expr\n" + " Actual: L\"needle\"\n" + "Expected: a substring of haystack_expr\n" + "Which is: L\"haystack\"", + IsSubstring( + "needle_expr", "haystack_expr", + ::std::wstring(L"needle"), L"haystack").failure_message()); +} + +#endif // GTEST_HAS_STD_WSTRING + +// Tests for ::testing::IsNotSubstring(). + +// Tests that IsNotSubstring() returns the correct result when the input +// argument type is const char*. +TEST(IsNotSubstringTest, ReturnsCorrectResultForCString) { + EXPECT_TRUE(IsNotSubstring("", "", "needle", "haystack")); + EXPECT_FALSE(IsNotSubstring("", "", "needle", "two needles")); +} + +// Tests that IsNotSubstring() returns the correct result when the input +// argument type is const wchar_t*. +TEST(IsNotSubstringTest, ReturnsCorrectResultForWideCString) { + EXPECT_TRUE(IsNotSubstring("", "", L"needle", L"haystack")); + EXPECT_FALSE(IsNotSubstring("", "", L"needle", L"two needles")); +} + +// Tests that IsNotSubstring() generates the correct message when the input +// argument type is const wchar_t*. +TEST(IsNotSubstringTest, GeneratesCorrectMessageForWideCString) { + EXPECT_STREQ("Value of: needle_expr\n" + " Actual: L\"needle\"\n" + "Expected: not a substring of haystack_expr\n" + "Which is: L\"two needles\"", + IsNotSubstring( + "needle_expr", "haystack_expr", + L"needle", L"two needles").failure_message()); +} + +// Tests that IsNotSubstring returns the correct result when the input +// argument type is ::std::string. +TEST(IsNotSubstringTest, ReturnsCorrectResultsForStdString) { + EXPECT_FALSE(IsNotSubstring("", "", std::string("hello"), "ahellob")); + EXPECT_TRUE(IsNotSubstring("", "", "hello", std::string("world"))); +} + +// Tests that IsNotSubstring() generates the correct message when the input +// argument type is ::std::string. +TEST(IsNotSubstringTest, GeneratesCorrectMessageForStdString) { + EXPECT_STREQ("Value of: needle_expr\n" + " Actual: \"needle\"\n" + "Expected: not a substring of haystack_expr\n" + "Which is: \"two needles\"", + IsNotSubstring( + "needle_expr", "haystack_expr", + ::std::string("needle"), "two needles").failure_message()); +} + +#if GTEST_HAS_STD_WSTRING + +// Tests that IsNotSubstring returns the correct result when the input +// argument type is ::std::wstring. +TEST(IsNotSubstringTest, ReturnsCorrectResultForStdWstring) { + EXPECT_FALSE( + IsNotSubstring("", "", ::std::wstring(L"needle"), L"two needles")); + EXPECT_TRUE(IsNotSubstring("", "", L"needle", ::std::wstring(L"haystack"))); +} + +#endif // GTEST_HAS_STD_WSTRING + +// Tests floating-point assertions. + +template +class FloatingPointTest : public Test { + protected: + // Pre-calculated numbers to be used by the tests. + struct TestValues { + RawType close_to_positive_zero; + RawType close_to_negative_zero; + RawType further_from_negative_zero; + + RawType close_to_one; + RawType further_from_one; + + RawType infinity; + RawType close_to_infinity; + RawType further_from_infinity; + + RawType nan1; + RawType nan2; + }; + + typedef typename testing::internal::FloatingPoint Floating; + typedef typename Floating::Bits Bits; + + virtual void SetUp() { + const size_t max_ulps = Floating::kMaxUlps; + + // The bits that represent 0.0. + const Bits zero_bits = Floating(0).bits(); + + // Makes some numbers close to 0.0. + values_.close_to_positive_zero = Floating::ReinterpretBits( + zero_bits + max_ulps/2); + values_.close_to_negative_zero = -Floating::ReinterpretBits( + zero_bits + max_ulps - max_ulps/2); + values_.further_from_negative_zero = -Floating::ReinterpretBits( + zero_bits + max_ulps + 1 - max_ulps/2); + + // The bits that represent 1.0. + const Bits one_bits = Floating(1).bits(); + + // Makes some numbers close to 1.0. + values_.close_to_one = Floating::ReinterpretBits(one_bits + max_ulps); + values_.further_from_one = Floating::ReinterpretBits( + one_bits + max_ulps + 1); + + // +infinity. + values_.infinity = Floating::Infinity(); + + // The bits that represent +infinity. + const Bits infinity_bits = Floating(values_.infinity).bits(); + + // Makes some numbers close to infinity. + values_.close_to_infinity = Floating::ReinterpretBits( + infinity_bits - max_ulps); + values_.further_from_infinity = Floating::ReinterpretBits( + infinity_bits - max_ulps - 1); + + // Makes some NAN's. Sets the most significant bit of the fraction so that + // our NaN's are quiet; trying to process a signaling NaN would raise an + // exception if our environment enables floating point exceptions. + values_.nan1 = Floating::ReinterpretBits(Floating::kExponentBitMask + | (static_cast(1) << (Floating::kFractionBitCount - 1)) | 1); + values_.nan2 = Floating::ReinterpretBits(Floating::kExponentBitMask + | (static_cast(1) << (Floating::kFractionBitCount - 1)) | 200); + } + + void TestSize() { + EXPECT_EQ(sizeof(RawType), sizeof(Bits)); + } + + static TestValues values_; +}; + +template +typename FloatingPointTest::TestValues + FloatingPointTest::values_; + +// Instantiates FloatingPointTest for testing *_FLOAT_EQ. +typedef FloatingPointTest FloatTest; + +// Tests that the size of Float::Bits matches the size of float. +TEST_F(FloatTest, Size) { + TestSize(); +} + +// Tests comparing with +0 and -0. +TEST_F(FloatTest, Zeros) { + EXPECT_FLOAT_EQ(0.0, -0.0); + EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(-0.0, 1.0), + "1.0"); + EXPECT_FATAL_FAILURE(ASSERT_FLOAT_EQ(0.0, 1.5), + "1.5"); +} + +// Tests comparing numbers close to 0. +// +// This ensures that *_FLOAT_EQ handles the sign correctly and no +// overflow occurs when comparing numbers whose absolute value is very +// small. +TEST_F(FloatTest, AlmostZeros) { + // In C++Builder, names within local classes (such as used by + // EXPECT_FATAL_FAILURE) cannot be resolved against static members of the + // scoping class. Use a static local alias as a workaround. + // We use the assignment syntax since some compilers, like Sun Studio, + // don't allow initializing references using construction syntax + // (parentheses). + static const FloatTest::TestValues& v = this->values_; + + EXPECT_FLOAT_EQ(0.0, v.close_to_positive_zero); + EXPECT_FLOAT_EQ(-0.0, v.close_to_negative_zero); + EXPECT_FLOAT_EQ(v.close_to_positive_zero, v.close_to_negative_zero); + + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_FLOAT_EQ(v.close_to_positive_zero, + v.further_from_negative_zero); + }, "v.further_from_negative_zero"); +} + +// Tests comparing numbers close to each other. +TEST_F(FloatTest, SmallDiff) { + EXPECT_FLOAT_EQ(1.0, values_.close_to_one); + EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(1.0, values_.further_from_one), + "values_.further_from_one"); +} + +// Tests comparing numbers far apart. +TEST_F(FloatTest, LargeDiff) { + EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(2.5, 3.0), + "3.0"); +} + +// Tests comparing with infinity. +// +// This ensures that no overflow occurs when comparing numbers whose +// absolute value is very large. +TEST_F(FloatTest, Infinity) { + EXPECT_FLOAT_EQ(values_.infinity, values_.close_to_infinity); + EXPECT_FLOAT_EQ(-values_.infinity, -values_.close_to_infinity); +#if !GTEST_OS_SYMBIAN + // Nokia's STLport crashes if we try to output infinity or NaN. + EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(values_.infinity, -values_.infinity), + "-values_.infinity"); + + // This is interesting as the representations of infinity and nan1 + // are only 1 DLP apart. + EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(values_.infinity, values_.nan1), + "values_.nan1"); +#endif // !GTEST_OS_SYMBIAN +} + +// Tests that comparing with NAN always returns false. +TEST_F(FloatTest, NaN) { +#if !GTEST_OS_SYMBIAN +// Nokia's STLport crashes if we try to output infinity or NaN. + + // In C++Builder, names within local classes (such as used by + // EXPECT_FATAL_FAILURE) cannot be resolved against static members of the + // scoping class. Use a static local alias as a workaround. + // We use the assignment syntax since some compilers, like Sun Studio, + // don't allow initializing references using construction syntax + // (parentheses). + static const FloatTest::TestValues& v = this->values_; + + EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(v.nan1, v.nan1), + "v.nan1"); + EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(v.nan1, v.nan2), + "v.nan2"); + EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(1.0, v.nan1), + "v.nan1"); + + EXPECT_FATAL_FAILURE(ASSERT_FLOAT_EQ(v.nan1, v.infinity), + "v.infinity"); +#endif // !GTEST_OS_SYMBIAN +} + +// Tests that *_FLOAT_EQ are reflexive. +TEST_F(FloatTest, Reflexive) { + EXPECT_FLOAT_EQ(0.0, 0.0); + EXPECT_FLOAT_EQ(1.0, 1.0); + ASSERT_FLOAT_EQ(values_.infinity, values_.infinity); +} + +// Tests that *_FLOAT_EQ are commutative. +TEST_F(FloatTest, Commutative) { + // We already tested EXPECT_FLOAT_EQ(1.0, values_.close_to_one). + EXPECT_FLOAT_EQ(values_.close_to_one, 1.0); + + // We already tested EXPECT_FLOAT_EQ(1.0, values_.further_from_one). + EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(values_.further_from_one, 1.0), + "1.0"); +} + +// Tests EXPECT_NEAR. +TEST_F(FloatTest, EXPECT_NEAR) { + EXPECT_NEAR(-1.0f, -1.1f, 0.2f); + EXPECT_NEAR(2.0f, 3.0f, 1.0f); + EXPECT_NONFATAL_FAILURE(EXPECT_NEAR(1.0f,1.5f, 0.25f), // NOLINT + "The difference between 1.0f and 1.5f is 0.5, " + "which exceeds 0.25f"); + // To work around a bug in gcc 2.95.0, there is intentionally no + // space after the first comma in the previous line. +} + +// Tests ASSERT_NEAR. +TEST_F(FloatTest, ASSERT_NEAR) { + ASSERT_NEAR(-1.0f, -1.1f, 0.2f); + ASSERT_NEAR(2.0f, 3.0f, 1.0f); + EXPECT_FATAL_FAILURE(ASSERT_NEAR(1.0f,1.5f, 0.25f), // NOLINT + "The difference between 1.0f and 1.5f is 0.5, " + "which exceeds 0.25f"); + // To work around a bug in gcc 2.95.0, there is intentionally no + // space after the first comma in the previous line. +} + +// Tests the cases where FloatLE() should succeed. +TEST_F(FloatTest, FloatLESucceeds) { + EXPECT_PRED_FORMAT2(FloatLE, 1.0f, 2.0f); // When val1 < val2, + ASSERT_PRED_FORMAT2(FloatLE, 1.0f, 1.0f); // val1 == val2, + + // or when val1 is greater than, but almost equals to, val2. + EXPECT_PRED_FORMAT2(FloatLE, values_.close_to_positive_zero, 0.0f); +} + +// Tests the cases where FloatLE() should fail. +TEST_F(FloatTest, FloatLEFails) { + // When val1 is greater than val2 by a large margin, + EXPECT_NONFATAL_FAILURE(EXPECT_PRED_FORMAT2(FloatLE, 2.0f, 1.0f), + "(2.0f) <= (1.0f)"); + + // or by a small yet non-negligible margin, + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT2(FloatLE, values_.further_from_one, 1.0f); + }, "(values_.further_from_one) <= (1.0f)"); + +#if !GTEST_OS_SYMBIAN && !defined(__BORLANDC__) + // Nokia's STLport crashes if we try to output infinity or NaN. + // C++Builder gives bad results for ordered comparisons involving NaNs + // due to compiler bugs. + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT2(FloatLE, values_.nan1, values_.infinity); + }, "(values_.nan1) <= (values_.infinity)"); + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT2(FloatLE, -values_.infinity, values_.nan1); + }, "(-values_.infinity) <= (values_.nan1)"); + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT2(FloatLE, values_.nan1, values_.nan1); + }, "(values_.nan1) <= (values_.nan1)"); +#endif // !GTEST_OS_SYMBIAN && !defined(__BORLANDC__) +} + +// Instantiates FloatingPointTest for testing *_DOUBLE_EQ. +typedef FloatingPointTest DoubleTest; + +// Tests that the size of Double::Bits matches the size of double. +TEST_F(DoubleTest, Size) { + TestSize(); +} + +// Tests comparing with +0 and -0. +TEST_F(DoubleTest, Zeros) { + EXPECT_DOUBLE_EQ(0.0, -0.0); + EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(-0.0, 1.0), + "1.0"); + EXPECT_FATAL_FAILURE(ASSERT_DOUBLE_EQ(0.0, 1.0), + "1.0"); +} + +// Tests comparing numbers close to 0. +// +// This ensures that *_DOUBLE_EQ handles the sign correctly and no +// overflow occurs when comparing numbers whose absolute value is very +// small. +TEST_F(DoubleTest, AlmostZeros) { + // In C++Builder, names within local classes (such as used by + // EXPECT_FATAL_FAILURE) cannot be resolved against static members of the + // scoping class. Use a static local alias as a workaround. + // We use the assignment syntax since some compilers, like Sun Studio, + // don't allow initializing references using construction syntax + // (parentheses). + static const DoubleTest::TestValues& v = this->values_; + + EXPECT_DOUBLE_EQ(0.0, v.close_to_positive_zero); + EXPECT_DOUBLE_EQ(-0.0, v.close_to_negative_zero); + EXPECT_DOUBLE_EQ(v.close_to_positive_zero, v.close_to_negative_zero); + + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_DOUBLE_EQ(v.close_to_positive_zero, + v.further_from_negative_zero); + }, "v.further_from_negative_zero"); +} + +// Tests comparing numbers close to each other. +TEST_F(DoubleTest, SmallDiff) { + EXPECT_DOUBLE_EQ(1.0, values_.close_to_one); + EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(1.0, values_.further_from_one), + "values_.further_from_one"); +} + +// Tests comparing numbers far apart. +TEST_F(DoubleTest, LargeDiff) { + EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(2.0, 3.0), + "3.0"); +} + +// Tests comparing with infinity. +// +// This ensures that no overflow occurs when comparing numbers whose +// absolute value is very large. +TEST_F(DoubleTest, Infinity) { + EXPECT_DOUBLE_EQ(values_.infinity, values_.close_to_infinity); + EXPECT_DOUBLE_EQ(-values_.infinity, -values_.close_to_infinity); +#if !GTEST_OS_SYMBIAN + // Nokia's STLport crashes if we try to output infinity or NaN. + EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(values_.infinity, -values_.infinity), + "-values_.infinity"); + + // This is interesting as the representations of infinity_ and nan1_ + // are only 1 DLP apart. + EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(values_.infinity, values_.nan1), + "values_.nan1"); +#endif // !GTEST_OS_SYMBIAN +} + +// Tests that comparing with NAN always returns false. +TEST_F(DoubleTest, NaN) { +#if !GTEST_OS_SYMBIAN + // In C++Builder, names within local classes (such as used by + // EXPECT_FATAL_FAILURE) cannot be resolved against static members of the + // scoping class. Use a static local alias as a workaround. + // We use the assignment syntax since some compilers, like Sun Studio, + // don't allow initializing references using construction syntax + // (parentheses). + static const DoubleTest::TestValues& v = this->values_; + + // Nokia's STLport crashes if we try to output infinity or NaN. + EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(v.nan1, v.nan1), + "v.nan1"); + EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(v.nan1, v.nan2), "v.nan2"); + EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(1.0, v.nan1), "v.nan1"); + EXPECT_FATAL_FAILURE(ASSERT_DOUBLE_EQ(v.nan1, v.infinity), + "v.infinity"); +#endif // !GTEST_OS_SYMBIAN +} + +// Tests that *_DOUBLE_EQ are reflexive. +TEST_F(DoubleTest, Reflexive) { + EXPECT_DOUBLE_EQ(0.0, 0.0); + EXPECT_DOUBLE_EQ(1.0, 1.0); +#if !GTEST_OS_SYMBIAN + // Nokia's STLport crashes if we try to output infinity or NaN. + ASSERT_DOUBLE_EQ(values_.infinity, values_.infinity); +#endif // !GTEST_OS_SYMBIAN +} + +// Tests that *_DOUBLE_EQ are commutative. +TEST_F(DoubleTest, Commutative) { + // We already tested EXPECT_DOUBLE_EQ(1.0, values_.close_to_one). + EXPECT_DOUBLE_EQ(values_.close_to_one, 1.0); + + // We already tested EXPECT_DOUBLE_EQ(1.0, values_.further_from_one). + EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(values_.further_from_one, 1.0), + "1.0"); +} + +// Tests EXPECT_NEAR. +TEST_F(DoubleTest, EXPECT_NEAR) { + EXPECT_NEAR(-1.0, -1.1, 0.2); + EXPECT_NEAR(2.0, 3.0, 1.0); + EXPECT_NONFATAL_FAILURE(EXPECT_NEAR(1.0, 1.5, 0.25), // NOLINT + "The difference between 1.0 and 1.5 is 0.5, " + "which exceeds 0.25"); + // To work around a bug in gcc 2.95.0, there is intentionally no + // space after the first comma in the previous statement. +} + +// Tests ASSERT_NEAR. +TEST_F(DoubleTest, ASSERT_NEAR) { + ASSERT_NEAR(-1.0, -1.1, 0.2); + ASSERT_NEAR(2.0, 3.0, 1.0); + EXPECT_FATAL_FAILURE(ASSERT_NEAR(1.0, 1.5, 0.25), // NOLINT + "The difference between 1.0 and 1.5 is 0.5, " + "which exceeds 0.25"); + // To work around a bug in gcc 2.95.0, there is intentionally no + // space after the first comma in the previous statement. +} + +// Tests the cases where DoubleLE() should succeed. +TEST_F(DoubleTest, DoubleLESucceeds) { + EXPECT_PRED_FORMAT2(DoubleLE, 1.0, 2.0); // When val1 < val2, + ASSERT_PRED_FORMAT2(DoubleLE, 1.0, 1.0); // val1 == val2, + + // or when val1 is greater than, but almost equals to, val2. + EXPECT_PRED_FORMAT2(DoubleLE, values_.close_to_positive_zero, 0.0); +} + +// Tests the cases where DoubleLE() should fail. +TEST_F(DoubleTest, DoubleLEFails) { + // When val1 is greater than val2 by a large margin, + EXPECT_NONFATAL_FAILURE(EXPECT_PRED_FORMAT2(DoubleLE, 2.0, 1.0), + "(2.0) <= (1.0)"); + + // or by a small yet non-negligible margin, + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT2(DoubleLE, values_.further_from_one, 1.0); + }, "(values_.further_from_one) <= (1.0)"); + +#if !GTEST_OS_SYMBIAN && !defined(__BORLANDC__) + // Nokia's STLport crashes if we try to output infinity or NaN. + // C++Builder gives bad results for ordered comparisons involving NaNs + // due to compiler bugs. + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT2(DoubleLE, values_.nan1, values_.infinity); + }, "(values_.nan1) <= (values_.infinity)"); + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_PRED_FORMAT2(DoubleLE, -values_.infinity, values_.nan1); + }, " (-values_.infinity) <= (values_.nan1)"); + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_PRED_FORMAT2(DoubleLE, values_.nan1, values_.nan1); + }, "(values_.nan1) <= (values_.nan1)"); +#endif // !GTEST_OS_SYMBIAN && !defined(__BORLANDC__) +} + + +// Verifies that a test or test case whose name starts with DISABLED_ is +// not run. + +// A test whose name starts with DISABLED_. +// Should not run. +TEST(DisabledTest, DISABLED_TestShouldNotRun) { + FAIL() << "Unexpected failure: Disabled test should not be run."; +} + +// A test whose name does not start with DISABLED_. +// Should run. +TEST(DisabledTest, NotDISABLED_TestShouldRun) { + EXPECT_EQ(1, 1); +} + +// A test case whose name starts with DISABLED_. +// Should not run. +TEST(DISABLED_TestCase, TestShouldNotRun) { + FAIL() << "Unexpected failure: Test in disabled test case should not be run."; +} + +// A test case and test whose names start with DISABLED_. +// Should not run. +TEST(DISABLED_TestCase, DISABLED_TestShouldNotRun) { + FAIL() << "Unexpected failure: Test in disabled test case should not be run."; +} + +// Check that when all tests in a test case are disabled, SetupTestCase() and +// TearDownTestCase() are not called. +class DisabledTestsTest : public Test { + protected: + static void SetUpTestCase() { + FAIL() << "Unexpected failure: All tests disabled in test case. " + "SetupTestCase() should not be called."; + } + + static void TearDownTestCase() { + FAIL() << "Unexpected failure: All tests disabled in test case. " + "TearDownTestCase() should not be called."; + } +}; + +TEST_F(DisabledTestsTest, DISABLED_TestShouldNotRun_1) { + FAIL() << "Unexpected failure: Disabled test should not be run."; +} + +TEST_F(DisabledTestsTest, DISABLED_TestShouldNotRun_2) { + FAIL() << "Unexpected failure: Disabled test should not be run."; +} + +// Tests that disabled typed tests aren't run. + +#if GTEST_HAS_TYPED_TEST + +template +class TypedTest : public Test { +}; + +typedef testing::Types NumericTypes; +TYPED_TEST_CASE(TypedTest, NumericTypes); + +TYPED_TEST(TypedTest, DISABLED_ShouldNotRun) { + FAIL() << "Unexpected failure: Disabled typed test should not run."; +} + +template +class DISABLED_TypedTest : public Test { +}; + +TYPED_TEST_CASE(DISABLED_TypedTest, NumericTypes); + +TYPED_TEST(DISABLED_TypedTest, ShouldNotRun) { + FAIL() << "Unexpected failure: Disabled typed test should not run."; +} + +#endif // GTEST_HAS_TYPED_TEST + +// Tests that disabled type-parameterized tests aren't run. + +#if GTEST_HAS_TYPED_TEST_P + +template +class TypedTestP : public Test { +}; + +TYPED_TEST_CASE_P(TypedTestP); + +TYPED_TEST_P(TypedTestP, DISABLED_ShouldNotRun) { + FAIL() << "Unexpected failure: " + << "Disabled type-parameterized test should not run."; +} + +REGISTER_TYPED_TEST_CASE_P(TypedTestP, DISABLED_ShouldNotRun); + +INSTANTIATE_TYPED_TEST_CASE_P(My, TypedTestP, NumericTypes); + +template +class DISABLED_TypedTestP : public Test { +}; + +TYPED_TEST_CASE_P(DISABLED_TypedTestP); + +TYPED_TEST_P(DISABLED_TypedTestP, ShouldNotRun) { + FAIL() << "Unexpected failure: " + << "Disabled type-parameterized test should not run."; +} + +REGISTER_TYPED_TEST_CASE_P(DISABLED_TypedTestP, ShouldNotRun); + +INSTANTIATE_TYPED_TEST_CASE_P(My, DISABLED_TypedTestP, NumericTypes); + +#endif // GTEST_HAS_TYPED_TEST_P + +// Tests that assertion macros evaluate their arguments exactly once. + +class SingleEvaluationTest : public Test { + public: // Must be public and not protected due to a bug in g++ 3.4.2. + // This helper function is needed by the FailedASSERT_STREQ test + // below. It's public to work around C++Builder's bug with scoping local + // classes. + static void CompareAndIncrementCharPtrs() { + ASSERT_STREQ(p1_++, p2_++); + } + + // This helper function is needed by the FailedASSERT_NE test below. It's + // public to work around C++Builder's bug with scoping local classes. + static void CompareAndIncrementInts() { + ASSERT_NE(a_++, b_++); + } + + protected: + SingleEvaluationTest() { + p1_ = s1_; + p2_ = s2_; + a_ = 0; + b_ = 0; + } + + static const char* const s1_; + static const char* const s2_; + static const char* p1_; + static const char* p2_; + + static int a_; + static int b_; +}; + +const char* const SingleEvaluationTest::s1_ = "01234"; +const char* const SingleEvaluationTest::s2_ = "abcde"; +const char* SingleEvaluationTest::p1_; +const char* SingleEvaluationTest::p2_; +int SingleEvaluationTest::a_; +int SingleEvaluationTest::b_; + +// Tests that when ASSERT_STREQ fails, it evaluates its arguments +// exactly once. +TEST_F(SingleEvaluationTest, FailedASSERT_STREQ) { + EXPECT_FATAL_FAILURE(SingleEvaluationTest::CompareAndIncrementCharPtrs(), + "p2_++"); + EXPECT_EQ(s1_ + 1, p1_); + EXPECT_EQ(s2_ + 1, p2_); +} + +// Tests that string assertion arguments are evaluated exactly once. +TEST_F(SingleEvaluationTest, ASSERT_STR) { + // successful EXPECT_STRNE + EXPECT_STRNE(p1_++, p2_++); + EXPECT_EQ(s1_ + 1, p1_); + EXPECT_EQ(s2_ + 1, p2_); + + // failed EXPECT_STRCASEEQ + EXPECT_NONFATAL_FAILURE(EXPECT_STRCASEEQ(p1_++, p2_++), + "Ignoring case"); + EXPECT_EQ(s1_ + 2, p1_); + EXPECT_EQ(s2_ + 2, p2_); +} + +// Tests that when ASSERT_NE fails, it evaluates its arguments exactly +// once. +TEST_F(SingleEvaluationTest, FailedASSERT_NE) { + EXPECT_FATAL_FAILURE(SingleEvaluationTest::CompareAndIncrementInts(), + "(a_++) != (b_++)"); + EXPECT_EQ(1, a_); + EXPECT_EQ(1, b_); +} + +// Tests that assertion arguments are evaluated exactly once. +TEST_F(SingleEvaluationTest, OtherCases) { + // successful EXPECT_TRUE + EXPECT_TRUE(0 == a_++); // NOLINT + EXPECT_EQ(1, a_); + + // failed EXPECT_TRUE + EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(-1 == a_++), "-1 == a_++"); + EXPECT_EQ(2, a_); + + // successful EXPECT_GT + EXPECT_GT(a_++, b_++); + EXPECT_EQ(3, a_); + EXPECT_EQ(1, b_); + + // failed EXPECT_LT + EXPECT_NONFATAL_FAILURE(EXPECT_LT(a_++, b_++), "(a_++) < (b_++)"); + EXPECT_EQ(4, a_); + EXPECT_EQ(2, b_); + + // successful ASSERT_TRUE + ASSERT_TRUE(0 < a_++); // NOLINT + EXPECT_EQ(5, a_); + + // successful ASSERT_GT + ASSERT_GT(a_++, b_++); + EXPECT_EQ(6, a_); + EXPECT_EQ(3, b_); +} + +#if GTEST_HAS_EXCEPTIONS + +void ThrowAnInteger() { + throw 1; +} + +// Tests that assertion arguments are evaluated exactly once. +TEST_F(SingleEvaluationTest, ExceptionTests) { + // successful EXPECT_THROW + EXPECT_THROW({ // NOLINT + a_++; + ThrowAnInteger(); + }, int); + EXPECT_EQ(1, a_); + + // failed EXPECT_THROW, throws different + EXPECT_NONFATAL_FAILURE(EXPECT_THROW({ // NOLINT + a_++; + ThrowAnInteger(); + }, bool), "throws a different type"); + EXPECT_EQ(2, a_); + + // failed EXPECT_THROW, throws nothing + EXPECT_NONFATAL_FAILURE(EXPECT_THROW(a_++, bool), "throws nothing"); + EXPECT_EQ(3, a_); + + // successful EXPECT_NO_THROW + EXPECT_NO_THROW(a_++); + EXPECT_EQ(4, a_); + + // failed EXPECT_NO_THROW + EXPECT_NONFATAL_FAILURE(EXPECT_NO_THROW({ // NOLINT + a_++; + ThrowAnInteger(); + }), "it throws"); + EXPECT_EQ(5, a_); + + // successful EXPECT_ANY_THROW + EXPECT_ANY_THROW({ // NOLINT + a_++; + ThrowAnInteger(); + }); + EXPECT_EQ(6, a_); + + // failed EXPECT_ANY_THROW + EXPECT_NONFATAL_FAILURE(EXPECT_ANY_THROW(a_++), "it doesn't"); + EXPECT_EQ(7, a_); +} + +#endif // GTEST_HAS_EXCEPTIONS + +// Tests {ASSERT|EXPECT}_NO_FATAL_FAILURE. +class NoFatalFailureTest : public Test { + protected: + void Succeeds() {} + void FailsNonFatal() { + ADD_FAILURE() << "some non-fatal failure"; + } + void Fails() { + FAIL() << "some fatal failure"; + } + + void DoAssertNoFatalFailureOnFails() { + ASSERT_NO_FATAL_FAILURE(Fails()); + ADD_FAILURE() << "shold not reach here."; + } + + void DoExpectNoFatalFailureOnFails() { + EXPECT_NO_FATAL_FAILURE(Fails()); + ADD_FAILURE() << "other failure"; + } +}; + +TEST_F(NoFatalFailureTest, NoFailure) { + EXPECT_NO_FATAL_FAILURE(Succeeds()); + ASSERT_NO_FATAL_FAILURE(Succeeds()); +} + +TEST_F(NoFatalFailureTest, NonFatalIsNoFailure) { + EXPECT_NONFATAL_FAILURE( + EXPECT_NO_FATAL_FAILURE(FailsNonFatal()), + "some non-fatal failure"); + EXPECT_NONFATAL_FAILURE( + ASSERT_NO_FATAL_FAILURE(FailsNonFatal()), + "some non-fatal failure"); +} + +TEST_F(NoFatalFailureTest, AssertNoFatalFailureOnFatalFailure) { + TestPartResultArray gtest_failures; + { + ScopedFakeTestPartResultReporter gtest_reporter(>est_failures); + DoAssertNoFatalFailureOnFails(); + } + ASSERT_EQ(2, gtest_failures.size()); + EXPECT_EQ(TestPartResult::kFatalFailure, + gtest_failures.GetTestPartResult(0).type()); + EXPECT_EQ(TestPartResult::kFatalFailure, + gtest_failures.GetTestPartResult(1).type()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "some fatal failure", + gtest_failures.GetTestPartResult(0).message()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "it does", + gtest_failures.GetTestPartResult(1).message()); +} + +TEST_F(NoFatalFailureTest, ExpectNoFatalFailureOnFatalFailure) { + TestPartResultArray gtest_failures; + { + ScopedFakeTestPartResultReporter gtest_reporter(>est_failures); + DoExpectNoFatalFailureOnFails(); + } + ASSERT_EQ(3, gtest_failures.size()); + EXPECT_EQ(TestPartResult::kFatalFailure, + gtest_failures.GetTestPartResult(0).type()); + EXPECT_EQ(TestPartResult::kNonFatalFailure, + gtest_failures.GetTestPartResult(1).type()); + EXPECT_EQ(TestPartResult::kNonFatalFailure, + gtest_failures.GetTestPartResult(2).type()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "some fatal failure", + gtest_failures.GetTestPartResult(0).message()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "it does", + gtest_failures.GetTestPartResult(1).message()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "other failure", + gtest_failures.GetTestPartResult(2).message()); +} + +TEST_F(NoFatalFailureTest, MessageIsStreamable) { + TestPartResultArray gtest_failures; + { + ScopedFakeTestPartResultReporter gtest_reporter(>est_failures); + EXPECT_NO_FATAL_FAILURE(FAIL() << "foo") << "my message"; + } + ASSERT_EQ(2, gtest_failures.size()); + EXPECT_EQ(TestPartResult::kNonFatalFailure, + gtest_failures.GetTestPartResult(0).type()); + EXPECT_EQ(TestPartResult::kNonFatalFailure, + gtest_failures.GetTestPartResult(1).type()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "foo", + gtest_failures.GetTestPartResult(0).message()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "my message", + gtest_failures.GetTestPartResult(1).message()); +} + +// Tests non-string assertions. + +std::string EditsToString(const std::vector& edits) { + std::string out; + for (size_t i = 0; i < edits.size(); ++i) { + static const char kEdits[] = " +-/"; + out.append(1, kEdits[edits[i]]); + } + return out; +} + +std::vector CharsToIndices(const std::string& str) { + std::vector out; + for (size_t i = 0; i < str.size(); ++i) { + out.push_back(str[i]); + } + return out; +} + +std::vector CharsToLines(const std::string& str) { + std::vector out; + for (size_t i = 0; i < str.size(); ++i) { + out.push_back(str.substr(i, 1)); + } + return out; +} + +TEST(EditDistance, TestCases) { + struct Case { + int line; + const char* left; + const char* right; + const char* expected_edits; + const char* expected_diff; + }; + static const Case kCases[] = { + // No change. + {__LINE__, "A", "A", " ", ""}, + {__LINE__, "ABCDE", "ABCDE", " ", ""}, + // Simple adds. + {__LINE__, "X", "XA", " +", "@@ +1,2 @@\n X\n+A\n"}, + {__LINE__, "X", "XABCD", " ++++", "@@ +1,5 @@\n X\n+A\n+B\n+C\n+D\n"}, + // Simple removes. + {__LINE__, "XA", "X", " -", "@@ -1,2 @@\n X\n-A\n"}, + {__LINE__, "XABCD", "X", " ----", "@@ -1,5 @@\n X\n-A\n-B\n-C\n-D\n"}, + // Simple replaces. + {__LINE__, "A", "a", "/", "@@ -1,1 +1,1 @@\n-A\n+a\n"}, + {__LINE__, "ABCD", "abcd", "////", + "@@ -1,4 +1,4 @@\n-A\n-B\n-C\n-D\n+a\n+b\n+c\n+d\n"}, + // Path finding. + {__LINE__, "ABCDEFGH", "ABXEGH1", " -/ - +", + "@@ -1,8 +1,7 @@\n A\n B\n-C\n-D\n+X\n E\n-F\n G\n H\n+1\n"}, + {__LINE__, "AAAABCCCC", "ABABCDCDC", "- / + / ", + "@@ -1,9 +1,9 @@\n-A\n A\n-A\n+B\n A\n B\n C\n+D\n C\n-C\n+D\n C\n"}, + {__LINE__, "ABCDE", "BCDCD", "- +/", + "@@ -1,5 +1,5 @@\n-A\n B\n C\n D\n-E\n+C\n+D\n"}, + {__LINE__, "ABCDEFGHIJKL", "BCDCDEFGJKLJK", "- ++ -- ++", + "@@ -1,4 +1,5 @@\n-A\n B\n+C\n+D\n C\n D\n" + "@@ -6,7 +7,7 @@\n F\n G\n-H\n-I\n J\n K\n L\n+J\n+K\n"}, + {}}; + for (const Case* c = kCases; c->left; ++c) { + EXPECT_TRUE(c->expected_edits == + EditsToString(CalculateOptimalEdits(CharsToIndices(c->left), + CharsToIndices(c->right)))) + << "Left <" << c->left << "> Right <" << c->right << "> Edits <" + << EditsToString(CalculateOptimalEdits( + CharsToIndices(c->left), CharsToIndices(c->right))) << ">"; + EXPECT_TRUE(c->expected_diff == CreateUnifiedDiff(CharsToLines(c->left), + CharsToLines(c->right))) + << "Left <" << c->left << "> Right <" << c->right << "> Diff <" + << CreateUnifiedDiff(CharsToLines(c->left), CharsToLines(c->right)) + << ">"; + } +} + +// Tests EqFailure(), used for implementing *EQ* assertions. +TEST(AssertionTest, EqFailure) { + const std::string foo_val("5"), bar_val("6"); + const std::string msg1( + EqFailure("foo", "bar", foo_val, bar_val, false) + .failure_message()); + EXPECT_STREQ( + " Expected: foo\n" + " Which is: 5\n" + "To be equal to: bar\n" + " Which is: 6", + msg1.c_str()); + + const std::string msg2( + EqFailure("foo", "6", foo_val, bar_val, false) + .failure_message()); + EXPECT_STREQ( + " Expected: foo\n" + " Which is: 5\n" + "To be equal to: 6", + msg2.c_str()); + + const std::string msg3( + EqFailure("5", "bar", foo_val, bar_val, false) + .failure_message()); + EXPECT_STREQ( + " Expected: 5\n" + "To be equal to: bar\n" + " Which is: 6", + msg3.c_str()); + + const std::string msg4( + EqFailure("5", "6", foo_val, bar_val, false).failure_message()); + EXPECT_STREQ( + " Expected: 5\n" + "To be equal to: 6", + msg4.c_str()); + + const std::string msg5( + EqFailure("foo", "bar", + std::string("\"x\""), std::string("\"y\""), + true).failure_message()); + EXPECT_STREQ( + " Expected: foo\n" + " Which is: \"x\"\n" + "To be equal to: bar\n" + " Which is: \"y\"\n" + "Ignoring case", + msg5.c_str()); +} + +TEST(AssertionTest, EqFailureWithDiff) { + const std::string left( + "1\\n2XXX\\n3\\n5\\n6\\n7\\n8\\n9\\n10\\n11\\n12XXX\\n13\\n14\\n15"); + const std::string right( + "1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n11\\n12\\n13\\n14"); + const std::string msg1( + EqFailure("left", "right", left, right, false).failure_message()); + EXPECT_STREQ( + " Expected: left\n" + " Which is: " + "1\\n2XXX\\n3\\n5\\n6\\n7\\n8\\n9\\n10\\n11\\n12XXX\\n13\\n14\\n15\n" + "To be equal to: right\n" + " Which is: 1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n11\\n12\\n13\\n14\n" + "With diff:\n@@ -1,5 +1,6 @@\n 1\n-2XXX\n+2\n 3\n+4\n 5\n 6\n" + "@@ -7,8 +8,6 @@\n 8\n 9\n-10\n 11\n-12XXX\n+12\n 13\n 14\n-15\n", + msg1.c_str()); +} + +// Tests AppendUserMessage(), used for implementing the *EQ* macros. +TEST(AssertionTest, AppendUserMessage) { + const std::string foo("foo"); + + Message msg; + EXPECT_STREQ("foo", + AppendUserMessage(foo, msg).c_str()); + + msg << "bar"; + EXPECT_STREQ("foo\nbar", + AppendUserMessage(foo, msg).c_str()); +} + +#ifdef __BORLANDC__ +// Silences warnings: "Condition is always true", "Unreachable code" +# pragma option push -w-ccc -w-rch +#endif + +// Tests ASSERT_TRUE. +TEST(AssertionTest, ASSERT_TRUE) { + ASSERT_TRUE(2 > 1); // NOLINT + EXPECT_FATAL_FAILURE(ASSERT_TRUE(2 < 1), + "2 < 1"); +} + +// Tests ASSERT_TRUE(predicate) for predicates returning AssertionResult. +TEST(AssertionTest, AssertTrueWithAssertionResult) { + ASSERT_TRUE(ResultIsEven(2)); +#ifndef __BORLANDC__ + // ICE's in C++Builder. + EXPECT_FATAL_FAILURE(ASSERT_TRUE(ResultIsEven(3)), + "Value of: ResultIsEven(3)\n" + " Actual: false (3 is odd)\n" + "Expected: true"); +#endif + ASSERT_TRUE(ResultIsEvenNoExplanation(2)); + EXPECT_FATAL_FAILURE(ASSERT_TRUE(ResultIsEvenNoExplanation(3)), + "Value of: ResultIsEvenNoExplanation(3)\n" + " Actual: false (3 is odd)\n" + "Expected: true"); +} + +// Tests ASSERT_FALSE. +TEST(AssertionTest, ASSERT_FALSE) { + ASSERT_FALSE(2 < 1); // NOLINT + EXPECT_FATAL_FAILURE(ASSERT_FALSE(2 > 1), + "Value of: 2 > 1\n" + " Actual: true\n" + "Expected: false"); +} + +// Tests ASSERT_FALSE(predicate) for predicates returning AssertionResult. +TEST(AssertionTest, AssertFalseWithAssertionResult) { + ASSERT_FALSE(ResultIsEven(3)); +#ifndef __BORLANDC__ + // ICE's in C++Builder. + EXPECT_FATAL_FAILURE(ASSERT_FALSE(ResultIsEven(2)), + "Value of: ResultIsEven(2)\n" + " Actual: true (2 is even)\n" + "Expected: false"); +#endif + ASSERT_FALSE(ResultIsEvenNoExplanation(3)); + EXPECT_FATAL_FAILURE(ASSERT_FALSE(ResultIsEvenNoExplanation(2)), + "Value of: ResultIsEvenNoExplanation(2)\n" + " Actual: true\n" + "Expected: false"); +} + +#ifdef __BORLANDC__ +// Restores warnings after previous "#pragma option push" supressed them +# pragma option pop +#endif + +// Tests using ASSERT_EQ on double values. The purpose is to make +// sure that the specialization we did for integer and anonymous enums +// isn't used for double arguments. +TEST(ExpectTest, ASSERT_EQ_Double) { + // A success. + ASSERT_EQ(5.6, 5.6); + + // A failure. + EXPECT_FATAL_FAILURE(ASSERT_EQ(5.1, 5.2), + "5.1"); +} + +// Tests ASSERT_EQ. +TEST(AssertionTest, ASSERT_EQ) { + ASSERT_EQ(5, 2 + 3); + EXPECT_FATAL_FAILURE(ASSERT_EQ(5, 2*3), + " Expected: 5\n" + "To be equal to: 2*3\n" + " Which is: 6"); +} + +// Tests ASSERT_EQ(NULL, pointer). +#if GTEST_CAN_COMPARE_NULL +TEST(AssertionTest, ASSERT_EQ_NULL) { + // A success. + const char* p = NULL; + // Some older GCC versions may issue a spurious waring in this or the next + // assertion statement. This warning should not be suppressed with + // static_cast since the test verifies the ability to use bare NULL as the + // expected parameter to the macro. + ASSERT_EQ(NULL, p); + + // A failure. + static int n = 0; + EXPECT_FATAL_FAILURE(ASSERT_EQ(NULL, &n), + "To be equal to: &n\n"); +} +#endif // GTEST_CAN_COMPARE_NULL + +// Tests ASSERT_EQ(0, non_pointer). Since the literal 0 can be +// treated as a null pointer by the compiler, we need to make sure +// that ASSERT_EQ(0, non_pointer) isn't interpreted by Google Test as +// ASSERT_EQ(static_cast(NULL), non_pointer). +TEST(ExpectTest, ASSERT_EQ_0) { + int n = 0; + + // A success. + ASSERT_EQ(0, n); + + // A failure. + EXPECT_FATAL_FAILURE(ASSERT_EQ(0, 5.6), + "Expected: 0"); +} + +// Tests ASSERT_NE. +TEST(AssertionTest, ASSERT_NE) { + ASSERT_NE(6, 7); + EXPECT_FATAL_FAILURE(ASSERT_NE('a', 'a'), + "Expected: ('a') != ('a'), " + "actual: 'a' (97, 0x61) vs 'a' (97, 0x61)"); +} + +// Tests ASSERT_LE. +TEST(AssertionTest, ASSERT_LE) { + ASSERT_LE(2, 3); + ASSERT_LE(2, 2); + EXPECT_FATAL_FAILURE(ASSERT_LE(2, 0), + "Expected: (2) <= (0), actual: 2 vs 0"); +} + +// Tests ASSERT_LT. +TEST(AssertionTest, ASSERT_LT) { + ASSERT_LT(2, 3); + EXPECT_FATAL_FAILURE(ASSERT_LT(2, 2), + "Expected: (2) < (2), actual: 2 vs 2"); +} + +// Tests ASSERT_GE. +TEST(AssertionTest, ASSERT_GE) { + ASSERT_GE(2, 1); + ASSERT_GE(2, 2); + EXPECT_FATAL_FAILURE(ASSERT_GE(2, 3), + "Expected: (2) >= (3), actual: 2 vs 3"); +} + +// Tests ASSERT_GT. +TEST(AssertionTest, ASSERT_GT) { + ASSERT_GT(2, 1); + EXPECT_FATAL_FAILURE(ASSERT_GT(2, 2), + "Expected: (2) > (2), actual: 2 vs 2"); +} + +#if GTEST_HAS_EXCEPTIONS + +void ThrowNothing() {} + +// Tests ASSERT_THROW. +TEST(AssertionTest, ASSERT_THROW) { + ASSERT_THROW(ThrowAnInteger(), int); + +# ifndef __BORLANDC__ + + // ICE's in C++Builder 2007 and 2009. + EXPECT_FATAL_FAILURE( + ASSERT_THROW(ThrowAnInteger(), bool), + "Expected: ThrowAnInteger() throws an exception of type bool.\n" + " Actual: it throws a different type."); +# endif + + EXPECT_FATAL_FAILURE( + ASSERT_THROW(ThrowNothing(), bool), + "Expected: ThrowNothing() throws an exception of type bool.\n" + " Actual: it throws nothing."); +} + +// Tests ASSERT_NO_THROW. +TEST(AssertionTest, ASSERT_NO_THROW) { + ASSERT_NO_THROW(ThrowNothing()); + EXPECT_FATAL_FAILURE(ASSERT_NO_THROW(ThrowAnInteger()), + "Expected: ThrowAnInteger() doesn't throw an exception." + "\n Actual: it throws."); +} + +// Tests ASSERT_ANY_THROW. +TEST(AssertionTest, ASSERT_ANY_THROW) { + ASSERT_ANY_THROW(ThrowAnInteger()); + EXPECT_FATAL_FAILURE( + ASSERT_ANY_THROW(ThrowNothing()), + "Expected: ThrowNothing() throws an exception.\n" + " Actual: it doesn't."); +} + +#endif // GTEST_HAS_EXCEPTIONS + +// Makes sure we deal with the precedence of <<. This test should +// compile. +TEST(AssertionTest, AssertPrecedence) { + ASSERT_EQ(1 < 2, true); + bool false_value = false; + ASSERT_EQ(true && false_value, false); +} + +// A subroutine used by the following test. +void TestEq1(int x) { + ASSERT_EQ(1, x); +} + +// Tests calling a test subroutine that's not part of a fixture. +TEST(AssertionTest, NonFixtureSubroutine) { + EXPECT_FATAL_FAILURE(TestEq1(2), + "To be equal to: x"); +} + +// An uncopyable class. +class Uncopyable { + public: + explicit Uncopyable(int a_value) : value_(a_value) {} + + int value() const { return value_; } + bool operator==(const Uncopyable& rhs) const { + return value() == rhs.value(); + } + private: + // This constructor deliberately has no implementation, as we don't + // want this class to be copyable. + Uncopyable(const Uncopyable&); // NOLINT + + int value_; +}; + +::std::ostream& operator<<(::std::ostream& os, const Uncopyable& value) { + return os << value.value(); +} + + +bool IsPositiveUncopyable(const Uncopyable& x) { + return x.value() > 0; +} + +// A subroutine used by the following test. +void TestAssertNonPositive() { + Uncopyable y(-1); + ASSERT_PRED1(IsPositiveUncopyable, y); +} +// A subroutine used by the following test. +void TestAssertEqualsUncopyable() { + Uncopyable x(5); + Uncopyable y(-1); + ASSERT_EQ(x, y); +} + +// Tests that uncopyable objects can be used in assertions. +TEST(AssertionTest, AssertWorksWithUncopyableObject) { + Uncopyable x(5); + ASSERT_PRED1(IsPositiveUncopyable, x); + ASSERT_EQ(x, x); + EXPECT_FATAL_FAILURE(TestAssertNonPositive(), + "IsPositiveUncopyable(y) evaluates to false, where\ny evaluates to -1"); + EXPECT_FATAL_FAILURE(TestAssertEqualsUncopyable(), + "Expected: x\n Which is: 5\nTo be equal to: y\n Which is: -1"); +} + +// Tests that uncopyable objects can be used in expects. +TEST(AssertionTest, ExpectWorksWithUncopyableObject) { + Uncopyable x(5); + EXPECT_PRED1(IsPositiveUncopyable, x); + Uncopyable y(-1); + EXPECT_NONFATAL_FAILURE(EXPECT_PRED1(IsPositiveUncopyable, y), + "IsPositiveUncopyable(y) evaluates to false, where\ny evaluates to -1"); + EXPECT_EQ(x, x); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(x, y), + "Expected: x\n Which is: 5\nTo be equal to: y\n Which is: -1"); +} + +enum NamedEnum { + kE1 = 0, + kE2 = 1 +}; + +TEST(AssertionTest, NamedEnum) { + EXPECT_EQ(kE1, kE1); + EXPECT_LT(kE1, kE2); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(kE1, kE2), "Which is: 0"); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(kE1, kE2), "Which is: 1"); +} + +// The version of gcc used in XCode 2.2 has a bug and doesn't allow +// anonymous enums in assertions. Therefore the following test is not +// done on Mac. +// Sun Studio and HP aCC also reject this code. +#if !GTEST_OS_MAC && !defined(__SUNPRO_CC) && !defined(__HP_aCC) + +// Tests using assertions with anonymous enums. +enum { + kCaseA = -1, + +# if GTEST_OS_LINUX + + // We want to test the case where the size of the anonymous enum is + // larger than sizeof(int), to make sure our implementation of the + // assertions doesn't truncate the enums. However, MSVC + // (incorrectly) doesn't allow an enum value to exceed the range of + // an int, so this has to be conditionally compiled. + // + // On Linux, kCaseB and kCaseA have the same value when truncated to + // int size. We want to test whether this will confuse the + // assertions. + kCaseB = testing::internal::kMaxBiggestInt, + +# else + + kCaseB = INT_MAX, + +# endif // GTEST_OS_LINUX + + kCaseC = 42 +}; + +TEST(AssertionTest, AnonymousEnum) { +# if GTEST_OS_LINUX + + EXPECT_EQ(static_cast(kCaseA), static_cast(kCaseB)); + +# endif // GTEST_OS_LINUX + + EXPECT_EQ(kCaseA, kCaseA); + EXPECT_NE(kCaseA, kCaseB); + EXPECT_LT(kCaseA, kCaseB); + EXPECT_LE(kCaseA, kCaseB); + EXPECT_GT(kCaseB, kCaseA); + EXPECT_GE(kCaseA, kCaseA); + EXPECT_NONFATAL_FAILURE(EXPECT_GE(kCaseA, kCaseB), + "(kCaseA) >= (kCaseB)"); + EXPECT_NONFATAL_FAILURE(EXPECT_GE(kCaseA, kCaseC), + "-1 vs 42"); + + ASSERT_EQ(kCaseA, kCaseA); + ASSERT_NE(kCaseA, kCaseB); + ASSERT_LT(kCaseA, kCaseB); + ASSERT_LE(kCaseA, kCaseB); + ASSERT_GT(kCaseB, kCaseA); + ASSERT_GE(kCaseA, kCaseA); + +# ifndef __BORLANDC__ + + // ICE's in C++Builder. + EXPECT_FATAL_FAILURE(ASSERT_EQ(kCaseA, kCaseB), + "To be equal to: kCaseB"); + EXPECT_FATAL_FAILURE(ASSERT_EQ(kCaseA, kCaseC), + "Which is: 42"); +# endif + + EXPECT_FATAL_FAILURE(ASSERT_EQ(kCaseA, kCaseC), + "Which is: -1"); +} + +#endif // !GTEST_OS_MAC && !defined(__SUNPRO_CC) + +#if GTEST_OS_WINDOWS + +static HRESULT UnexpectedHRESULTFailure() { + return E_UNEXPECTED; +} + +static HRESULT OkHRESULTSuccess() { + return S_OK; +} + +static HRESULT FalseHRESULTSuccess() { + return S_FALSE; +} + +// HRESULT assertion tests test both zero and non-zero +// success codes as well as failure message for each. +// +// Windows CE doesn't support message texts. +TEST(HRESULTAssertionTest, EXPECT_HRESULT_SUCCEEDED) { + EXPECT_HRESULT_SUCCEEDED(S_OK); + EXPECT_HRESULT_SUCCEEDED(S_FALSE); + + EXPECT_NONFATAL_FAILURE(EXPECT_HRESULT_SUCCEEDED(UnexpectedHRESULTFailure()), + "Expected: (UnexpectedHRESULTFailure()) succeeds.\n" + " Actual: 0x8000FFFF"); +} + +TEST(HRESULTAssertionTest, ASSERT_HRESULT_SUCCEEDED) { + ASSERT_HRESULT_SUCCEEDED(S_OK); + ASSERT_HRESULT_SUCCEEDED(S_FALSE); + + EXPECT_FATAL_FAILURE(ASSERT_HRESULT_SUCCEEDED(UnexpectedHRESULTFailure()), + "Expected: (UnexpectedHRESULTFailure()) succeeds.\n" + " Actual: 0x8000FFFF"); +} + +TEST(HRESULTAssertionTest, EXPECT_HRESULT_FAILED) { + EXPECT_HRESULT_FAILED(E_UNEXPECTED); + + EXPECT_NONFATAL_FAILURE(EXPECT_HRESULT_FAILED(OkHRESULTSuccess()), + "Expected: (OkHRESULTSuccess()) fails.\n" + " Actual: 0x0"); + EXPECT_NONFATAL_FAILURE(EXPECT_HRESULT_FAILED(FalseHRESULTSuccess()), + "Expected: (FalseHRESULTSuccess()) fails.\n" + " Actual: 0x1"); +} + +TEST(HRESULTAssertionTest, ASSERT_HRESULT_FAILED) { + ASSERT_HRESULT_FAILED(E_UNEXPECTED); + +# ifndef __BORLANDC__ + + // ICE's in C++Builder 2007 and 2009. + EXPECT_FATAL_FAILURE(ASSERT_HRESULT_FAILED(OkHRESULTSuccess()), + "Expected: (OkHRESULTSuccess()) fails.\n" + " Actual: 0x0"); +# endif + + EXPECT_FATAL_FAILURE(ASSERT_HRESULT_FAILED(FalseHRESULTSuccess()), + "Expected: (FalseHRESULTSuccess()) fails.\n" + " Actual: 0x1"); +} + +// Tests that streaming to the HRESULT macros works. +TEST(HRESULTAssertionTest, Streaming) { + EXPECT_HRESULT_SUCCEEDED(S_OK) << "unexpected failure"; + ASSERT_HRESULT_SUCCEEDED(S_OK) << "unexpected failure"; + EXPECT_HRESULT_FAILED(E_UNEXPECTED) << "unexpected failure"; + ASSERT_HRESULT_FAILED(E_UNEXPECTED) << "unexpected failure"; + + EXPECT_NONFATAL_FAILURE( + EXPECT_HRESULT_SUCCEEDED(E_UNEXPECTED) << "expected failure", + "expected failure"); + +# ifndef __BORLANDC__ + + // ICE's in C++Builder 2007 and 2009. + EXPECT_FATAL_FAILURE( + ASSERT_HRESULT_SUCCEEDED(E_UNEXPECTED) << "expected failure", + "expected failure"); +# endif + + EXPECT_NONFATAL_FAILURE( + EXPECT_HRESULT_FAILED(S_OK) << "expected failure", + "expected failure"); + + EXPECT_FATAL_FAILURE( + ASSERT_HRESULT_FAILED(S_OK) << "expected failure", + "expected failure"); +} + +#endif // GTEST_OS_WINDOWS + +#ifdef __BORLANDC__ +// Silences warnings: "Condition is always true", "Unreachable code" +# pragma option push -w-ccc -w-rch +#endif + +// Tests that the assertion macros behave like single statements. +TEST(AssertionSyntaxTest, BasicAssertionsBehavesLikeSingleStatement) { + if (AlwaysFalse()) + ASSERT_TRUE(false) << "This should never be executed; " + "It's a compilation test only."; + + if (AlwaysTrue()) + EXPECT_FALSE(false); + else + ; // NOLINT + + if (AlwaysFalse()) + ASSERT_LT(1, 3); + + if (AlwaysFalse()) + ; // NOLINT + else + EXPECT_GT(3, 2) << ""; +} + +#if GTEST_HAS_EXCEPTIONS +// Tests that the compiler will not complain about unreachable code in the +// EXPECT_THROW/EXPECT_ANY_THROW/EXPECT_NO_THROW macros. +TEST(ExpectThrowTest, DoesNotGenerateUnreachableCodeWarning) { + int n = 0; + + EXPECT_THROW(throw 1, int); + EXPECT_NONFATAL_FAILURE(EXPECT_THROW(n++, int), ""); + EXPECT_NONFATAL_FAILURE(EXPECT_THROW(throw 1, const char*), ""); + EXPECT_NO_THROW(n++); + EXPECT_NONFATAL_FAILURE(EXPECT_NO_THROW(throw 1), ""); + EXPECT_ANY_THROW(throw 1); + EXPECT_NONFATAL_FAILURE(EXPECT_ANY_THROW(n++), ""); +} + +TEST(AssertionSyntaxTest, ExceptionAssertionsBehavesLikeSingleStatement) { + if (AlwaysFalse()) + EXPECT_THROW(ThrowNothing(), bool); + + if (AlwaysTrue()) + EXPECT_THROW(ThrowAnInteger(), int); + else + ; // NOLINT + + if (AlwaysFalse()) + EXPECT_NO_THROW(ThrowAnInteger()); + + if (AlwaysTrue()) + EXPECT_NO_THROW(ThrowNothing()); + else + ; // NOLINT + + if (AlwaysFalse()) + EXPECT_ANY_THROW(ThrowNothing()); + + if (AlwaysTrue()) + EXPECT_ANY_THROW(ThrowAnInteger()); + else + ; // NOLINT +} +#endif // GTEST_HAS_EXCEPTIONS + +TEST(AssertionSyntaxTest, NoFatalFailureAssertionsBehavesLikeSingleStatement) { + if (AlwaysFalse()) + EXPECT_NO_FATAL_FAILURE(FAIL()) << "This should never be executed. " + << "It's a compilation test only."; + else + ; // NOLINT + + if (AlwaysFalse()) + ASSERT_NO_FATAL_FAILURE(FAIL()) << ""; + else + ; // NOLINT + + if (AlwaysTrue()) + EXPECT_NO_FATAL_FAILURE(SUCCEED()); + else + ; // NOLINT + + if (AlwaysFalse()) + ; // NOLINT + else + ASSERT_NO_FATAL_FAILURE(SUCCEED()); +} + +// Tests that the assertion macros work well with switch statements. +TEST(AssertionSyntaxTest, WorksWithSwitch) { + switch (0) { + case 1: + break; + default: + ASSERT_TRUE(true); + } + + switch (0) + case 0: + EXPECT_FALSE(false) << "EXPECT_FALSE failed in switch case"; + + // Binary assertions are implemented using a different code path + // than the Boolean assertions. Hence we test them separately. + switch (0) { + case 1: + default: + ASSERT_EQ(1, 1) << "ASSERT_EQ failed in default switch handler"; + } + + switch (0) + case 0: + EXPECT_NE(1, 2); +} + +#if GTEST_HAS_EXCEPTIONS + +void ThrowAString() { + throw "std::string"; +} + +// Test that the exception assertion macros compile and work with const +// type qualifier. +TEST(AssertionSyntaxTest, WorksWithConst) { + ASSERT_THROW(ThrowAString(), const char*); + + EXPECT_THROW(ThrowAString(), const char*); +} + +#endif // GTEST_HAS_EXCEPTIONS + +} // namespace + +namespace testing { + +// Tests that Google Test tracks SUCCEED*. +TEST(SuccessfulAssertionTest, SUCCEED) { + SUCCEED(); + SUCCEED() << "OK"; + EXPECT_EQ(2, GetUnitTestImpl()->current_test_result()->total_part_count()); +} + +// Tests that Google Test doesn't track successful EXPECT_*. +TEST(SuccessfulAssertionTest, EXPECT) { + EXPECT_TRUE(true); + EXPECT_EQ(0, GetUnitTestImpl()->current_test_result()->total_part_count()); +} + +// Tests that Google Test doesn't track successful EXPECT_STR*. +TEST(SuccessfulAssertionTest, EXPECT_STR) { + EXPECT_STREQ("", ""); + EXPECT_EQ(0, GetUnitTestImpl()->current_test_result()->total_part_count()); +} + +// Tests that Google Test doesn't track successful ASSERT_*. +TEST(SuccessfulAssertionTest, ASSERT) { + ASSERT_TRUE(true); + EXPECT_EQ(0, GetUnitTestImpl()->current_test_result()->total_part_count()); +} + +// Tests that Google Test doesn't track successful ASSERT_STR*. +TEST(SuccessfulAssertionTest, ASSERT_STR) { + ASSERT_STREQ("", ""); + EXPECT_EQ(0, GetUnitTestImpl()->current_test_result()->total_part_count()); +} + +} // namespace testing + +namespace { + +// Tests the message streaming variation of assertions. + +TEST(AssertionWithMessageTest, EXPECT) { + EXPECT_EQ(1, 1) << "This should succeed."; + EXPECT_NONFATAL_FAILURE(EXPECT_NE(1, 1) << "Expected failure #1.", + "Expected failure #1"); + EXPECT_LE(1, 2) << "This should succeed."; + EXPECT_NONFATAL_FAILURE(EXPECT_LT(1, 0) << "Expected failure #2.", + "Expected failure #2."); + EXPECT_GE(1, 0) << "This should succeed."; + EXPECT_NONFATAL_FAILURE(EXPECT_GT(1, 2) << "Expected failure #3.", + "Expected failure #3."); + + EXPECT_STREQ("1", "1") << "This should succeed."; + EXPECT_NONFATAL_FAILURE(EXPECT_STRNE("1", "1") << "Expected failure #4.", + "Expected failure #4."); + EXPECT_STRCASEEQ("a", "A") << "This should succeed."; + EXPECT_NONFATAL_FAILURE(EXPECT_STRCASENE("a", "A") << "Expected failure #5.", + "Expected failure #5."); + + EXPECT_FLOAT_EQ(1, 1) << "This should succeed."; + EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(1, 1.2) << "Expected failure #6.", + "Expected failure #6."); + EXPECT_NEAR(1, 1.1, 0.2) << "This should succeed."; +} + +TEST(AssertionWithMessageTest, ASSERT) { + ASSERT_EQ(1, 1) << "This should succeed."; + ASSERT_NE(1, 2) << "This should succeed."; + ASSERT_LE(1, 2) << "This should succeed."; + ASSERT_LT(1, 2) << "This should succeed."; + ASSERT_GE(1, 0) << "This should succeed."; + EXPECT_FATAL_FAILURE(ASSERT_GT(1, 2) << "Expected failure.", + "Expected failure."); +} + +TEST(AssertionWithMessageTest, ASSERT_STR) { + ASSERT_STREQ("1", "1") << "This should succeed."; + ASSERT_STRNE("1", "2") << "This should succeed."; + ASSERT_STRCASEEQ("a", "A") << "This should succeed."; + EXPECT_FATAL_FAILURE(ASSERT_STRCASENE("a", "A") << "Expected failure.", + "Expected failure."); +} + +TEST(AssertionWithMessageTest, ASSERT_FLOATING) { + ASSERT_FLOAT_EQ(1, 1) << "This should succeed."; + ASSERT_DOUBLE_EQ(1, 1) << "This should succeed."; + EXPECT_FATAL_FAILURE(ASSERT_NEAR(1,1.2, 0.1) << "Expect failure.", // NOLINT + "Expect failure."); + // To work around a bug in gcc 2.95.0, there is intentionally no + // space after the first comma in the previous statement. +} + +// Tests using ASSERT_FALSE with a streamed message. +TEST(AssertionWithMessageTest, ASSERT_FALSE) { + ASSERT_FALSE(false) << "This shouldn't fail."; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_FALSE(true) << "Expected failure: " << 2 << " > " << 1 + << " evaluates to " << true; + }, "Expected failure"); +} + +// Tests using FAIL with a streamed message. +TEST(AssertionWithMessageTest, FAIL) { + EXPECT_FATAL_FAILURE(FAIL() << 0, + "0"); +} + +// Tests using SUCCEED with a streamed message. +TEST(AssertionWithMessageTest, SUCCEED) { + SUCCEED() << "Success == " << 1; +} + +// Tests using ASSERT_TRUE with a streamed message. +TEST(AssertionWithMessageTest, ASSERT_TRUE) { + ASSERT_TRUE(true) << "This should succeed."; + ASSERT_TRUE(true) << true; + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_TRUE(false) << static_cast(NULL) + << static_cast(NULL); + }, "(null)(null)"); +} + +#if GTEST_OS_WINDOWS +// Tests using wide strings in assertion messages. +TEST(AssertionWithMessageTest, WideStringMessage) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_TRUE(false) << L"This failure is expected.\x8119"; + }, "This failure is expected."); + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_EQ(1, 2) << "This failure is " + << L"expected too.\x8120"; + }, "This failure is expected too."); +} +#endif // GTEST_OS_WINDOWS + +// Tests EXPECT_TRUE. +TEST(ExpectTest, EXPECT_TRUE) { + EXPECT_TRUE(true) << "Intentional success"; + EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(false) << "Intentional failure #1.", + "Intentional failure #1."); + EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(false) << "Intentional failure #2.", + "Intentional failure #2."); + EXPECT_TRUE(2 > 1); // NOLINT + EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(2 < 1), + "Value of: 2 < 1\n" + " Actual: false\n" + "Expected: true"); + EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(2 > 3), + "2 > 3"); +} + +// Tests EXPECT_TRUE(predicate) for predicates returning AssertionResult. +TEST(ExpectTest, ExpectTrueWithAssertionResult) { + EXPECT_TRUE(ResultIsEven(2)); + EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(ResultIsEven(3)), + "Value of: ResultIsEven(3)\n" + " Actual: false (3 is odd)\n" + "Expected: true"); + EXPECT_TRUE(ResultIsEvenNoExplanation(2)); + EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(ResultIsEvenNoExplanation(3)), + "Value of: ResultIsEvenNoExplanation(3)\n" + " Actual: false (3 is odd)\n" + "Expected: true"); +} + +// Tests EXPECT_FALSE with a streamed message. +TEST(ExpectTest, EXPECT_FALSE) { + EXPECT_FALSE(2 < 1); // NOLINT + EXPECT_FALSE(false) << "Intentional success"; + EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(true) << "Intentional failure #1.", + "Intentional failure #1."); + EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(true) << "Intentional failure #2.", + "Intentional failure #2."); + EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(2 > 1), + "Value of: 2 > 1\n" + " Actual: true\n" + "Expected: false"); + EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(2 < 3), + "2 < 3"); +} + +// Tests EXPECT_FALSE(predicate) for predicates returning AssertionResult. +TEST(ExpectTest, ExpectFalseWithAssertionResult) { + EXPECT_FALSE(ResultIsEven(3)); + EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(ResultIsEven(2)), + "Value of: ResultIsEven(2)\n" + " Actual: true (2 is even)\n" + "Expected: false"); + EXPECT_FALSE(ResultIsEvenNoExplanation(3)); + EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(ResultIsEvenNoExplanation(2)), + "Value of: ResultIsEvenNoExplanation(2)\n" + " Actual: true\n" + "Expected: false"); +} + +#ifdef __BORLANDC__ +// Restores warnings after previous "#pragma option push" supressed them +# pragma option pop +#endif + +// Tests EXPECT_EQ. +TEST(ExpectTest, EXPECT_EQ) { + EXPECT_EQ(5, 2 + 3); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(5, 2*3), + " Expected: 5\n" + "To be equal to: 2*3\n" + " Which is: 6"); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(5, 2 - 3), + "2 - 3"); +} + +// Tests using EXPECT_EQ on double values. The purpose is to make +// sure that the specialization we did for integer and anonymous enums +// isn't used for double arguments. +TEST(ExpectTest, EXPECT_EQ_Double) { + // A success. + EXPECT_EQ(5.6, 5.6); + + // A failure. + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(5.1, 5.2), + "5.1"); +} + +#if GTEST_CAN_COMPARE_NULL +// Tests EXPECT_EQ(NULL, pointer). +TEST(ExpectTest, EXPECT_EQ_NULL) { + // A success. + const char* p = NULL; + // Some older GCC versions may issue a spurious warning in this or the next + // assertion statement. This warning should not be suppressed with + // static_cast since the test verifies the ability to use bare NULL as the + // expected parameter to the macro. + EXPECT_EQ(NULL, p); + + // A failure. + int n = 0; + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(NULL, &n), + "To be equal to: &n\n"); +} +#endif // GTEST_CAN_COMPARE_NULL + +// Tests EXPECT_EQ(0, non_pointer). Since the literal 0 can be +// treated as a null pointer by the compiler, we need to make sure +// that EXPECT_EQ(0, non_pointer) isn't interpreted by Google Test as +// EXPECT_EQ(static_cast(NULL), non_pointer). +TEST(ExpectTest, EXPECT_EQ_0) { + int n = 0; + + // A success. + EXPECT_EQ(0, n); + + // A failure. + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(0, 5.6), + "Expected: 0"); +} + +// Tests EXPECT_NE. +TEST(ExpectTest, EXPECT_NE) { + EXPECT_NE(6, 7); + + EXPECT_NONFATAL_FAILURE(EXPECT_NE('a', 'a'), + "Expected: ('a') != ('a'), " + "actual: 'a' (97, 0x61) vs 'a' (97, 0x61)"); + EXPECT_NONFATAL_FAILURE(EXPECT_NE(2, 2), + "2"); + char* const p0 = NULL; + EXPECT_NONFATAL_FAILURE(EXPECT_NE(p0, p0), + "p0"); + // Only way to get the Nokia compiler to compile the cast + // is to have a separate void* variable first. Putting + // the two casts on the same line doesn't work, neither does + // a direct C-style to char*. + void* pv1 = (void*)0x1234; // NOLINT + char* const p1 = reinterpret_cast(pv1); + EXPECT_NONFATAL_FAILURE(EXPECT_NE(p1, p1), + "p1"); +} + +// Tests EXPECT_LE. +TEST(ExpectTest, EXPECT_LE) { + EXPECT_LE(2, 3); + EXPECT_LE(2, 2); + EXPECT_NONFATAL_FAILURE(EXPECT_LE(2, 0), + "Expected: (2) <= (0), actual: 2 vs 0"); + EXPECT_NONFATAL_FAILURE(EXPECT_LE(1.1, 0.9), + "(1.1) <= (0.9)"); +} + +// Tests EXPECT_LT. +TEST(ExpectTest, EXPECT_LT) { + EXPECT_LT(2, 3); + EXPECT_NONFATAL_FAILURE(EXPECT_LT(2, 2), + "Expected: (2) < (2), actual: 2 vs 2"); + EXPECT_NONFATAL_FAILURE(EXPECT_LT(2, 1), + "(2) < (1)"); +} + +// Tests EXPECT_GE. +TEST(ExpectTest, EXPECT_GE) { + EXPECT_GE(2, 1); + EXPECT_GE(2, 2); + EXPECT_NONFATAL_FAILURE(EXPECT_GE(2, 3), + "Expected: (2) >= (3), actual: 2 vs 3"); + EXPECT_NONFATAL_FAILURE(EXPECT_GE(0.9, 1.1), + "(0.9) >= (1.1)"); +} + +// Tests EXPECT_GT. +TEST(ExpectTest, EXPECT_GT) { + EXPECT_GT(2, 1); + EXPECT_NONFATAL_FAILURE(EXPECT_GT(2, 2), + "Expected: (2) > (2), actual: 2 vs 2"); + EXPECT_NONFATAL_FAILURE(EXPECT_GT(2, 3), + "(2) > (3)"); +} + +#if GTEST_HAS_EXCEPTIONS + +// Tests EXPECT_THROW. +TEST(ExpectTest, EXPECT_THROW) { + EXPECT_THROW(ThrowAnInteger(), int); + EXPECT_NONFATAL_FAILURE(EXPECT_THROW(ThrowAnInteger(), bool), + "Expected: ThrowAnInteger() throws an exception of " + "type bool.\n Actual: it throws a different type."); + EXPECT_NONFATAL_FAILURE( + EXPECT_THROW(ThrowNothing(), bool), + "Expected: ThrowNothing() throws an exception of type bool.\n" + " Actual: it throws nothing."); +} + +// Tests EXPECT_NO_THROW. +TEST(ExpectTest, EXPECT_NO_THROW) { + EXPECT_NO_THROW(ThrowNothing()); + EXPECT_NONFATAL_FAILURE(EXPECT_NO_THROW(ThrowAnInteger()), + "Expected: ThrowAnInteger() doesn't throw an " + "exception.\n Actual: it throws."); +} + +// Tests EXPECT_ANY_THROW. +TEST(ExpectTest, EXPECT_ANY_THROW) { + EXPECT_ANY_THROW(ThrowAnInteger()); + EXPECT_NONFATAL_FAILURE( + EXPECT_ANY_THROW(ThrowNothing()), + "Expected: ThrowNothing() throws an exception.\n" + " Actual: it doesn't."); +} + +#endif // GTEST_HAS_EXCEPTIONS + +// Make sure we deal with the precedence of <<. +TEST(ExpectTest, ExpectPrecedence) { + EXPECT_EQ(1 < 2, true); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(true, true && false), + "To be equal to: true && false"); +} + + +// Tests the StreamableToString() function. + +// Tests using StreamableToString() on a scalar. +TEST(StreamableToStringTest, Scalar) { + EXPECT_STREQ("5", StreamableToString(5).c_str()); +} + +// Tests using StreamableToString() on a non-char pointer. +TEST(StreamableToStringTest, Pointer) { + int n = 0; + int* p = &n; + EXPECT_STRNE("(null)", StreamableToString(p).c_str()); +} + +// Tests using StreamableToString() on a NULL non-char pointer. +TEST(StreamableToStringTest, NullPointer) { + int* p = NULL; + EXPECT_STREQ("(null)", StreamableToString(p).c_str()); +} + +// Tests using StreamableToString() on a C string. +TEST(StreamableToStringTest, CString) { + EXPECT_STREQ("Foo", StreamableToString("Foo").c_str()); +} + +// Tests using StreamableToString() on a NULL C string. +TEST(StreamableToStringTest, NullCString) { + char* p = NULL; + EXPECT_STREQ("(null)", StreamableToString(p).c_str()); +} + +// Tests using streamable values as assertion messages. + +// Tests using std::string as an assertion message. +TEST(StreamableTest, string) { + static const std::string str( + "This failure message is a std::string, and is expected."); + EXPECT_FATAL_FAILURE(FAIL() << str, + str.c_str()); +} + +// Tests that we can output strings containing embedded NULs. +// Limited to Linux because we can only do this with std::string's. +TEST(StreamableTest, stringWithEmbeddedNUL) { + static const char char_array_with_nul[] = + "Here's a NUL\0 and some more string"; + static const std::string string_with_nul(char_array_with_nul, + sizeof(char_array_with_nul) + - 1); // drops the trailing NUL + EXPECT_FATAL_FAILURE(FAIL() << string_with_nul, + "Here's a NUL\\0 and some more string"); +} + +// Tests that we can output a NUL char. +TEST(StreamableTest, NULChar) { + EXPECT_FATAL_FAILURE({ // NOLINT + FAIL() << "A NUL" << '\0' << " and some more string"; + }, "A NUL\\0 and some more string"); +} + +// Tests using int as an assertion message. +TEST(StreamableTest, int) { + EXPECT_FATAL_FAILURE(FAIL() << 900913, + "900913"); +} + +// Tests using NULL char pointer as an assertion message. +// +// In MSVC, streaming a NULL char * causes access violation. Google Test +// implemented a workaround (substituting "(null)" for NULL). This +// tests whether the workaround works. +TEST(StreamableTest, NullCharPtr) { + EXPECT_FATAL_FAILURE(FAIL() << static_cast(NULL), + "(null)"); +} + +// Tests that basic IO manipulators (endl, ends, and flush) can be +// streamed to testing::Message. +TEST(StreamableTest, BasicIoManip) { + EXPECT_FATAL_FAILURE({ // NOLINT + FAIL() << "Line 1." << std::endl + << "A NUL char " << std::ends << std::flush << " in line 2."; + }, "Line 1.\nA NUL char \\0 in line 2."); +} + +// Tests the macros that haven't been covered so far. + +void AddFailureHelper(bool* aborted) { + *aborted = true; + ADD_FAILURE() << "Intentional failure."; + *aborted = false; +} + +// Tests ADD_FAILURE. +TEST(MacroTest, ADD_FAILURE) { + bool aborted = true; + EXPECT_NONFATAL_FAILURE(AddFailureHelper(&aborted), + "Intentional failure."); + EXPECT_FALSE(aborted); +} + +// Tests ADD_FAILURE_AT. +TEST(MacroTest, ADD_FAILURE_AT) { + // Verifies that ADD_FAILURE_AT does generate a nonfatal failure and + // the failure message contains the user-streamed part. + EXPECT_NONFATAL_FAILURE(ADD_FAILURE_AT("foo.cc", 42) << "Wrong!", "Wrong!"); + + // Verifies that the user-streamed part is optional. + EXPECT_NONFATAL_FAILURE(ADD_FAILURE_AT("foo.cc", 42), "Failed"); + + // Unfortunately, we cannot verify that the failure message contains + // the right file path and line number the same way, as + // EXPECT_NONFATAL_FAILURE() doesn't get to see the file path and + // line number. Instead, we do that in gtest_output_test_.cc. +} + +// Tests FAIL. +TEST(MacroTest, FAIL) { + EXPECT_FATAL_FAILURE(FAIL(), + "Failed"); + EXPECT_FATAL_FAILURE(FAIL() << "Intentional failure.", + "Intentional failure."); +} + +// Tests SUCCEED +TEST(MacroTest, SUCCEED) { + SUCCEED(); + SUCCEED() << "Explicit success."; +} + +// Tests for EXPECT_EQ() and ASSERT_EQ(). +// +// These tests fail *intentionally*, s.t. the failure messages can be +// generated and tested. +// +// We have different tests for different argument types. + +// Tests using bool values in {EXPECT|ASSERT}_EQ. +TEST(EqAssertionTest, Bool) { + EXPECT_EQ(true, true); + EXPECT_FATAL_FAILURE({ + bool false_value = false; + ASSERT_EQ(false_value, true); + }, "To be equal to: true"); +} + +// Tests using int values in {EXPECT|ASSERT}_EQ. +TEST(EqAssertionTest, Int) { + ASSERT_EQ(32, 32); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(32, 33), + "33"); +} + +// Tests using time_t values in {EXPECT|ASSERT}_EQ. +TEST(EqAssertionTest, Time_T) { + EXPECT_EQ(static_cast(0), + static_cast(0)); + EXPECT_FATAL_FAILURE(ASSERT_EQ(static_cast(0), + static_cast(1234)), + "1234"); +} + +// Tests using char values in {EXPECT|ASSERT}_EQ. +TEST(EqAssertionTest, Char) { + ASSERT_EQ('z', 'z'); + const char ch = 'b'; + EXPECT_NONFATAL_FAILURE(EXPECT_EQ('\0', ch), + "ch"); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ('a', ch), + "ch"); +} + +// Tests using wchar_t values in {EXPECT|ASSERT}_EQ. +TEST(EqAssertionTest, WideChar) { + EXPECT_EQ(L'b', L'b'); + + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(L'\0', L'x'), + " Expected: L'\0'\n" + " Which is: L'\0' (0, 0x0)\n" + "To be equal to: L'x'\n" + " Which is: L'x' (120, 0x78)"); + + static wchar_t wchar; + wchar = L'b'; + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(L'a', wchar), + "wchar"); + wchar = 0x8119; + EXPECT_FATAL_FAILURE(ASSERT_EQ(static_cast(0x8120), wchar), + "To be equal to: wchar"); +} + +// Tests using ::std::string values in {EXPECT|ASSERT}_EQ. +TEST(EqAssertionTest, StdString) { + // Compares a const char* to an std::string that has identical + // content. + ASSERT_EQ("Test", ::std::string("Test")); + + // Compares two identical std::strings. + static const ::std::string str1("A * in the middle"); + static const ::std::string str2(str1); + EXPECT_EQ(str1, str2); + + // Compares a const char* to an std::string that has different + // content + EXPECT_NONFATAL_FAILURE(EXPECT_EQ("Test", ::std::string("test")), + "\"test\""); + + // Compares an std::string to a char* that has different content. + char* const p1 = const_cast("foo"); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(::std::string("bar"), p1), + "p1"); + + // Compares two std::strings that have different contents, one of + // which having a NUL character in the middle. This should fail. + static ::std::string str3(str1); + str3.at(2) = '\0'; + EXPECT_FATAL_FAILURE(ASSERT_EQ(str1, str3), + "To be equal to: str3\n" + " Which is: \"A \\0 in the middle\""); +} + +#if GTEST_HAS_STD_WSTRING + +// Tests using ::std::wstring values in {EXPECT|ASSERT}_EQ. +TEST(EqAssertionTest, StdWideString) { + // Compares two identical std::wstrings. + const ::std::wstring wstr1(L"A * in the middle"); + const ::std::wstring wstr2(wstr1); + ASSERT_EQ(wstr1, wstr2); + + // Compares an std::wstring to a const wchar_t* that has identical + // content. + const wchar_t kTestX8119[] = { 'T', 'e', 's', 't', 0x8119, '\0' }; + EXPECT_EQ(::std::wstring(kTestX8119), kTestX8119); + + // Compares an std::wstring to a const wchar_t* that has different + // content. + const wchar_t kTestX8120[] = { 'T', 'e', 's', 't', 0x8120, '\0' }; + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_EQ(::std::wstring(kTestX8119), kTestX8120); + }, "kTestX8120"); + + // Compares two std::wstrings that have different contents, one of + // which having a NUL character in the middle. + ::std::wstring wstr3(wstr1); + wstr3.at(2) = L'\0'; + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(wstr1, wstr3), + "wstr3"); + + // Compares a wchar_t* to an std::wstring that has different + // content. + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_EQ(const_cast(L"foo"), ::std::wstring(L"bar")); + }, ""); +} + +#endif // GTEST_HAS_STD_WSTRING + +#if GTEST_HAS_GLOBAL_STRING +// Tests using ::string values in {EXPECT|ASSERT}_EQ. +TEST(EqAssertionTest, GlobalString) { + // Compares a const char* to a ::string that has identical content. + EXPECT_EQ("Test", ::string("Test")); + + // Compares two identical ::strings. + const ::string str1("A * in the middle"); + const ::string str2(str1); + ASSERT_EQ(str1, str2); + + // Compares a ::string to a const char* that has different content. + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(::string("Test"), "test"), + "test"); + + // Compares two ::strings that have different contents, one of which + // having a NUL character in the middle. + ::string str3(str1); + str3.at(2) = '\0'; + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(str1, str3), + "str3"); + + // Compares a ::string to a char* that has different content. + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_EQ(::string("bar"), const_cast("foo")); + }, ""); +} + +#endif // GTEST_HAS_GLOBAL_STRING + +#if GTEST_HAS_GLOBAL_WSTRING + +// Tests using ::wstring values in {EXPECT|ASSERT}_EQ. +TEST(EqAssertionTest, GlobalWideString) { + // Compares two identical ::wstrings. + static const ::wstring wstr1(L"A * in the middle"); + static const ::wstring wstr2(wstr1); + EXPECT_EQ(wstr1, wstr2); + + // Compares a const wchar_t* to a ::wstring that has identical content. + const wchar_t kTestX8119[] = { 'T', 'e', 's', 't', 0x8119, '\0' }; + ASSERT_EQ(kTestX8119, ::wstring(kTestX8119)); + + // Compares a const wchar_t* to a ::wstring that has different + // content. + const wchar_t kTestX8120[] = { 'T', 'e', 's', 't', 0x8120, '\0' }; + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_EQ(kTestX8120, ::wstring(kTestX8119)); + }, "Test\\x8119"); + + // Compares a wchar_t* to a ::wstring that has different content. + wchar_t* const p1 = const_cast(L"foo"); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(p1, ::wstring(L"bar")), + "bar"); + + // Compares two ::wstrings that have different contents, one of which + // having a NUL character in the middle. + static ::wstring wstr3; + wstr3 = wstr1; + wstr3.at(2) = L'\0'; + EXPECT_FATAL_FAILURE(ASSERT_EQ(wstr1, wstr3), + "wstr3"); +} + +#endif // GTEST_HAS_GLOBAL_WSTRING + +// Tests using char pointers in {EXPECT|ASSERT}_EQ. +TEST(EqAssertionTest, CharPointer) { + char* const p0 = NULL; + // Only way to get the Nokia compiler to compile the cast + // is to have a separate void* variable first. Putting + // the two casts on the same line doesn't work, neither does + // a direct C-style to char*. + void* pv1 = (void*)0x1234; // NOLINT + void* pv2 = (void*)0xABC0; // NOLINT + char* const p1 = reinterpret_cast(pv1); + char* const p2 = reinterpret_cast(pv2); + ASSERT_EQ(p1, p1); + + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(p0, p2), + "To be equal to: p2"); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(p1, p2), + "p2"); + EXPECT_FATAL_FAILURE(ASSERT_EQ(reinterpret_cast(0x1234), + reinterpret_cast(0xABC0)), + "ABC0"); +} + +// Tests using wchar_t pointers in {EXPECT|ASSERT}_EQ. +TEST(EqAssertionTest, WideCharPointer) { + wchar_t* const p0 = NULL; + // Only way to get the Nokia compiler to compile the cast + // is to have a separate void* variable first. Putting + // the two casts on the same line doesn't work, neither does + // a direct C-style to char*. + void* pv1 = (void*)0x1234; // NOLINT + void* pv2 = (void*)0xABC0; // NOLINT + wchar_t* const p1 = reinterpret_cast(pv1); + wchar_t* const p2 = reinterpret_cast(pv2); + EXPECT_EQ(p0, p0); + + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(p0, p2), + "To be equal to: p2"); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(p1, p2), + "p2"); + void* pv3 = (void*)0x1234; // NOLINT + void* pv4 = (void*)0xABC0; // NOLINT + const wchar_t* p3 = reinterpret_cast(pv3); + const wchar_t* p4 = reinterpret_cast(pv4); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(p3, p4), + "p4"); +} + +// Tests using other types of pointers in {EXPECT|ASSERT}_EQ. +TEST(EqAssertionTest, OtherPointer) { + ASSERT_EQ(static_cast(NULL), + static_cast(NULL)); + EXPECT_FATAL_FAILURE(ASSERT_EQ(static_cast(NULL), + reinterpret_cast(0x1234)), + "0x1234"); +} + +// A class that supports binary comparison operators but not streaming. +class UnprintableChar { + public: + explicit UnprintableChar(char ch) : char_(ch) {} + + bool operator==(const UnprintableChar& rhs) const { + return char_ == rhs.char_; + } + bool operator!=(const UnprintableChar& rhs) const { + return char_ != rhs.char_; + } + bool operator<(const UnprintableChar& rhs) const { + return char_ < rhs.char_; + } + bool operator<=(const UnprintableChar& rhs) const { + return char_ <= rhs.char_; + } + bool operator>(const UnprintableChar& rhs) const { + return char_ > rhs.char_; + } + bool operator>=(const UnprintableChar& rhs) const { + return char_ >= rhs.char_; + } + + private: + char char_; +}; + +// Tests that ASSERT_EQ() and friends don't require the arguments to +// be printable. +TEST(ComparisonAssertionTest, AcceptsUnprintableArgs) { + const UnprintableChar x('x'), y('y'); + ASSERT_EQ(x, x); + EXPECT_NE(x, y); + ASSERT_LT(x, y); + EXPECT_LE(x, y); + ASSERT_GT(y, x); + EXPECT_GE(x, x); + + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(x, y), "1-byte object <78>"); + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(x, y), "1-byte object <79>"); + EXPECT_NONFATAL_FAILURE(EXPECT_LT(y, y), "1-byte object <79>"); + EXPECT_NONFATAL_FAILURE(EXPECT_GT(x, y), "1-byte object <78>"); + EXPECT_NONFATAL_FAILURE(EXPECT_GT(x, y), "1-byte object <79>"); + + // Code tested by EXPECT_FATAL_FAILURE cannot reference local + // variables, so we have to write UnprintableChar('x') instead of x. +#ifndef __BORLANDC__ + // ICE's in C++Builder. + EXPECT_FATAL_FAILURE(ASSERT_NE(UnprintableChar('x'), UnprintableChar('x')), + "1-byte object <78>"); + EXPECT_FATAL_FAILURE(ASSERT_LE(UnprintableChar('y'), UnprintableChar('x')), + "1-byte object <78>"); +#endif + EXPECT_FATAL_FAILURE(ASSERT_LE(UnprintableChar('y'), UnprintableChar('x')), + "1-byte object <79>"); + EXPECT_FATAL_FAILURE(ASSERT_GE(UnprintableChar('x'), UnprintableChar('y')), + "1-byte object <78>"); + EXPECT_FATAL_FAILURE(ASSERT_GE(UnprintableChar('x'), UnprintableChar('y')), + "1-byte object <79>"); +} + +// Tests the FRIEND_TEST macro. + +// This class has a private member we want to test. We will test it +// both in a TEST and in a TEST_F. +class Foo { + public: + Foo() {} + + private: + int Bar() const { return 1; } + + // Declares the friend tests that can access the private member + // Bar(). + FRIEND_TEST(FRIEND_TEST_Test, TEST); + FRIEND_TEST(FRIEND_TEST_Test2, TEST_F); +}; + +// Tests that the FRIEND_TEST declaration allows a TEST to access a +// class's private members. This should compile. +TEST(FRIEND_TEST_Test, TEST) { + ASSERT_EQ(1, Foo().Bar()); +} + +// The fixture needed to test using FRIEND_TEST with TEST_F. +class FRIEND_TEST_Test2 : public Test { + protected: + Foo foo; +}; + +// Tests that the FRIEND_TEST declaration allows a TEST_F to access a +// class's private members. This should compile. +TEST_F(FRIEND_TEST_Test2, TEST_F) { + ASSERT_EQ(1, foo.Bar()); +} + +// Tests the life cycle of Test objects. + +// The test fixture for testing the life cycle of Test objects. +// +// This class counts the number of live test objects that uses this +// fixture. +class TestLifeCycleTest : public Test { + protected: + // Constructor. Increments the number of test objects that uses + // this fixture. + TestLifeCycleTest() { count_++; } + + // Destructor. Decrements the number of test objects that uses this + // fixture. + ~TestLifeCycleTest() { count_--; } + + // Returns the number of live test objects that uses this fixture. + int count() const { return count_; } + + private: + static int count_; +}; + +int TestLifeCycleTest::count_ = 0; + +// Tests the life cycle of test objects. +TEST_F(TestLifeCycleTest, Test1) { + // There should be only one test object in this test case that's + // currently alive. + ASSERT_EQ(1, count()); +} + +// Tests the life cycle of test objects. +TEST_F(TestLifeCycleTest, Test2) { + // After Test1 is done and Test2 is started, there should still be + // only one live test object, as the object for Test1 should've been + // deleted. + ASSERT_EQ(1, count()); +} + +} // namespace + +// Tests that the copy constructor works when it is NOT optimized away by +// the compiler. +TEST(AssertionResultTest, CopyConstructorWorksWhenNotOptimied) { + // Checks that the copy constructor doesn't try to dereference NULL pointers + // in the source object. + AssertionResult r1 = AssertionSuccess(); + AssertionResult r2 = r1; + // The following line is added to prevent the compiler from optimizing + // away the constructor call. + r1 << "abc"; + + AssertionResult r3 = r1; + EXPECT_EQ(static_cast(r3), static_cast(r1)); + EXPECT_STREQ("abc", r1.message()); +} + +// Tests that AssertionSuccess and AssertionFailure construct +// AssertionResult objects as expected. +TEST(AssertionResultTest, ConstructionWorks) { + AssertionResult r1 = AssertionSuccess(); + EXPECT_TRUE(r1); + EXPECT_STREQ("", r1.message()); + + AssertionResult r2 = AssertionSuccess() << "abc"; + EXPECT_TRUE(r2); + EXPECT_STREQ("abc", r2.message()); + + AssertionResult r3 = AssertionFailure(); + EXPECT_FALSE(r3); + EXPECT_STREQ("", r3.message()); + + AssertionResult r4 = AssertionFailure() << "def"; + EXPECT_FALSE(r4); + EXPECT_STREQ("def", r4.message()); + + AssertionResult r5 = AssertionFailure(Message() << "ghi"); + EXPECT_FALSE(r5); + EXPECT_STREQ("ghi", r5.message()); +} + +// Tests that the negation flips the predicate result but keeps the message. +TEST(AssertionResultTest, NegationWorks) { + AssertionResult r1 = AssertionSuccess() << "abc"; + EXPECT_FALSE(!r1); + EXPECT_STREQ("abc", (!r1).message()); + + AssertionResult r2 = AssertionFailure() << "def"; + EXPECT_TRUE(!r2); + EXPECT_STREQ("def", (!r2).message()); +} + +TEST(AssertionResultTest, StreamingWorks) { + AssertionResult r = AssertionSuccess(); + r << "abc" << 'd' << 0 << true; + EXPECT_STREQ("abcd0true", r.message()); +} + +TEST(AssertionResultTest, CanStreamOstreamManipulators) { + AssertionResult r = AssertionSuccess(); + r << "Data" << std::endl << std::flush << std::ends << "Will be visible"; + EXPECT_STREQ("Data\n\\0Will be visible", r.message()); +} + +// The next test uses explicit conversion operators -- a C++11 feature. +#if GTEST_LANG_CXX11 + +TEST(AssertionResultTest, ConstructibleFromContextuallyConvertibleToBool) { + struct ExplicitlyConvertibleToBool { + explicit operator bool() const { return value; } + bool value; + }; + ExplicitlyConvertibleToBool v1 = {false}; + ExplicitlyConvertibleToBool v2 = {true}; + EXPECT_FALSE(v1); + EXPECT_TRUE(v2); +} + +#endif // GTEST_LANG_CXX11 + +struct ConvertibleToAssertionResult { + operator AssertionResult() const { return AssertionResult(true); } +}; + +TEST(AssertionResultTest, ConstructibleFromImplicitlyConvertible) { + ConvertibleToAssertionResult obj; + EXPECT_TRUE(obj); +} + +// Tests streaming a user type whose definition and operator << are +// both in the global namespace. +class Base { + public: + explicit Base(int an_x) : x_(an_x) {} + int x() const { return x_; } + private: + int x_; +}; +std::ostream& operator<<(std::ostream& os, + const Base& val) { + return os << val.x(); +} +std::ostream& operator<<(std::ostream& os, + const Base* pointer) { + return os << "(" << pointer->x() << ")"; +} + +TEST(MessageTest, CanStreamUserTypeInGlobalNameSpace) { + Message msg; + Base a(1); + + msg << a << &a; // Uses ::operator<<. + EXPECT_STREQ("1(1)", msg.GetString().c_str()); +} + +// Tests streaming a user type whose definition and operator<< are +// both in an unnamed namespace. +namespace { +class MyTypeInUnnamedNameSpace : public Base { + public: + explicit MyTypeInUnnamedNameSpace(int an_x): Base(an_x) {} +}; +std::ostream& operator<<(std::ostream& os, + const MyTypeInUnnamedNameSpace& val) { + return os << val.x(); +} +std::ostream& operator<<(std::ostream& os, + const MyTypeInUnnamedNameSpace* pointer) { + return os << "(" << pointer->x() << ")"; +} +} // namespace + +TEST(MessageTest, CanStreamUserTypeInUnnamedNameSpace) { + Message msg; + MyTypeInUnnamedNameSpace a(1); + + msg << a << &a; // Uses ::operator<<. + EXPECT_STREQ("1(1)", msg.GetString().c_str()); +} + +// Tests streaming a user type whose definition and operator<< are +// both in a user namespace. +namespace namespace1 { +class MyTypeInNameSpace1 : public Base { + public: + explicit MyTypeInNameSpace1(int an_x): Base(an_x) {} +}; +std::ostream& operator<<(std::ostream& os, + const MyTypeInNameSpace1& val) { + return os << val.x(); +} +std::ostream& operator<<(std::ostream& os, + const MyTypeInNameSpace1* pointer) { + return os << "(" << pointer->x() << ")"; +} +} // namespace namespace1 + +TEST(MessageTest, CanStreamUserTypeInUserNameSpace) { + Message msg; + namespace1::MyTypeInNameSpace1 a(1); + + msg << a << &a; // Uses namespace1::operator<<. + EXPECT_STREQ("1(1)", msg.GetString().c_str()); +} + +// Tests streaming a user type whose definition is in a user namespace +// but whose operator<< is in the global namespace. +namespace namespace2 { +class MyTypeInNameSpace2 : public ::Base { + public: + explicit MyTypeInNameSpace2(int an_x): Base(an_x) {} +}; +} // namespace namespace2 +std::ostream& operator<<(std::ostream& os, + const namespace2::MyTypeInNameSpace2& val) { + return os << val.x(); +} +std::ostream& operator<<(std::ostream& os, + const namespace2::MyTypeInNameSpace2* pointer) { + return os << "(" << pointer->x() << ")"; +} + +TEST(MessageTest, CanStreamUserTypeInUserNameSpaceWithStreamOperatorInGlobal) { + Message msg; + namespace2::MyTypeInNameSpace2 a(1); + + msg << a << &a; // Uses ::operator<<. + EXPECT_STREQ("1(1)", msg.GetString().c_str()); +} + +// Tests streaming NULL pointers to testing::Message. +TEST(MessageTest, NullPointers) { + Message msg; + char* const p1 = NULL; + unsigned char* const p2 = NULL; + int* p3 = NULL; + double* p4 = NULL; + bool* p5 = NULL; + Message* p6 = NULL; + + msg << p1 << p2 << p3 << p4 << p5 << p6; + ASSERT_STREQ("(null)(null)(null)(null)(null)(null)", + msg.GetString().c_str()); +} + +// Tests streaming wide strings to testing::Message. +TEST(MessageTest, WideStrings) { + // Streams a NULL of type const wchar_t*. + const wchar_t* const_wstr = NULL; + EXPECT_STREQ("(null)", + (Message() << const_wstr).GetString().c_str()); + + // Streams a NULL of type wchar_t*. + wchar_t* wstr = NULL; + EXPECT_STREQ("(null)", + (Message() << wstr).GetString().c_str()); + + // Streams a non-NULL of type const wchar_t*. + const_wstr = L"abc\x8119"; + EXPECT_STREQ("abc\xe8\x84\x99", + (Message() << const_wstr).GetString().c_str()); + + // Streams a non-NULL of type wchar_t*. + wstr = const_cast(const_wstr); + EXPECT_STREQ("abc\xe8\x84\x99", + (Message() << wstr).GetString().c_str()); +} + + +// This line tests that we can define tests in the testing namespace. +namespace testing { + +// Tests the TestInfo class. + +class TestInfoTest : public Test { + protected: + static const TestInfo* GetTestInfo(const char* test_name) { + const TestCase* const test_case = GetUnitTestImpl()-> + GetTestCase("TestInfoTest", "", NULL, NULL); + + for (int i = 0; i < test_case->total_test_count(); ++i) { + const TestInfo* const test_info = test_case->GetTestInfo(i); + if (strcmp(test_name, test_info->name()) == 0) + return test_info; + } + return NULL; + } + + static const TestResult* GetTestResult( + const TestInfo* test_info) { + return test_info->result(); + } +}; + +// Tests TestInfo::test_case_name() and TestInfo::name(). +TEST_F(TestInfoTest, Names) { + const TestInfo* const test_info = GetTestInfo("Names"); + + ASSERT_STREQ("TestInfoTest", test_info->test_case_name()); + ASSERT_STREQ("Names", test_info->name()); +} + +// Tests TestInfo::result(). +TEST_F(TestInfoTest, result) { + const TestInfo* const test_info = GetTestInfo("result"); + + // Initially, there is no TestPartResult for this test. + ASSERT_EQ(0, GetTestResult(test_info)->total_part_count()); + + // After the previous assertion, there is still none. + ASSERT_EQ(0, GetTestResult(test_info)->total_part_count()); +} + +#define VERIFY_CODE_LOCATION \ + const int expected_line = __LINE__ - 1; \ + const TestInfo* const test_info = GetUnitTestImpl()->current_test_info(); \ + ASSERT_TRUE(test_info); \ + EXPECT_STREQ(__FILE__, test_info->file()); \ + EXPECT_EQ(expected_line, test_info->line()) + +TEST(CodeLocationForTEST, Verify) { + VERIFY_CODE_LOCATION; +} + +class CodeLocationForTESTF : public Test { +}; + +TEST_F(CodeLocationForTESTF, Verify) { + VERIFY_CODE_LOCATION; +} + +class CodeLocationForTESTP : public TestWithParam { +}; + +TEST_P(CodeLocationForTESTP, Verify) { + VERIFY_CODE_LOCATION; +} + +INSTANTIATE_TEST_CASE_P(, CodeLocationForTESTP, Values(0)); + +template +class CodeLocationForTYPEDTEST : public Test { +}; + +TYPED_TEST_CASE(CodeLocationForTYPEDTEST, int); + +TYPED_TEST(CodeLocationForTYPEDTEST, Verify) { + VERIFY_CODE_LOCATION; +} + +template +class CodeLocationForTYPEDTESTP : public Test { +}; + +TYPED_TEST_CASE_P(CodeLocationForTYPEDTESTP); + +TYPED_TEST_P(CodeLocationForTYPEDTESTP, Verify) { + VERIFY_CODE_LOCATION; +} + +REGISTER_TYPED_TEST_CASE_P(CodeLocationForTYPEDTESTP, Verify); + +INSTANTIATE_TYPED_TEST_CASE_P(My, CodeLocationForTYPEDTESTP, int); + +#undef VERIFY_CODE_LOCATION + +// Tests setting up and tearing down a test case. + +class SetUpTestCaseTest : public Test { + protected: + // This will be called once before the first test in this test case + // is run. + static void SetUpTestCase() { + printf("Setting up the test case . . .\n"); + + // Initializes some shared resource. In this simple example, we + // just create a C string. More complex stuff can be done if + // desired. + shared_resource_ = "123"; + + // Increments the number of test cases that have been set up. + counter_++; + + // SetUpTestCase() should be called only once. + EXPECT_EQ(1, counter_); + } + + // This will be called once after the last test in this test case is + // run. + static void TearDownTestCase() { + printf("Tearing down the test case . . .\n"); + + // Decrements the number of test cases that have been set up. + counter_--; + + // TearDownTestCase() should be called only once. + EXPECT_EQ(0, counter_); + + // Cleans up the shared resource. + shared_resource_ = NULL; + } + + // This will be called before each test in this test case. + virtual void SetUp() { + // SetUpTestCase() should be called only once, so counter_ should + // always be 1. + EXPECT_EQ(1, counter_); + } + + // Number of test cases that have been set up. + static int counter_; + + // Some resource to be shared by all tests in this test case. + static const char* shared_resource_; +}; + +int SetUpTestCaseTest::counter_ = 0; +const char* SetUpTestCaseTest::shared_resource_ = NULL; + +// A test that uses the shared resource. +TEST_F(SetUpTestCaseTest, Test1) { + EXPECT_STRNE(NULL, shared_resource_); +} + +// Another test that uses the shared resource. +TEST_F(SetUpTestCaseTest, Test2) { + EXPECT_STREQ("123", shared_resource_); +} + +// The InitGoogleTestTest test case tests testing::InitGoogleTest(). + +// The Flags struct stores a copy of all Google Test flags. +struct Flags { + // Constructs a Flags struct where each flag has its default value. + Flags() : also_run_disabled_tests(false), + break_on_failure(false), + catch_exceptions(false), + death_test_use_fork(false), + filter(""), + list_tests(false), + output(""), + print_time(true), + random_seed(0), + repeat(1), + shuffle(false), + stack_trace_depth(kMaxStackTraceDepth), + stream_result_to(""), + throw_on_failure(false) {} + + // Factory methods. + + // Creates a Flags struct where the gtest_also_run_disabled_tests flag has + // the given value. + static Flags AlsoRunDisabledTests(bool also_run_disabled_tests) { + Flags flags; + flags.also_run_disabled_tests = also_run_disabled_tests; + return flags; + } + + // Creates a Flags struct where the gtest_break_on_failure flag has + // the given value. + static Flags BreakOnFailure(bool break_on_failure) { + Flags flags; + flags.break_on_failure = break_on_failure; + return flags; + } + + // Creates a Flags struct where the gtest_catch_exceptions flag has + // the given value. + static Flags CatchExceptions(bool catch_exceptions) { + Flags flags; + flags.catch_exceptions = catch_exceptions; + return flags; + } + + // Creates a Flags struct where the gtest_death_test_use_fork flag has + // the given value. + static Flags DeathTestUseFork(bool death_test_use_fork) { + Flags flags; + flags.death_test_use_fork = death_test_use_fork; + return flags; + } + + // Creates a Flags struct where the gtest_filter flag has the given + // value. + static Flags Filter(const char* filter) { + Flags flags; + flags.filter = filter; + return flags; + } + + // Creates a Flags struct where the gtest_list_tests flag has the + // given value. + static Flags ListTests(bool list_tests) { + Flags flags; + flags.list_tests = list_tests; + return flags; + } + + // Creates a Flags struct where the gtest_output flag has the given + // value. + static Flags Output(const char* output) { + Flags flags; + flags.output = output; + return flags; + } + + // Creates a Flags struct where the gtest_print_time flag has the given + // value. + static Flags PrintTime(bool print_time) { + Flags flags; + flags.print_time = print_time; + return flags; + } + + // Creates a Flags struct where the gtest_random_seed flag has + // the given value. + static Flags RandomSeed(Int32 random_seed) { + Flags flags; + flags.random_seed = random_seed; + return flags; + } + + // Creates a Flags struct where the gtest_repeat flag has the given + // value. + static Flags Repeat(Int32 repeat) { + Flags flags; + flags.repeat = repeat; + return flags; + } + + // Creates a Flags struct where the gtest_shuffle flag has + // the given value. + static Flags Shuffle(bool shuffle) { + Flags flags; + flags.shuffle = shuffle; + return flags; + } + + // Creates a Flags struct where the GTEST_FLAG(stack_trace_depth) flag has + // the given value. + static Flags StackTraceDepth(Int32 stack_trace_depth) { + Flags flags; + flags.stack_trace_depth = stack_trace_depth; + return flags; + } + + // Creates a Flags struct where the GTEST_FLAG(stream_result_to) flag has + // the given value. + static Flags StreamResultTo(const char* stream_result_to) { + Flags flags; + flags.stream_result_to = stream_result_to; + return flags; + } + + // Creates a Flags struct where the gtest_throw_on_failure flag has + // the given value. + static Flags ThrowOnFailure(bool throw_on_failure) { + Flags flags; + flags.throw_on_failure = throw_on_failure; + return flags; + } + + // These fields store the flag values. + bool also_run_disabled_tests; + bool break_on_failure; + bool catch_exceptions; + bool death_test_use_fork; + const char* filter; + bool list_tests; + const char* output; + bool print_time; + Int32 random_seed; + Int32 repeat; + bool shuffle; + Int32 stack_trace_depth; + const char* stream_result_to; + bool throw_on_failure; +}; + +// Fixture for testing InitGoogleTest(). +class InitGoogleTestTest : public Test { + protected: + // Clears the flags before each test. + virtual void SetUp() { + GTEST_FLAG(also_run_disabled_tests) = false; + GTEST_FLAG(break_on_failure) = false; + GTEST_FLAG(catch_exceptions) = false; + GTEST_FLAG(death_test_use_fork) = false; + GTEST_FLAG(filter) = ""; + GTEST_FLAG(list_tests) = false; + GTEST_FLAG(output) = ""; + GTEST_FLAG(print_time) = true; + GTEST_FLAG(random_seed) = 0; + GTEST_FLAG(repeat) = 1; + GTEST_FLAG(shuffle) = false; + GTEST_FLAG(stack_trace_depth) = kMaxStackTraceDepth; + GTEST_FLAG(stream_result_to) = ""; + GTEST_FLAG(throw_on_failure) = false; + } + + // Asserts that two narrow or wide string arrays are equal. + template + static void AssertStringArrayEq(size_t size1, CharType** array1, + size_t size2, CharType** array2) { + ASSERT_EQ(size1, size2) << " Array sizes different."; + + for (size_t i = 0; i != size1; i++) { + ASSERT_STREQ(array1[i], array2[i]) << " where i == " << i; + } + } + + // Verifies that the flag values match the expected values. + static void CheckFlags(const Flags& expected) { + EXPECT_EQ(expected.also_run_disabled_tests, + GTEST_FLAG(also_run_disabled_tests)); + EXPECT_EQ(expected.break_on_failure, GTEST_FLAG(break_on_failure)); + EXPECT_EQ(expected.catch_exceptions, GTEST_FLAG(catch_exceptions)); + EXPECT_EQ(expected.death_test_use_fork, GTEST_FLAG(death_test_use_fork)); + EXPECT_STREQ(expected.filter, GTEST_FLAG(filter).c_str()); + EXPECT_EQ(expected.list_tests, GTEST_FLAG(list_tests)); + EXPECT_STREQ(expected.output, GTEST_FLAG(output).c_str()); + EXPECT_EQ(expected.print_time, GTEST_FLAG(print_time)); + EXPECT_EQ(expected.random_seed, GTEST_FLAG(random_seed)); + EXPECT_EQ(expected.repeat, GTEST_FLAG(repeat)); + EXPECT_EQ(expected.shuffle, GTEST_FLAG(shuffle)); + EXPECT_EQ(expected.stack_trace_depth, GTEST_FLAG(stack_trace_depth)); + EXPECT_STREQ(expected.stream_result_to, + GTEST_FLAG(stream_result_to).c_str()); + EXPECT_EQ(expected.throw_on_failure, GTEST_FLAG(throw_on_failure)); + } + + // Parses a command line (specified by argc1 and argv1), then + // verifies that the flag values are expected and that the + // recognized flags are removed from the command line. + template + static void TestParsingFlags(int argc1, const CharType** argv1, + int argc2, const CharType** argv2, + const Flags& expected, bool should_print_help) { + const bool saved_help_flag = ::testing::internal::g_help_flag; + ::testing::internal::g_help_flag = false; + +#if GTEST_HAS_STREAM_REDIRECTION + CaptureStdout(); +#endif + + // Parses the command line. + internal::ParseGoogleTestFlagsOnly(&argc1, const_cast(argv1)); + +#if GTEST_HAS_STREAM_REDIRECTION + const std::string captured_stdout = GetCapturedStdout(); +#endif + + // Verifies the flag values. + CheckFlags(expected); + + // Verifies that the recognized flags are removed from the command + // line. + AssertStringArrayEq(argc1 + 1, argv1, argc2 + 1, argv2); + + // ParseGoogleTestFlagsOnly should neither set g_help_flag nor print the + // help message for the flags it recognizes. + EXPECT_EQ(should_print_help, ::testing::internal::g_help_flag); + +#if GTEST_HAS_STREAM_REDIRECTION + const char* const expected_help_fragment = + "This program contains tests written using"; + if (should_print_help) { + EXPECT_PRED_FORMAT2(IsSubstring, expected_help_fragment, captured_stdout); + } else { + EXPECT_PRED_FORMAT2(IsNotSubstring, + expected_help_fragment, captured_stdout); + } +#endif // GTEST_HAS_STREAM_REDIRECTION + + ::testing::internal::g_help_flag = saved_help_flag; + } + + // This macro wraps TestParsingFlags s.t. the user doesn't need + // to specify the array sizes. + +#define GTEST_TEST_PARSING_FLAGS_(argv1, argv2, expected, should_print_help) \ + TestParsingFlags(sizeof(argv1)/sizeof(*argv1) - 1, argv1, \ + sizeof(argv2)/sizeof(*argv2) - 1, argv2, \ + expected, should_print_help) +}; + +// Tests parsing an empty command line. +TEST_F(InitGoogleTestTest, Empty) { + const char* argv[] = { + NULL + }; + + const char* argv2[] = { + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags(), false); +} + +// Tests parsing a command line that has no flag. +TEST_F(InitGoogleTestTest, NoFlag) { + const char* argv[] = { + "foo.exe", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags(), false); +} + +// Tests parsing a bad --gtest_filter flag. +TEST_F(InitGoogleTestTest, FilterBad) { + const char* argv[] = { + "foo.exe", + "--gtest_filter", + NULL + }; + + const char* argv2[] = { + "foo.exe", + "--gtest_filter", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Filter(""), true); +} + +// Tests parsing an empty --gtest_filter flag. +TEST_F(InitGoogleTestTest, FilterEmpty) { + const char* argv[] = { + "foo.exe", + "--gtest_filter=", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Filter(""), false); +} + +// Tests parsing a non-empty --gtest_filter flag. +TEST_F(InitGoogleTestTest, FilterNonEmpty) { + const char* argv[] = { + "foo.exe", + "--gtest_filter=abc", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Filter("abc"), false); +} + +// Tests parsing --gtest_break_on_failure. +TEST_F(InitGoogleTestTest, BreakOnFailureWithoutValue) { + const char* argv[] = { + "foo.exe", + "--gtest_break_on_failure", + NULL +}; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::BreakOnFailure(true), false); +} + +// Tests parsing --gtest_break_on_failure=0. +TEST_F(InitGoogleTestTest, BreakOnFailureFalse_0) { + const char* argv[] = { + "foo.exe", + "--gtest_break_on_failure=0", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::BreakOnFailure(false), false); +} + +// Tests parsing --gtest_break_on_failure=f. +TEST_F(InitGoogleTestTest, BreakOnFailureFalse_f) { + const char* argv[] = { + "foo.exe", + "--gtest_break_on_failure=f", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::BreakOnFailure(false), false); +} + +// Tests parsing --gtest_break_on_failure=F. +TEST_F(InitGoogleTestTest, BreakOnFailureFalse_F) { + const char* argv[] = { + "foo.exe", + "--gtest_break_on_failure=F", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::BreakOnFailure(false), false); +} + +// Tests parsing a --gtest_break_on_failure flag that has a "true" +// definition. +TEST_F(InitGoogleTestTest, BreakOnFailureTrue) { + const char* argv[] = { + "foo.exe", + "--gtest_break_on_failure=1", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::BreakOnFailure(true), false); +} + +// Tests parsing --gtest_catch_exceptions. +TEST_F(InitGoogleTestTest, CatchExceptions) { + const char* argv[] = { + "foo.exe", + "--gtest_catch_exceptions", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::CatchExceptions(true), false); +} + +// Tests parsing --gtest_death_test_use_fork. +TEST_F(InitGoogleTestTest, DeathTestUseFork) { + const char* argv[] = { + "foo.exe", + "--gtest_death_test_use_fork", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::DeathTestUseFork(true), false); +} + +// Tests having the same flag twice with different values. The +// expected behavior is that the one coming last takes precedence. +TEST_F(InitGoogleTestTest, DuplicatedFlags) { + const char* argv[] = { + "foo.exe", + "--gtest_filter=a", + "--gtest_filter=b", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Filter("b"), false); +} + +// Tests having an unrecognized flag on the command line. +TEST_F(InitGoogleTestTest, UnrecognizedFlag) { + const char* argv[] = { + "foo.exe", + "--gtest_break_on_failure", + "bar", // Unrecognized by Google Test. + "--gtest_filter=b", + NULL + }; + + const char* argv2[] = { + "foo.exe", + "bar", + NULL + }; + + Flags flags; + flags.break_on_failure = true; + flags.filter = "b"; + GTEST_TEST_PARSING_FLAGS_(argv, argv2, flags, false); +} + +// Tests having a --gtest_list_tests flag +TEST_F(InitGoogleTestTest, ListTestsFlag) { + const char* argv[] = { + "foo.exe", + "--gtest_list_tests", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ListTests(true), false); +} + +// Tests having a --gtest_list_tests flag with a "true" value +TEST_F(InitGoogleTestTest, ListTestsTrue) { + const char* argv[] = { + "foo.exe", + "--gtest_list_tests=1", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ListTests(true), false); +} + +// Tests having a --gtest_list_tests flag with a "false" value +TEST_F(InitGoogleTestTest, ListTestsFalse) { + const char* argv[] = { + "foo.exe", + "--gtest_list_tests=0", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ListTests(false), false); +} + +// Tests parsing --gtest_list_tests=f. +TEST_F(InitGoogleTestTest, ListTestsFalse_f) { + const char* argv[] = { + "foo.exe", + "--gtest_list_tests=f", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ListTests(false), false); +} + +// Tests parsing --gtest_list_tests=F. +TEST_F(InitGoogleTestTest, ListTestsFalse_F) { + const char* argv[] = { + "foo.exe", + "--gtest_list_tests=F", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ListTests(false), false); +} + +// Tests parsing --gtest_output (invalid). +TEST_F(InitGoogleTestTest, OutputEmpty) { + const char* argv[] = { + "foo.exe", + "--gtest_output", + NULL + }; + + const char* argv2[] = { + "foo.exe", + "--gtest_output", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags(), true); +} + +// Tests parsing --gtest_output=xml +TEST_F(InitGoogleTestTest, OutputXml) { + const char* argv[] = { + "foo.exe", + "--gtest_output=xml", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Output("xml"), false); +} + +// Tests parsing --gtest_output=xml:file +TEST_F(InitGoogleTestTest, OutputXmlFile) { + const char* argv[] = { + "foo.exe", + "--gtest_output=xml:file", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Output("xml:file"), false); +} + +// Tests parsing --gtest_output=xml:directory/path/ +TEST_F(InitGoogleTestTest, OutputXmlDirectory) { + const char* argv[] = { + "foo.exe", + "--gtest_output=xml:directory/path/", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, + Flags::Output("xml:directory/path/"), false); +} + +// Tests having a --gtest_print_time flag +TEST_F(InitGoogleTestTest, PrintTimeFlag) { + const char* argv[] = { + "foo.exe", + "--gtest_print_time", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::PrintTime(true), false); +} + +// Tests having a --gtest_print_time flag with a "true" value +TEST_F(InitGoogleTestTest, PrintTimeTrue) { + const char* argv[] = { + "foo.exe", + "--gtest_print_time=1", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::PrintTime(true), false); +} + +// Tests having a --gtest_print_time flag with a "false" value +TEST_F(InitGoogleTestTest, PrintTimeFalse) { + const char* argv[] = { + "foo.exe", + "--gtest_print_time=0", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::PrintTime(false), false); +} + +// Tests parsing --gtest_print_time=f. +TEST_F(InitGoogleTestTest, PrintTimeFalse_f) { + const char* argv[] = { + "foo.exe", + "--gtest_print_time=f", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::PrintTime(false), false); +} + +// Tests parsing --gtest_print_time=F. +TEST_F(InitGoogleTestTest, PrintTimeFalse_F) { + const char* argv[] = { + "foo.exe", + "--gtest_print_time=F", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::PrintTime(false), false); +} + +// Tests parsing --gtest_random_seed=number +TEST_F(InitGoogleTestTest, RandomSeed) { + const char* argv[] = { + "foo.exe", + "--gtest_random_seed=1000", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::RandomSeed(1000), false); +} + +// Tests parsing --gtest_repeat=number +TEST_F(InitGoogleTestTest, Repeat) { + const char* argv[] = { + "foo.exe", + "--gtest_repeat=1000", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Repeat(1000), false); +} + +// Tests having a --gtest_also_run_disabled_tests flag +TEST_F(InitGoogleTestTest, AlsoRunDisabledTestsFlag) { + const char* argv[] = { + "foo.exe", + "--gtest_also_run_disabled_tests", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, + Flags::AlsoRunDisabledTests(true), false); +} + +// Tests having a --gtest_also_run_disabled_tests flag with a "true" value +TEST_F(InitGoogleTestTest, AlsoRunDisabledTestsTrue) { + const char* argv[] = { + "foo.exe", + "--gtest_also_run_disabled_tests=1", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, + Flags::AlsoRunDisabledTests(true), false); +} + +// Tests having a --gtest_also_run_disabled_tests flag with a "false" value +TEST_F(InitGoogleTestTest, AlsoRunDisabledTestsFalse) { + const char* argv[] = { + "foo.exe", + "--gtest_also_run_disabled_tests=0", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, + Flags::AlsoRunDisabledTests(false), false); +} + +// Tests parsing --gtest_shuffle. +TEST_F(InitGoogleTestTest, ShuffleWithoutValue) { + const char* argv[] = { + "foo.exe", + "--gtest_shuffle", + NULL +}; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Shuffle(true), false); +} + +// Tests parsing --gtest_shuffle=0. +TEST_F(InitGoogleTestTest, ShuffleFalse_0) { + const char* argv[] = { + "foo.exe", + "--gtest_shuffle=0", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Shuffle(false), false); +} + +// Tests parsing a --gtest_shuffle flag that has a "true" +// definition. +TEST_F(InitGoogleTestTest, ShuffleTrue) { + const char* argv[] = { + "foo.exe", + "--gtest_shuffle=1", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Shuffle(true), false); +} + +// Tests parsing --gtest_stack_trace_depth=number. +TEST_F(InitGoogleTestTest, StackTraceDepth) { + const char* argv[] = { + "foo.exe", + "--gtest_stack_trace_depth=5", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::StackTraceDepth(5), false); +} + +TEST_F(InitGoogleTestTest, StreamResultTo) { + const char* argv[] = { + "foo.exe", + "--gtest_stream_result_to=localhost:1234", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_( + argv, argv2, Flags::StreamResultTo("localhost:1234"), false); +} + +// Tests parsing --gtest_throw_on_failure. +TEST_F(InitGoogleTestTest, ThrowOnFailureWithoutValue) { + const char* argv[] = { + "foo.exe", + "--gtest_throw_on_failure", + NULL +}; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ThrowOnFailure(true), false); +} + +// Tests parsing --gtest_throw_on_failure=0. +TEST_F(InitGoogleTestTest, ThrowOnFailureFalse_0) { + const char* argv[] = { + "foo.exe", + "--gtest_throw_on_failure=0", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ThrowOnFailure(false), false); +} + +// Tests parsing a --gtest_throw_on_failure flag that has a "true" +// definition. +TEST_F(InitGoogleTestTest, ThrowOnFailureTrue) { + const char* argv[] = { + "foo.exe", + "--gtest_throw_on_failure=1", + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ThrowOnFailure(true), false); +} + +#if GTEST_OS_WINDOWS +// Tests parsing wide strings. +TEST_F(InitGoogleTestTest, WideStrings) { + const wchar_t* argv[] = { + L"foo.exe", + L"--gtest_filter=Foo*", + L"--gtest_list_tests=1", + L"--gtest_break_on_failure", + L"--non_gtest_flag", + NULL + }; + + const wchar_t* argv2[] = { + L"foo.exe", + L"--non_gtest_flag", + NULL + }; + + Flags expected_flags; + expected_flags.break_on_failure = true; + expected_flags.filter = "Foo*"; + expected_flags.list_tests = true; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, expected_flags, false); +} +# endif // GTEST_OS_WINDOWS + +#if GTEST_USE_OWN_FLAGFILE_FLAG_ +class FlagfileTest : public InitGoogleTestTest { + public: + virtual void SetUp() { + InitGoogleTestTest::SetUp(); + + testdata_path_.Set(internal::FilePath( + internal::TempDir() + internal::GetCurrentExecutableName().string() + + "_flagfile_test")); + testing::internal::posix::RmDir(testdata_path_.c_str()); + EXPECT_TRUE(testdata_path_.CreateFolder()); + } + + virtual void TearDown() { + testing::internal::posix::RmDir(testdata_path_.c_str()); + InitGoogleTestTest::TearDown(); + } + + internal::FilePath CreateFlagfile(const char* contents) { + internal::FilePath file_path(internal::FilePath::GenerateUniqueFileName( + testdata_path_, internal::FilePath("unique"), "txt")); + FILE* f = testing::internal::posix::FOpen(file_path.c_str(), "w"); + fprintf(f, "%s", contents); + fclose(f); + return file_path; + } + + private: + internal::FilePath testdata_path_; +}; + +// Tests an empty flagfile. +TEST_F(FlagfileTest, Empty) { + internal::FilePath flagfile_path(CreateFlagfile("")); + std::string flagfile_flag = + std::string("--" GTEST_FLAG_PREFIX_ "flagfile=") + flagfile_path.c_str(); + + const char* argv[] = { + "foo.exe", + flagfile_flag.c_str(), + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags(), false); +} + +// Tests passing a non-empty --gtest_filter flag via --gtest_flagfile. +TEST_F(FlagfileTest, FilterNonEmpty) { + internal::FilePath flagfile_path(CreateFlagfile( + "--" GTEST_FLAG_PREFIX_ "filter=abc")); + std::string flagfile_flag = + std::string("--" GTEST_FLAG_PREFIX_ "flagfile=") + flagfile_path.c_str(); + + const char* argv[] = { + "foo.exe", + flagfile_flag.c_str(), + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Filter("abc"), false); +} + +// Tests passing several flags via --gtest_flagfile. +TEST_F(FlagfileTest, SeveralFlags) { + internal::FilePath flagfile_path(CreateFlagfile( + "--" GTEST_FLAG_PREFIX_ "filter=abc\n" + "--" GTEST_FLAG_PREFIX_ "break_on_failure\n" + "--" GTEST_FLAG_PREFIX_ "list_tests")); + std::string flagfile_flag = + std::string("--" GTEST_FLAG_PREFIX_ "flagfile=") + flagfile_path.c_str(); + + const char* argv[] = { + "foo.exe", + flagfile_flag.c_str(), + NULL + }; + + const char* argv2[] = { + "foo.exe", + NULL + }; + + Flags expected_flags; + expected_flags.break_on_failure = true; + expected_flags.filter = "abc"; + expected_flags.list_tests = true; + + GTEST_TEST_PARSING_FLAGS_(argv, argv2, expected_flags, false); +} +#endif // GTEST_USE_OWN_FLAGFILE_FLAG_ + +// Tests current_test_info() in UnitTest. +class CurrentTestInfoTest : public Test { + protected: + // Tests that current_test_info() returns NULL before the first test in + // the test case is run. + static void SetUpTestCase() { + // There should be no tests running at this point. + const TestInfo* test_info = + UnitTest::GetInstance()->current_test_info(); + EXPECT_TRUE(test_info == NULL) + << "There should be no tests running at this point."; + } + + // Tests that current_test_info() returns NULL after the last test in + // the test case has run. + static void TearDownTestCase() { + const TestInfo* test_info = + UnitTest::GetInstance()->current_test_info(); + EXPECT_TRUE(test_info == NULL) + << "There should be no tests running at this point."; + } +}; + +// Tests that current_test_info() returns TestInfo for currently running +// test by checking the expected test name against the actual one. +TEST_F(CurrentTestInfoTest, WorksForFirstTestInATestCase) { + const TestInfo* test_info = + UnitTest::GetInstance()->current_test_info(); + ASSERT_TRUE(NULL != test_info) + << "There is a test running so we should have a valid TestInfo."; + EXPECT_STREQ("CurrentTestInfoTest", test_info->test_case_name()) + << "Expected the name of the currently running test case."; + EXPECT_STREQ("WorksForFirstTestInATestCase", test_info->name()) + << "Expected the name of the currently running test."; +} + +// Tests that current_test_info() returns TestInfo for currently running +// test by checking the expected test name against the actual one. We +// use this test to see that the TestInfo object actually changed from +// the previous invocation. +TEST_F(CurrentTestInfoTest, WorksForSecondTestInATestCase) { + const TestInfo* test_info = + UnitTest::GetInstance()->current_test_info(); + ASSERT_TRUE(NULL != test_info) + << "There is a test running so we should have a valid TestInfo."; + EXPECT_STREQ("CurrentTestInfoTest", test_info->test_case_name()) + << "Expected the name of the currently running test case."; + EXPECT_STREQ("WorksForSecondTestInATestCase", test_info->name()) + << "Expected the name of the currently running test."; +} + +} // namespace testing + +// These two lines test that we can define tests in a namespace that +// has the name "testing" and is nested in another namespace. +namespace my_namespace { +namespace testing { + +// Makes sure that TEST knows to use ::testing::Test instead of +// ::my_namespace::testing::Test. +class Test {}; + +// Makes sure that an assertion knows to use ::testing::Message instead of +// ::my_namespace::testing::Message. +class Message {}; + +// Makes sure that an assertion knows to use +// ::testing::AssertionResult instead of +// ::my_namespace::testing::AssertionResult. +class AssertionResult {}; + +// Tests that an assertion that should succeed works as expected. +TEST(NestedTestingNamespaceTest, Success) { + EXPECT_EQ(1, 1) << "This shouldn't fail."; +} + +// Tests that an assertion that should fail works as expected. +TEST(NestedTestingNamespaceTest, Failure) { + EXPECT_FATAL_FAILURE(FAIL() << "This failure is expected.", + "This failure is expected."); +} + +} // namespace testing +} // namespace my_namespace + +// Tests that one can call superclass SetUp and TearDown methods-- +// that is, that they are not private. +// No tests are based on this fixture; the test "passes" if it compiles +// successfully. +class ProtectedFixtureMethodsTest : public Test { + protected: + virtual void SetUp() { + Test::SetUp(); + } + virtual void TearDown() { + Test::TearDown(); + } +}; + +// StreamingAssertionsTest tests the streaming versions of a representative +// sample of assertions. +TEST(StreamingAssertionsTest, Unconditional) { + SUCCEED() << "expected success"; + EXPECT_NONFATAL_FAILURE(ADD_FAILURE() << "expected failure", + "expected failure"); + EXPECT_FATAL_FAILURE(FAIL() << "expected failure", + "expected failure"); +} + +#ifdef __BORLANDC__ +// Silences warnings: "Condition is always true", "Unreachable code" +# pragma option push -w-ccc -w-rch +#endif + +TEST(StreamingAssertionsTest, Truth) { + EXPECT_TRUE(true) << "unexpected failure"; + ASSERT_TRUE(true) << "unexpected failure"; + EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(false) << "expected failure", + "expected failure"); + EXPECT_FATAL_FAILURE(ASSERT_TRUE(false) << "expected failure", + "expected failure"); +} + +TEST(StreamingAssertionsTest, Truth2) { + EXPECT_FALSE(false) << "unexpected failure"; + ASSERT_FALSE(false) << "unexpected failure"; + EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(true) << "expected failure", + "expected failure"); + EXPECT_FATAL_FAILURE(ASSERT_FALSE(true) << "expected failure", + "expected failure"); +} + +#ifdef __BORLANDC__ +// Restores warnings after previous "#pragma option push" supressed them +# pragma option pop +#endif + +TEST(StreamingAssertionsTest, IntegerEquals) { + EXPECT_EQ(1, 1) << "unexpected failure"; + ASSERT_EQ(1, 1) << "unexpected failure"; + EXPECT_NONFATAL_FAILURE(EXPECT_EQ(1, 2) << "expected failure", + "expected failure"); + EXPECT_FATAL_FAILURE(ASSERT_EQ(1, 2) << "expected failure", + "expected failure"); +} + +TEST(StreamingAssertionsTest, IntegerLessThan) { + EXPECT_LT(1, 2) << "unexpected failure"; + ASSERT_LT(1, 2) << "unexpected failure"; + EXPECT_NONFATAL_FAILURE(EXPECT_LT(2, 1) << "expected failure", + "expected failure"); + EXPECT_FATAL_FAILURE(ASSERT_LT(2, 1) << "expected failure", + "expected failure"); +} + +TEST(StreamingAssertionsTest, StringsEqual) { + EXPECT_STREQ("foo", "foo") << "unexpected failure"; + ASSERT_STREQ("foo", "foo") << "unexpected failure"; + EXPECT_NONFATAL_FAILURE(EXPECT_STREQ("foo", "bar") << "expected failure", + "expected failure"); + EXPECT_FATAL_FAILURE(ASSERT_STREQ("foo", "bar") << "expected failure", + "expected failure"); +} + +TEST(StreamingAssertionsTest, StringsNotEqual) { + EXPECT_STRNE("foo", "bar") << "unexpected failure"; + ASSERT_STRNE("foo", "bar") << "unexpected failure"; + EXPECT_NONFATAL_FAILURE(EXPECT_STRNE("foo", "foo") << "expected failure", + "expected failure"); + EXPECT_FATAL_FAILURE(ASSERT_STRNE("foo", "foo") << "expected failure", + "expected failure"); +} + +TEST(StreamingAssertionsTest, StringsEqualIgnoringCase) { + EXPECT_STRCASEEQ("foo", "FOO") << "unexpected failure"; + ASSERT_STRCASEEQ("foo", "FOO") << "unexpected failure"; + EXPECT_NONFATAL_FAILURE(EXPECT_STRCASEEQ("foo", "bar") << "expected failure", + "expected failure"); + EXPECT_FATAL_FAILURE(ASSERT_STRCASEEQ("foo", "bar") << "expected failure", + "expected failure"); +} + +TEST(StreamingAssertionsTest, StringNotEqualIgnoringCase) { + EXPECT_STRCASENE("foo", "bar") << "unexpected failure"; + ASSERT_STRCASENE("foo", "bar") << "unexpected failure"; + EXPECT_NONFATAL_FAILURE(EXPECT_STRCASENE("foo", "FOO") << "expected failure", + "expected failure"); + EXPECT_FATAL_FAILURE(ASSERT_STRCASENE("bar", "BAR") << "expected failure", + "expected failure"); +} + +TEST(StreamingAssertionsTest, FloatingPointEquals) { + EXPECT_FLOAT_EQ(1.0, 1.0) << "unexpected failure"; + ASSERT_FLOAT_EQ(1.0, 1.0) << "unexpected failure"; + EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(0.0, 1.0) << "expected failure", + "expected failure"); + EXPECT_FATAL_FAILURE(ASSERT_FLOAT_EQ(0.0, 1.0) << "expected failure", + "expected failure"); +} + +#if GTEST_HAS_EXCEPTIONS + +TEST(StreamingAssertionsTest, Throw) { + EXPECT_THROW(ThrowAnInteger(), int) << "unexpected failure"; + ASSERT_THROW(ThrowAnInteger(), int) << "unexpected failure"; + EXPECT_NONFATAL_FAILURE(EXPECT_THROW(ThrowAnInteger(), bool) << + "expected failure", "expected failure"); + EXPECT_FATAL_FAILURE(ASSERT_THROW(ThrowAnInteger(), bool) << + "expected failure", "expected failure"); +} + +TEST(StreamingAssertionsTest, NoThrow) { + EXPECT_NO_THROW(ThrowNothing()) << "unexpected failure"; + ASSERT_NO_THROW(ThrowNothing()) << "unexpected failure"; + EXPECT_NONFATAL_FAILURE(EXPECT_NO_THROW(ThrowAnInteger()) << + "expected failure", "expected failure"); + EXPECT_FATAL_FAILURE(ASSERT_NO_THROW(ThrowAnInteger()) << + "expected failure", "expected failure"); +} + +TEST(StreamingAssertionsTest, AnyThrow) { + EXPECT_ANY_THROW(ThrowAnInteger()) << "unexpected failure"; + ASSERT_ANY_THROW(ThrowAnInteger()) << "unexpected failure"; + EXPECT_NONFATAL_FAILURE(EXPECT_ANY_THROW(ThrowNothing()) << + "expected failure", "expected failure"); + EXPECT_FATAL_FAILURE(ASSERT_ANY_THROW(ThrowNothing()) << + "expected failure", "expected failure"); +} + +#endif // GTEST_HAS_EXCEPTIONS + +// Tests that Google Test correctly decides whether to use colors in the output. + +TEST(ColoredOutputTest, UsesColorsWhenGTestColorFlagIsYes) { + GTEST_FLAG(color) = "yes"; + + SetEnv("TERM", "xterm"); // TERM supports colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + EXPECT_TRUE(ShouldUseColor(false)); // Stdout is not a TTY. + + SetEnv("TERM", "dumb"); // TERM doesn't support colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + EXPECT_TRUE(ShouldUseColor(false)); // Stdout is not a TTY. +} + +TEST(ColoredOutputTest, UsesColorsWhenGTestColorFlagIsAliasOfYes) { + SetEnv("TERM", "dumb"); // TERM doesn't support colors. + + GTEST_FLAG(color) = "True"; + EXPECT_TRUE(ShouldUseColor(false)); // Stdout is not a TTY. + + GTEST_FLAG(color) = "t"; + EXPECT_TRUE(ShouldUseColor(false)); // Stdout is not a TTY. + + GTEST_FLAG(color) = "1"; + EXPECT_TRUE(ShouldUseColor(false)); // Stdout is not a TTY. +} + +TEST(ColoredOutputTest, UsesNoColorWhenGTestColorFlagIsNo) { + GTEST_FLAG(color) = "no"; + + SetEnv("TERM", "xterm"); // TERM supports colors. + EXPECT_FALSE(ShouldUseColor(true)); // Stdout is a TTY. + EXPECT_FALSE(ShouldUseColor(false)); // Stdout is not a TTY. + + SetEnv("TERM", "dumb"); // TERM doesn't support colors. + EXPECT_FALSE(ShouldUseColor(true)); // Stdout is a TTY. + EXPECT_FALSE(ShouldUseColor(false)); // Stdout is not a TTY. +} + +TEST(ColoredOutputTest, UsesNoColorWhenGTestColorFlagIsInvalid) { + SetEnv("TERM", "xterm"); // TERM supports colors. + + GTEST_FLAG(color) = "F"; + EXPECT_FALSE(ShouldUseColor(true)); // Stdout is a TTY. + + GTEST_FLAG(color) = "0"; + EXPECT_FALSE(ShouldUseColor(true)); // Stdout is a TTY. + + GTEST_FLAG(color) = "unknown"; + EXPECT_FALSE(ShouldUseColor(true)); // Stdout is a TTY. +} + +TEST(ColoredOutputTest, UsesColorsWhenStdoutIsTty) { + GTEST_FLAG(color) = "auto"; + + SetEnv("TERM", "xterm"); // TERM supports colors. + EXPECT_FALSE(ShouldUseColor(false)); // Stdout is not a TTY. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. +} + +TEST(ColoredOutputTest, UsesColorsWhenTermSupportsColors) { + GTEST_FLAG(color) = "auto"; + +#if GTEST_OS_WINDOWS + // On Windows, we ignore the TERM variable as it's usually not set. + + SetEnv("TERM", "dumb"); + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", ""); + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "xterm"); + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. +#else + // On non-Windows platforms, we rely on TERM to determine if the + // terminal supports colors. + + SetEnv("TERM", "dumb"); // TERM doesn't support colors. + EXPECT_FALSE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "emacs"); // TERM doesn't support colors. + EXPECT_FALSE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "vt100"); // TERM doesn't support colors. + EXPECT_FALSE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "xterm-mono"); // TERM doesn't support colors. + EXPECT_FALSE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "xterm"); // TERM supports colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "xterm-color"); // TERM supports colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "xterm-256color"); // TERM supports colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "screen"); // TERM supports colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "screen-256color"); // TERM supports colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "tmux"); // TERM supports colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "tmux-256color"); // TERM supports colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "rxvt-unicode"); // TERM supports colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "rxvt-unicode-256color"); // TERM supports colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "linux"); // TERM supports colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. + + SetEnv("TERM", "cygwin"); // TERM supports colors. + EXPECT_TRUE(ShouldUseColor(true)); // Stdout is a TTY. +#endif // GTEST_OS_WINDOWS +} + +// Verifies that StaticAssertTypeEq works in a namespace scope. + +static bool dummy1 GTEST_ATTRIBUTE_UNUSED_ = StaticAssertTypeEq(); +static bool dummy2 GTEST_ATTRIBUTE_UNUSED_ = + StaticAssertTypeEq(); + +// Verifies that StaticAssertTypeEq works in a class. + +template +class StaticAssertTypeEqTestHelper { + public: + StaticAssertTypeEqTestHelper() { StaticAssertTypeEq(); } +}; + +TEST(StaticAssertTypeEqTest, WorksInClass) { + StaticAssertTypeEqTestHelper(); +} + +// Verifies that StaticAssertTypeEq works inside a function. + +typedef int IntAlias; + +TEST(StaticAssertTypeEqTest, CompilesForEqualTypes) { + StaticAssertTypeEq(); + StaticAssertTypeEq(); +} + +TEST(GetCurrentOsStackTraceExceptTopTest, ReturnsTheStackTrace) { + testing::UnitTest* const unit_test = testing::UnitTest::GetInstance(); + + // We don't have a stack walker in Google Test yet. + EXPECT_STREQ("", GetCurrentOsStackTraceExceptTop(unit_test, 0).c_str()); + EXPECT_STREQ("", GetCurrentOsStackTraceExceptTop(unit_test, 1).c_str()); +} + +TEST(HasNonfatalFailureTest, ReturnsFalseWhenThereIsNoFailure) { + EXPECT_FALSE(HasNonfatalFailure()); +} + +static void FailFatally() { FAIL(); } + +TEST(HasNonfatalFailureTest, ReturnsFalseWhenThereIsOnlyFatalFailure) { + FailFatally(); + const bool has_nonfatal_failure = HasNonfatalFailure(); + ClearCurrentTestPartResults(); + EXPECT_FALSE(has_nonfatal_failure); +} + +TEST(HasNonfatalFailureTest, ReturnsTrueWhenThereIsNonfatalFailure) { + ADD_FAILURE(); + const bool has_nonfatal_failure = HasNonfatalFailure(); + ClearCurrentTestPartResults(); + EXPECT_TRUE(has_nonfatal_failure); +} + +TEST(HasNonfatalFailureTest, ReturnsTrueWhenThereAreFatalAndNonfatalFailures) { + FailFatally(); + ADD_FAILURE(); + const bool has_nonfatal_failure = HasNonfatalFailure(); + ClearCurrentTestPartResults(); + EXPECT_TRUE(has_nonfatal_failure); +} + +// A wrapper for calling HasNonfatalFailure outside of a test body. +static bool HasNonfatalFailureHelper() { + return testing::Test::HasNonfatalFailure(); +} + +TEST(HasNonfatalFailureTest, WorksOutsideOfTestBody) { + EXPECT_FALSE(HasNonfatalFailureHelper()); +} + +TEST(HasNonfatalFailureTest, WorksOutsideOfTestBody2) { + ADD_FAILURE(); + const bool has_nonfatal_failure = HasNonfatalFailureHelper(); + ClearCurrentTestPartResults(); + EXPECT_TRUE(has_nonfatal_failure); +} + +TEST(HasFailureTest, ReturnsFalseWhenThereIsNoFailure) { + EXPECT_FALSE(HasFailure()); +} + +TEST(HasFailureTest, ReturnsTrueWhenThereIsFatalFailure) { + FailFatally(); + const bool has_failure = HasFailure(); + ClearCurrentTestPartResults(); + EXPECT_TRUE(has_failure); +} + +TEST(HasFailureTest, ReturnsTrueWhenThereIsNonfatalFailure) { + ADD_FAILURE(); + const bool has_failure = HasFailure(); + ClearCurrentTestPartResults(); + EXPECT_TRUE(has_failure); +} + +TEST(HasFailureTest, ReturnsTrueWhenThereAreFatalAndNonfatalFailures) { + FailFatally(); + ADD_FAILURE(); + const bool has_failure = HasFailure(); + ClearCurrentTestPartResults(); + EXPECT_TRUE(has_failure); +} + +// A wrapper for calling HasFailure outside of a test body. +static bool HasFailureHelper() { return testing::Test::HasFailure(); } + +TEST(HasFailureTest, WorksOutsideOfTestBody) { + EXPECT_FALSE(HasFailureHelper()); +} + +TEST(HasFailureTest, WorksOutsideOfTestBody2) { + ADD_FAILURE(); + const bool has_failure = HasFailureHelper(); + ClearCurrentTestPartResults(); + EXPECT_TRUE(has_failure); +} + +class TestListener : public EmptyTestEventListener { + public: + TestListener() : on_start_counter_(NULL), is_destroyed_(NULL) {} + TestListener(int* on_start_counter, bool* is_destroyed) + : on_start_counter_(on_start_counter), + is_destroyed_(is_destroyed) {} + + virtual ~TestListener() { + if (is_destroyed_) + *is_destroyed_ = true; + } + + protected: + virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) { + if (on_start_counter_ != NULL) + (*on_start_counter_)++; + } + + private: + int* on_start_counter_; + bool* is_destroyed_; +}; + +// Tests the constructor. +TEST(TestEventListenersTest, ConstructionWorks) { + TestEventListeners listeners; + + EXPECT_TRUE(TestEventListenersAccessor::GetRepeater(&listeners) != NULL); + EXPECT_TRUE(listeners.default_result_printer() == NULL); + EXPECT_TRUE(listeners.default_xml_generator() == NULL); +} + +// Tests that the TestEventListeners destructor deletes all the listeners it +// owns. +TEST(TestEventListenersTest, DestructionWorks) { + bool default_result_printer_is_destroyed = false; + bool default_xml_printer_is_destroyed = false; + bool extra_listener_is_destroyed = false; + TestListener* default_result_printer = new TestListener( + NULL, &default_result_printer_is_destroyed); + TestListener* default_xml_printer = new TestListener( + NULL, &default_xml_printer_is_destroyed); + TestListener* extra_listener = new TestListener( + NULL, &extra_listener_is_destroyed); + + { + TestEventListeners listeners; + TestEventListenersAccessor::SetDefaultResultPrinter(&listeners, + default_result_printer); + TestEventListenersAccessor::SetDefaultXmlGenerator(&listeners, + default_xml_printer); + listeners.Append(extra_listener); + } + EXPECT_TRUE(default_result_printer_is_destroyed); + EXPECT_TRUE(default_xml_printer_is_destroyed); + EXPECT_TRUE(extra_listener_is_destroyed); +} + +// Tests that a listener Append'ed to a TestEventListeners list starts +// receiving events. +TEST(TestEventListenersTest, Append) { + int on_start_counter = 0; + bool is_destroyed = false; + TestListener* listener = new TestListener(&on_start_counter, &is_destroyed); + { + TestEventListeners listeners; + listeners.Append(listener); + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestProgramStart( + *UnitTest::GetInstance()); + EXPECT_EQ(1, on_start_counter); + } + EXPECT_TRUE(is_destroyed); +} + +// Tests that listeners receive events in the order they were appended to +// the list, except for *End requests, which must be received in the reverse +// order. +class SequenceTestingListener : public EmptyTestEventListener { + public: + SequenceTestingListener(std::vector* vector, const char* id) + : vector_(vector), id_(id) {} + + protected: + virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) { + vector_->push_back(GetEventDescription("OnTestProgramStart")); + } + + virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) { + vector_->push_back(GetEventDescription("OnTestProgramEnd")); + } + + virtual void OnTestIterationStart(const UnitTest& /*unit_test*/, + int /*iteration*/) { + vector_->push_back(GetEventDescription("OnTestIterationStart")); + } + + virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/, + int /*iteration*/) { + vector_->push_back(GetEventDescription("OnTestIterationEnd")); + } + + private: + std::string GetEventDescription(const char* method) { + Message message; + message << id_ << "." << method; + return message.GetString(); + } + + std::vector* vector_; + const char* const id_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(SequenceTestingListener); +}; + +TEST(EventListenerTest, AppendKeepsOrder) { + std::vector vec; + TestEventListeners listeners; + listeners.Append(new SequenceTestingListener(&vec, "1st")); + listeners.Append(new SequenceTestingListener(&vec, "2nd")); + listeners.Append(new SequenceTestingListener(&vec, "3rd")); + + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestProgramStart( + *UnitTest::GetInstance()); + ASSERT_EQ(3U, vec.size()); + EXPECT_STREQ("1st.OnTestProgramStart", vec[0].c_str()); + EXPECT_STREQ("2nd.OnTestProgramStart", vec[1].c_str()); + EXPECT_STREQ("3rd.OnTestProgramStart", vec[2].c_str()); + + vec.clear(); + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestProgramEnd( + *UnitTest::GetInstance()); + ASSERT_EQ(3U, vec.size()); + EXPECT_STREQ("3rd.OnTestProgramEnd", vec[0].c_str()); + EXPECT_STREQ("2nd.OnTestProgramEnd", vec[1].c_str()); + EXPECT_STREQ("1st.OnTestProgramEnd", vec[2].c_str()); + + vec.clear(); + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestIterationStart( + *UnitTest::GetInstance(), 0); + ASSERT_EQ(3U, vec.size()); + EXPECT_STREQ("1st.OnTestIterationStart", vec[0].c_str()); + EXPECT_STREQ("2nd.OnTestIterationStart", vec[1].c_str()); + EXPECT_STREQ("3rd.OnTestIterationStart", vec[2].c_str()); + + vec.clear(); + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestIterationEnd( + *UnitTest::GetInstance(), 0); + ASSERT_EQ(3U, vec.size()); + EXPECT_STREQ("3rd.OnTestIterationEnd", vec[0].c_str()); + EXPECT_STREQ("2nd.OnTestIterationEnd", vec[1].c_str()); + EXPECT_STREQ("1st.OnTestIterationEnd", vec[2].c_str()); +} + +// Tests that a listener removed from a TestEventListeners list stops receiving +// events and is not deleted when the list is destroyed. +TEST(TestEventListenersTest, Release) { + int on_start_counter = 0; + bool is_destroyed = false; + // Although Append passes the ownership of this object to the list, + // the following calls release it, and we need to delete it before the + // test ends. + TestListener* listener = new TestListener(&on_start_counter, &is_destroyed); + { + TestEventListeners listeners; + listeners.Append(listener); + EXPECT_EQ(listener, listeners.Release(listener)); + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestProgramStart( + *UnitTest::GetInstance()); + EXPECT_TRUE(listeners.Release(listener) == NULL); + } + EXPECT_EQ(0, on_start_counter); + EXPECT_FALSE(is_destroyed); + delete listener; +} + +// Tests that no events are forwarded when event forwarding is disabled. +TEST(EventListenerTest, SuppressEventForwarding) { + int on_start_counter = 0; + TestListener* listener = new TestListener(&on_start_counter, NULL); + + TestEventListeners listeners; + listeners.Append(listener); + ASSERT_TRUE(TestEventListenersAccessor::EventForwardingEnabled(listeners)); + TestEventListenersAccessor::SuppressEventForwarding(&listeners); + ASSERT_FALSE(TestEventListenersAccessor::EventForwardingEnabled(listeners)); + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestProgramStart( + *UnitTest::GetInstance()); + EXPECT_EQ(0, on_start_counter); +} + +// Tests that events generated by Google Test are not forwarded in +// death test subprocesses. +TEST(EventListenerDeathTest, EventsNotForwardedInDeathTestSubprecesses) { + EXPECT_DEATH_IF_SUPPORTED({ + GTEST_CHECK_(TestEventListenersAccessor::EventForwardingEnabled( + *GetUnitTestImpl()->listeners())) << "expected failure";}, + "expected failure"); +} + +// Tests that a listener installed via SetDefaultResultPrinter() starts +// receiving events and is returned via default_result_printer() and that +// the previous default_result_printer is removed from the list and deleted. +TEST(EventListenerTest, default_result_printer) { + int on_start_counter = 0; + bool is_destroyed = false; + TestListener* listener = new TestListener(&on_start_counter, &is_destroyed); + + TestEventListeners listeners; + TestEventListenersAccessor::SetDefaultResultPrinter(&listeners, listener); + + EXPECT_EQ(listener, listeners.default_result_printer()); + + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestProgramStart( + *UnitTest::GetInstance()); + + EXPECT_EQ(1, on_start_counter); + + // Replacing default_result_printer with something else should remove it + // from the list and destroy it. + TestEventListenersAccessor::SetDefaultResultPrinter(&listeners, NULL); + + EXPECT_TRUE(listeners.default_result_printer() == NULL); + EXPECT_TRUE(is_destroyed); + + // After broadcasting an event the counter is still the same, indicating + // the listener is not in the list anymore. + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestProgramStart( + *UnitTest::GetInstance()); + EXPECT_EQ(1, on_start_counter); +} + +// Tests that the default_result_printer listener stops receiving events +// when removed via Release and that is not owned by the list anymore. +TEST(EventListenerTest, RemovingDefaultResultPrinterWorks) { + int on_start_counter = 0; + bool is_destroyed = false; + // Although Append passes the ownership of this object to the list, + // the following calls release it, and we need to delete it before the + // test ends. + TestListener* listener = new TestListener(&on_start_counter, &is_destroyed); + { + TestEventListeners listeners; + TestEventListenersAccessor::SetDefaultResultPrinter(&listeners, listener); + + EXPECT_EQ(listener, listeners.Release(listener)); + EXPECT_TRUE(listeners.default_result_printer() == NULL); + EXPECT_FALSE(is_destroyed); + + // Broadcasting events now should not affect default_result_printer. + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestProgramStart( + *UnitTest::GetInstance()); + EXPECT_EQ(0, on_start_counter); + } + // Destroying the list should not affect the listener now, too. + EXPECT_FALSE(is_destroyed); + delete listener; +} + +// Tests that a listener installed via SetDefaultXmlGenerator() starts +// receiving events and is returned via default_xml_generator() and that +// the previous default_xml_generator is removed from the list and deleted. +TEST(EventListenerTest, default_xml_generator) { + int on_start_counter = 0; + bool is_destroyed = false; + TestListener* listener = new TestListener(&on_start_counter, &is_destroyed); + + TestEventListeners listeners; + TestEventListenersAccessor::SetDefaultXmlGenerator(&listeners, listener); + + EXPECT_EQ(listener, listeners.default_xml_generator()); + + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestProgramStart( + *UnitTest::GetInstance()); + + EXPECT_EQ(1, on_start_counter); + + // Replacing default_xml_generator with something else should remove it + // from the list and destroy it. + TestEventListenersAccessor::SetDefaultXmlGenerator(&listeners, NULL); + + EXPECT_TRUE(listeners.default_xml_generator() == NULL); + EXPECT_TRUE(is_destroyed); + + // After broadcasting an event the counter is still the same, indicating + // the listener is not in the list anymore. + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestProgramStart( + *UnitTest::GetInstance()); + EXPECT_EQ(1, on_start_counter); +} + +// Tests that the default_xml_generator listener stops receiving events +// when removed via Release and that is not owned by the list anymore. +TEST(EventListenerTest, RemovingDefaultXmlGeneratorWorks) { + int on_start_counter = 0; + bool is_destroyed = false; + // Although Append passes the ownership of this object to the list, + // the following calls release it, and we need to delete it before the + // test ends. + TestListener* listener = new TestListener(&on_start_counter, &is_destroyed); + { + TestEventListeners listeners; + TestEventListenersAccessor::SetDefaultXmlGenerator(&listeners, listener); + + EXPECT_EQ(listener, listeners.Release(listener)); + EXPECT_TRUE(listeners.default_xml_generator() == NULL); + EXPECT_FALSE(is_destroyed); + + // Broadcasting events now should not affect default_xml_generator. + TestEventListenersAccessor::GetRepeater(&listeners)->OnTestProgramStart( + *UnitTest::GetInstance()); + EXPECT_EQ(0, on_start_counter); + } + // Destroying the list should not affect the listener now, too. + EXPECT_FALSE(is_destroyed); + delete listener; +} + +// Sanity tests to ensure that the alternative, verbose spellings of +// some of the macros work. We don't test them thoroughly as that +// would be quite involved. Since their implementations are +// straightforward, and they are rarely used, we'll just rely on the +// users to tell us when they are broken. +GTEST_TEST(AlternativeNameTest, Works) { // GTEST_TEST is the same as TEST. + GTEST_SUCCEED() << "OK"; // GTEST_SUCCEED is the same as SUCCEED. + + // GTEST_FAIL is the same as FAIL. + EXPECT_FATAL_FAILURE(GTEST_FAIL() << "An expected failure", + "An expected failure"); + + // GTEST_ASSERT_XY is the same as ASSERT_XY. + + GTEST_ASSERT_EQ(0, 0); + EXPECT_FATAL_FAILURE(GTEST_ASSERT_EQ(0, 1) << "An expected failure", + "An expected failure"); + EXPECT_FATAL_FAILURE(GTEST_ASSERT_EQ(1, 0) << "An expected failure", + "An expected failure"); + + GTEST_ASSERT_NE(0, 1); + GTEST_ASSERT_NE(1, 0); + EXPECT_FATAL_FAILURE(GTEST_ASSERT_NE(0, 0) << "An expected failure", + "An expected failure"); + + GTEST_ASSERT_LE(0, 0); + GTEST_ASSERT_LE(0, 1); + EXPECT_FATAL_FAILURE(GTEST_ASSERT_LE(1, 0) << "An expected failure", + "An expected failure"); + + GTEST_ASSERT_LT(0, 1); + EXPECT_FATAL_FAILURE(GTEST_ASSERT_LT(0, 0) << "An expected failure", + "An expected failure"); + EXPECT_FATAL_FAILURE(GTEST_ASSERT_LT(1, 0) << "An expected failure", + "An expected failure"); + + GTEST_ASSERT_GE(0, 0); + GTEST_ASSERT_GE(1, 0); + EXPECT_FATAL_FAILURE(GTEST_ASSERT_GE(0, 1) << "An expected failure", + "An expected failure"); + + GTEST_ASSERT_GT(1, 0); + EXPECT_FATAL_FAILURE(GTEST_ASSERT_GT(0, 1) << "An expected failure", + "An expected failure"); + EXPECT_FATAL_FAILURE(GTEST_ASSERT_GT(1, 1) << "An expected failure", + "An expected failure"); +} + +// Tests for internal utilities necessary for implementation of the universal +// printing. +// TODO(vladl@google.com): Find a better home for them. + +class ConversionHelperBase {}; +class ConversionHelperDerived : public ConversionHelperBase {}; + +// Tests that IsAProtocolMessage::value is a compile-time constant. +TEST(IsAProtocolMessageTest, ValueIsCompileTimeConstant) { + GTEST_COMPILE_ASSERT_(IsAProtocolMessage::value, + const_true); + GTEST_COMPILE_ASSERT_(!IsAProtocolMessage::value, const_false); +} + +// Tests that IsAProtocolMessage::value is true when T is +// proto2::Message or a sub-class of it. +TEST(IsAProtocolMessageTest, ValueIsTrueWhenTypeIsAProtocolMessage) { + EXPECT_TRUE(IsAProtocolMessage< ::proto2::Message>::value); + EXPECT_TRUE(IsAProtocolMessage::value); +} + +// Tests that IsAProtocolMessage::value is false when T is neither +// ProtocolMessage nor a sub-class of it. +TEST(IsAProtocolMessageTest, ValueIsFalseWhenTypeIsNotAProtocolMessage) { + EXPECT_FALSE(IsAProtocolMessage::value); + EXPECT_FALSE(IsAProtocolMessage::value); +} + +// Tests that CompileAssertTypesEqual compiles when the type arguments are +// equal. +TEST(CompileAssertTypesEqual, CompilesWhenTypesAreEqual) { + CompileAssertTypesEqual(); + CompileAssertTypesEqual(); +} + +// Tests that RemoveReference does not affect non-reference types. +TEST(RemoveReferenceTest, DoesNotAffectNonReferenceType) { + CompileAssertTypesEqual::type>(); + CompileAssertTypesEqual::type>(); +} + +// Tests that RemoveReference removes reference from reference types. +TEST(RemoveReferenceTest, RemovesReference) { + CompileAssertTypesEqual::type>(); + CompileAssertTypesEqual::type>(); +} + +// Tests GTEST_REMOVE_REFERENCE_. + +template +void TestGTestRemoveReference() { + CompileAssertTypesEqual(); +} + +TEST(RemoveReferenceTest, MacroVersion) { + TestGTestRemoveReference(); + TestGTestRemoveReference(); +} + + +// Tests that RemoveConst does not affect non-const types. +TEST(RemoveConstTest, DoesNotAffectNonConstType) { + CompileAssertTypesEqual::type>(); + CompileAssertTypesEqual::type>(); +} + +// Tests that RemoveConst removes const from const types. +TEST(RemoveConstTest, RemovesConst) { + CompileAssertTypesEqual::type>(); + CompileAssertTypesEqual::type>(); + CompileAssertTypesEqual::type>(); +} + +// Tests GTEST_REMOVE_CONST_. + +template +void TestGTestRemoveConst() { + CompileAssertTypesEqual(); +} + +TEST(RemoveConstTest, MacroVersion) { + TestGTestRemoveConst(); + TestGTestRemoveConst(); + TestGTestRemoveConst(); +} + +// Tests GTEST_REMOVE_REFERENCE_AND_CONST_. + +template +void TestGTestRemoveReferenceAndConst() { + CompileAssertTypesEqual(); +} + +TEST(RemoveReferenceToConstTest, Works) { + TestGTestRemoveReferenceAndConst(); + TestGTestRemoveReferenceAndConst(); + TestGTestRemoveReferenceAndConst(); + TestGTestRemoveReferenceAndConst(); + TestGTestRemoveReferenceAndConst(); +} + +// Tests that AddReference does not affect reference types. +TEST(AddReferenceTest, DoesNotAffectReferenceType) { + CompileAssertTypesEqual::type>(); + CompileAssertTypesEqual::type>(); +} + +// Tests that AddReference adds reference to non-reference types. +TEST(AddReferenceTest, AddsReference) { + CompileAssertTypesEqual::type>(); + CompileAssertTypesEqual::type>(); +} + +// Tests GTEST_ADD_REFERENCE_. + +template +void TestGTestAddReference() { + CompileAssertTypesEqual(); +} + +TEST(AddReferenceTest, MacroVersion) { + TestGTestAddReference(); + TestGTestAddReference(); +} + +// Tests GTEST_REFERENCE_TO_CONST_. + +template +void TestGTestReferenceToConst() { + CompileAssertTypesEqual(); +} + +TEST(GTestReferenceToConstTest, Works) { + TestGTestReferenceToConst(); + TestGTestReferenceToConst(); + TestGTestReferenceToConst(); + TestGTestReferenceToConst(); +} + +// Tests that ImplicitlyConvertible::value is a compile-time constant. +TEST(ImplicitlyConvertibleTest, ValueIsCompileTimeConstant) { + GTEST_COMPILE_ASSERT_((ImplicitlyConvertible::value), const_true); + GTEST_COMPILE_ASSERT_((!ImplicitlyConvertible::value), + const_false); +} + +// Tests that ImplicitlyConvertible::value is true when T1 can +// be implicitly converted to T2. +TEST(ImplicitlyConvertibleTest, ValueIsTrueWhenConvertible) { + EXPECT_TRUE((ImplicitlyConvertible::value)); + EXPECT_TRUE((ImplicitlyConvertible::value)); + EXPECT_TRUE((ImplicitlyConvertible::value)); + EXPECT_TRUE((ImplicitlyConvertible::value)); + EXPECT_TRUE((ImplicitlyConvertible::value)); + EXPECT_TRUE((ImplicitlyConvertible::value)); +} + +// Tests that ImplicitlyConvertible::value is false when T1 +// cannot be implicitly converted to T2. +TEST(ImplicitlyConvertibleTest, ValueIsFalseWhenNotConvertible) { + EXPECT_FALSE((ImplicitlyConvertible::value)); + EXPECT_FALSE((ImplicitlyConvertible::value)); + EXPECT_FALSE((ImplicitlyConvertible::value)); + EXPECT_FALSE((ImplicitlyConvertible::value)); +} + +// Tests IsContainerTest. + +class NonContainer {}; + +TEST(IsContainerTestTest, WorksForNonContainer) { + EXPECT_EQ(sizeof(IsNotContainer), sizeof(IsContainerTest(0))); + EXPECT_EQ(sizeof(IsNotContainer), sizeof(IsContainerTest(0))); + EXPECT_EQ(sizeof(IsNotContainer), sizeof(IsContainerTest(0))); +} + +TEST(IsContainerTestTest, WorksForContainer) { + EXPECT_EQ(sizeof(IsContainer), + sizeof(IsContainerTest >(0))); + EXPECT_EQ(sizeof(IsContainer), + sizeof(IsContainerTest >(0))); +} + +// Tests ArrayEq(). + +TEST(ArrayEqTest, WorksForDegeneratedArrays) { + EXPECT_TRUE(ArrayEq(5, 5L)); + EXPECT_FALSE(ArrayEq('a', 0)); +} + +TEST(ArrayEqTest, WorksForOneDimensionalArrays) { + // Note that a and b are distinct but compatible types. + const int a[] = { 0, 1 }; + long b[] = { 0, 1 }; + EXPECT_TRUE(ArrayEq(a, b)); + EXPECT_TRUE(ArrayEq(a, 2, b)); + + b[0] = 2; + EXPECT_FALSE(ArrayEq(a, b)); + EXPECT_FALSE(ArrayEq(a, 1, b)); +} + +TEST(ArrayEqTest, WorksForTwoDimensionalArrays) { + const char a[][3] = { "hi", "lo" }; + const char b[][3] = { "hi", "lo" }; + const char c[][3] = { "hi", "li" }; + + EXPECT_TRUE(ArrayEq(a, b)); + EXPECT_TRUE(ArrayEq(a, 2, b)); + + EXPECT_FALSE(ArrayEq(a, c)); + EXPECT_FALSE(ArrayEq(a, 2, c)); +} + +// Tests ArrayAwareFind(). + +TEST(ArrayAwareFindTest, WorksForOneDimensionalArray) { + const char a[] = "hello"; + EXPECT_EQ(a + 4, ArrayAwareFind(a, a + 5, 'o')); + EXPECT_EQ(a + 5, ArrayAwareFind(a, a + 5, 'x')); +} + +TEST(ArrayAwareFindTest, WorksForTwoDimensionalArray) { + int a[][2] = { { 0, 1 }, { 2, 3 }, { 4, 5 } }; + const int b[2] = { 2, 3 }; + EXPECT_EQ(a + 1, ArrayAwareFind(a, a + 3, b)); + + const int c[2] = { 6, 7 }; + EXPECT_EQ(a + 3, ArrayAwareFind(a, a + 3, c)); +} + +// Tests CopyArray(). + +TEST(CopyArrayTest, WorksForDegeneratedArrays) { + int n = 0; + CopyArray('a', &n); + EXPECT_EQ('a', n); +} + +TEST(CopyArrayTest, WorksForOneDimensionalArrays) { + const char a[3] = "hi"; + int b[3]; +#ifndef __BORLANDC__ // C++Builder cannot compile some array size deductions. + CopyArray(a, &b); + EXPECT_TRUE(ArrayEq(a, b)); +#endif + + int c[3]; + CopyArray(a, 3, c); + EXPECT_TRUE(ArrayEq(a, c)); +} + +TEST(CopyArrayTest, WorksForTwoDimensionalArrays) { + const int a[2][3] = { { 0, 1, 2 }, { 3, 4, 5 } }; + int b[2][3]; +#ifndef __BORLANDC__ // C++Builder cannot compile some array size deductions. + CopyArray(a, &b); + EXPECT_TRUE(ArrayEq(a, b)); +#endif + + int c[2][3]; + CopyArray(a, 2, c); + EXPECT_TRUE(ArrayEq(a, c)); +} + +// Tests NativeArray. + +TEST(NativeArrayTest, ConstructorFromArrayWorks) { + const int a[3] = { 0, 1, 2 }; + NativeArray na(a, 3, RelationToSourceReference()); + EXPECT_EQ(3U, na.size()); + EXPECT_EQ(a, na.begin()); +} + +TEST(NativeArrayTest, CreatesAndDeletesCopyOfArrayWhenAskedTo) { + typedef int Array[2]; + Array* a = new Array[1]; + (*a)[0] = 0; + (*a)[1] = 1; + NativeArray na(*a, 2, RelationToSourceCopy()); + EXPECT_NE(*a, na.begin()); + delete[] a; + EXPECT_EQ(0, na.begin()[0]); + EXPECT_EQ(1, na.begin()[1]); + + // We rely on the heap checker to verify that na deletes the copy of + // array. +} + +TEST(NativeArrayTest, TypeMembersAreCorrect) { + StaticAssertTypeEq::value_type>(); + StaticAssertTypeEq::value_type>(); + + StaticAssertTypeEq::const_iterator>(); + StaticAssertTypeEq::const_iterator>(); +} + +TEST(NativeArrayTest, MethodsWork) { + const int a[3] = { 0, 1, 2 }; + NativeArray na(a, 3, RelationToSourceCopy()); + ASSERT_EQ(3U, na.size()); + EXPECT_EQ(3, na.end() - na.begin()); + + NativeArray::const_iterator it = na.begin(); + EXPECT_EQ(0, *it); + ++it; + EXPECT_EQ(1, *it); + it++; + EXPECT_EQ(2, *it); + ++it; + EXPECT_EQ(na.end(), it); + + EXPECT_TRUE(na == na); + + NativeArray na2(a, 3, RelationToSourceReference()); + EXPECT_TRUE(na == na2); + + const int b1[3] = { 0, 1, 1 }; + const int b2[4] = { 0, 1, 2, 3 }; + EXPECT_FALSE(na == NativeArray(b1, 3, RelationToSourceReference())); + EXPECT_FALSE(na == NativeArray(b2, 4, RelationToSourceCopy())); +} + +TEST(NativeArrayTest, WorksForTwoDimensionalArray) { + const char a[2][3] = { "hi", "lo" }; + NativeArray na(a, 2, RelationToSourceReference()); + ASSERT_EQ(2U, na.size()); + EXPECT_EQ(a, na.begin()); +} + +// Tests SkipPrefix(). + +TEST(SkipPrefixTest, SkipsWhenPrefixMatches) { + const char* const str = "hello"; + + const char* p = str; + EXPECT_TRUE(SkipPrefix("", &p)); + EXPECT_EQ(str, p); + + p = str; + EXPECT_TRUE(SkipPrefix("hell", &p)); + EXPECT_EQ(str + 4, p); +} + +TEST(SkipPrefixTest, DoesNotSkipWhenPrefixDoesNotMatch) { + const char* const str = "world"; + + const char* p = str; + EXPECT_FALSE(SkipPrefix("W", &p)); + EXPECT_EQ(str, p); + + p = str; + EXPECT_FALSE(SkipPrefix("world!", &p)); + EXPECT_EQ(str, p); +} + +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: keith.ray@gmail.com (Keith Ray) +// +// gtest_xml_outfile1_test_ writes some xml via TestProperty used by +// gtest_xml_outfiles_test.py + +#include "gtest/gtest.h" + +class PropertyOne : public testing::Test { + protected: + virtual void SetUp() { + RecordProperty("SetUpProp", 1); + } + virtual void TearDown() { + RecordProperty("TearDownProp", 1); + } +}; + +TEST_F(PropertyOne, TestSomeProperties) { + RecordProperty("TestSomeProperty", 1); +} +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: keith.ray@gmail.com (Keith Ray) +// +// gtest_xml_outfile2_test_ writes some xml via TestProperty used by +// gtest_xml_outfiles_test.py + +#include "gtest/gtest.h" + +class PropertyTwo : public testing::Test { + protected: + virtual void SetUp() { + RecordProperty("SetUpProp", 2); + } + virtual void TearDown() { + RecordProperty("TearDownProp", 2); + } +}; + +TEST_F(PropertyTwo, TestSomeProperties) { + RecordProperty("TestSomeProperty", 2); +} +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// This is part of the unit test for include/gtest/gtest_prod.h. + +#include "production.h" + +PrivateCode::PrivateCode() : x_(0) {} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// Tests for death tests. + +#include "gtest/gtest-death-test.h" +#include "gtest/gtest.h" +#include "gtest/internal/gtest-filepath.h" + +using testing::internal::AlwaysFalse; +using testing::internal::AlwaysTrue; + +#if GTEST_HAS_DEATH_TEST + +# if GTEST_OS_WINDOWS +# include // For chdir(). +# else +# include +# include // For waitpid. +# endif // GTEST_OS_WINDOWS + +# include +# include +# include + +# if GTEST_OS_LINUX +# include +# endif // GTEST_OS_LINUX + +# include "gtest/gtest-spi.h" + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +# define GTEST_IMPLEMENTATION_ 1 +# include "src/gtest-internal-inl.h" +# undef GTEST_IMPLEMENTATION_ + +namespace posix = ::testing::internal::posix; + +using testing::Message; +using testing::internal::DeathTest; +using testing::internal::DeathTestFactory; +using testing::internal::FilePath; +using testing::internal::GetLastErrnoDescription; +using testing::internal::GetUnitTestImpl; +using testing::internal::InDeathTestChild; +using testing::internal::ParseNaturalNumber; + +namespace testing { +namespace internal { + +// A helper class whose objects replace the death test factory for a +// single UnitTest object during their lifetimes. +class ReplaceDeathTestFactory { + public: + explicit ReplaceDeathTestFactory(DeathTestFactory* new_factory) + : unit_test_impl_(GetUnitTestImpl()) { + old_factory_ = unit_test_impl_->death_test_factory_.release(); + unit_test_impl_->death_test_factory_.reset(new_factory); + } + + ~ReplaceDeathTestFactory() { + unit_test_impl_->death_test_factory_.release(); + unit_test_impl_->death_test_factory_.reset(old_factory_); + } + private: + // Prevents copying ReplaceDeathTestFactory objects. + ReplaceDeathTestFactory(const ReplaceDeathTestFactory&); + void operator=(const ReplaceDeathTestFactory&); + + UnitTestImpl* unit_test_impl_; + DeathTestFactory* old_factory_; +}; + +} // namespace internal +} // namespace testing + +void DieWithMessage(const ::std::string& message) { + fprintf(stderr, "%s", message.c_str()); + fflush(stderr); // Make sure the text is printed before the process exits. + + // We call _exit() instead of exit(), as the former is a direct + // system call and thus safer in the presence of threads. exit() + // will invoke user-defined exit-hooks, which may do dangerous + // things that conflict with death tests. + // + // Some compilers can recognize that _exit() never returns and issue the + // 'unreachable code' warning for code following this function, unless + // fooled by a fake condition. + if (AlwaysTrue()) + _exit(1); +} + +void DieInside(const ::std::string& function) { + DieWithMessage("death inside " + function + "()."); +} + +// Tests that death tests work. + +class TestForDeathTest : public testing::Test { + protected: + TestForDeathTest() : original_dir_(FilePath::GetCurrentDir()) {} + + virtual ~TestForDeathTest() { + posix::ChDir(original_dir_.c_str()); + } + + // A static member function that's expected to die. + static void StaticMemberFunction() { DieInside("StaticMemberFunction"); } + + // A method of the test fixture that may die. + void MemberFunction() { + if (should_die_) + DieInside("MemberFunction"); + } + + // True iff MemberFunction() should die. + bool should_die_; + const FilePath original_dir_; +}; + +// A class with a member function that may die. +class MayDie { + public: + explicit MayDie(bool should_die) : should_die_(should_die) {} + + // A member function that may die. + void MemberFunction() const { + if (should_die_) + DieInside("MayDie::MemberFunction"); + } + + private: + // True iff MemberFunction() should die. + bool should_die_; +}; + +// A global function that's expected to die. +void GlobalFunction() { DieInside("GlobalFunction"); } + +// A non-void function that's expected to die. +int NonVoidFunction() { + DieInside("NonVoidFunction"); + return 1; +} + +// A unary function that may die. +void DieIf(bool should_die) { + if (should_die) + DieInside("DieIf"); +} + +// A binary function that may die. +bool DieIfLessThan(int x, int y) { + if (x < y) { + DieInside("DieIfLessThan"); + } + return true; +} + +// Tests that ASSERT_DEATH can be used outside a TEST, TEST_F, or test fixture. +void DeathTestSubroutine() { + EXPECT_DEATH(GlobalFunction(), "death.*GlobalFunction"); + ASSERT_DEATH(GlobalFunction(), "death.*GlobalFunction"); +} + +// Death in dbg, not opt. +int DieInDebugElse12(int* sideeffect) { + if (sideeffect) *sideeffect = 12; + +# ifndef NDEBUG + + DieInside("DieInDebugElse12"); + +# endif // NDEBUG + + return 12; +} + +# if GTEST_OS_WINDOWS + +// Tests the ExitedWithCode predicate. +TEST(ExitStatusPredicateTest, ExitedWithCode) { + // On Windows, the process's exit code is the same as its exit status, + // so the predicate just compares the its input with its parameter. + EXPECT_TRUE(testing::ExitedWithCode(0)(0)); + EXPECT_TRUE(testing::ExitedWithCode(1)(1)); + EXPECT_TRUE(testing::ExitedWithCode(42)(42)); + EXPECT_FALSE(testing::ExitedWithCode(0)(1)); + EXPECT_FALSE(testing::ExitedWithCode(1)(0)); +} + +# else + +// Returns the exit status of a process that calls _exit(2) with a +// given exit code. This is a helper function for the +// ExitStatusPredicateTest test suite. +static int NormalExitStatus(int exit_code) { + pid_t child_pid = fork(); + if (child_pid == 0) { + _exit(exit_code); + } + int status; + waitpid(child_pid, &status, 0); + return status; +} + +// Returns the exit status of a process that raises a given signal. +// If the signal does not cause the process to die, then it returns +// instead the exit status of a process that exits normally with exit +// code 1. This is a helper function for the ExitStatusPredicateTest +// test suite. +static int KilledExitStatus(int signum) { + pid_t child_pid = fork(); + if (child_pid == 0) { + raise(signum); + _exit(1); + } + int status; + waitpid(child_pid, &status, 0); + return status; +} + +// Tests the ExitedWithCode predicate. +TEST(ExitStatusPredicateTest, ExitedWithCode) { + const int status0 = NormalExitStatus(0); + const int status1 = NormalExitStatus(1); + const int status42 = NormalExitStatus(42); + const testing::ExitedWithCode pred0(0); + const testing::ExitedWithCode pred1(1); + const testing::ExitedWithCode pred42(42); + EXPECT_PRED1(pred0, status0); + EXPECT_PRED1(pred1, status1); + EXPECT_PRED1(pred42, status42); + EXPECT_FALSE(pred0(status1)); + EXPECT_FALSE(pred42(status0)); + EXPECT_FALSE(pred1(status42)); +} + +// Tests the KilledBySignal predicate. +TEST(ExitStatusPredicateTest, KilledBySignal) { + const int status_segv = KilledExitStatus(SIGSEGV); + const int status_kill = KilledExitStatus(SIGKILL); + const testing::KilledBySignal pred_segv(SIGSEGV); + const testing::KilledBySignal pred_kill(SIGKILL); + EXPECT_PRED1(pred_segv, status_segv); + EXPECT_PRED1(pred_kill, status_kill); + EXPECT_FALSE(pred_segv(status_kill)); + EXPECT_FALSE(pred_kill(status_segv)); +} + +# endif // GTEST_OS_WINDOWS + +// Tests that the death test macros expand to code which may or may not +// be followed by operator<<, and that in either case the complete text +// comprises only a single C++ statement. +TEST_F(TestForDeathTest, SingleStatement) { + if (AlwaysFalse()) + // This would fail if executed; this is a compilation test only + ASSERT_DEATH(return, ""); + + if (AlwaysTrue()) + EXPECT_DEATH(_exit(1), ""); + else + // This empty "else" branch is meant to ensure that EXPECT_DEATH + // doesn't expand into an "if" statement without an "else" + ; + + if (AlwaysFalse()) + ASSERT_DEATH(return, "") << "did not die"; + + if (AlwaysFalse()) + ; + else + EXPECT_DEATH(_exit(1), "") << 1 << 2 << 3; +} + +void DieWithEmbeddedNul() { + fprintf(stderr, "Hello%cmy null world.\n", '\0'); + fflush(stderr); + _exit(1); +} + +# if GTEST_USES_PCRE +// Tests that EXPECT_DEATH and ASSERT_DEATH work when the error +// message has a NUL character in it. +TEST_F(TestForDeathTest, EmbeddedNulInMessage) { + // TODO(wan@google.com): doesn't support matching strings + // with embedded NUL characters - find a way to workaround it. + EXPECT_DEATH(DieWithEmbeddedNul(), "my null world"); + ASSERT_DEATH(DieWithEmbeddedNul(), "my null world"); +} +# endif // GTEST_USES_PCRE + +// Tests that death test macros expand to code which interacts well with switch +// statements. +TEST_F(TestForDeathTest, SwitchStatement) { + // Microsoft compiler usually complains about switch statements without + // case labels. We suppress that warning for this test. + GTEST_DISABLE_MSC_WARNINGS_PUSH_(4065) + + switch (0) + default: + ASSERT_DEATH(_exit(1), "") << "exit in default switch handler"; + + switch (0) + case 0: + EXPECT_DEATH(_exit(1), "") << "exit in switch case"; + + GTEST_DISABLE_MSC_WARNINGS_POP_() +} + +// Tests that a static member function can be used in a "fast" style +// death test. +TEST_F(TestForDeathTest, StaticMemberFunctionFastStyle) { + testing::GTEST_FLAG(death_test_style) = "fast"; + ASSERT_DEATH(StaticMemberFunction(), "death.*StaticMember"); +} + +// Tests that a method of the test fixture can be used in a "fast" +// style death test. +TEST_F(TestForDeathTest, MemberFunctionFastStyle) { + testing::GTEST_FLAG(death_test_style) = "fast"; + should_die_ = true; + EXPECT_DEATH(MemberFunction(), "inside.*MemberFunction"); +} + +void ChangeToRootDir() { posix::ChDir(GTEST_PATH_SEP_); } + +// Tests that death tests work even if the current directory has been +// changed. +TEST_F(TestForDeathTest, FastDeathTestInChangedDir) { + testing::GTEST_FLAG(death_test_style) = "fast"; + + ChangeToRootDir(); + EXPECT_EXIT(_exit(1), testing::ExitedWithCode(1), ""); + + ChangeToRootDir(); + ASSERT_DEATH(_exit(1), ""); +} + +# if GTEST_OS_LINUX +void SigprofAction(int, siginfo_t*, void*) { /* no op */ } + +// Sets SIGPROF action and ITIMER_PROF timer (interval: 1ms). +void SetSigprofActionAndTimer() { + struct itimerval timer; + timer.it_interval.tv_sec = 0; + timer.it_interval.tv_usec = 1; + timer.it_value = timer.it_interval; + ASSERT_EQ(0, setitimer(ITIMER_PROF, &timer, NULL)); + struct sigaction signal_action; + memset(&signal_action, 0, sizeof(signal_action)); + sigemptyset(&signal_action.sa_mask); + signal_action.sa_sigaction = SigprofAction; + signal_action.sa_flags = SA_RESTART | SA_SIGINFO; + ASSERT_EQ(0, sigaction(SIGPROF, &signal_action, NULL)); +} + +// Disables ITIMER_PROF timer and ignores SIGPROF signal. +void DisableSigprofActionAndTimer(struct sigaction* old_signal_action) { + struct itimerval timer; + timer.it_interval.tv_sec = 0; + timer.it_interval.tv_usec = 0; + timer.it_value = timer.it_interval; + ASSERT_EQ(0, setitimer(ITIMER_PROF, &timer, NULL)); + struct sigaction signal_action; + memset(&signal_action, 0, sizeof(signal_action)); + sigemptyset(&signal_action.sa_mask); + signal_action.sa_handler = SIG_IGN; + ASSERT_EQ(0, sigaction(SIGPROF, &signal_action, old_signal_action)); +} + +// Tests that death tests work when SIGPROF handler and timer are set. +TEST_F(TestForDeathTest, FastSigprofActionSet) { + testing::GTEST_FLAG(death_test_style) = "fast"; + SetSigprofActionAndTimer(); + EXPECT_DEATH(_exit(1), ""); + struct sigaction old_signal_action; + DisableSigprofActionAndTimer(&old_signal_action); + EXPECT_TRUE(old_signal_action.sa_sigaction == SigprofAction); +} + +TEST_F(TestForDeathTest, ThreadSafeSigprofActionSet) { + testing::GTEST_FLAG(death_test_style) = "threadsafe"; + SetSigprofActionAndTimer(); + EXPECT_DEATH(_exit(1), ""); + struct sigaction old_signal_action; + DisableSigprofActionAndTimer(&old_signal_action); + EXPECT_TRUE(old_signal_action.sa_sigaction == SigprofAction); +} +# endif // GTEST_OS_LINUX + +// Repeats a representative sample of death tests in the "threadsafe" style: + +TEST_F(TestForDeathTest, StaticMemberFunctionThreadsafeStyle) { + testing::GTEST_FLAG(death_test_style) = "threadsafe"; + ASSERT_DEATH(StaticMemberFunction(), "death.*StaticMember"); +} + +TEST_F(TestForDeathTest, MemberFunctionThreadsafeStyle) { + testing::GTEST_FLAG(death_test_style) = "threadsafe"; + should_die_ = true; + EXPECT_DEATH(MemberFunction(), "inside.*MemberFunction"); +} + +TEST_F(TestForDeathTest, ThreadsafeDeathTestInLoop) { + testing::GTEST_FLAG(death_test_style) = "threadsafe"; + + for (int i = 0; i < 3; ++i) + EXPECT_EXIT(_exit(i), testing::ExitedWithCode(i), "") << ": i = " << i; +} + +TEST_F(TestForDeathTest, ThreadsafeDeathTestInChangedDir) { + testing::GTEST_FLAG(death_test_style) = "threadsafe"; + + ChangeToRootDir(); + EXPECT_EXIT(_exit(1), testing::ExitedWithCode(1), ""); + + ChangeToRootDir(); + ASSERT_DEATH(_exit(1), ""); +} + +TEST_F(TestForDeathTest, MixedStyles) { + testing::GTEST_FLAG(death_test_style) = "threadsafe"; + EXPECT_DEATH(_exit(1), ""); + testing::GTEST_FLAG(death_test_style) = "fast"; + EXPECT_DEATH(_exit(1), ""); +} + +# if GTEST_HAS_CLONE && GTEST_HAS_PTHREAD + +namespace { + +bool pthread_flag; + +void SetPthreadFlag() { + pthread_flag = true; +} + +} // namespace + +TEST_F(TestForDeathTest, DoesNotExecuteAtforkHooks) { + if (!testing::GTEST_FLAG(death_test_use_fork)) { + testing::GTEST_FLAG(death_test_style) = "threadsafe"; + pthread_flag = false; + ASSERT_EQ(0, pthread_atfork(&SetPthreadFlag, NULL, NULL)); + ASSERT_DEATH(_exit(1), ""); + ASSERT_FALSE(pthread_flag); + } +} + +# endif // GTEST_HAS_CLONE && GTEST_HAS_PTHREAD + +// Tests that a method of another class can be used in a death test. +TEST_F(TestForDeathTest, MethodOfAnotherClass) { + const MayDie x(true); + ASSERT_DEATH(x.MemberFunction(), "MayDie\\:\\:MemberFunction"); +} + +// Tests that a global function can be used in a death test. +TEST_F(TestForDeathTest, GlobalFunction) { + EXPECT_DEATH(GlobalFunction(), "GlobalFunction"); +} + +// Tests that any value convertible to an RE works as a second +// argument to EXPECT_DEATH. +TEST_F(TestForDeathTest, AcceptsAnythingConvertibleToRE) { + static const char regex_c_str[] = "GlobalFunction"; + EXPECT_DEATH(GlobalFunction(), regex_c_str); + + const testing::internal::RE regex(regex_c_str); + EXPECT_DEATH(GlobalFunction(), regex); + +# if GTEST_HAS_GLOBAL_STRING + + const string regex_str(regex_c_str); + EXPECT_DEATH(GlobalFunction(), regex_str); + +# endif // GTEST_HAS_GLOBAL_STRING + +# if !GTEST_USES_PCRE + + const ::std::string regex_std_str(regex_c_str); + EXPECT_DEATH(GlobalFunction(), regex_std_str); + +# endif // !GTEST_USES_PCRE +} + +// Tests that a non-void function can be used in a death test. +TEST_F(TestForDeathTest, NonVoidFunction) { + ASSERT_DEATH(NonVoidFunction(), "NonVoidFunction"); +} + +// Tests that functions that take parameter(s) can be used in a death test. +TEST_F(TestForDeathTest, FunctionWithParameter) { + EXPECT_DEATH(DieIf(true), "DieIf\\(\\)"); + EXPECT_DEATH(DieIfLessThan(2, 3), "DieIfLessThan"); +} + +// Tests that ASSERT_DEATH can be used outside a TEST, TEST_F, or test fixture. +TEST_F(TestForDeathTest, OutsideFixture) { + DeathTestSubroutine(); +} + +// Tests that death tests can be done inside a loop. +TEST_F(TestForDeathTest, InsideLoop) { + for (int i = 0; i < 5; i++) { + EXPECT_DEATH(DieIfLessThan(-1, i), "DieIfLessThan") << "where i == " << i; + } +} + +// Tests that a compound statement can be used in a death test. +TEST_F(TestForDeathTest, CompoundStatement) { + EXPECT_DEATH({ // NOLINT + const int x = 2; + const int y = x + 1; + DieIfLessThan(x, y); + }, + "DieIfLessThan"); +} + +// Tests that code that doesn't die causes a death test to fail. +TEST_F(TestForDeathTest, DoesNotDie) { + EXPECT_NONFATAL_FAILURE(EXPECT_DEATH(DieIf(false), "DieIf"), + "failed to die"); +} + +// Tests that a death test fails when the error message isn't expected. +TEST_F(TestForDeathTest, ErrorMessageMismatch) { + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_DEATH(DieIf(true), "DieIfLessThan") << "End of death test message."; + }, "died but not with expected error"); +} + +// On exit, *aborted will be true iff the EXPECT_DEATH() statement +// aborted the function. +void ExpectDeathTestHelper(bool* aborted) { + *aborted = true; + EXPECT_DEATH(DieIf(false), "DieIf"); // This assertion should fail. + *aborted = false; +} + +// Tests that EXPECT_DEATH doesn't abort the test on failure. +TEST_F(TestForDeathTest, EXPECT_DEATH) { + bool aborted = true; + EXPECT_NONFATAL_FAILURE(ExpectDeathTestHelper(&aborted), + "failed to die"); + EXPECT_FALSE(aborted); +} + +// Tests that ASSERT_DEATH does abort the test on failure. +TEST_F(TestForDeathTest, ASSERT_DEATH) { + static bool aborted; + EXPECT_FATAL_FAILURE({ // NOLINT + aborted = true; + ASSERT_DEATH(DieIf(false), "DieIf"); // This assertion should fail. + aborted = false; + }, "failed to die"); + EXPECT_TRUE(aborted); +} + +// Tests that EXPECT_DEATH evaluates the arguments exactly once. +TEST_F(TestForDeathTest, SingleEvaluation) { + int x = 3; + EXPECT_DEATH(DieIf((++x) == 4), "DieIf"); + + const char* regex = "DieIf"; + const char* regex_save = regex; + EXPECT_DEATH(DieIfLessThan(3, 4), regex++); + EXPECT_EQ(regex_save + 1, regex); +} + +// Tests that run-away death tests are reported as failures. +TEST_F(TestForDeathTest, RunawayIsFailure) { + EXPECT_NONFATAL_FAILURE(EXPECT_DEATH(static_cast(0), "Foo"), + "failed to die."); +} + +// Tests that death tests report executing 'return' in the statement as +// failure. +TEST_F(TestForDeathTest, ReturnIsFailure) { + EXPECT_FATAL_FAILURE(ASSERT_DEATH(return, "Bar"), + "illegal return in test statement."); +} + +// Tests that EXPECT_DEBUG_DEATH works as expected, that is, you can stream a +// message to it, and in debug mode it: +// 1. Asserts on death. +// 2. Has no side effect. +// +// And in opt mode, it: +// 1. Has side effects but does not assert. +TEST_F(TestForDeathTest, TestExpectDebugDeath) { + int sideeffect = 0; + + EXPECT_DEBUG_DEATH(DieInDebugElse12(&sideeffect), "death.*DieInDebugElse12") + << "Must accept a streamed message"; + +# ifdef NDEBUG + + // Checks that the assignment occurs in opt mode (sideeffect). + EXPECT_EQ(12, sideeffect); + +# else + + // Checks that the assignment does not occur in dbg mode (no sideeffect). + EXPECT_EQ(0, sideeffect); + +# endif +} + +// Tests that ASSERT_DEBUG_DEATH works as expected, that is, you can stream a +// message to it, and in debug mode it: +// 1. Asserts on death. +// 2. Has no side effect. +// +// And in opt mode, it: +// 1. Has side effects but does not assert. +TEST_F(TestForDeathTest, TestAssertDebugDeath) { + int sideeffect = 0; + + ASSERT_DEBUG_DEATH(DieInDebugElse12(&sideeffect), "death.*DieInDebugElse12") + << "Must accept a streamed message"; + +# ifdef NDEBUG + + // Checks that the assignment occurs in opt mode (sideeffect). + EXPECT_EQ(12, sideeffect); + +# else + + // Checks that the assignment does not occur in dbg mode (no sideeffect). + EXPECT_EQ(0, sideeffect); + +# endif +} + +# ifndef NDEBUG + +void ExpectDebugDeathHelper(bool* aborted) { + *aborted = true; + EXPECT_DEBUG_DEATH(return, "") << "This is expected to fail."; + *aborted = false; +} + +# if GTEST_OS_WINDOWS +TEST(PopUpDeathTest, DoesNotShowPopUpOnAbort) { + printf("This test should be considered failing if it shows " + "any pop-up dialogs.\n"); + fflush(stdout); + + EXPECT_DEATH({ + testing::GTEST_FLAG(catch_exceptions) = false; + abort(); + }, ""); +} +# endif // GTEST_OS_WINDOWS + +// Tests that EXPECT_DEBUG_DEATH in debug mode does not abort +// the function. +TEST_F(TestForDeathTest, ExpectDebugDeathDoesNotAbort) { + bool aborted = true; + EXPECT_NONFATAL_FAILURE(ExpectDebugDeathHelper(&aborted), ""); + EXPECT_FALSE(aborted); +} + +void AssertDebugDeathHelper(bool* aborted) { + *aborted = true; + GTEST_LOG_(INFO) << "Before ASSERT_DEBUG_DEATH"; + ASSERT_DEBUG_DEATH(GTEST_LOG_(INFO) << "In ASSERT_DEBUG_DEATH"; return, "") + << "This is expected to fail."; + GTEST_LOG_(INFO) << "After ASSERT_DEBUG_DEATH"; + *aborted = false; +} + +// Tests that ASSERT_DEBUG_DEATH in debug mode aborts the function on +// failure. +TEST_F(TestForDeathTest, AssertDebugDeathAborts) { + static bool aborted; + aborted = false; + EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), ""); + EXPECT_TRUE(aborted); +} + +TEST_F(TestForDeathTest, AssertDebugDeathAborts2) { + static bool aborted; + aborted = false; + EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), ""); + EXPECT_TRUE(aborted); +} + +TEST_F(TestForDeathTest, AssertDebugDeathAborts3) { + static bool aborted; + aborted = false; + EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), ""); + EXPECT_TRUE(aborted); +} + +TEST_F(TestForDeathTest, AssertDebugDeathAborts4) { + static bool aborted; + aborted = false; + EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), ""); + EXPECT_TRUE(aborted); +} + +TEST_F(TestForDeathTest, AssertDebugDeathAborts5) { + static bool aborted; + aborted = false; + EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), ""); + EXPECT_TRUE(aborted); +} + +TEST_F(TestForDeathTest, AssertDebugDeathAborts6) { + static bool aborted; + aborted = false; + EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), ""); + EXPECT_TRUE(aborted); +} + +TEST_F(TestForDeathTest, AssertDebugDeathAborts7) { + static bool aborted; + aborted = false; + EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), ""); + EXPECT_TRUE(aborted); +} + +TEST_F(TestForDeathTest, AssertDebugDeathAborts8) { + static bool aborted; + aborted = false; + EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), ""); + EXPECT_TRUE(aborted); +} + +TEST_F(TestForDeathTest, AssertDebugDeathAborts9) { + static bool aborted; + aborted = false; + EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), ""); + EXPECT_TRUE(aborted); +} + +TEST_F(TestForDeathTest, AssertDebugDeathAborts10) { + static bool aborted; + aborted = false; + EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), ""); + EXPECT_TRUE(aborted); +} + +# endif // _NDEBUG + +// Tests the *_EXIT family of macros, using a variety of predicates. +static void TestExitMacros() { + EXPECT_EXIT(_exit(1), testing::ExitedWithCode(1), ""); + ASSERT_EXIT(_exit(42), testing::ExitedWithCode(42), ""); + +# if GTEST_OS_WINDOWS + + // Of all signals effects on the process exit code, only those of SIGABRT + // are documented on Windows. + // See http://msdn.microsoft.com/en-us/library/dwwzkt4c(VS.71).aspx. + EXPECT_EXIT(raise(SIGABRT), testing::ExitedWithCode(3), "") << "b_ar"; + +# else + + EXPECT_EXIT(raise(SIGKILL), testing::KilledBySignal(SIGKILL), "") << "foo"; + ASSERT_EXIT(raise(SIGUSR2), testing::KilledBySignal(SIGUSR2), "") << "bar"; + + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_EXIT(_exit(0), testing::KilledBySignal(SIGSEGV), "") + << "This failure is expected, too."; + }, "This failure is expected, too."); + +# endif // GTEST_OS_WINDOWS + + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_EXIT(raise(SIGSEGV), testing::ExitedWithCode(0), "") + << "This failure is expected."; + }, "This failure is expected."); +} + +TEST_F(TestForDeathTest, ExitMacros) { + TestExitMacros(); +} + +TEST_F(TestForDeathTest, ExitMacrosUsingFork) { + testing::GTEST_FLAG(death_test_use_fork) = true; + TestExitMacros(); +} + +TEST_F(TestForDeathTest, InvalidStyle) { + testing::GTEST_FLAG(death_test_style) = "rococo"; + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_DEATH(_exit(0), "") << "This failure is expected."; + }, "This failure is expected."); +} + +TEST_F(TestForDeathTest, DeathTestFailedOutput) { + testing::GTEST_FLAG(death_test_style) = "fast"; + EXPECT_NONFATAL_FAILURE( + EXPECT_DEATH(DieWithMessage("death\n"), + "expected message"), + "Actual msg:\n" + "[ DEATH ] death\n"); +} + +TEST_F(TestForDeathTest, DeathTestUnexpectedReturnOutput) { + testing::GTEST_FLAG(death_test_style) = "fast"; + EXPECT_NONFATAL_FAILURE( + EXPECT_DEATH({ + fprintf(stderr, "returning\n"); + fflush(stderr); + return; + }, ""), + " Result: illegal return in test statement.\n" + " Error msg:\n" + "[ DEATH ] returning\n"); +} + +TEST_F(TestForDeathTest, DeathTestBadExitCodeOutput) { + testing::GTEST_FLAG(death_test_style) = "fast"; + EXPECT_NONFATAL_FAILURE( + EXPECT_EXIT(DieWithMessage("exiting with rc 1\n"), + testing::ExitedWithCode(3), + "expected message"), + " Result: died but not with expected exit code:\n" + " Exited with exit status 1\n" + "Actual msg:\n" + "[ DEATH ] exiting with rc 1\n"); +} + +TEST_F(TestForDeathTest, DeathTestMultiLineMatchFail) { + testing::GTEST_FLAG(death_test_style) = "fast"; + EXPECT_NONFATAL_FAILURE( + EXPECT_DEATH(DieWithMessage("line 1\nline 2\nline 3\n"), + "line 1\nxyz\nline 3\n"), + "Actual msg:\n" + "[ DEATH ] line 1\n" + "[ DEATH ] line 2\n" + "[ DEATH ] line 3\n"); +} + +TEST_F(TestForDeathTest, DeathTestMultiLineMatchPass) { + testing::GTEST_FLAG(death_test_style) = "fast"; + EXPECT_DEATH(DieWithMessage("line 1\nline 2\nline 3\n"), + "line 1\nline 2\nline 3\n"); +} + +// A DeathTestFactory that returns MockDeathTests. +class MockDeathTestFactory : public DeathTestFactory { + public: + MockDeathTestFactory(); + virtual bool Create(const char* statement, + const ::testing::internal::RE* regex, + const char* file, int line, DeathTest** test); + + // Sets the parameters for subsequent calls to Create. + void SetParameters(bool create, DeathTest::TestRole role, + int status, bool passed); + + // Accessors. + int AssumeRoleCalls() const { return assume_role_calls_; } + int WaitCalls() const { return wait_calls_; } + size_t PassedCalls() const { return passed_args_.size(); } + bool PassedArgument(int n) const { return passed_args_[n]; } + size_t AbortCalls() const { return abort_args_.size(); } + DeathTest::AbortReason AbortArgument(int n) const { + return abort_args_[n]; + } + bool TestDeleted() const { return test_deleted_; } + + private: + friend class MockDeathTest; + // If true, Create will return a MockDeathTest; otherwise it returns + // NULL. + bool create_; + // The value a MockDeathTest will return from its AssumeRole method. + DeathTest::TestRole role_; + // The value a MockDeathTest will return from its Wait method. + int status_; + // The value a MockDeathTest will return from its Passed method. + bool passed_; + + // Number of times AssumeRole was called. + int assume_role_calls_; + // Number of times Wait was called. + int wait_calls_; + // The arguments to the calls to Passed since the last call to + // SetParameters. + std::vector passed_args_; + // The arguments to the calls to Abort since the last call to + // SetParameters. + std::vector abort_args_; + // True if the last MockDeathTest returned by Create has been + // deleted. + bool test_deleted_; +}; + + +// A DeathTest implementation useful in testing. It returns values set +// at its creation from its various inherited DeathTest methods, and +// reports calls to those methods to its parent MockDeathTestFactory +// object. +class MockDeathTest : public DeathTest { + public: + MockDeathTest(MockDeathTestFactory *parent, + TestRole role, int status, bool passed) : + parent_(parent), role_(role), status_(status), passed_(passed) { + } + virtual ~MockDeathTest() { + parent_->test_deleted_ = true; + } + virtual TestRole AssumeRole() { + ++parent_->assume_role_calls_; + return role_; + } + virtual int Wait() { + ++parent_->wait_calls_; + return status_; + } + virtual bool Passed(bool exit_status_ok) { + parent_->passed_args_.push_back(exit_status_ok); + return passed_; + } + virtual void Abort(AbortReason reason) { + parent_->abort_args_.push_back(reason); + } + + private: + MockDeathTestFactory* const parent_; + const TestRole role_; + const int status_; + const bool passed_; +}; + + +// MockDeathTestFactory constructor. +MockDeathTestFactory::MockDeathTestFactory() + : create_(true), + role_(DeathTest::OVERSEE_TEST), + status_(0), + passed_(true), + assume_role_calls_(0), + wait_calls_(0), + passed_args_(), + abort_args_() { +} + + +// Sets the parameters for subsequent calls to Create. +void MockDeathTestFactory::SetParameters(bool create, + DeathTest::TestRole role, + int status, bool passed) { + create_ = create; + role_ = role; + status_ = status; + passed_ = passed; + + assume_role_calls_ = 0; + wait_calls_ = 0; + passed_args_.clear(); + abort_args_.clear(); +} + + +// Sets test to NULL (if create_ is false) or to the address of a new +// MockDeathTest object with parameters taken from the last call +// to SetParameters (if create_ is true). Always returns true. +bool MockDeathTestFactory::Create(const char* /*statement*/, + const ::testing::internal::RE* /*regex*/, + const char* /*file*/, + int /*line*/, + DeathTest** test) { + test_deleted_ = false; + if (create_) { + *test = new MockDeathTest(this, role_, status_, passed_); + } else { + *test = NULL; + } + return true; +} + +// A test fixture for testing the logic of the GTEST_DEATH_TEST_ macro. +// It installs a MockDeathTestFactory that is used for the duration +// of the test case. +class MacroLogicDeathTest : public testing::Test { + protected: + static testing::internal::ReplaceDeathTestFactory* replacer_; + static MockDeathTestFactory* factory_; + + static void SetUpTestCase() { + factory_ = new MockDeathTestFactory; + replacer_ = new testing::internal::ReplaceDeathTestFactory(factory_); + } + + static void TearDownTestCase() { + delete replacer_; + replacer_ = NULL; + delete factory_; + factory_ = NULL; + } + + // Runs a death test that breaks the rules by returning. Such a death + // test cannot be run directly from a test routine that uses a + // MockDeathTest, or the remainder of the routine will not be executed. + static void RunReturningDeathTest(bool* flag) { + ASSERT_DEATH({ // NOLINT + *flag = true; + return; + }, ""); + } +}; + +testing::internal::ReplaceDeathTestFactory* MacroLogicDeathTest::replacer_ + = NULL; +MockDeathTestFactory* MacroLogicDeathTest::factory_ = NULL; + + +// Test that nothing happens when the factory doesn't return a DeathTest: +TEST_F(MacroLogicDeathTest, NothingHappens) { + bool flag = false; + factory_->SetParameters(false, DeathTest::OVERSEE_TEST, 0, true); + EXPECT_DEATH(flag = true, ""); + EXPECT_FALSE(flag); + EXPECT_EQ(0, factory_->AssumeRoleCalls()); + EXPECT_EQ(0, factory_->WaitCalls()); + EXPECT_EQ(0U, factory_->PassedCalls()); + EXPECT_EQ(0U, factory_->AbortCalls()); + EXPECT_FALSE(factory_->TestDeleted()); +} + +// Test that the parent process doesn't run the death test code, +// and that the Passed method returns false when the (simulated) +// child process exits with status 0: +TEST_F(MacroLogicDeathTest, ChildExitsSuccessfully) { + bool flag = false; + factory_->SetParameters(true, DeathTest::OVERSEE_TEST, 0, true); + EXPECT_DEATH(flag = true, ""); + EXPECT_FALSE(flag); + EXPECT_EQ(1, factory_->AssumeRoleCalls()); + EXPECT_EQ(1, factory_->WaitCalls()); + ASSERT_EQ(1U, factory_->PassedCalls()); + EXPECT_FALSE(factory_->PassedArgument(0)); + EXPECT_EQ(0U, factory_->AbortCalls()); + EXPECT_TRUE(factory_->TestDeleted()); +} + +// Tests that the Passed method was given the argument "true" when +// the (simulated) child process exits with status 1: +TEST_F(MacroLogicDeathTest, ChildExitsUnsuccessfully) { + bool flag = false; + factory_->SetParameters(true, DeathTest::OVERSEE_TEST, 1, true); + EXPECT_DEATH(flag = true, ""); + EXPECT_FALSE(flag); + EXPECT_EQ(1, factory_->AssumeRoleCalls()); + EXPECT_EQ(1, factory_->WaitCalls()); + ASSERT_EQ(1U, factory_->PassedCalls()); + EXPECT_TRUE(factory_->PassedArgument(0)); + EXPECT_EQ(0U, factory_->AbortCalls()); + EXPECT_TRUE(factory_->TestDeleted()); +} + +// Tests that the (simulated) child process executes the death test +// code, and is aborted with the correct AbortReason if it +// executes a return statement. +TEST_F(MacroLogicDeathTest, ChildPerformsReturn) { + bool flag = false; + factory_->SetParameters(true, DeathTest::EXECUTE_TEST, 0, true); + RunReturningDeathTest(&flag); + EXPECT_TRUE(flag); + EXPECT_EQ(1, factory_->AssumeRoleCalls()); + EXPECT_EQ(0, factory_->WaitCalls()); + EXPECT_EQ(0U, factory_->PassedCalls()); + EXPECT_EQ(1U, factory_->AbortCalls()); + EXPECT_EQ(DeathTest::TEST_ENCOUNTERED_RETURN_STATEMENT, + factory_->AbortArgument(0)); + EXPECT_TRUE(factory_->TestDeleted()); +} + +// Tests that the (simulated) child process is aborted with the +// correct AbortReason if it does not die. +TEST_F(MacroLogicDeathTest, ChildDoesNotDie) { + bool flag = false; + factory_->SetParameters(true, DeathTest::EXECUTE_TEST, 0, true); + EXPECT_DEATH(flag = true, ""); + EXPECT_TRUE(flag); + EXPECT_EQ(1, factory_->AssumeRoleCalls()); + EXPECT_EQ(0, factory_->WaitCalls()); + EXPECT_EQ(0U, factory_->PassedCalls()); + // This time there are two calls to Abort: one since the test didn't + // die, and another from the ReturnSentinel when it's destroyed. The + // sentinel normally isn't destroyed if a test doesn't die, since + // _exit(2) is called in that case by ForkingDeathTest, but not by + // our MockDeathTest. + ASSERT_EQ(2U, factory_->AbortCalls()); + EXPECT_EQ(DeathTest::TEST_DID_NOT_DIE, + factory_->AbortArgument(0)); + EXPECT_EQ(DeathTest::TEST_ENCOUNTERED_RETURN_STATEMENT, + factory_->AbortArgument(1)); + EXPECT_TRUE(factory_->TestDeleted()); +} + +// Tests that a successful death test does not register a successful +// test part. +TEST(SuccessRegistrationDeathTest, NoSuccessPart) { + EXPECT_DEATH(_exit(1), ""); + EXPECT_EQ(0, GetUnitTestImpl()->current_test_result()->total_part_count()); +} + +TEST(StreamingAssertionsDeathTest, DeathTest) { + EXPECT_DEATH(_exit(1), "") << "unexpected failure"; + ASSERT_DEATH(_exit(1), "") << "unexpected failure"; + EXPECT_NONFATAL_FAILURE({ // NOLINT + EXPECT_DEATH(_exit(0), "") << "expected failure"; + }, "expected failure"); + EXPECT_FATAL_FAILURE({ // NOLINT + ASSERT_DEATH(_exit(0), "") << "expected failure"; + }, "expected failure"); +} + +// Tests that GetLastErrnoDescription returns an empty string when the +// last error is 0 and non-empty string when it is non-zero. +TEST(GetLastErrnoDescription, GetLastErrnoDescriptionWorks) { + errno = ENOENT; + EXPECT_STRNE("", GetLastErrnoDescription().c_str()); + errno = 0; + EXPECT_STREQ("", GetLastErrnoDescription().c_str()); +} + +# if GTEST_OS_WINDOWS +TEST(AutoHandleTest, AutoHandleWorks) { + HANDLE handle = ::CreateEvent(NULL, FALSE, FALSE, NULL); + ASSERT_NE(INVALID_HANDLE_VALUE, handle); + + // Tests that the AutoHandle is correctly initialized with a handle. + testing::internal::AutoHandle auto_handle(handle); + EXPECT_EQ(handle, auto_handle.Get()); + + // Tests that Reset assigns INVALID_HANDLE_VALUE. + // Note that this cannot verify whether the original handle is closed. + auto_handle.Reset(); + EXPECT_EQ(INVALID_HANDLE_VALUE, auto_handle.Get()); + + // Tests that Reset assigns the new handle. + // Note that this cannot verify whether the original handle is closed. + handle = ::CreateEvent(NULL, FALSE, FALSE, NULL); + ASSERT_NE(INVALID_HANDLE_VALUE, handle); + auto_handle.Reset(handle); + EXPECT_EQ(handle, auto_handle.Get()); + + // Tests that AutoHandle contains INVALID_HANDLE_VALUE by default. + testing::internal::AutoHandle auto_handle2; + EXPECT_EQ(INVALID_HANDLE_VALUE, auto_handle2.Get()); +} +# endif // GTEST_OS_WINDOWS + +# if GTEST_OS_WINDOWS +typedef unsigned __int64 BiggestParsable; +typedef signed __int64 BiggestSignedParsable; +# else +typedef unsigned long long BiggestParsable; +typedef signed long long BiggestSignedParsable; +# endif // GTEST_OS_WINDOWS + +// We cannot use std::numeric_limits::max() as it clashes with the +// max() macro defined by . +const BiggestParsable kBiggestParsableMax = ULLONG_MAX; +const BiggestSignedParsable kBiggestSignedParsableMax = LLONG_MAX; + +TEST(ParseNaturalNumberTest, RejectsInvalidFormat) { + BiggestParsable result = 0; + + // Rejects non-numbers. + EXPECT_FALSE(ParseNaturalNumber("non-number string", &result)); + + // Rejects numbers with whitespace prefix. + EXPECT_FALSE(ParseNaturalNumber(" 123", &result)); + + // Rejects negative numbers. + EXPECT_FALSE(ParseNaturalNumber("-123", &result)); + + // Rejects numbers starting with a plus sign. + EXPECT_FALSE(ParseNaturalNumber("+123", &result)); + errno = 0; +} + +TEST(ParseNaturalNumberTest, RejectsOverflownNumbers) { + BiggestParsable result = 0; + + EXPECT_FALSE(ParseNaturalNumber("99999999999999999999999", &result)); + + signed char char_result = 0; + EXPECT_FALSE(ParseNaturalNumber("200", &char_result)); + errno = 0; +} + +TEST(ParseNaturalNumberTest, AcceptsValidNumbers) { + BiggestParsable result = 0; + + result = 0; + ASSERT_TRUE(ParseNaturalNumber("123", &result)); + EXPECT_EQ(123U, result); + + // Check 0 as an edge case. + result = 1; + ASSERT_TRUE(ParseNaturalNumber("0", &result)); + EXPECT_EQ(0U, result); + + result = 1; + ASSERT_TRUE(ParseNaturalNumber("00000", &result)); + EXPECT_EQ(0U, result); +} + +TEST(ParseNaturalNumberTest, AcceptsTypeLimits) { + Message msg; + msg << kBiggestParsableMax; + + BiggestParsable result = 0; + EXPECT_TRUE(ParseNaturalNumber(msg.GetString(), &result)); + EXPECT_EQ(kBiggestParsableMax, result); + + Message msg2; + msg2 << kBiggestSignedParsableMax; + + BiggestSignedParsable signed_result = 0; + EXPECT_TRUE(ParseNaturalNumber(msg2.GetString(), &signed_result)); + EXPECT_EQ(kBiggestSignedParsableMax, signed_result); + + Message msg3; + msg3 << INT_MAX; + + int int_result = 0; + EXPECT_TRUE(ParseNaturalNumber(msg3.GetString(), &int_result)); + EXPECT_EQ(INT_MAX, int_result); + + Message msg4; + msg4 << UINT_MAX; + + unsigned int uint_result = 0; + EXPECT_TRUE(ParseNaturalNumber(msg4.GetString(), &uint_result)); + EXPECT_EQ(UINT_MAX, uint_result); +} + +TEST(ParseNaturalNumberTest, WorksForShorterIntegers) { + short short_result = 0; + ASSERT_TRUE(ParseNaturalNumber("123", &short_result)); + EXPECT_EQ(123, short_result); + + signed char char_result = 0; + ASSERT_TRUE(ParseNaturalNumber("123", &char_result)); + EXPECT_EQ(123, char_result); +} + +# if GTEST_OS_WINDOWS +TEST(EnvironmentTest, HandleFitsIntoSizeT) { + // TODO(vladl@google.com): Remove this test after this condition is verified + // in a static assertion in gtest-death-test.cc in the function + // GetStatusFileDescriptor. + ASSERT_TRUE(sizeof(HANDLE) <= sizeof(size_t)); +} +# endif // GTEST_OS_WINDOWS + +// Tests that EXPECT_DEATH_IF_SUPPORTED/ASSERT_DEATH_IF_SUPPORTED trigger +// failures when death tests are available on the system. +TEST(ConditionalDeathMacrosDeathTest, ExpectsDeathWhenDeathTestsAvailable) { + EXPECT_DEATH_IF_SUPPORTED(DieInside("CondDeathTestExpectMacro"), + "death inside CondDeathTestExpectMacro"); + ASSERT_DEATH_IF_SUPPORTED(DieInside("CondDeathTestAssertMacro"), + "death inside CondDeathTestAssertMacro"); + + // Empty statement will not crash, which must trigger a failure. + EXPECT_NONFATAL_FAILURE(EXPECT_DEATH_IF_SUPPORTED(;, ""), ""); + EXPECT_FATAL_FAILURE(ASSERT_DEATH_IF_SUPPORTED(;, ""), ""); +} + +TEST(InDeathTestChildDeathTest, ReportsDeathTestCorrectlyInFastStyle) { + testing::GTEST_FLAG(death_test_style) = "fast"; + EXPECT_FALSE(InDeathTestChild()); + EXPECT_DEATH({ + fprintf(stderr, InDeathTestChild() ? "Inside" : "Outside"); + fflush(stderr); + _exit(1); + }, "Inside"); +} + +TEST(InDeathTestChildDeathTest, ReportsDeathTestCorrectlyInThreadSafeStyle) { + testing::GTEST_FLAG(death_test_style) = "threadsafe"; + EXPECT_FALSE(InDeathTestChild()); + EXPECT_DEATH({ + fprintf(stderr, InDeathTestChild() ? "Inside" : "Outside"); + fflush(stderr); + _exit(1); + }, "Inside"); +} + +#else // !GTEST_HAS_DEATH_TEST follows + +using testing::internal::CaptureStderr; +using testing::internal::GetCapturedStderr; + +// Tests that EXPECT_DEATH_IF_SUPPORTED/ASSERT_DEATH_IF_SUPPORTED are still +// defined but do not trigger failures when death tests are not available on +// the system. +TEST(ConditionalDeathMacrosTest, WarnsWhenDeathTestsNotAvailable) { + // Empty statement will not crash, but that should not trigger a failure + // when death tests are not supported. + CaptureStderr(); + EXPECT_DEATH_IF_SUPPORTED(;, ""); + std::string output = GetCapturedStderr(); + ASSERT_TRUE(NULL != strstr(output.c_str(), + "Death tests are not supported on this platform")); + ASSERT_TRUE(NULL != strstr(output.c_str(), ";")); + + // The streamed message should not be printed as there is no test failure. + CaptureStderr(); + EXPECT_DEATH_IF_SUPPORTED(;, "") << "streamed message"; + output = GetCapturedStderr(); + ASSERT_TRUE(NULL == strstr(output.c_str(), "streamed message")); + + CaptureStderr(); + ASSERT_DEATH_IF_SUPPORTED(;, ""); // NOLINT + output = GetCapturedStderr(); + ASSERT_TRUE(NULL != strstr(output.c_str(), + "Death tests are not supported on this platform")); + ASSERT_TRUE(NULL != strstr(output.c_str(), ";")); + + CaptureStderr(); + ASSERT_DEATH_IF_SUPPORTED(;, "") << "streamed message"; // NOLINT + output = GetCapturedStderr(); + ASSERT_TRUE(NULL == strstr(output.c_str(), "streamed message")); +} + +void FuncWithAssert(int* n) { + ASSERT_DEATH_IF_SUPPORTED(return;, ""); + (*n)++; +} + +// Tests that ASSERT_DEATH_IF_SUPPORTED does not return from the current +// function (as ASSERT_DEATH does) if death tests are not supported. +TEST(ConditionalDeathMacrosTest, AssertDeatDoesNotReturnhIfUnsupported) { + int n = 0; + FuncWithAssert(&n); + EXPECT_EQ(1, n); +} + +#endif // !GTEST_HAS_DEATH_TEST + +// Tests that the death test macros expand to code which may or may not +// be followed by operator<<, and that in either case the complete text +// comprises only a single C++ statement. +// +// The syntax should work whether death tests are available or not. +TEST(ConditionalDeathMacrosSyntaxDeathTest, SingleStatement) { + if (AlwaysFalse()) + // This would fail if executed; this is a compilation test only + ASSERT_DEATH_IF_SUPPORTED(return, ""); + + if (AlwaysTrue()) + EXPECT_DEATH_IF_SUPPORTED(_exit(1), ""); + else + // This empty "else" branch is meant to ensure that EXPECT_DEATH + // doesn't expand into an "if" statement without an "else" + ; // NOLINT + + if (AlwaysFalse()) + ASSERT_DEATH_IF_SUPPORTED(return, "") << "did not die"; + + if (AlwaysFalse()) + ; // NOLINT + else + EXPECT_DEATH_IF_SUPPORTED(_exit(1), "") << 1 << 2 << 3; +} + +// Tests that conditional death test macros expand to code which interacts +// well with switch statements. +TEST(ConditionalDeathMacrosSyntaxDeathTest, SwitchStatement) { + // Microsoft compiler usually complains about switch statements without + // case labels. We suppress that warning for this test. + GTEST_DISABLE_MSC_WARNINGS_PUSH_(4065) + + switch (0) + default: + ASSERT_DEATH_IF_SUPPORTED(_exit(1), "") + << "exit in default switch handler"; + + switch (0) + case 0: + EXPECT_DEATH_IF_SUPPORTED(_exit(1), "") << "exit in switch case"; + + GTEST_DISABLE_MSC_WARNINGS_POP_() +} + +// Tests that a test case whose name ends with "DeathTest" works fine +// on Windows. +TEST(NotADeathTest, Test) { + SUCCEED(); +} +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: keith.ray@gmail.com (Keith Ray) +// +// Google Test filepath utilities +// +// This file tests classes and functions used internally by +// Google Test. They are subject to change without notice. +// +// This file is #included from gtest_unittest.cc, to avoid changing +// build or make-files for some existing Google Test clients. Do not +// #include this file anywhere else! + +#include "gtest/internal/gtest-filepath.h" +#include "gtest/gtest.h" + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +#if GTEST_OS_WINDOWS_MOBILE +# include // NOLINT +#elif GTEST_OS_WINDOWS +# include // NOLINT +#endif // GTEST_OS_WINDOWS_MOBILE + +namespace testing { +namespace internal { +namespace { + +#if GTEST_OS_WINDOWS_MOBILE +// TODO(wan@google.com): Move these to the POSIX adapter section in +// gtest-port.h. + +// Windows CE doesn't have the remove C function. +int remove(const char* path) { + LPCWSTR wpath = String::AnsiToUtf16(path); + int ret = DeleteFile(wpath) ? 0 : -1; + delete [] wpath; + return ret; +} +// Windows CE doesn't have the _rmdir C function. +int _rmdir(const char* path) { + FilePath filepath(path); + LPCWSTR wpath = String::AnsiToUtf16( + filepath.RemoveTrailingPathSeparator().c_str()); + int ret = RemoveDirectory(wpath) ? 0 : -1; + delete [] wpath; + return ret; +} + +#else + +TEST(GetCurrentDirTest, ReturnsCurrentDir) { + const FilePath original_dir = FilePath::GetCurrentDir(); + EXPECT_FALSE(original_dir.IsEmpty()); + + posix::ChDir(GTEST_PATH_SEP_); + const FilePath cwd = FilePath::GetCurrentDir(); + posix::ChDir(original_dir.c_str()); + +# if GTEST_OS_WINDOWS + + // Skips the ":". + const char* const cwd_without_drive = strchr(cwd.c_str(), ':'); + ASSERT_TRUE(cwd_without_drive != NULL); + EXPECT_STREQ(GTEST_PATH_SEP_, cwd_without_drive + 1); + +# else + + EXPECT_EQ(GTEST_PATH_SEP_, cwd.string()); + +# endif +} + +#endif // GTEST_OS_WINDOWS_MOBILE + +TEST(IsEmptyTest, ReturnsTrueForEmptyPath) { + EXPECT_TRUE(FilePath("").IsEmpty()); +} + +TEST(IsEmptyTest, ReturnsFalseForNonEmptyPath) { + EXPECT_FALSE(FilePath("a").IsEmpty()); + EXPECT_FALSE(FilePath(".").IsEmpty()); + EXPECT_FALSE(FilePath("a/b").IsEmpty()); + EXPECT_FALSE(FilePath("a\\b\\").IsEmpty()); +} + +// RemoveDirectoryName "" -> "" +TEST(RemoveDirectoryNameTest, WhenEmptyName) { + EXPECT_EQ("", FilePath("").RemoveDirectoryName().string()); +} + +// RemoveDirectoryName "afile" -> "afile" +TEST(RemoveDirectoryNameTest, ButNoDirectory) { + EXPECT_EQ("afile", + FilePath("afile").RemoveDirectoryName().string()); +} + +// RemoveDirectoryName "/afile" -> "afile" +TEST(RemoveDirectoryNameTest, RootFileShouldGiveFileName) { + EXPECT_EQ("afile", + FilePath(GTEST_PATH_SEP_ "afile").RemoveDirectoryName().string()); +} + +// RemoveDirectoryName "adir/" -> "" +TEST(RemoveDirectoryNameTest, WhereThereIsNoFileName) { + EXPECT_EQ("", + FilePath("adir" GTEST_PATH_SEP_).RemoveDirectoryName().string()); +} + +// RemoveDirectoryName "adir/afile" -> "afile" +TEST(RemoveDirectoryNameTest, ShouldGiveFileName) { + EXPECT_EQ("afile", + FilePath("adir" GTEST_PATH_SEP_ "afile").RemoveDirectoryName().string()); +} + +// RemoveDirectoryName "adir/subdir/afile" -> "afile" +TEST(RemoveDirectoryNameTest, ShouldAlsoGiveFileName) { + EXPECT_EQ("afile", + FilePath("adir" GTEST_PATH_SEP_ "subdir" GTEST_PATH_SEP_ "afile") + .RemoveDirectoryName().string()); +} + +#if GTEST_HAS_ALT_PATH_SEP_ + +// Tests that RemoveDirectoryName() works with the alternate separator +// on Windows. + +// RemoveDirectoryName("/afile") -> "afile" +TEST(RemoveDirectoryNameTest, RootFileShouldGiveFileNameForAlternateSeparator) { + EXPECT_EQ("afile", FilePath("/afile").RemoveDirectoryName().string()); +} + +// RemoveDirectoryName("adir/") -> "" +TEST(RemoveDirectoryNameTest, WhereThereIsNoFileNameForAlternateSeparator) { + EXPECT_EQ("", FilePath("adir/").RemoveDirectoryName().string()); +} + +// RemoveDirectoryName("adir/afile") -> "afile" +TEST(RemoveDirectoryNameTest, ShouldGiveFileNameForAlternateSeparator) { + EXPECT_EQ("afile", FilePath("adir/afile").RemoveDirectoryName().string()); +} + +// RemoveDirectoryName("adir/subdir/afile") -> "afile" +TEST(RemoveDirectoryNameTest, ShouldAlsoGiveFileNameForAlternateSeparator) { + EXPECT_EQ("afile", + FilePath("adir/subdir/afile").RemoveDirectoryName().string()); +} + +#endif + +// RemoveFileName "" -> "./" +TEST(RemoveFileNameTest, EmptyName) { +#if GTEST_OS_WINDOWS_MOBILE + // On Windows CE, we use the root as the current directory. + EXPECT_EQ(GTEST_PATH_SEP_, FilePath("").RemoveFileName().string()); +#else + EXPECT_EQ("." GTEST_PATH_SEP_, FilePath("").RemoveFileName().string()); +#endif +} + +// RemoveFileName "adir/" -> "adir/" +TEST(RemoveFileNameTest, ButNoFile) { + EXPECT_EQ("adir" GTEST_PATH_SEP_, + FilePath("adir" GTEST_PATH_SEP_).RemoveFileName().string()); +} + +// RemoveFileName "adir/afile" -> "adir/" +TEST(RemoveFileNameTest, GivesDirName) { + EXPECT_EQ("adir" GTEST_PATH_SEP_, + FilePath("adir" GTEST_PATH_SEP_ "afile").RemoveFileName().string()); +} + +// RemoveFileName "adir/subdir/afile" -> "adir/subdir/" +TEST(RemoveFileNameTest, GivesDirAndSubDirName) { + EXPECT_EQ("adir" GTEST_PATH_SEP_ "subdir" GTEST_PATH_SEP_, + FilePath("adir" GTEST_PATH_SEP_ "subdir" GTEST_PATH_SEP_ "afile") + .RemoveFileName().string()); +} + +// RemoveFileName "/afile" -> "/" +TEST(RemoveFileNameTest, GivesRootDir) { + EXPECT_EQ(GTEST_PATH_SEP_, + FilePath(GTEST_PATH_SEP_ "afile").RemoveFileName().string()); +} + +#if GTEST_HAS_ALT_PATH_SEP_ + +// Tests that RemoveFileName() works with the alternate separator on +// Windows. + +// RemoveFileName("adir/") -> "adir/" +TEST(RemoveFileNameTest, ButNoFileForAlternateSeparator) { + EXPECT_EQ("adir" GTEST_PATH_SEP_, + FilePath("adir/").RemoveFileName().string()); +} + +// RemoveFileName("adir/afile") -> "adir/" +TEST(RemoveFileNameTest, GivesDirNameForAlternateSeparator) { + EXPECT_EQ("adir" GTEST_PATH_SEP_, + FilePath("adir/afile").RemoveFileName().string()); +} + +// RemoveFileName("adir/subdir/afile") -> "adir/subdir/" +TEST(RemoveFileNameTest, GivesDirAndSubDirNameForAlternateSeparator) { + EXPECT_EQ("adir" GTEST_PATH_SEP_ "subdir" GTEST_PATH_SEP_, + FilePath("adir/subdir/afile").RemoveFileName().string()); +} + +// RemoveFileName("/afile") -> "\" +TEST(RemoveFileNameTest, GivesRootDirForAlternateSeparator) { + EXPECT_EQ(GTEST_PATH_SEP_, FilePath("/afile").RemoveFileName().string()); +} + +#endif + +TEST(MakeFileNameTest, GenerateWhenNumberIsZero) { + FilePath actual = FilePath::MakeFileName(FilePath("foo"), FilePath("bar"), + 0, "xml"); + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar.xml", actual.string()); +} + +TEST(MakeFileNameTest, GenerateFileNameNumberGtZero) { + FilePath actual = FilePath::MakeFileName(FilePath("foo"), FilePath("bar"), + 12, "xml"); + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar_12.xml", actual.string()); +} + +TEST(MakeFileNameTest, GenerateFileNameWithSlashNumberIsZero) { + FilePath actual = FilePath::MakeFileName(FilePath("foo" GTEST_PATH_SEP_), + FilePath("bar"), 0, "xml"); + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar.xml", actual.string()); +} + +TEST(MakeFileNameTest, GenerateFileNameWithSlashNumberGtZero) { + FilePath actual = FilePath::MakeFileName(FilePath("foo" GTEST_PATH_SEP_), + FilePath("bar"), 12, "xml"); + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar_12.xml", actual.string()); +} + +TEST(MakeFileNameTest, GenerateWhenNumberIsZeroAndDirIsEmpty) { + FilePath actual = FilePath::MakeFileName(FilePath(""), FilePath("bar"), + 0, "xml"); + EXPECT_EQ("bar.xml", actual.string()); +} + +TEST(MakeFileNameTest, GenerateWhenNumberIsNotZeroAndDirIsEmpty) { + FilePath actual = FilePath::MakeFileName(FilePath(""), FilePath("bar"), + 14, "xml"); + EXPECT_EQ("bar_14.xml", actual.string()); +} + +TEST(ConcatPathsTest, WorksWhenDirDoesNotEndWithPathSep) { + FilePath actual = FilePath::ConcatPaths(FilePath("foo"), + FilePath("bar.xml")); + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar.xml", actual.string()); +} + +TEST(ConcatPathsTest, WorksWhenPath1EndsWithPathSep) { + FilePath actual = FilePath::ConcatPaths(FilePath("foo" GTEST_PATH_SEP_), + FilePath("bar.xml")); + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar.xml", actual.string()); +} + +TEST(ConcatPathsTest, Path1BeingEmpty) { + FilePath actual = FilePath::ConcatPaths(FilePath(""), + FilePath("bar.xml")); + EXPECT_EQ("bar.xml", actual.string()); +} + +TEST(ConcatPathsTest, Path2BeingEmpty) { + FilePath actual = FilePath::ConcatPaths(FilePath("foo"), FilePath("")); + EXPECT_EQ("foo" GTEST_PATH_SEP_, actual.string()); +} + +TEST(ConcatPathsTest, BothPathBeingEmpty) { + FilePath actual = FilePath::ConcatPaths(FilePath(""), + FilePath("")); + EXPECT_EQ("", actual.string()); +} + +TEST(ConcatPathsTest, Path1ContainsPathSep) { + FilePath actual = FilePath::ConcatPaths(FilePath("foo" GTEST_PATH_SEP_ "bar"), + FilePath("foobar.xml")); + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar" GTEST_PATH_SEP_ "foobar.xml", + actual.string()); +} + +TEST(ConcatPathsTest, Path2ContainsPathSep) { + FilePath actual = FilePath::ConcatPaths( + FilePath("foo" GTEST_PATH_SEP_), + FilePath("bar" GTEST_PATH_SEP_ "bar.xml")); + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar" GTEST_PATH_SEP_ "bar.xml", + actual.string()); +} + +TEST(ConcatPathsTest, Path2EndsWithPathSep) { + FilePath actual = FilePath::ConcatPaths(FilePath("foo"), + FilePath("bar" GTEST_PATH_SEP_)); + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar" GTEST_PATH_SEP_, actual.string()); +} + +// RemoveTrailingPathSeparator "" -> "" +TEST(RemoveTrailingPathSeparatorTest, EmptyString) { + EXPECT_EQ("", FilePath("").RemoveTrailingPathSeparator().string()); +} + +// RemoveTrailingPathSeparator "foo" -> "foo" +TEST(RemoveTrailingPathSeparatorTest, FileNoSlashString) { + EXPECT_EQ("foo", FilePath("foo").RemoveTrailingPathSeparator().string()); +} + +// RemoveTrailingPathSeparator "foo/" -> "foo" +TEST(RemoveTrailingPathSeparatorTest, ShouldRemoveTrailingSeparator) { + EXPECT_EQ("foo", + FilePath("foo" GTEST_PATH_SEP_).RemoveTrailingPathSeparator().string()); +#if GTEST_HAS_ALT_PATH_SEP_ + EXPECT_EQ("foo", FilePath("foo/").RemoveTrailingPathSeparator().string()); +#endif +} + +// RemoveTrailingPathSeparator "foo/bar/" -> "foo/bar/" +TEST(RemoveTrailingPathSeparatorTest, ShouldRemoveLastSeparator) { + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar", + FilePath("foo" GTEST_PATH_SEP_ "bar" GTEST_PATH_SEP_) + .RemoveTrailingPathSeparator().string()); +} + +// RemoveTrailingPathSeparator "foo/bar" -> "foo/bar" +TEST(RemoveTrailingPathSeparatorTest, ShouldReturnUnmodified) { + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar", + FilePath("foo" GTEST_PATH_SEP_ "bar") + .RemoveTrailingPathSeparator().string()); +} + +TEST(DirectoryTest, RootDirectoryExists) { +#if GTEST_OS_WINDOWS // We are on Windows. + char current_drive[_MAX_PATH]; // NOLINT + current_drive[0] = static_cast(_getdrive() + 'A' - 1); + current_drive[1] = ':'; + current_drive[2] = '\\'; + current_drive[3] = '\0'; + EXPECT_TRUE(FilePath(current_drive).DirectoryExists()); +#else + EXPECT_TRUE(FilePath("/").DirectoryExists()); +#endif // GTEST_OS_WINDOWS +} + +#if GTEST_OS_WINDOWS +TEST(DirectoryTest, RootOfWrongDriveDoesNotExists) { + const int saved_drive_ = _getdrive(); + // Find a drive that doesn't exist. Start with 'Z' to avoid common ones. + for (char drive = 'Z'; drive >= 'A'; drive--) + if (_chdrive(drive - 'A' + 1) == -1) { + char non_drive[_MAX_PATH]; // NOLINT + non_drive[0] = drive; + non_drive[1] = ':'; + non_drive[2] = '\\'; + non_drive[3] = '\0'; + EXPECT_FALSE(FilePath(non_drive).DirectoryExists()); + break; + } + _chdrive(saved_drive_); +} +#endif // GTEST_OS_WINDOWS + +#if !GTEST_OS_WINDOWS_MOBILE +// Windows CE _does_ consider an empty directory to exist. +TEST(DirectoryTest, EmptyPathDirectoryDoesNotExist) { + EXPECT_FALSE(FilePath("").DirectoryExists()); +} +#endif // !GTEST_OS_WINDOWS_MOBILE + +TEST(DirectoryTest, CurrentDirectoryExists) { +#if GTEST_OS_WINDOWS // We are on Windows. +# ifndef _WIN32_CE // Windows CE doesn't have a current directory. + + EXPECT_TRUE(FilePath(".").DirectoryExists()); + EXPECT_TRUE(FilePath(".\\").DirectoryExists()); + +# endif // _WIN32_CE +#else + EXPECT_TRUE(FilePath(".").DirectoryExists()); + EXPECT_TRUE(FilePath("./").DirectoryExists()); +#endif // GTEST_OS_WINDOWS +} + +// "foo/bar" == foo//bar" == "foo///bar" +TEST(NormalizeTest, MultipleConsecutiveSepaparatorsInMidstring) { + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar", + FilePath("foo" GTEST_PATH_SEP_ "bar").string()); + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar", + FilePath("foo" GTEST_PATH_SEP_ GTEST_PATH_SEP_ "bar").string()); + EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar", + FilePath("foo" GTEST_PATH_SEP_ GTEST_PATH_SEP_ + GTEST_PATH_SEP_ "bar").string()); +} + +// "/bar" == //bar" == "///bar" +TEST(NormalizeTest, MultipleConsecutiveSepaparatorsAtStringStart) { + EXPECT_EQ(GTEST_PATH_SEP_ "bar", + FilePath(GTEST_PATH_SEP_ "bar").string()); + EXPECT_EQ(GTEST_PATH_SEP_ "bar", + FilePath(GTEST_PATH_SEP_ GTEST_PATH_SEP_ "bar").string()); + EXPECT_EQ(GTEST_PATH_SEP_ "bar", + FilePath(GTEST_PATH_SEP_ GTEST_PATH_SEP_ GTEST_PATH_SEP_ "bar").string()); +} + +// "foo/" == foo//" == "foo///" +TEST(NormalizeTest, MultipleConsecutiveSepaparatorsAtStringEnd) { + EXPECT_EQ("foo" GTEST_PATH_SEP_, + FilePath("foo" GTEST_PATH_SEP_).string()); + EXPECT_EQ("foo" GTEST_PATH_SEP_, + FilePath("foo" GTEST_PATH_SEP_ GTEST_PATH_SEP_).string()); + EXPECT_EQ("foo" GTEST_PATH_SEP_, + FilePath("foo" GTEST_PATH_SEP_ GTEST_PATH_SEP_ GTEST_PATH_SEP_).string()); +} + +#if GTEST_HAS_ALT_PATH_SEP_ + +// Tests that separators at the end of the string are normalized +// regardless of their combination (e.g. "foo\" =="foo/\" == +// "foo\\/"). +TEST(NormalizeTest, MixAlternateSeparatorAtStringEnd) { + EXPECT_EQ("foo" GTEST_PATH_SEP_, + FilePath("foo/").string()); + EXPECT_EQ("foo" GTEST_PATH_SEP_, + FilePath("foo" GTEST_PATH_SEP_ "/").string()); + EXPECT_EQ("foo" GTEST_PATH_SEP_, + FilePath("foo//" GTEST_PATH_SEP_).string()); +} + +#endif + +TEST(AssignmentOperatorTest, DefaultAssignedToNonDefault) { + FilePath default_path; + FilePath non_default_path("path"); + non_default_path = default_path; + EXPECT_EQ("", non_default_path.string()); + EXPECT_EQ("", default_path.string()); // RHS var is unchanged. +} + +TEST(AssignmentOperatorTest, NonDefaultAssignedToDefault) { + FilePath non_default_path("path"); + FilePath default_path; + default_path = non_default_path; + EXPECT_EQ("path", default_path.string()); + EXPECT_EQ("path", non_default_path.string()); // RHS var is unchanged. +} + +TEST(AssignmentOperatorTest, ConstAssignedToNonConst) { + const FilePath const_default_path("const_path"); + FilePath non_default_path("path"); + non_default_path = const_default_path; + EXPECT_EQ("const_path", non_default_path.string()); +} + +class DirectoryCreationTest : public Test { + protected: + virtual void SetUp() { + testdata_path_.Set(FilePath( + TempDir() + GetCurrentExecutableName().string() + + "_directory_creation" GTEST_PATH_SEP_ "test" GTEST_PATH_SEP_)); + testdata_file_.Set(testdata_path_.RemoveTrailingPathSeparator()); + + unique_file0_.Set(FilePath::MakeFileName(testdata_path_, FilePath("unique"), + 0, "txt")); + unique_file1_.Set(FilePath::MakeFileName(testdata_path_, FilePath("unique"), + 1, "txt")); + + remove(testdata_file_.c_str()); + remove(unique_file0_.c_str()); + remove(unique_file1_.c_str()); + posix::RmDir(testdata_path_.c_str()); + } + + virtual void TearDown() { + remove(testdata_file_.c_str()); + remove(unique_file0_.c_str()); + remove(unique_file1_.c_str()); + posix::RmDir(testdata_path_.c_str()); + } + + void CreateTextFile(const char* filename) { + FILE* f = posix::FOpen(filename, "w"); + fprintf(f, "text\n"); + fclose(f); + } + + // Strings representing a directory and a file, with identical paths + // except for the trailing separator character that distinquishes + // a directory named 'test' from a file named 'test'. Example names: + FilePath testdata_path_; // "/tmp/directory_creation/test/" + FilePath testdata_file_; // "/tmp/directory_creation/test" + FilePath unique_file0_; // "/tmp/directory_creation/test/unique.txt" + FilePath unique_file1_; // "/tmp/directory_creation/test/unique_1.txt" +}; + +TEST_F(DirectoryCreationTest, CreateDirectoriesRecursively) { + EXPECT_FALSE(testdata_path_.DirectoryExists()) << testdata_path_.string(); + EXPECT_TRUE(testdata_path_.CreateDirectoriesRecursively()); + EXPECT_TRUE(testdata_path_.DirectoryExists()); +} + +TEST_F(DirectoryCreationTest, CreateDirectoriesForAlreadyExistingPath) { + EXPECT_FALSE(testdata_path_.DirectoryExists()) << testdata_path_.string(); + EXPECT_TRUE(testdata_path_.CreateDirectoriesRecursively()); + // Call 'create' again... should still succeed. + EXPECT_TRUE(testdata_path_.CreateDirectoriesRecursively()); +} + +TEST_F(DirectoryCreationTest, CreateDirectoriesAndUniqueFilename) { + FilePath file_path(FilePath::GenerateUniqueFileName(testdata_path_, + FilePath("unique"), "txt")); + EXPECT_EQ(unique_file0_.string(), file_path.string()); + EXPECT_FALSE(file_path.FileOrDirectoryExists()); // file not there + + testdata_path_.CreateDirectoriesRecursively(); + EXPECT_FALSE(file_path.FileOrDirectoryExists()); // file still not there + CreateTextFile(file_path.c_str()); + EXPECT_TRUE(file_path.FileOrDirectoryExists()); + + FilePath file_path2(FilePath::GenerateUniqueFileName(testdata_path_, + FilePath("unique"), "txt")); + EXPECT_EQ(unique_file1_.string(), file_path2.string()); + EXPECT_FALSE(file_path2.FileOrDirectoryExists()); // file not there + CreateTextFile(file_path2.c_str()); + EXPECT_TRUE(file_path2.FileOrDirectoryExists()); +} + +TEST_F(DirectoryCreationTest, CreateDirectoriesFail) { + // force a failure by putting a file where we will try to create a directory. + CreateTextFile(testdata_file_.c_str()); + EXPECT_TRUE(testdata_file_.FileOrDirectoryExists()); + EXPECT_FALSE(testdata_file_.DirectoryExists()); + EXPECT_FALSE(testdata_file_.CreateDirectoriesRecursively()); +} + +TEST(NoDirectoryCreationTest, CreateNoDirectoriesForDefaultXmlFile) { + const FilePath test_detail_xml("test_detail.xml"); + EXPECT_FALSE(test_detail_xml.CreateDirectoriesRecursively()); +} + +TEST(FilePathTest, DefaultConstructor) { + FilePath fp; + EXPECT_EQ("", fp.string()); +} + +TEST(FilePathTest, CharAndCopyConstructors) { + const FilePath fp("spicy"); + EXPECT_EQ("spicy", fp.string()); + + const FilePath fp_copy(fp); + EXPECT_EQ("spicy", fp_copy.string()); +} + +TEST(FilePathTest, StringConstructor) { + const FilePath fp(std::string("cider")); + EXPECT_EQ("cider", fp.string()); +} + +TEST(FilePathTest, Set) { + const FilePath apple("apple"); + FilePath mac("mac"); + mac.Set(apple); // Implement Set() since overloading operator= is forbidden. + EXPECT_EQ("apple", mac.string()); + EXPECT_EQ("apple", apple.string()); +} + +TEST(FilePathTest, ToString) { + const FilePath file("drink"); + EXPECT_EQ("drink", file.string()); +} + +TEST(FilePathTest, RemoveExtension) { + EXPECT_EQ("app", FilePath("app.cc").RemoveExtension("cc").string()); + EXPECT_EQ("app", FilePath("app.exe").RemoveExtension("exe").string()); + EXPECT_EQ("APP", FilePath("APP.EXE").RemoveExtension("exe").string()); +} + +TEST(FilePathTest, RemoveExtensionWhenThereIsNoExtension) { + EXPECT_EQ("app", FilePath("app").RemoveExtension("exe").string()); +} + +TEST(FilePathTest, IsDirectory) { + EXPECT_FALSE(FilePath("cola").IsDirectory()); + EXPECT_TRUE(FilePath("koala" GTEST_PATH_SEP_).IsDirectory()); +#if GTEST_HAS_ALT_PATH_SEP_ + EXPECT_TRUE(FilePath("koala/").IsDirectory()); +#endif +} + +TEST(FilePathTest, IsAbsolutePath) { + EXPECT_FALSE(FilePath("is" GTEST_PATH_SEP_ "relative").IsAbsolutePath()); + EXPECT_FALSE(FilePath("").IsAbsolutePath()); +#if GTEST_OS_WINDOWS + EXPECT_TRUE(FilePath("c:\\" GTEST_PATH_SEP_ "is_not" + GTEST_PATH_SEP_ "relative").IsAbsolutePath()); + EXPECT_FALSE(FilePath("c:foo" GTEST_PATH_SEP_ "bar").IsAbsolutePath()); + EXPECT_TRUE(FilePath("c:/" GTEST_PATH_SEP_ "is_not" + GTEST_PATH_SEP_ "relative").IsAbsolutePath()); +#else + EXPECT_TRUE(FilePath(GTEST_PATH_SEP_ "is_not" GTEST_PATH_SEP_ "relative") + .IsAbsolutePath()); +#endif // GTEST_OS_WINDOWS +} + +TEST(FilePathTest, IsRootDirectory) { +#if GTEST_OS_WINDOWS + EXPECT_TRUE(FilePath("a:\\").IsRootDirectory()); + EXPECT_TRUE(FilePath("Z:/").IsRootDirectory()); + EXPECT_TRUE(FilePath("e://").IsRootDirectory()); + EXPECT_FALSE(FilePath("").IsRootDirectory()); + EXPECT_FALSE(FilePath("b:").IsRootDirectory()); + EXPECT_FALSE(FilePath("b:a").IsRootDirectory()); + EXPECT_FALSE(FilePath("8:/").IsRootDirectory()); + EXPECT_FALSE(FilePath("c|/").IsRootDirectory()); +#else + EXPECT_TRUE(FilePath("/").IsRootDirectory()); + EXPECT_TRUE(FilePath("//").IsRootDirectory()); + EXPECT_FALSE(FilePath("").IsRootDirectory()); + EXPECT_FALSE(FilePath("\\").IsRootDirectory()); + EXPECT_FALSE(FilePath("/x").IsRootDirectory()); +#endif +} + +} // namespace +} // namespace internal +} // namespace testing +// Copyright 2009 Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) +// +// The Google C++ Testing Framework (Google Test) +// +// This file verifies Google Test event listeners receive events at the +// right times. + +#include "gtest/gtest.h" +#include + +using ::testing::AddGlobalTestEnvironment; +using ::testing::Environment; +using ::testing::InitGoogleTest; +using ::testing::Test; +using ::testing::TestCase; +using ::testing::TestEventListener; +using ::testing::TestInfo; +using ::testing::TestPartResult; +using ::testing::UnitTest; + +// Used by tests to register their events. +std::vector* g_events = NULL; + +namespace testing { +namespace internal { + +class EventRecordingListener : public TestEventListener { + public: + explicit EventRecordingListener(const char* name) : name_(name) {} + + protected: + virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) { + g_events->push_back(GetFullMethodName("OnTestProgramStart")); + } + + virtual void OnTestIterationStart(const UnitTest& /*unit_test*/, + int iteration) { + Message message; + message << GetFullMethodName("OnTestIterationStart") + << "(" << iteration << ")"; + g_events->push_back(message.GetString()); + } + + virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) { + g_events->push_back(GetFullMethodName("OnEnvironmentsSetUpStart")); + } + + virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) { + g_events->push_back(GetFullMethodName("OnEnvironmentsSetUpEnd")); + } + + virtual void OnTestCaseStart(const TestCase& /*test_case*/) { + g_events->push_back(GetFullMethodName("OnTestCaseStart")); + } + + virtual void OnTestStart(const TestInfo& /*test_info*/) { + g_events->push_back(GetFullMethodName("OnTestStart")); + } + + virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) { + g_events->push_back(GetFullMethodName("OnTestPartResult")); + } + + virtual void OnTestEnd(const TestInfo& /*test_info*/) { + g_events->push_back(GetFullMethodName("OnTestEnd")); + } + + virtual void OnTestCaseEnd(const TestCase& /*test_case*/) { + g_events->push_back(GetFullMethodName("OnTestCaseEnd")); + } + + virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) { + g_events->push_back(GetFullMethodName("OnEnvironmentsTearDownStart")); + } + + virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) { + g_events->push_back(GetFullMethodName("OnEnvironmentsTearDownEnd")); + } + + virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/, + int iteration) { + Message message; + message << GetFullMethodName("OnTestIterationEnd") + << "(" << iteration << ")"; + g_events->push_back(message.GetString()); + } + + virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) { + g_events->push_back(GetFullMethodName("OnTestProgramEnd")); + } + + private: + std::string GetFullMethodName(const char* name) { + return name_ + "." + name; + } + + std::string name_; +}; + +class EnvironmentInvocationCatcher : public Environment { + protected: + virtual void SetUp() { + g_events->push_back("Environment::SetUp"); + } + + virtual void TearDown() { + g_events->push_back("Environment::TearDown"); + } +}; + +class ListenerTest : public Test { + protected: + static void SetUpTestCase() { + g_events->push_back("ListenerTest::SetUpTestCase"); + } + + static void TearDownTestCase() { + g_events->push_back("ListenerTest::TearDownTestCase"); + } + + virtual void SetUp() { + g_events->push_back("ListenerTest::SetUp"); + } + + virtual void TearDown() { + g_events->push_back("ListenerTest::TearDown"); + } +}; + +TEST_F(ListenerTest, DoesFoo) { + // Test execution order within a test case is not guaranteed so we are not + // recording the test name. + g_events->push_back("ListenerTest::* Test Body"); + SUCCEED(); // Triggers OnTestPartResult. +} + +TEST_F(ListenerTest, DoesBar) { + g_events->push_back("ListenerTest::* Test Body"); + SUCCEED(); // Triggers OnTestPartResult. +} + +} // namespace internal + +} // namespace testing + +using ::testing::internal::EnvironmentInvocationCatcher; +using ::testing::internal::EventRecordingListener; + +void VerifyResults(const std::vector& data, + const char* const* expected_data, + size_t expected_data_size) { + const size_t actual_size = data.size(); + // If the following assertion fails, a new entry will be appended to + // data. Hence we save data.size() first. + EXPECT_EQ(expected_data_size, actual_size); + + // Compares the common prefix. + const size_t shorter_size = expected_data_size <= actual_size ? + expected_data_size : actual_size; + size_t i = 0; + for (; i < shorter_size; ++i) { + ASSERT_STREQ(expected_data[i], data[i].c_str()) + << "at position " << i; + } + + // Prints extra elements in the actual data. + for (; i < actual_size; ++i) { + printf(" Actual event #%lu: %s\n", + static_cast(i), data[i].c_str()); + } +} + +int main(int argc, char **argv) { + std::vector events; + g_events = &events; + InitGoogleTest(&argc, argv); + + UnitTest::GetInstance()->listeners().Append( + new EventRecordingListener("1st")); + UnitTest::GetInstance()->listeners().Append( + new EventRecordingListener("2nd")); + + AddGlobalTestEnvironment(new EnvironmentInvocationCatcher); + + GTEST_CHECK_(events.size() == 0) + << "AddGlobalTestEnvironment should not generate any events itself."; + + ::testing::GTEST_FLAG(repeat) = 2; + int ret_val = RUN_ALL_TESTS(); + + const char* const expected_events[] = { + "1st.OnTestProgramStart", + "2nd.OnTestProgramStart", + "1st.OnTestIterationStart(0)", + "2nd.OnTestIterationStart(0)", + "1st.OnEnvironmentsSetUpStart", + "2nd.OnEnvironmentsSetUpStart", + "Environment::SetUp", + "2nd.OnEnvironmentsSetUpEnd", + "1st.OnEnvironmentsSetUpEnd", + "1st.OnTestCaseStart", + "2nd.OnTestCaseStart", + "ListenerTest::SetUpTestCase", + "1st.OnTestStart", + "2nd.OnTestStart", + "ListenerTest::SetUp", + "ListenerTest::* Test Body", + "1st.OnTestPartResult", + "2nd.OnTestPartResult", + "ListenerTest::TearDown", + "2nd.OnTestEnd", + "1st.OnTestEnd", + "1st.OnTestStart", + "2nd.OnTestStart", + "ListenerTest::SetUp", + "ListenerTest::* Test Body", + "1st.OnTestPartResult", + "2nd.OnTestPartResult", + "ListenerTest::TearDown", + "2nd.OnTestEnd", + "1st.OnTestEnd", + "ListenerTest::TearDownTestCase", + "2nd.OnTestCaseEnd", + "1st.OnTestCaseEnd", + "1st.OnEnvironmentsTearDownStart", + "2nd.OnEnvironmentsTearDownStart", + "Environment::TearDown", + "2nd.OnEnvironmentsTearDownEnd", + "1st.OnEnvironmentsTearDownEnd", + "2nd.OnTestIterationEnd(0)", + "1st.OnTestIterationEnd(0)", + "1st.OnTestIterationStart(1)", + "2nd.OnTestIterationStart(1)", + "1st.OnEnvironmentsSetUpStart", + "2nd.OnEnvironmentsSetUpStart", + "Environment::SetUp", + "2nd.OnEnvironmentsSetUpEnd", + "1st.OnEnvironmentsSetUpEnd", + "1st.OnTestCaseStart", + "2nd.OnTestCaseStart", + "ListenerTest::SetUpTestCase", + "1st.OnTestStart", + "2nd.OnTestStart", + "ListenerTest::SetUp", + "ListenerTest::* Test Body", + "1st.OnTestPartResult", + "2nd.OnTestPartResult", + "ListenerTest::TearDown", + "2nd.OnTestEnd", + "1st.OnTestEnd", + "1st.OnTestStart", + "2nd.OnTestStart", + "ListenerTest::SetUp", + "ListenerTest::* Test Body", + "1st.OnTestPartResult", + "2nd.OnTestPartResult", + "ListenerTest::TearDown", + "2nd.OnTestEnd", + "1st.OnTestEnd", + "ListenerTest::TearDownTestCase", + "2nd.OnTestCaseEnd", + "1st.OnTestCaseEnd", + "1st.OnEnvironmentsTearDownStart", + "2nd.OnEnvironmentsTearDownStart", + "Environment::TearDown", + "2nd.OnEnvironmentsTearDownEnd", + "1st.OnEnvironmentsTearDownEnd", + "2nd.OnTestIterationEnd(1)", + "1st.OnTestIterationEnd(1)", + "2nd.OnTestProgramEnd", + "1st.OnTestProgramEnd" + }; + VerifyResults(events, + expected_events, + sizeof(expected_events)/sizeof(expected_events[0])); + + // We need to check manually for ad hoc test failures that happen after + // RUN_ALL_TESTS finishes. + if (UnitTest::GetInstance()->Failed()) + ret_val = 1; + + return ret_val; +} +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: keith.ray@gmail.com (Keith Ray) +// +// Google Test UnitTestOptions tests +// +// This file tests classes and functions used internally by +// Google Test. They are subject to change without notice. +// +// This file is #included from gtest.cc, to avoid changing build or +// make-files on Windows and other platforms. Do not #include this file +// anywhere else! + +#include "gtest/gtest.h" + +#if GTEST_OS_WINDOWS_MOBILE +# include +#elif GTEST_OS_WINDOWS +# include +#endif // GTEST_OS_WINDOWS_MOBILE + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +namespace testing { +namespace internal { +namespace { + +// Turns the given relative path into an absolute path. +FilePath GetAbsolutePathOf(const FilePath& relative_path) { + return FilePath::ConcatPaths(FilePath::GetCurrentDir(), relative_path); +} + +// Testing UnitTestOptions::GetOutputFormat/GetOutputFile. + +TEST(XmlOutputTest, GetOutputFormatDefault) { + GTEST_FLAG(output) = ""; + EXPECT_STREQ("", UnitTestOptions::GetOutputFormat().c_str()); +} + +TEST(XmlOutputTest, GetOutputFormat) { + GTEST_FLAG(output) = "xml:filename"; + EXPECT_STREQ("xml", UnitTestOptions::GetOutputFormat().c_str()); +} + +TEST(XmlOutputTest, GetOutputFileDefault) { + GTEST_FLAG(output) = ""; + EXPECT_EQ(GetAbsolutePathOf(FilePath("test_detail.xml")).string(), + UnitTestOptions::GetAbsolutePathToOutputFile()); +} + +TEST(XmlOutputTest, GetOutputFileSingleFile) { + GTEST_FLAG(output) = "xml:filename.abc"; + EXPECT_EQ(GetAbsolutePathOf(FilePath("filename.abc")).string(), + UnitTestOptions::GetAbsolutePathToOutputFile()); +} + +TEST(XmlOutputTest, GetOutputFileFromDirectoryPath) { + GTEST_FLAG(output) = "xml:path" GTEST_PATH_SEP_; + const std::string expected_output_file = + GetAbsolutePathOf( + FilePath(std::string("path") + GTEST_PATH_SEP_ + + GetCurrentExecutableName().string() + ".xml")).string(); + const std::string& output_file = + UnitTestOptions::GetAbsolutePathToOutputFile(); +#if GTEST_OS_WINDOWS + EXPECT_STRCASEEQ(expected_output_file.c_str(), output_file.c_str()); +#else + EXPECT_EQ(expected_output_file, output_file.c_str()); +#endif +} + +TEST(OutputFileHelpersTest, GetCurrentExecutableName) { + const std::string exe_str = GetCurrentExecutableName().string(); +#if GTEST_OS_WINDOWS + const bool success = + _strcmpi("gtest-options_test", exe_str.c_str()) == 0 || + _strcmpi("gtest-options-ex_test", exe_str.c_str()) == 0 || + _strcmpi("gtest_all_test", exe_str.c_str()) == 0 || + _strcmpi("gtest_dll_test", exe_str.c_str()) == 0; +#else + // TODO(wan@google.com): remove the hard-coded "lt-" prefix when + // Chandler Carruth's libtool replacement is ready. + const bool success = + exe_str == "gtest-options_test" || + exe_str == "gtest_all_test" || + exe_str == "lt-gtest_all_test" || + exe_str == "gtest_dll_test"; +#endif // GTEST_OS_WINDOWS + if (!success) + FAIL() << "GetCurrentExecutableName() returns " << exe_str; +} + +class XmlOutputChangeDirTest : public Test { + protected: + virtual void SetUp() { + original_working_dir_ = FilePath::GetCurrentDir(); + posix::ChDir(".."); + // This will make the test fail if run from the root directory. + EXPECT_NE(original_working_dir_.string(), + FilePath::GetCurrentDir().string()); + } + + virtual void TearDown() { + posix::ChDir(original_working_dir_.string().c_str()); + } + + FilePath original_working_dir_; +}; + +TEST_F(XmlOutputChangeDirTest, PreserveOriginalWorkingDirWithDefault) { + GTEST_FLAG(output) = ""; + EXPECT_EQ(FilePath::ConcatPaths(original_working_dir_, + FilePath("test_detail.xml")).string(), + UnitTestOptions::GetAbsolutePathToOutputFile()); +} + +TEST_F(XmlOutputChangeDirTest, PreserveOriginalWorkingDirWithDefaultXML) { + GTEST_FLAG(output) = "xml"; + EXPECT_EQ(FilePath::ConcatPaths(original_working_dir_, + FilePath("test_detail.xml")).string(), + UnitTestOptions::GetAbsolutePathToOutputFile()); +} + +TEST_F(XmlOutputChangeDirTest, PreserveOriginalWorkingDirWithRelativeFile) { + GTEST_FLAG(output) = "xml:filename.abc"; + EXPECT_EQ(FilePath::ConcatPaths(original_working_dir_, + FilePath("filename.abc")).string(), + UnitTestOptions::GetAbsolutePathToOutputFile()); +} + +TEST_F(XmlOutputChangeDirTest, PreserveOriginalWorkingDirWithRelativePath) { + GTEST_FLAG(output) = "xml:path" GTEST_PATH_SEP_; + const std::string expected_output_file = + FilePath::ConcatPaths( + original_working_dir_, + FilePath(std::string("path") + GTEST_PATH_SEP_ + + GetCurrentExecutableName().string() + ".xml")).string(); + const std::string& output_file = + UnitTestOptions::GetAbsolutePathToOutputFile(); +#if GTEST_OS_WINDOWS + EXPECT_STRCASEEQ(expected_output_file.c_str(), output_file.c_str()); +#else + EXPECT_EQ(expected_output_file, output_file.c_str()); +#endif +} + +TEST_F(XmlOutputChangeDirTest, PreserveOriginalWorkingDirWithAbsoluteFile) { +#if GTEST_OS_WINDOWS + GTEST_FLAG(output) = "xml:c:\\tmp\\filename.abc"; + EXPECT_EQ(FilePath("c:\\tmp\\filename.abc").string(), + UnitTestOptions::GetAbsolutePathToOutputFile()); +#else + GTEST_FLAG(output) ="xml:/tmp/filename.abc"; + EXPECT_EQ(FilePath("/tmp/filename.abc").string(), + UnitTestOptions::GetAbsolutePathToOutputFile()); +#endif +} + +TEST_F(XmlOutputChangeDirTest, PreserveOriginalWorkingDirWithAbsolutePath) { +#if GTEST_OS_WINDOWS + const std::string path = "c:\\tmp\\"; +#else + const std::string path = "/tmp/"; +#endif + + GTEST_FLAG(output) = "xml:" + path; + const std::string expected_output_file = + path + GetCurrentExecutableName().string() + ".xml"; + const std::string& output_file = + UnitTestOptions::GetAbsolutePathToOutputFile(); + +#if GTEST_OS_WINDOWS + EXPECT_STRCASEEQ(expected_output_file.c_str(), output_file.c_str()); +#else + EXPECT_EQ(expected_output_file, output_file.c_str()); +#endif +} + +} // namespace +} // namespace internal +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) +// +// Tests for Google Test itself. This verifies that the basic constructs of +// Google Test work. + +#include "gtest/gtest.h" + +#include "test/gtest-param-test_test.h" + +#if GTEST_HAS_PARAM_TEST + +using ::testing::Values; +using ::testing::internal::ParamGenerator; + +// Tests that generators defined in a different translation unit +// are functional. The test using extern_gen is defined +// in gtest-param-test_test.cc. +ParamGenerator extern_gen = Values(33); + +// Tests that a parameterized test case can be defined in one translation unit +// and instantiated in another. The test is defined in gtest-param-test_test.cc +// and ExternalInstantiationTest fixture class is defined in +// gtest-param-test_test.h. +INSTANTIATE_TEST_CASE_P(MultiplesOf33, + ExternalInstantiationTest, + Values(33, 66)); + +// Tests that a parameterized test case can be instantiated +// in multiple translation units. Another instantiation is defined +// in gtest-param-test_test.cc and InstantiationInMultipleTranslaionUnitsTest +// fixture is defined in gtest-param-test_test.h +INSTANTIATE_TEST_CASE_P(Sequence2, + InstantiationInMultipleTranslaionUnitsTest, + Values(42*3, 42*4, 42*5)); + +#endif // GTEST_HAS_PARAM_TEST +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) +// +// Tests for Google Test itself. This file verifies that the parameter +// generators objects produce correct parameter sequences and that +// Google Test runtime instantiates correct tests from those sequences. + +#include "gtest/gtest.h" + +#if GTEST_HAS_PARAM_TEST + +# include +# include +# include +# include +# include +# include + +// To include gtest-internal-inl.h. +# define GTEST_IMPLEMENTATION_ 1 +# include "src/gtest-internal-inl.h" // for UnitTestOptions +# undef GTEST_IMPLEMENTATION_ + +# include "test/gtest-param-test_test.h" + +using ::std::vector; +using ::std::sort; + +using ::testing::AddGlobalTestEnvironment; +using ::testing::Bool; +using ::testing::Message; +using ::testing::Range; +using ::testing::TestWithParam; +using ::testing::Values; +using ::testing::ValuesIn; + +# if GTEST_HAS_COMBINE +using ::testing::Combine; +using ::testing::get; +using ::testing::make_tuple; +using ::testing::tuple; +# endif // GTEST_HAS_COMBINE + +using ::testing::internal::ParamGenerator; +using ::testing::internal::UnitTestOptions; + +// Prints a value to a string. +// +// TODO(wan@google.com): remove PrintValue() when we move matchers and +// EXPECT_THAT() from Google Mock to Google Test. At that time, we +// can write EXPECT_THAT(x, Eq(y)) to compare two tuples x and y, as +// EXPECT_THAT() and the matchers know how to print tuples. +template +::std::string PrintValue(const T& value) { + ::std::stringstream stream; + stream << value; + return stream.str(); +} + +# if GTEST_HAS_COMBINE + +// These overloads allow printing tuples in our tests. We cannot +// define an operator<< for tuples, as that definition needs to be in +// the std namespace in order to be picked up by Google Test via +// Argument-Dependent Lookup, yet defining anything in the std +// namespace in non-STL code is undefined behavior. + +template +::std::string PrintValue(const tuple& value) { + ::std::stringstream stream; + stream << "(" << get<0>(value) << ", " << get<1>(value) << ")"; + return stream.str(); +} + +template +::std::string PrintValue(const tuple& value) { + ::std::stringstream stream; + stream << "(" << get<0>(value) << ", " << get<1>(value) + << ", "<< get<2>(value) << ")"; + return stream.str(); +} + +template +::std::string PrintValue( + const tuple& value) { + ::std::stringstream stream; + stream << "(" << get<0>(value) << ", " << get<1>(value) + << ", "<< get<2>(value) << ", " << get<3>(value) + << ", "<< get<4>(value) << ", " << get<5>(value) + << ", "<< get<6>(value) << ", " << get<7>(value) + << ", "<< get<8>(value) << ", " << get<9>(value) << ")"; + return stream.str(); +} + +# endif // GTEST_HAS_COMBINE + +// Verifies that a sequence generated by the generator and accessed +// via the iterator object matches the expected one using Google Test +// assertions. +template +void VerifyGenerator(const ParamGenerator& generator, + const T (&expected_values)[N]) { + typename ParamGenerator::iterator it = generator.begin(); + for (size_t i = 0; i < N; ++i) { + ASSERT_FALSE(it == generator.end()) + << "At element " << i << " when accessing via an iterator " + << "created with the copy constructor.\n"; + // We cannot use EXPECT_EQ() here as the values may be tuples, + // which don't support <<. + EXPECT_TRUE(expected_values[i] == *it) + << "where i is " << i + << ", expected_values[i] is " << PrintValue(expected_values[i]) + << ", *it is " << PrintValue(*it) + << ", and 'it' is an iterator created with the copy constructor.\n"; + it++; + } + EXPECT_TRUE(it == generator.end()) + << "At the presumed end of sequence when accessing via an iterator " + << "created with the copy constructor.\n"; + + // Test the iterator assignment. The following lines verify that + // the sequence accessed via an iterator initialized via the + // assignment operator (as opposed to a copy constructor) matches + // just the same. + it = generator.begin(); + for (size_t i = 0; i < N; ++i) { + ASSERT_FALSE(it == generator.end()) + << "At element " << i << " when accessing via an iterator " + << "created with the assignment operator.\n"; + EXPECT_TRUE(expected_values[i] == *it) + << "where i is " << i + << ", expected_values[i] is " << PrintValue(expected_values[i]) + << ", *it is " << PrintValue(*it) + << ", and 'it' is an iterator created with the copy constructor.\n"; + it++; + } + EXPECT_TRUE(it == generator.end()) + << "At the presumed end of sequence when accessing via an iterator " + << "created with the assignment operator.\n"; +} + +template +void VerifyGeneratorIsEmpty(const ParamGenerator& generator) { + typename ParamGenerator::iterator it = generator.begin(); + EXPECT_TRUE(it == generator.end()); + + it = generator.begin(); + EXPECT_TRUE(it == generator.end()); +} + +// Generator tests. They test that each of the provided generator functions +// generates an expected sequence of values. The general test pattern +// instantiates a generator using one of the generator functions, +// checks the sequence produced by the generator using its iterator API, +// and then resets the iterator back to the beginning of the sequence +// and checks the sequence again. + +// Tests that iterators produced by generator functions conform to the +// ForwardIterator concept. +TEST(IteratorTest, ParamIteratorConformsToForwardIteratorConcept) { + const ParamGenerator gen = Range(0, 10); + ParamGenerator::iterator it = gen.begin(); + + // Verifies that iterator initialization works as expected. + ParamGenerator::iterator it2 = it; + EXPECT_TRUE(*it == *it2) << "Initialized iterators must point to the " + << "element same as its source points to"; + + // Verifies that iterator assignment works as expected. + it++; + EXPECT_FALSE(*it == *it2); + it2 = it; + EXPECT_TRUE(*it == *it2) << "Assigned iterators must point to the " + << "element same as its source points to"; + + // Verifies that prefix operator++() returns *this. + EXPECT_EQ(&it, &(++it)) << "Result of the prefix operator++ must be " + << "refer to the original object"; + + // Verifies that the result of the postfix operator++ points to the value + // pointed to by the original iterator. + int original_value = *it; // Have to compute it outside of macro call to be + // unaffected by the parameter evaluation order. + EXPECT_EQ(original_value, *(it++)); + + // Verifies that prefix and postfix operator++() advance an iterator + // all the same. + it2 = it; + it++; + ++it2; + EXPECT_TRUE(*it == *it2); +} + +// Tests that Range() generates the expected sequence. +TEST(RangeTest, IntRangeWithDefaultStep) { + const ParamGenerator gen = Range(0, 3); + const int expected_values[] = {0, 1, 2}; + VerifyGenerator(gen, expected_values); +} + +// Edge case. Tests that Range() generates the single element sequence +// as expected when provided with range limits that are equal. +TEST(RangeTest, IntRangeSingleValue) { + const ParamGenerator gen = Range(0, 1); + const int expected_values[] = {0}; + VerifyGenerator(gen, expected_values); +} + +// Edge case. Tests that Range() with generates empty sequence when +// supplied with an empty range. +TEST(RangeTest, IntRangeEmpty) { + const ParamGenerator gen = Range(0, 0); + VerifyGeneratorIsEmpty(gen); +} + +// Tests that Range() with custom step (greater then one) generates +// the expected sequence. +TEST(RangeTest, IntRangeWithCustomStep) { + const ParamGenerator gen = Range(0, 9, 3); + const int expected_values[] = {0, 3, 6}; + VerifyGenerator(gen, expected_values); +} + +// Tests that Range() with custom step (greater then one) generates +// the expected sequence when the last element does not fall on the +// upper range limit. Sequences generated by Range() must not have +// elements beyond the range limits. +TEST(RangeTest, IntRangeWithCustomStepOverUpperBound) { + const ParamGenerator gen = Range(0, 4, 3); + const int expected_values[] = {0, 3}; + VerifyGenerator(gen, expected_values); +} + +// Verifies that Range works with user-defined types that define +// copy constructor, operator=(), operator+(), and operator<(). +class DogAdder { + public: + explicit DogAdder(const char* a_value) : value_(a_value) {} + DogAdder(const DogAdder& other) : value_(other.value_.c_str()) {} + + DogAdder operator=(const DogAdder& other) { + if (this != &other) + value_ = other.value_; + return *this; + } + DogAdder operator+(const DogAdder& other) const { + Message msg; + msg << value_.c_str() << other.value_.c_str(); + return DogAdder(msg.GetString().c_str()); + } + bool operator<(const DogAdder& other) const { + return value_ < other.value_; + } + const std::string& value() const { return value_; } + + private: + std::string value_; +}; + +TEST(RangeTest, WorksWithACustomType) { + const ParamGenerator gen = + Range(DogAdder("cat"), DogAdder("catdogdog"), DogAdder("dog")); + ParamGenerator::iterator it = gen.begin(); + + ASSERT_FALSE(it == gen.end()); + EXPECT_STREQ("cat", it->value().c_str()); + + ASSERT_FALSE(++it == gen.end()); + EXPECT_STREQ("catdog", it->value().c_str()); + + EXPECT_TRUE(++it == gen.end()); +} + +class IntWrapper { + public: + explicit IntWrapper(int a_value) : value_(a_value) {} + IntWrapper(const IntWrapper& other) : value_(other.value_) {} + + IntWrapper operator=(const IntWrapper& other) { + value_ = other.value_; + return *this; + } + // operator+() adds a different type. + IntWrapper operator+(int other) const { return IntWrapper(value_ + other); } + bool operator<(const IntWrapper& other) const { + return value_ < other.value_; + } + int value() const { return value_; } + + private: + int value_; +}; + +TEST(RangeTest, WorksWithACustomTypeWithDifferentIncrementType) { + const ParamGenerator gen = Range(IntWrapper(0), IntWrapper(2)); + ParamGenerator::iterator it = gen.begin(); + + ASSERT_FALSE(it == gen.end()); + EXPECT_EQ(0, it->value()); + + ASSERT_FALSE(++it == gen.end()); + EXPECT_EQ(1, it->value()); + + EXPECT_TRUE(++it == gen.end()); +} + +// Tests that ValuesIn() with an array parameter generates +// the expected sequence. +TEST(ValuesInTest, ValuesInArray) { + int array[] = {3, 5, 8}; + const ParamGenerator gen = ValuesIn(array); + VerifyGenerator(gen, array); +} + +// Tests that ValuesIn() with a const array parameter generates +// the expected sequence. +TEST(ValuesInTest, ValuesInConstArray) { + const int array[] = {3, 5, 8}; + const ParamGenerator gen = ValuesIn(array); + VerifyGenerator(gen, array); +} + +// Edge case. Tests that ValuesIn() with an array parameter containing a +// single element generates the single element sequence. +TEST(ValuesInTest, ValuesInSingleElementArray) { + int array[] = {42}; + const ParamGenerator gen = ValuesIn(array); + VerifyGenerator(gen, array); +} + +// Tests that ValuesIn() generates the expected sequence for an STL +// container (vector). +TEST(ValuesInTest, ValuesInVector) { + typedef ::std::vector ContainerType; + ContainerType values; + values.push_back(3); + values.push_back(5); + values.push_back(8); + const ParamGenerator gen = ValuesIn(values); + + const int expected_values[] = {3, 5, 8}; + VerifyGenerator(gen, expected_values); +} + +// Tests that ValuesIn() generates the expected sequence. +TEST(ValuesInTest, ValuesInIteratorRange) { + typedef ::std::vector ContainerType; + ContainerType values; + values.push_back(3); + values.push_back(5); + values.push_back(8); + const ParamGenerator gen = ValuesIn(values.begin(), values.end()); + + const int expected_values[] = {3, 5, 8}; + VerifyGenerator(gen, expected_values); +} + +// Edge case. Tests that ValuesIn() provided with an iterator range specifying a +// single value generates a single-element sequence. +TEST(ValuesInTest, ValuesInSingleElementIteratorRange) { + typedef ::std::vector ContainerType; + ContainerType values; + values.push_back(42); + const ParamGenerator gen = ValuesIn(values.begin(), values.end()); + + const int expected_values[] = {42}; + VerifyGenerator(gen, expected_values); +} + +// Edge case. Tests that ValuesIn() provided with an empty iterator range +// generates an empty sequence. +TEST(ValuesInTest, ValuesInEmptyIteratorRange) { + typedef ::std::vector ContainerType; + ContainerType values; + const ParamGenerator gen = ValuesIn(values.begin(), values.end()); + + VerifyGeneratorIsEmpty(gen); +} + +// Tests that the Values() generates the expected sequence. +TEST(ValuesTest, ValuesWorks) { + const ParamGenerator gen = Values(3, 5, 8); + + const int expected_values[] = {3, 5, 8}; + VerifyGenerator(gen, expected_values); +} + +// Tests that Values() generates the expected sequences from elements of +// different types convertible to ParamGenerator's parameter type. +TEST(ValuesTest, ValuesWorksForValuesOfCompatibleTypes) { + const ParamGenerator gen = Values(3, 5.0f, 8.0); + + const double expected_values[] = {3.0, 5.0, 8.0}; + VerifyGenerator(gen, expected_values); +} + +TEST(ValuesTest, ValuesWorksForMaxLengthList) { + const ParamGenerator gen = Values( + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, + 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, + 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, + 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, + 410, 420, 430, 440, 450, 460, 470, 480, 490, 500); + + const int expected_values[] = { + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, + 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, + 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, + 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, + 410, 420, 430, 440, 450, 460, 470, 480, 490, 500}; + VerifyGenerator(gen, expected_values); +} + +// Edge case test. Tests that single-parameter Values() generates the sequence +// with the single value. +TEST(ValuesTest, ValuesWithSingleParameter) { + const ParamGenerator gen = Values(42); + + const int expected_values[] = {42}; + VerifyGenerator(gen, expected_values); +} + +// Tests that Bool() generates sequence (false, true). +TEST(BoolTest, BoolWorks) { + const ParamGenerator gen = Bool(); + + const bool expected_values[] = {false, true}; + VerifyGenerator(gen, expected_values); +} + +# if GTEST_HAS_COMBINE + +// Tests that Combine() with two parameters generates the expected sequence. +TEST(CombineTest, CombineWithTwoParameters) { + const char* foo = "foo"; + const char* bar = "bar"; + const ParamGenerator > gen = + Combine(Values(foo, bar), Values(3, 4)); + + tuple expected_values[] = { + make_tuple(foo, 3), make_tuple(foo, 4), + make_tuple(bar, 3), make_tuple(bar, 4)}; + VerifyGenerator(gen, expected_values); +} + +// Tests that Combine() with three parameters generates the expected sequence. +TEST(CombineTest, CombineWithThreeParameters) { + const ParamGenerator > gen = Combine(Values(0, 1), + Values(3, 4), + Values(5, 6)); + tuple expected_values[] = { + make_tuple(0, 3, 5), make_tuple(0, 3, 6), + make_tuple(0, 4, 5), make_tuple(0, 4, 6), + make_tuple(1, 3, 5), make_tuple(1, 3, 6), + make_tuple(1, 4, 5), make_tuple(1, 4, 6)}; + VerifyGenerator(gen, expected_values); +} + +// Tests that the Combine() with the first parameter generating a single value +// sequence generates a sequence with the number of elements equal to the +// number of elements in the sequence generated by the second parameter. +TEST(CombineTest, CombineWithFirstParameterSingleValue) { + const ParamGenerator > gen = Combine(Values(42), + Values(0, 1)); + + tuple expected_values[] = {make_tuple(42, 0), make_tuple(42, 1)}; + VerifyGenerator(gen, expected_values); +} + +// Tests that the Combine() with the second parameter generating a single value +// sequence generates a sequence with the number of elements equal to the +// number of elements in the sequence generated by the first parameter. +TEST(CombineTest, CombineWithSecondParameterSingleValue) { + const ParamGenerator > gen = Combine(Values(0, 1), + Values(42)); + + tuple expected_values[] = {make_tuple(0, 42), make_tuple(1, 42)}; + VerifyGenerator(gen, expected_values); +} + +// Tests that when the first parameter produces an empty sequence, +// Combine() produces an empty sequence, too. +TEST(CombineTest, CombineWithFirstParameterEmptyRange) { + const ParamGenerator > gen = Combine(Range(0, 0), + Values(0, 1)); + VerifyGeneratorIsEmpty(gen); +} + +// Tests that when the second parameter produces an empty sequence, +// Combine() produces an empty sequence, too. +TEST(CombineTest, CombineWithSecondParameterEmptyRange) { + const ParamGenerator > gen = Combine(Values(0, 1), + Range(1, 1)); + VerifyGeneratorIsEmpty(gen); +} + +// Edge case. Tests that combine works with the maximum number +// of parameters supported by Google Test (currently 10). +TEST(CombineTest, CombineWithMaxNumberOfParameters) { + const char* foo = "foo"; + const char* bar = "bar"; + const ParamGenerator > gen = Combine(Values(foo, bar), + Values(1), Values(2), + Values(3), Values(4), + Values(5), Values(6), + Values(7), Values(8), + Values(9)); + + tuple + expected_values[] = {make_tuple(foo, 1, 2, 3, 4, 5, 6, 7, 8, 9), + make_tuple(bar, 1, 2, 3, 4, 5, 6, 7, 8, 9)}; + VerifyGenerator(gen, expected_values); +} + +# endif // GTEST_HAS_COMBINE + +// Tests that an generator produces correct sequence after being +// assigned from another generator. +TEST(ParamGeneratorTest, AssignmentWorks) { + ParamGenerator gen = Values(1, 2); + const ParamGenerator gen2 = Values(3, 4); + gen = gen2; + + const int expected_values[] = {3, 4}; + VerifyGenerator(gen, expected_values); +} + +// This test verifies that the tests are expanded and run as specified: +// one test per element from the sequence produced by the generator +// specified in INSTANTIATE_TEST_CASE_P. It also verifies that the test's +// fixture constructor, SetUp(), and TearDown() have run and have been +// supplied with the correct parameters. + +// The use of environment object allows detection of the case where no test +// case functionality is run at all. In this case TestCaseTearDown will not +// be able to detect missing tests, naturally. +template +class TestGenerationEnvironment : public ::testing::Environment { + public: + static TestGenerationEnvironment* Instance() { + static TestGenerationEnvironment* instance = new TestGenerationEnvironment; + return instance; + } + + void FixtureConstructorExecuted() { fixture_constructor_count_++; } + void SetUpExecuted() { set_up_count_++; } + void TearDownExecuted() { tear_down_count_++; } + void TestBodyExecuted() { test_body_count_++; } + + virtual void TearDown() { + // If all MultipleTestGenerationTest tests have been de-selected + // by the filter flag, the following checks make no sense. + bool perform_check = false; + + for (int i = 0; i < kExpectedCalls; ++i) { + Message msg; + msg << "TestsExpandedAndRun/" << i; + if (UnitTestOptions::FilterMatchesTest( + "TestExpansionModule/MultipleTestGenerationTest", + msg.GetString().c_str())) { + perform_check = true; + } + } + if (perform_check) { + EXPECT_EQ(kExpectedCalls, fixture_constructor_count_) + << "Fixture constructor of ParamTestGenerationTest test case " + << "has not been run as expected."; + EXPECT_EQ(kExpectedCalls, set_up_count_) + << "Fixture SetUp method of ParamTestGenerationTest test case " + << "has not been run as expected."; + EXPECT_EQ(kExpectedCalls, tear_down_count_) + << "Fixture TearDown method of ParamTestGenerationTest test case " + << "has not been run as expected."; + EXPECT_EQ(kExpectedCalls, test_body_count_) + << "Test in ParamTestGenerationTest test case " + << "has not been run as expected."; + } + } + + private: + TestGenerationEnvironment() : fixture_constructor_count_(0), set_up_count_(0), + tear_down_count_(0), test_body_count_(0) {} + + int fixture_constructor_count_; + int set_up_count_; + int tear_down_count_; + int test_body_count_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestGenerationEnvironment); +}; + +const int test_generation_params[] = {36, 42, 72}; + +class TestGenerationTest : public TestWithParam { + public: + enum { + PARAMETER_COUNT = + sizeof(test_generation_params)/sizeof(test_generation_params[0]) + }; + + typedef TestGenerationEnvironment Environment; + + TestGenerationTest() { + Environment::Instance()->FixtureConstructorExecuted(); + current_parameter_ = GetParam(); + } + virtual void SetUp() { + Environment::Instance()->SetUpExecuted(); + EXPECT_EQ(current_parameter_, GetParam()); + } + virtual void TearDown() { + Environment::Instance()->TearDownExecuted(); + EXPECT_EQ(current_parameter_, GetParam()); + } + + static void SetUpTestCase() { + bool all_tests_in_test_case_selected = true; + + for (int i = 0; i < PARAMETER_COUNT; ++i) { + Message test_name; + test_name << "TestsExpandedAndRun/" << i; + if ( !UnitTestOptions::FilterMatchesTest( + "TestExpansionModule/MultipleTestGenerationTest", + test_name.GetString())) { + all_tests_in_test_case_selected = false; + } + } + EXPECT_TRUE(all_tests_in_test_case_selected) + << "When running the TestGenerationTest test case all of its tests\n" + << "must be selected by the filter flag for the test case to pass.\n" + << "If not all of them are enabled, we can't reliably conclude\n" + << "that the correct number of tests have been generated."; + + collected_parameters_.clear(); + } + + static void TearDownTestCase() { + vector expected_values(test_generation_params, + test_generation_params + PARAMETER_COUNT); + // Test execution order is not guaranteed by Google Test, + // so the order of values in collected_parameters_ can be + // different and we have to sort to compare. + sort(expected_values.begin(), expected_values.end()); + sort(collected_parameters_.begin(), collected_parameters_.end()); + + EXPECT_TRUE(collected_parameters_ == expected_values); + } + + protected: + int current_parameter_; + static vector collected_parameters_; + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestGenerationTest); +}; +vector TestGenerationTest::collected_parameters_; + +TEST_P(TestGenerationTest, TestsExpandedAndRun) { + Environment::Instance()->TestBodyExecuted(); + EXPECT_EQ(current_parameter_, GetParam()); + collected_parameters_.push_back(GetParam()); +} +INSTANTIATE_TEST_CASE_P(TestExpansionModule, TestGenerationTest, + ValuesIn(test_generation_params)); + +// This test verifies that the element sequence (third parameter of +// INSTANTIATE_TEST_CASE_P) is evaluated in InitGoogleTest() and neither at +// the call site of INSTANTIATE_TEST_CASE_P nor in RUN_ALL_TESTS(). For +// that, we declare param_value_ to be a static member of +// GeneratorEvaluationTest and initialize it to 0. We set it to 1 in +// main(), just before invocation of InitGoogleTest(). After calling +// InitGoogleTest(), we set the value to 2. If the sequence is evaluated +// before or after InitGoogleTest, INSTANTIATE_TEST_CASE_P will create a +// test with parameter other than 1, and the test body will fail the +// assertion. +class GeneratorEvaluationTest : public TestWithParam { + public: + static int param_value() { return param_value_; } + static void set_param_value(int param_value) { param_value_ = param_value; } + + private: + static int param_value_; +}; +int GeneratorEvaluationTest::param_value_ = 0; + +TEST_P(GeneratorEvaluationTest, GeneratorsEvaluatedInMain) { + EXPECT_EQ(1, GetParam()); +} +INSTANTIATE_TEST_CASE_P(GenEvalModule, + GeneratorEvaluationTest, + Values(GeneratorEvaluationTest::param_value())); + +// Tests that generators defined in a different translation unit are +// functional. Generator extern_gen is defined in gtest-param-test_test2.cc. +extern ParamGenerator extern_gen; +class ExternalGeneratorTest : public TestWithParam {}; +TEST_P(ExternalGeneratorTest, ExternalGenerator) { + // Sequence produced by extern_gen contains only a single value + // which we verify here. + EXPECT_EQ(GetParam(), 33); +} +INSTANTIATE_TEST_CASE_P(ExternalGeneratorModule, + ExternalGeneratorTest, + extern_gen); + +// Tests that a parameterized test case can be defined in one translation +// unit and instantiated in another. This test will be instantiated in +// gtest-param-test_test2.cc. ExternalInstantiationTest fixture class is +// defined in gtest-param-test_test.h. +TEST_P(ExternalInstantiationTest, IsMultipleOf33) { + EXPECT_EQ(0, GetParam() % 33); +} + +// Tests that a parameterized test case can be instantiated with multiple +// generators. +class MultipleInstantiationTest : public TestWithParam {}; +TEST_P(MultipleInstantiationTest, AllowsMultipleInstances) { +} +INSTANTIATE_TEST_CASE_P(Sequence1, MultipleInstantiationTest, Values(1, 2)); +INSTANTIATE_TEST_CASE_P(Sequence2, MultipleInstantiationTest, Range(3, 5)); + +// Tests that a parameterized test case can be instantiated +// in multiple translation units. This test will be instantiated +// here and in gtest-param-test_test2.cc. +// InstantiationInMultipleTranslationUnitsTest fixture class +// is defined in gtest-param-test_test.h. +TEST_P(InstantiationInMultipleTranslaionUnitsTest, IsMultipleOf42) { + EXPECT_EQ(0, GetParam() % 42); +} +INSTANTIATE_TEST_CASE_P(Sequence1, + InstantiationInMultipleTranslaionUnitsTest, + Values(42, 42*2)); + +// Tests that each iteration of parameterized test runs in a separate test +// object. +class SeparateInstanceTest : public TestWithParam { + public: + SeparateInstanceTest() : count_(0) {} + + static void TearDownTestCase() { + EXPECT_GE(global_count_, 2) + << "If some (but not all) SeparateInstanceTest tests have been " + << "filtered out this test will fail. Make sure that all " + << "GeneratorEvaluationTest are selected or de-selected together " + << "by the test filter."; + } + + protected: + int count_; + static int global_count_; +}; +int SeparateInstanceTest::global_count_ = 0; + +TEST_P(SeparateInstanceTest, TestsRunInSeparateInstances) { + EXPECT_EQ(0, count_++); + global_count_++; +} +INSTANTIATE_TEST_CASE_P(FourElemSequence, SeparateInstanceTest, Range(1, 4)); + +// Tests that all instantiations of a test have named appropriately. Test +// defined with TEST_P(TestCaseName, TestName) and instantiated with +// INSTANTIATE_TEST_CASE_P(SequenceName, TestCaseName, generator) must be named +// SequenceName/TestCaseName.TestName/i, where i is the 0-based index of the +// sequence element used to instantiate the test. +class NamingTest : public TestWithParam {}; + +TEST_P(NamingTest, TestsReportCorrectNamesAndParameters) { + const ::testing::TestInfo* const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + + EXPECT_STREQ("ZeroToFiveSequence/NamingTest", test_info->test_case_name()); + + Message index_stream; + index_stream << "TestsReportCorrectNamesAndParameters/" << GetParam(); + EXPECT_STREQ(index_stream.GetString().c_str(), test_info->name()); + + EXPECT_EQ(::testing::PrintToString(GetParam()), test_info->value_param()); +} + +INSTANTIATE_TEST_CASE_P(ZeroToFiveSequence, NamingTest, Range(0, 5)); + +// Tests that user supplied custom parameter names are working correctly. +// Runs the test with a builtin helper method which uses PrintToString, +// as well as a custom function and custom functor to ensure all possible +// uses work correctly. +class CustomFunctorNamingTest : public TestWithParam {}; +TEST_P(CustomFunctorNamingTest, CustomTestNames) {} + +struct CustomParamNameFunctor { + std::string operator()(const ::testing::TestParamInfo& info) { + return info.param; + } +}; + +INSTANTIATE_TEST_CASE_P(CustomParamNameFunctor, + CustomFunctorNamingTest, + Values(std::string("FunctorName")), + CustomParamNameFunctor()); + +INSTANTIATE_TEST_CASE_P(AllAllowedCharacters, + CustomFunctorNamingTest, + Values("abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "01234567890_"), + CustomParamNameFunctor()); + +inline std::string CustomParamNameFunction( + const ::testing::TestParamInfo& info) { + return info.param; +} + +class CustomFunctionNamingTest : public TestWithParam {}; +TEST_P(CustomFunctionNamingTest, CustomTestNames) {} + +INSTANTIATE_TEST_CASE_P(CustomParamNameFunction, + CustomFunctionNamingTest, + Values(std::string("FunctionName")), + CustomParamNameFunction); + +#if GTEST_LANG_CXX11 + +// Test custom naming with a lambda + +class CustomLambdaNamingTest : public TestWithParam {}; +TEST_P(CustomLambdaNamingTest, CustomTestNames) {} + +INSTANTIATE_TEST_CASE_P(CustomParamNameLambda, + CustomLambdaNamingTest, + Values(std::string("LambdaName")), + [](const ::testing::TestParamInfo& info) { + return info.param; + }); + +#endif // GTEST_LANG_CXX11 + +TEST(CustomNamingTest, CheckNameRegistry) { + ::testing::UnitTest* unit_test = ::testing::UnitTest::GetInstance(); + std::set test_names; + for (int case_num = 0; + case_num < unit_test->total_test_case_count(); + ++case_num) { + const ::testing::TestCase* test_case = unit_test->GetTestCase(case_num); + for (int test_num = 0; + test_num < test_case->total_test_count(); + ++test_num) { + const ::testing::TestInfo* test_info = test_case->GetTestInfo(test_num); + test_names.insert(std::string(test_info->name())); + } + } + EXPECT_EQ(1u, test_names.count("CustomTestNames/FunctorName")); + EXPECT_EQ(1u, test_names.count("CustomTestNames/FunctionName")); +#if GTEST_LANG_CXX11 + EXPECT_EQ(1u, test_names.count("CustomTestNames/LambdaName")); +#endif // GTEST_LANG_CXX11 +} + +// Test a numeric name to ensure PrintToStringParamName works correctly. + +class CustomIntegerNamingTest : public TestWithParam {}; + +TEST_P(CustomIntegerNamingTest, TestsReportCorrectNames) { + const ::testing::TestInfo* const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + Message test_name_stream; + test_name_stream << "TestsReportCorrectNames/" << GetParam(); + EXPECT_STREQ(test_name_stream.GetString().c_str(), test_info->name()); +} + +INSTANTIATE_TEST_CASE_P(PrintToString, + CustomIntegerNamingTest, + Range(0, 5), + ::testing::PrintToStringParamName()); + +// Test a custom struct with PrintToString. + +struct CustomStruct { + explicit CustomStruct(int value) : x(value) {} + int x; +}; + +std::ostream& operator<<(std::ostream& stream, const CustomStruct& val) { + stream << val.x; + return stream; +} + +class CustomStructNamingTest : public TestWithParam {}; + +TEST_P(CustomStructNamingTest, TestsReportCorrectNames) { + const ::testing::TestInfo* const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + Message test_name_stream; + test_name_stream << "TestsReportCorrectNames/" << GetParam(); + EXPECT_STREQ(test_name_stream.GetString().c_str(), test_info->name()); +} + +INSTANTIATE_TEST_CASE_P(PrintToString, + CustomStructNamingTest, + Values(CustomStruct(0), CustomStruct(1)), + ::testing::PrintToStringParamName()); + +// Test that using a stateful parameter naming function works as expected. + +struct StatefulNamingFunctor { + StatefulNamingFunctor() : sum(0) {} + std::string operator()(const ::testing::TestParamInfo& info) { + int value = info.param + sum; + sum += info.param; + return ::testing::PrintToString(value); + } + int sum; +}; + +class StatefulNamingTest : public ::testing::TestWithParam { + protected: + StatefulNamingTest() : sum_(0) {} + int sum_; +}; + +TEST_P(StatefulNamingTest, TestsReportCorrectNames) { + const ::testing::TestInfo* const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + sum_ += GetParam(); + Message test_name_stream; + test_name_stream << "TestsReportCorrectNames/" << sum_; + EXPECT_STREQ(test_name_stream.GetString().c_str(), test_info->name()); +} + +INSTANTIATE_TEST_CASE_P(StatefulNamingFunctor, + StatefulNamingTest, + Range(0, 5), + StatefulNamingFunctor()); + +// Class that cannot be streamed into an ostream. It needs to be copyable +// (and, in case of MSVC, also assignable) in order to be a test parameter +// type. Its default copy constructor and assignment operator do exactly +// what we need. +class Unstreamable { + public: + explicit Unstreamable(int value) : value_(value) {} + + private: + int value_; +}; + +class CommentTest : public TestWithParam {}; + +TEST_P(CommentTest, TestsCorrectlyReportUnstreamableParams) { + const ::testing::TestInfo* const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + + EXPECT_EQ(::testing::PrintToString(GetParam()), test_info->value_param()); +} + +INSTANTIATE_TEST_CASE_P(InstantiationWithComments, + CommentTest, + Values(Unstreamable(1))); + +// Verify that we can create a hierarchy of test fixtures, where the base +// class fixture is not parameterized and the derived class is. In this case +// ParameterizedDerivedTest inherits from NonParameterizedBaseTest. We +// perform simple tests on both. +class NonParameterizedBaseTest : public ::testing::Test { + public: + NonParameterizedBaseTest() : n_(17) { } + protected: + int n_; +}; + +class ParameterizedDerivedTest : public NonParameterizedBaseTest, + public ::testing::WithParamInterface { + protected: + ParameterizedDerivedTest() : count_(0) { } + int count_; + static int global_count_; +}; + +int ParameterizedDerivedTest::global_count_ = 0; + +TEST_F(NonParameterizedBaseTest, FixtureIsInitialized) { + EXPECT_EQ(17, n_); +} + +TEST_P(ParameterizedDerivedTest, SeesSequence) { + EXPECT_EQ(17, n_); + EXPECT_EQ(0, count_++); + EXPECT_EQ(GetParam(), global_count_++); +} + +class ParameterizedDeathTest : public ::testing::TestWithParam { }; + +TEST_F(ParameterizedDeathTest, GetParamDiesFromTestF) { + EXPECT_DEATH_IF_SUPPORTED(GetParam(), + ".* value-parameterized test .*"); +} + +INSTANTIATE_TEST_CASE_P(RangeZeroToFive, ParameterizedDerivedTest, Range(0, 5)); + +#endif // GTEST_HAS_PARAM_TEST + +TEST(CompileTest, CombineIsDefinedOnlyWhenGtestHasParamTestIsDefined) { +#if GTEST_HAS_COMBINE && !GTEST_HAS_PARAM_TEST + FAIL() << "GTEST_HAS_COMBINE is defined while GTEST_HAS_PARAM_TEST is not\n" +#endif +} + +int main(int argc, char **argv) { +#if GTEST_HAS_PARAM_TEST + // Used in TestGenerationTest test case. + AddGlobalTestEnvironment(TestGenerationTest::Environment::Instance()); + // Used in GeneratorEvaluationTest test case. Tests that the updated value + // will be picked up for instantiating tests in GeneratorEvaluationTest. + GeneratorEvaluationTest::set_param_value(1); +#endif // GTEST_HAS_PARAM_TEST + + ::testing::InitGoogleTest(&argc, argv); + +#if GTEST_HAS_PARAM_TEST + // Used in GeneratorEvaluationTest test case. Tests that value updated + // here will NOT be used for instantiating tests in + // GeneratorEvaluationTest. + GeneratorEvaluationTest::set_param_value(2); +#endif // GTEST_HAS_PARAM_TEST + + return RUN_ALL_TESTS(); +} +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: vladl@google.com (Vlad Losev), wan@google.com (Zhanyong Wan) +// +// This file tests the internal cross-platform support utilities. + +#include "gtest/internal/gtest-port.h" + +#include + +#if GTEST_OS_MAC +# include +#endif // GTEST_OS_MAC + +#include +#include // For std::pair and std::make_pair. +#include + +#include "gtest/gtest.h" +#include "gtest/gtest-spi.h" + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +using std::make_pair; +using std::pair; + +namespace testing { +namespace internal { + +TEST(IsXDigitTest, WorksForNarrowAscii) { + EXPECT_TRUE(IsXDigit('0')); + EXPECT_TRUE(IsXDigit('9')); + EXPECT_TRUE(IsXDigit('A')); + EXPECT_TRUE(IsXDigit('F')); + EXPECT_TRUE(IsXDigit('a')); + EXPECT_TRUE(IsXDigit('f')); + + EXPECT_FALSE(IsXDigit('-')); + EXPECT_FALSE(IsXDigit('g')); + EXPECT_FALSE(IsXDigit('G')); +} + +TEST(IsXDigitTest, ReturnsFalseForNarrowNonAscii) { + EXPECT_FALSE(IsXDigit('\x80')); + EXPECT_FALSE(IsXDigit(static_cast('0' | '\x80'))); +} + +TEST(IsXDigitTest, WorksForWideAscii) { + EXPECT_TRUE(IsXDigit(L'0')); + EXPECT_TRUE(IsXDigit(L'9')); + EXPECT_TRUE(IsXDigit(L'A')); + EXPECT_TRUE(IsXDigit(L'F')); + EXPECT_TRUE(IsXDigit(L'a')); + EXPECT_TRUE(IsXDigit(L'f')); + + EXPECT_FALSE(IsXDigit(L'-')); + EXPECT_FALSE(IsXDigit(L'g')); + EXPECT_FALSE(IsXDigit(L'G')); +} + +TEST(IsXDigitTest, ReturnsFalseForWideNonAscii) { + EXPECT_FALSE(IsXDigit(static_cast(0x80))); + EXPECT_FALSE(IsXDigit(static_cast(L'0' | 0x80))); + EXPECT_FALSE(IsXDigit(static_cast(L'0' | 0x100))); +} + +class Base { + public: + // Copy constructor and assignment operator do exactly what we need, so we + // use them. + Base() : member_(0) {} + explicit Base(int n) : member_(n) {} + virtual ~Base() {} + int member() { return member_; } + + private: + int member_; +}; + +class Derived : public Base { + public: + explicit Derived(int n) : Base(n) {} +}; + +TEST(ImplicitCastTest, ConvertsPointers) { + Derived derived(0); + EXPECT_TRUE(&derived == ::testing::internal::ImplicitCast_(&derived)); +} + +TEST(ImplicitCastTest, CanUseInheritance) { + Derived derived(1); + Base base = ::testing::internal::ImplicitCast_(derived); + EXPECT_EQ(derived.member(), base.member()); +} + +class Castable { + public: + explicit Castable(bool* converted) : converted_(converted) {} + operator Base() { + *converted_ = true; + return Base(); + } + + private: + bool* converted_; +}; + +TEST(ImplicitCastTest, CanUseNonConstCastOperator) { + bool converted = false; + Castable castable(&converted); + Base base = ::testing::internal::ImplicitCast_(castable); + EXPECT_TRUE(converted); +} + +class ConstCastable { + public: + explicit ConstCastable(bool* converted) : converted_(converted) {} + operator Base() const { + *converted_ = true; + return Base(); + } + + private: + bool* converted_; +}; + +TEST(ImplicitCastTest, CanUseConstCastOperatorOnConstValues) { + bool converted = false; + const ConstCastable const_castable(&converted); + Base base = ::testing::internal::ImplicitCast_(const_castable); + EXPECT_TRUE(converted); +} + +class ConstAndNonConstCastable { + public: + ConstAndNonConstCastable(bool* converted, bool* const_converted) + : converted_(converted), const_converted_(const_converted) {} + operator Base() { + *converted_ = true; + return Base(); + } + operator Base() const { + *const_converted_ = true; + return Base(); + } + + private: + bool* converted_; + bool* const_converted_; +}; + +TEST(ImplicitCastTest, CanSelectBetweenConstAndNonConstCasrAppropriately) { + bool converted = false; + bool const_converted = false; + ConstAndNonConstCastable castable(&converted, &const_converted); + Base base = ::testing::internal::ImplicitCast_(castable); + EXPECT_TRUE(converted); + EXPECT_FALSE(const_converted); + + converted = false; + const_converted = false; + const ConstAndNonConstCastable const_castable(&converted, &const_converted); + base = ::testing::internal::ImplicitCast_(const_castable); + EXPECT_FALSE(converted); + EXPECT_TRUE(const_converted); +} + +class To { + public: + To(bool* converted) { *converted = true; } // NOLINT +}; + +TEST(ImplicitCastTest, CanUseImplicitConstructor) { + bool converted = false; + To to = ::testing::internal::ImplicitCast_(&converted); + (void)to; + EXPECT_TRUE(converted); +} + +TEST(IteratorTraitsTest, WorksForSTLContainerIterators) { + StaticAssertTypeEq::const_iterator>::value_type>(); + StaticAssertTypeEq::iterator>::value_type>(); +} + +TEST(IteratorTraitsTest, WorksForPointerToNonConst) { + StaticAssertTypeEq::value_type>(); + StaticAssertTypeEq::value_type>(); +} + +TEST(IteratorTraitsTest, WorksForPointerToConst) { + StaticAssertTypeEq::value_type>(); + StaticAssertTypeEq::value_type>(); +} + +// Tests that the element_type typedef is available in scoped_ptr and refers +// to the parameter type. +TEST(ScopedPtrTest, DefinesElementType) { + StaticAssertTypeEq::element_type>(); +} + +// TODO(vladl@google.com): Implement THE REST of scoped_ptr tests. + +TEST(GtestCheckSyntaxTest, BehavesLikeASingleStatement) { + if (AlwaysFalse()) + GTEST_CHECK_(false) << "This should never be executed; " + "It's a compilation test only."; + + if (AlwaysTrue()) + GTEST_CHECK_(true); + else + ; // NOLINT + + if (AlwaysFalse()) + ; // NOLINT + else + GTEST_CHECK_(true) << ""; +} + +TEST(GtestCheckSyntaxTest, WorksWithSwitch) { + switch (0) { + case 1: + break; + default: + GTEST_CHECK_(true); + } + + switch (0) + case 0: + GTEST_CHECK_(true) << "Check failed in switch case"; +} + +// Verifies behavior of FormatFileLocation. +TEST(FormatFileLocationTest, FormatsFileLocation) { + EXPECT_PRED_FORMAT2(IsSubstring, "foo.cc", FormatFileLocation("foo.cc", 42)); + EXPECT_PRED_FORMAT2(IsSubstring, "42", FormatFileLocation("foo.cc", 42)); +} + +TEST(FormatFileLocationTest, FormatsUnknownFile) { + EXPECT_PRED_FORMAT2( + IsSubstring, "unknown file", FormatFileLocation(NULL, 42)); + EXPECT_PRED_FORMAT2(IsSubstring, "42", FormatFileLocation(NULL, 42)); +} + +TEST(FormatFileLocationTest, FormatsUknownLine) { + EXPECT_EQ("foo.cc:", FormatFileLocation("foo.cc", -1)); +} + +TEST(FormatFileLocationTest, FormatsUknownFileAndLine) { + EXPECT_EQ("unknown file:", FormatFileLocation(NULL, -1)); +} + +// Verifies behavior of FormatCompilerIndependentFileLocation. +TEST(FormatCompilerIndependentFileLocationTest, FormatsFileLocation) { + EXPECT_EQ("foo.cc:42", FormatCompilerIndependentFileLocation("foo.cc", 42)); +} + +TEST(FormatCompilerIndependentFileLocationTest, FormatsUknownFile) { + EXPECT_EQ("unknown file:42", + FormatCompilerIndependentFileLocation(NULL, 42)); +} + +TEST(FormatCompilerIndependentFileLocationTest, FormatsUknownLine) { + EXPECT_EQ("foo.cc", FormatCompilerIndependentFileLocation("foo.cc", -1)); +} + +TEST(FormatCompilerIndependentFileLocationTest, FormatsUknownFileAndLine) { + EXPECT_EQ("unknown file", FormatCompilerIndependentFileLocation(NULL, -1)); +} + +#if GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_QNX +void* ThreadFunc(void* data) { + internal::Mutex* mutex = static_cast(data); + mutex->Lock(); + mutex->Unlock(); + return NULL; +} + +TEST(GetThreadCountTest, ReturnsCorrectValue) { + const size_t starting_count = GetThreadCount(); + pthread_t thread_id; + + internal::Mutex mutex; + { + internal::MutexLock lock(&mutex); + pthread_attr_t attr; + ASSERT_EQ(0, pthread_attr_init(&attr)); + ASSERT_EQ(0, pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE)); + + const int status = pthread_create(&thread_id, &attr, &ThreadFunc, &mutex); + ASSERT_EQ(0, pthread_attr_destroy(&attr)); + ASSERT_EQ(0, status); + EXPECT_EQ(starting_count + 1, GetThreadCount()); + } + + void* dummy; + ASSERT_EQ(0, pthread_join(thread_id, &dummy)); + + // The OS may not immediately report the updated thread count after + // joining a thread, causing flakiness in this test. To counter that, we + // wait for up to .5 seconds for the OS to report the correct value. + for (int i = 0; i < 5; ++i) { + if (GetThreadCount() == starting_count) + break; + + SleepMilliseconds(100); + } + + EXPECT_EQ(starting_count, GetThreadCount()); +} +#else +TEST(GetThreadCountTest, ReturnsZeroWhenUnableToCountThreads) { + EXPECT_EQ(0U, GetThreadCount()); +} +#endif // GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_QNX + +TEST(GtestCheckDeathTest, DiesWithCorrectOutputOnFailure) { + const bool a_false_condition = false; + const char regex[] = +#ifdef _MSC_VER + "gtest-port_test\\.cc\\(\\d+\\):" +#elif GTEST_USES_POSIX_RE + "gtest-port_test\\.cc:[0-9]+" +#else + "gtest-port_test\\.cc:\\d+" +#endif // _MSC_VER + ".*a_false_condition.*Extra info.*"; + + EXPECT_DEATH_IF_SUPPORTED(GTEST_CHECK_(a_false_condition) << "Extra info", + regex); +} + +#if GTEST_HAS_DEATH_TEST + +TEST(GtestCheckDeathTest, LivesSilentlyOnSuccess) { + EXPECT_EXIT({ + GTEST_CHECK_(true) << "Extra info"; + ::std::cerr << "Success\n"; + exit(0); }, + ::testing::ExitedWithCode(0), "Success"); +} + +#endif // GTEST_HAS_DEATH_TEST + +// Verifies that Google Test choose regular expression engine appropriate to +// the platform. The test will produce compiler errors in case of failure. +// For simplicity, we only cover the most important platforms here. +TEST(RegexEngineSelectionTest, SelectsCorrectRegexEngine) { +#if !GTEST_USES_PCRE +# if GTEST_HAS_POSIX_RE + + EXPECT_TRUE(GTEST_USES_POSIX_RE); + +# else + + EXPECT_TRUE(GTEST_USES_SIMPLE_RE); + +# endif +#endif // !GTEST_USES_PCRE +} + +#if GTEST_USES_POSIX_RE + +# if GTEST_HAS_TYPED_TEST + +template +class RETest : public ::testing::Test {}; + +// Defines StringTypes as the list of all string types that class RE +// supports. +typedef testing::Types< + ::std::string, +# if GTEST_HAS_GLOBAL_STRING + ::string, +# endif // GTEST_HAS_GLOBAL_STRING + const char*> StringTypes; + +TYPED_TEST_CASE(RETest, StringTypes); + +// Tests RE's implicit constructors. +TYPED_TEST(RETest, ImplicitConstructorWorks) { + const RE empty(TypeParam("")); + EXPECT_STREQ("", empty.pattern()); + + const RE simple(TypeParam("hello")); + EXPECT_STREQ("hello", simple.pattern()); + + const RE normal(TypeParam(".*(\\w+)")); + EXPECT_STREQ(".*(\\w+)", normal.pattern()); +} + +// Tests that RE's constructors reject invalid regular expressions. +TYPED_TEST(RETest, RejectsInvalidRegex) { + EXPECT_NONFATAL_FAILURE({ + const RE invalid(TypeParam("?")); + }, "\"?\" is not a valid POSIX Extended regular expression."); +} + +// Tests RE::FullMatch(). +TYPED_TEST(RETest, FullMatchWorks) { + const RE empty(TypeParam("")); + EXPECT_TRUE(RE::FullMatch(TypeParam(""), empty)); + EXPECT_FALSE(RE::FullMatch(TypeParam("a"), empty)); + + const RE re(TypeParam("a.*z")); + EXPECT_TRUE(RE::FullMatch(TypeParam("az"), re)); + EXPECT_TRUE(RE::FullMatch(TypeParam("axyz"), re)); + EXPECT_FALSE(RE::FullMatch(TypeParam("baz"), re)); + EXPECT_FALSE(RE::FullMatch(TypeParam("azy"), re)); +} + +// Tests RE::PartialMatch(). +TYPED_TEST(RETest, PartialMatchWorks) { + const RE empty(TypeParam("")); + EXPECT_TRUE(RE::PartialMatch(TypeParam(""), empty)); + EXPECT_TRUE(RE::PartialMatch(TypeParam("a"), empty)); + + const RE re(TypeParam("a.*z")); + EXPECT_TRUE(RE::PartialMatch(TypeParam("az"), re)); + EXPECT_TRUE(RE::PartialMatch(TypeParam("axyz"), re)); + EXPECT_TRUE(RE::PartialMatch(TypeParam("baz"), re)); + EXPECT_TRUE(RE::PartialMatch(TypeParam("azy"), re)); + EXPECT_FALSE(RE::PartialMatch(TypeParam("zza"), re)); +} + +# endif // GTEST_HAS_TYPED_TEST + +#elif GTEST_USES_SIMPLE_RE + +TEST(IsInSetTest, NulCharIsNotInAnySet) { + EXPECT_FALSE(IsInSet('\0', "")); + EXPECT_FALSE(IsInSet('\0', "\0")); + EXPECT_FALSE(IsInSet('\0', "a")); +} + +TEST(IsInSetTest, WorksForNonNulChars) { + EXPECT_FALSE(IsInSet('a', "Ab")); + EXPECT_FALSE(IsInSet('c', "")); + + EXPECT_TRUE(IsInSet('b', "bcd")); + EXPECT_TRUE(IsInSet('b', "ab")); +} + +TEST(IsAsciiDigitTest, IsFalseForNonDigit) { + EXPECT_FALSE(IsAsciiDigit('\0')); + EXPECT_FALSE(IsAsciiDigit(' ')); + EXPECT_FALSE(IsAsciiDigit('+')); + EXPECT_FALSE(IsAsciiDigit('-')); + EXPECT_FALSE(IsAsciiDigit('.')); + EXPECT_FALSE(IsAsciiDigit('a')); +} + +TEST(IsAsciiDigitTest, IsTrueForDigit) { + EXPECT_TRUE(IsAsciiDigit('0')); + EXPECT_TRUE(IsAsciiDigit('1')); + EXPECT_TRUE(IsAsciiDigit('5')); + EXPECT_TRUE(IsAsciiDigit('9')); +} + +TEST(IsAsciiPunctTest, IsFalseForNonPunct) { + EXPECT_FALSE(IsAsciiPunct('\0')); + EXPECT_FALSE(IsAsciiPunct(' ')); + EXPECT_FALSE(IsAsciiPunct('\n')); + EXPECT_FALSE(IsAsciiPunct('a')); + EXPECT_FALSE(IsAsciiPunct('0')); +} + +TEST(IsAsciiPunctTest, IsTrueForPunct) { + for (const char* p = "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"; *p; p++) { + EXPECT_PRED1(IsAsciiPunct, *p); + } +} + +TEST(IsRepeatTest, IsFalseForNonRepeatChar) { + EXPECT_FALSE(IsRepeat('\0')); + EXPECT_FALSE(IsRepeat(' ')); + EXPECT_FALSE(IsRepeat('a')); + EXPECT_FALSE(IsRepeat('1')); + EXPECT_FALSE(IsRepeat('-')); +} + +TEST(IsRepeatTest, IsTrueForRepeatChar) { + EXPECT_TRUE(IsRepeat('?')); + EXPECT_TRUE(IsRepeat('*')); + EXPECT_TRUE(IsRepeat('+')); +} + +TEST(IsAsciiWhiteSpaceTest, IsFalseForNonWhiteSpace) { + EXPECT_FALSE(IsAsciiWhiteSpace('\0')); + EXPECT_FALSE(IsAsciiWhiteSpace('a')); + EXPECT_FALSE(IsAsciiWhiteSpace('1')); + EXPECT_FALSE(IsAsciiWhiteSpace('+')); + EXPECT_FALSE(IsAsciiWhiteSpace('_')); +} + +TEST(IsAsciiWhiteSpaceTest, IsTrueForWhiteSpace) { + EXPECT_TRUE(IsAsciiWhiteSpace(' ')); + EXPECT_TRUE(IsAsciiWhiteSpace('\n')); + EXPECT_TRUE(IsAsciiWhiteSpace('\r')); + EXPECT_TRUE(IsAsciiWhiteSpace('\t')); + EXPECT_TRUE(IsAsciiWhiteSpace('\v')); + EXPECT_TRUE(IsAsciiWhiteSpace('\f')); +} + +TEST(IsAsciiWordCharTest, IsFalseForNonWordChar) { + EXPECT_FALSE(IsAsciiWordChar('\0')); + EXPECT_FALSE(IsAsciiWordChar('+')); + EXPECT_FALSE(IsAsciiWordChar('.')); + EXPECT_FALSE(IsAsciiWordChar(' ')); + EXPECT_FALSE(IsAsciiWordChar('\n')); +} + +TEST(IsAsciiWordCharTest, IsTrueForLetter) { + EXPECT_TRUE(IsAsciiWordChar('a')); + EXPECT_TRUE(IsAsciiWordChar('b')); + EXPECT_TRUE(IsAsciiWordChar('A')); + EXPECT_TRUE(IsAsciiWordChar('Z')); +} + +TEST(IsAsciiWordCharTest, IsTrueForDigit) { + EXPECT_TRUE(IsAsciiWordChar('0')); + EXPECT_TRUE(IsAsciiWordChar('1')); + EXPECT_TRUE(IsAsciiWordChar('7')); + EXPECT_TRUE(IsAsciiWordChar('9')); +} + +TEST(IsAsciiWordCharTest, IsTrueForUnderscore) { + EXPECT_TRUE(IsAsciiWordChar('_')); +} + +TEST(IsValidEscapeTest, IsFalseForNonPrintable) { + EXPECT_FALSE(IsValidEscape('\0')); + EXPECT_FALSE(IsValidEscape('\007')); +} + +TEST(IsValidEscapeTest, IsFalseForDigit) { + EXPECT_FALSE(IsValidEscape('0')); + EXPECT_FALSE(IsValidEscape('9')); +} + +TEST(IsValidEscapeTest, IsFalseForWhiteSpace) { + EXPECT_FALSE(IsValidEscape(' ')); + EXPECT_FALSE(IsValidEscape('\n')); +} + +TEST(IsValidEscapeTest, IsFalseForSomeLetter) { + EXPECT_FALSE(IsValidEscape('a')); + EXPECT_FALSE(IsValidEscape('Z')); +} + +TEST(IsValidEscapeTest, IsTrueForPunct) { + EXPECT_TRUE(IsValidEscape('.')); + EXPECT_TRUE(IsValidEscape('-')); + EXPECT_TRUE(IsValidEscape('^')); + EXPECT_TRUE(IsValidEscape('$')); + EXPECT_TRUE(IsValidEscape('(')); + EXPECT_TRUE(IsValidEscape(']')); + EXPECT_TRUE(IsValidEscape('{')); + EXPECT_TRUE(IsValidEscape('|')); +} + +TEST(IsValidEscapeTest, IsTrueForSomeLetter) { + EXPECT_TRUE(IsValidEscape('d')); + EXPECT_TRUE(IsValidEscape('D')); + EXPECT_TRUE(IsValidEscape('s')); + EXPECT_TRUE(IsValidEscape('S')); + EXPECT_TRUE(IsValidEscape('w')); + EXPECT_TRUE(IsValidEscape('W')); +} + +TEST(AtomMatchesCharTest, EscapedPunct) { + EXPECT_FALSE(AtomMatchesChar(true, '\\', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, '\\', ' ')); + EXPECT_FALSE(AtomMatchesChar(true, '_', '.')); + EXPECT_FALSE(AtomMatchesChar(true, '.', 'a')); + + EXPECT_TRUE(AtomMatchesChar(true, '\\', '\\')); + EXPECT_TRUE(AtomMatchesChar(true, '_', '_')); + EXPECT_TRUE(AtomMatchesChar(true, '+', '+')); + EXPECT_TRUE(AtomMatchesChar(true, '.', '.')); +} + +TEST(AtomMatchesCharTest, Escaped_d) { + EXPECT_FALSE(AtomMatchesChar(true, 'd', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 'd', 'a')); + EXPECT_FALSE(AtomMatchesChar(true, 'd', '.')); + + EXPECT_TRUE(AtomMatchesChar(true, 'd', '0')); + EXPECT_TRUE(AtomMatchesChar(true, 'd', '9')); +} + +TEST(AtomMatchesCharTest, Escaped_D) { + EXPECT_FALSE(AtomMatchesChar(true, 'D', '0')); + EXPECT_FALSE(AtomMatchesChar(true, 'D', '9')); + + EXPECT_TRUE(AtomMatchesChar(true, 'D', '\0')); + EXPECT_TRUE(AtomMatchesChar(true, 'D', 'a')); + EXPECT_TRUE(AtomMatchesChar(true, 'D', '-')); +} + +TEST(AtomMatchesCharTest, Escaped_s) { + EXPECT_FALSE(AtomMatchesChar(true, 's', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 's', 'a')); + EXPECT_FALSE(AtomMatchesChar(true, 's', '.')); + EXPECT_FALSE(AtomMatchesChar(true, 's', '9')); + + EXPECT_TRUE(AtomMatchesChar(true, 's', ' ')); + EXPECT_TRUE(AtomMatchesChar(true, 's', '\n')); + EXPECT_TRUE(AtomMatchesChar(true, 's', '\t')); +} + +TEST(AtomMatchesCharTest, Escaped_S) { + EXPECT_FALSE(AtomMatchesChar(true, 'S', ' ')); + EXPECT_FALSE(AtomMatchesChar(true, 'S', '\r')); + + EXPECT_TRUE(AtomMatchesChar(true, 'S', '\0')); + EXPECT_TRUE(AtomMatchesChar(true, 'S', 'a')); + EXPECT_TRUE(AtomMatchesChar(true, 'S', '9')); +} + +TEST(AtomMatchesCharTest, Escaped_w) { + EXPECT_FALSE(AtomMatchesChar(true, 'w', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 'w', '+')); + EXPECT_FALSE(AtomMatchesChar(true, 'w', ' ')); + EXPECT_FALSE(AtomMatchesChar(true, 'w', '\n')); + + EXPECT_TRUE(AtomMatchesChar(true, 'w', '0')); + EXPECT_TRUE(AtomMatchesChar(true, 'w', 'b')); + EXPECT_TRUE(AtomMatchesChar(true, 'w', 'C')); + EXPECT_TRUE(AtomMatchesChar(true, 'w', '_')); +} + +TEST(AtomMatchesCharTest, Escaped_W) { + EXPECT_FALSE(AtomMatchesChar(true, 'W', 'A')); + EXPECT_FALSE(AtomMatchesChar(true, 'W', 'b')); + EXPECT_FALSE(AtomMatchesChar(true, 'W', '9')); + EXPECT_FALSE(AtomMatchesChar(true, 'W', '_')); + + EXPECT_TRUE(AtomMatchesChar(true, 'W', '\0')); + EXPECT_TRUE(AtomMatchesChar(true, 'W', '*')); + EXPECT_TRUE(AtomMatchesChar(true, 'W', '\n')); +} + +TEST(AtomMatchesCharTest, EscapedWhiteSpace) { + EXPECT_FALSE(AtomMatchesChar(true, 'f', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 'f', '\n')); + EXPECT_FALSE(AtomMatchesChar(true, 'n', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 'n', '\r')); + EXPECT_FALSE(AtomMatchesChar(true, 'r', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 'r', 'a')); + EXPECT_FALSE(AtomMatchesChar(true, 't', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 't', 't')); + EXPECT_FALSE(AtomMatchesChar(true, 'v', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 'v', '\f')); + + EXPECT_TRUE(AtomMatchesChar(true, 'f', '\f')); + EXPECT_TRUE(AtomMatchesChar(true, 'n', '\n')); + EXPECT_TRUE(AtomMatchesChar(true, 'r', '\r')); + EXPECT_TRUE(AtomMatchesChar(true, 't', '\t')); + EXPECT_TRUE(AtomMatchesChar(true, 'v', '\v')); +} + +TEST(AtomMatchesCharTest, UnescapedDot) { + EXPECT_FALSE(AtomMatchesChar(false, '.', '\n')); + + EXPECT_TRUE(AtomMatchesChar(false, '.', '\0')); + EXPECT_TRUE(AtomMatchesChar(false, '.', '.')); + EXPECT_TRUE(AtomMatchesChar(false, '.', 'a')); + EXPECT_TRUE(AtomMatchesChar(false, '.', ' ')); +} + +TEST(AtomMatchesCharTest, UnescapedChar) { + EXPECT_FALSE(AtomMatchesChar(false, 'a', '\0')); + EXPECT_FALSE(AtomMatchesChar(false, 'a', 'b')); + EXPECT_FALSE(AtomMatchesChar(false, '$', 'a')); + + EXPECT_TRUE(AtomMatchesChar(false, '$', '$')); + EXPECT_TRUE(AtomMatchesChar(false, '5', '5')); + EXPECT_TRUE(AtomMatchesChar(false, 'Z', 'Z')); +} + +TEST(ValidateRegexTest, GeneratesFailureAndReturnsFalseForInvalid) { + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex(NULL)), + "NULL is not a valid simple regular expression"); + EXPECT_NONFATAL_FAILURE( + ASSERT_FALSE(ValidateRegex("a\\")), + "Syntax error at index 1 in simple regular expression \"a\\\": "); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a\\")), + "'\\' cannot appear at the end"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("\\n\\")), + "'\\' cannot appear at the end"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("\\s\\hb")), + "invalid escape sequence \"\\h\""); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^^")), + "'^' can only appear at the beginning"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex(".*^b")), + "'^' can only appear at the beginning"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("$$")), + "'$' can only appear at the end"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^$a")), + "'$' can only appear at the end"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a(b")), + "'(' is unsupported"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("ab)")), + "')' is unsupported"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("[ab")), + "'[' is unsupported"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a{2")), + "'{' is unsupported"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("?")), + "'?' can only follow a repeatable token"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^*")), + "'*' can only follow a repeatable token"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("5*+")), + "'+' can only follow a repeatable token"); +} + +TEST(ValidateRegexTest, ReturnsTrueForValid) { + EXPECT_TRUE(ValidateRegex("")); + EXPECT_TRUE(ValidateRegex("a")); + EXPECT_TRUE(ValidateRegex(".*")); + EXPECT_TRUE(ValidateRegex("^a_+")); + EXPECT_TRUE(ValidateRegex("^a\\t\\&?")); + EXPECT_TRUE(ValidateRegex("09*$")); + EXPECT_TRUE(ValidateRegex("^Z$")); + EXPECT_TRUE(ValidateRegex("a\\^Z\\$\\(\\)\\|\\[\\]\\{\\}")); +} + +TEST(MatchRepetitionAndRegexAtHeadTest, WorksForZeroOrOne) { + EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "a", "ba")); + // Repeating more than once. + EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "aab")); + + // Repeating zero times. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "ba")); + // Repeating once. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "ab")); + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '#', '?', ".", "##")); +} + +TEST(MatchRepetitionAndRegexAtHeadTest, WorksForZeroOrMany) { + EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '*', "a$", "baab")); + + // Repeating zero times. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '*', "b", "bc")); + // Repeating once. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '*', "b", "abc")); + // Repeating more than once. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(true, 'w', '*', "-", "ab_1-g")); +} + +TEST(MatchRepetitionAndRegexAtHeadTest, WorksForOneOrMany) { + EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '+', "a$", "baab")); + // Repeating zero times. + EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '+', "b", "bc")); + + // Repeating once. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '+', "b", "abc")); + // Repeating more than once. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(true, 'w', '+', "-", "ab_1-g")); +} + +TEST(MatchRegexAtHeadTest, ReturnsTrueForEmptyRegex) { + EXPECT_TRUE(MatchRegexAtHead("", "")); + EXPECT_TRUE(MatchRegexAtHead("", "ab")); +} + +TEST(MatchRegexAtHeadTest, WorksWhenDollarIsInRegex) { + EXPECT_FALSE(MatchRegexAtHead("$", "a")); + + EXPECT_TRUE(MatchRegexAtHead("$", "")); + EXPECT_TRUE(MatchRegexAtHead("a$", "a")); +} + +TEST(MatchRegexAtHeadTest, WorksWhenRegexStartsWithEscapeSequence) { + EXPECT_FALSE(MatchRegexAtHead("\\w", "+")); + EXPECT_FALSE(MatchRegexAtHead("\\W", "ab")); + + EXPECT_TRUE(MatchRegexAtHead("\\sa", "\nab")); + EXPECT_TRUE(MatchRegexAtHead("\\d", "1a")); +} + +TEST(MatchRegexAtHeadTest, WorksWhenRegexStartsWithRepetition) { + EXPECT_FALSE(MatchRegexAtHead(".+a", "abc")); + EXPECT_FALSE(MatchRegexAtHead("a?b", "aab")); + + EXPECT_TRUE(MatchRegexAtHead(".*a", "bc12-ab")); + EXPECT_TRUE(MatchRegexAtHead("a?b", "b")); + EXPECT_TRUE(MatchRegexAtHead("a?b", "ab")); +} + +TEST(MatchRegexAtHeadTest, + WorksWhenRegexStartsWithRepetionOfEscapeSequence) { + EXPECT_FALSE(MatchRegexAtHead("\\.+a", "abc")); + EXPECT_FALSE(MatchRegexAtHead("\\s?b", " b")); + + EXPECT_TRUE(MatchRegexAtHead("\\(*a", "((((ab")); + EXPECT_TRUE(MatchRegexAtHead("\\^?b", "^b")); + EXPECT_TRUE(MatchRegexAtHead("\\\\?b", "b")); + EXPECT_TRUE(MatchRegexAtHead("\\\\?b", "\\b")); +} + +TEST(MatchRegexAtHeadTest, MatchesSequentially) { + EXPECT_FALSE(MatchRegexAtHead("ab.*c", "acabc")); + + EXPECT_TRUE(MatchRegexAtHead("ab.*c", "ab-fsc")); +} + +TEST(MatchRegexAnywhereTest, ReturnsFalseWhenStringIsNull) { + EXPECT_FALSE(MatchRegexAnywhere("", NULL)); +} + +TEST(MatchRegexAnywhereTest, WorksWhenRegexStartsWithCaret) { + EXPECT_FALSE(MatchRegexAnywhere("^a", "ba")); + EXPECT_FALSE(MatchRegexAnywhere("^$", "a")); + + EXPECT_TRUE(MatchRegexAnywhere("^a", "ab")); + EXPECT_TRUE(MatchRegexAnywhere("^", "ab")); + EXPECT_TRUE(MatchRegexAnywhere("^$", "")); +} + +TEST(MatchRegexAnywhereTest, ReturnsFalseWhenNoMatch) { + EXPECT_FALSE(MatchRegexAnywhere("a", "bcde123")); + EXPECT_FALSE(MatchRegexAnywhere("a.+a", "--aa88888888")); +} + +TEST(MatchRegexAnywhereTest, ReturnsTrueWhenMatchingPrefix) { + EXPECT_TRUE(MatchRegexAnywhere("\\w+", "ab1_ - 5")); + EXPECT_TRUE(MatchRegexAnywhere(".*=", "=")); + EXPECT_TRUE(MatchRegexAnywhere("x.*ab?.*bc", "xaaabc")); +} + +TEST(MatchRegexAnywhereTest, ReturnsTrueWhenMatchingNonPrefix) { + EXPECT_TRUE(MatchRegexAnywhere("\\w+", "$$$ ab1_ - 5")); + EXPECT_TRUE(MatchRegexAnywhere("\\.+=", "= ...=")); +} + +// Tests RE's implicit constructors. +TEST(RETest, ImplicitConstructorWorks) { + const RE empty(""); + EXPECT_STREQ("", empty.pattern()); + + const RE simple("hello"); + EXPECT_STREQ("hello", simple.pattern()); +} + +// Tests that RE's constructors reject invalid regular expressions. +TEST(RETest, RejectsInvalidRegex) { + EXPECT_NONFATAL_FAILURE({ + const RE normal(NULL); + }, "NULL is not a valid simple regular expression"); + + EXPECT_NONFATAL_FAILURE({ + const RE normal(".*(\\w+"); + }, "'(' is unsupported"); + + EXPECT_NONFATAL_FAILURE({ + const RE invalid("^?"); + }, "'?' can only follow a repeatable token"); +} + +// Tests RE::FullMatch(). +TEST(RETest, FullMatchWorks) { + const RE empty(""); + EXPECT_TRUE(RE::FullMatch("", empty)); + EXPECT_FALSE(RE::FullMatch("a", empty)); + + const RE re1("a"); + EXPECT_TRUE(RE::FullMatch("a", re1)); + + const RE re("a.*z"); + EXPECT_TRUE(RE::FullMatch("az", re)); + EXPECT_TRUE(RE::FullMatch("axyz", re)); + EXPECT_FALSE(RE::FullMatch("baz", re)); + EXPECT_FALSE(RE::FullMatch("azy", re)); +} + +// Tests RE::PartialMatch(). +TEST(RETest, PartialMatchWorks) { + const RE empty(""); + EXPECT_TRUE(RE::PartialMatch("", empty)); + EXPECT_TRUE(RE::PartialMatch("a", empty)); + + const RE re("a.*z"); + EXPECT_TRUE(RE::PartialMatch("az", re)); + EXPECT_TRUE(RE::PartialMatch("axyz", re)); + EXPECT_TRUE(RE::PartialMatch("baz", re)); + EXPECT_TRUE(RE::PartialMatch("azy", re)); + EXPECT_FALSE(RE::PartialMatch("zza", re)); +} + +#endif // GTEST_USES_POSIX_RE + +#if !GTEST_OS_WINDOWS_MOBILE + +TEST(CaptureTest, CapturesStdout) { + CaptureStdout(); + fprintf(stdout, "abc"); + EXPECT_STREQ("abc", GetCapturedStdout().c_str()); + + CaptureStdout(); + fprintf(stdout, "def%cghi", '\0'); + EXPECT_EQ(::std::string("def\0ghi", 7), ::std::string(GetCapturedStdout())); +} + +TEST(CaptureTest, CapturesStderr) { + CaptureStderr(); + fprintf(stderr, "jkl"); + EXPECT_STREQ("jkl", GetCapturedStderr().c_str()); + + CaptureStderr(); + fprintf(stderr, "jkl%cmno", '\0'); + EXPECT_EQ(::std::string("jkl\0mno", 7), ::std::string(GetCapturedStderr())); +} + +// Tests that stdout and stderr capture don't interfere with each other. +TEST(CaptureTest, CapturesStdoutAndStderr) { + CaptureStdout(); + CaptureStderr(); + fprintf(stdout, "pqr"); + fprintf(stderr, "stu"); + EXPECT_STREQ("pqr", GetCapturedStdout().c_str()); + EXPECT_STREQ("stu", GetCapturedStderr().c_str()); +} + +TEST(CaptureDeathTest, CannotReenterStdoutCapture) { + CaptureStdout(); + EXPECT_DEATH_IF_SUPPORTED(CaptureStdout(), + "Only one stdout capturer can exist at a time"); + GetCapturedStdout(); + + // We cannot test stderr capturing using death tests as they use it + // themselves. +} + +#endif // !GTEST_OS_WINDOWS_MOBILE + +TEST(ThreadLocalTest, DefaultConstructorInitializesToDefaultValues) { + ThreadLocal t1; + EXPECT_EQ(0, t1.get()); + + ThreadLocal t2; + EXPECT_TRUE(t2.get() == NULL); +} + +TEST(ThreadLocalTest, SingleParamConstructorInitializesToParam) { + ThreadLocal t1(123); + EXPECT_EQ(123, t1.get()); + + int i = 0; + ThreadLocal t2(&i); + EXPECT_EQ(&i, t2.get()); +} + +class NoDefaultContructor { + public: + explicit NoDefaultContructor(const char*) {} + NoDefaultContructor(const NoDefaultContructor&) {} +}; + +TEST(ThreadLocalTest, ValueDefaultContructorIsNotRequiredForParamVersion) { + ThreadLocal bar(NoDefaultContructor("foo")); + bar.pointer(); +} + +TEST(ThreadLocalTest, GetAndPointerReturnSameValue) { + ThreadLocal thread_local_string; + + EXPECT_EQ(thread_local_string.pointer(), &(thread_local_string.get())); + + // Verifies the condition still holds after calling set. + thread_local_string.set("foo"); + EXPECT_EQ(thread_local_string.pointer(), &(thread_local_string.get())); +} + +TEST(ThreadLocalTest, PointerAndConstPointerReturnSameValue) { + ThreadLocal thread_local_string; + const ThreadLocal& const_thread_local_string = + thread_local_string; + + EXPECT_EQ(thread_local_string.pointer(), const_thread_local_string.pointer()); + + thread_local_string.set("foo"); + EXPECT_EQ(thread_local_string.pointer(), const_thread_local_string.pointer()); +} + +#if GTEST_IS_THREADSAFE + +void AddTwo(int* param) { *param += 2; } + +TEST(ThreadWithParamTest, ConstructorExecutesThreadFunc) { + int i = 40; + ThreadWithParam thread(&AddTwo, &i, NULL); + thread.Join(); + EXPECT_EQ(42, i); +} + +TEST(MutexDeathTest, AssertHeldShouldAssertWhenNotLocked) { + // AssertHeld() is flaky only in the presence of multiple threads accessing + // the lock. In this case, the test is robust. + EXPECT_DEATH_IF_SUPPORTED({ + Mutex m; + { MutexLock lock(&m); } + m.AssertHeld(); + }, + "thread .*hold"); +} + +TEST(MutexTest, AssertHeldShouldNotAssertWhenLocked) { + Mutex m; + MutexLock lock(&m); + m.AssertHeld(); +} + +class AtomicCounterWithMutex { + public: + explicit AtomicCounterWithMutex(Mutex* mutex) : + value_(0), mutex_(mutex), random_(42) {} + + void Increment() { + MutexLock lock(mutex_); + int temp = value_; + { + // We need to put up a memory barrier to prevent reads and writes to + // value_ rearranged with the call to SleepMilliseconds when observed + // from other threads. +#if GTEST_HAS_PTHREAD + // On POSIX, locking a mutex puts up a memory barrier. We cannot use + // Mutex and MutexLock here or rely on their memory barrier + // functionality as we are testing them here. + pthread_mutex_t memory_barrier_mutex; + GTEST_CHECK_POSIX_SUCCESS_( + pthread_mutex_init(&memory_barrier_mutex, NULL)); + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&memory_barrier_mutex)); + + SleepMilliseconds(random_.Generate(30)); + + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&memory_barrier_mutex)); + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&memory_barrier_mutex)); +#elif GTEST_OS_WINDOWS + // On Windows, performing an interlocked access puts up a memory barrier. + volatile LONG dummy = 0; + ::InterlockedIncrement(&dummy); + SleepMilliseconds(random_.Generate(30)); + ::InterlockedIncrement(&dummy); +#else +# error "Memory barrier not implemented on this platform." +#endif // GTEST_HAS_PTHREAD + } + value_ = temp + 1; + } + int value() const { return value_; } + + private: + volatile int value_; + Mutex* const mutex_; // Protects value_. + Random random_; +}; + +void CountingThreadFunc(pair param) { + for (int i = 0; i < param.second; ++i) + param.first->Increment(); +} + +// Tests that the mutex only lets one thread at a time to lock it. +TEST(MutexTest, OnlyOneThreadCanLockAtATime) { + Mutex mutex; + AtomicCounterWithMutex locked_counter(&mutex); + + typedef ThreadWithParam > ThreadType; + const int kCycleCount = 20; + const int kThreadCount = 7; + scoped_ptr counting_threads[kThreadCount]; + Notification threads_can_start; + // Creates and runs kThreadCount threads that increment locked_counter + // kCycleCount times each. + for (int i = 0; i < kThreadCount; ++i) { + counting_threads[i].reset(new ThreadType(&CountingThreadFunc, + make_pair(&locked_counter, + kCycleCount), + &threads_can_start)); + } + threads_can_start.Notify(); + for (int i = 0; i < kThreadCount; ++i) + counting_threads[i]->Join(); + + // If the mutex lets more than one thread to increment the counter at a + // time, they are likely to encounter a race condition and have some + // increments overwritten, resulting in the lower then expected counter + // value. + EXPECT_EQ(kCycleCount * kThreadCount, locked_counter.value()); +} + +template +void RunFromThread(void (func)(T), T param) { + ThreadWithParam thread(func, param, NULL); + thread.Join(); +} + +void RetrieveThreadLocalValue( + pair*, std::string*> param) { + *param.second = param.first->get(); +} + +TEST(ThreadLocalTest, ParameterizedConstructorSetsDefault) { + ThreadLocal thread_local_string("foo"); + EXPECT_STREQ("foo", thread_local_string.get().c_str()); + + thread_local_string.set("bar"); + EXPECT_STREQ("bar", thread_local_string.get().c_str()); + + std::string result; + RunFromThread(&RetrieveThreadLocalValue, + make_pair(&thread_local_string, &result)); + EXPECT_STREQ("foo", result.c_str()); +} + +// Keeps track of whether of destructors being called on instances of +// DestructorTracker. On Windows, waits for the destructor call reports. +class DestructorCall { + public: + DestructorCall() { + invoked_ = false; +#if GTEST_OS_WINDOWS + wait_event_.Reset(::CreateEvent(NULL, TRUE, FALSE, NULL)); + GTEST_CHECK_(wait_event_.Get() != NULL); +#endif + } + + bool CheckDestroyed() const { +#if GTEST_OS_WINDOWS + if (::WaitForSingleObject(wait_event_.Get(), 1000) != WAIT_OBJECT_0) + return false; +#endif + return invoked_; + } + + void ReportDestroyed() { + invoked_ = true; +#if GTEST_OS_WINDOWS + ::SetEvent(wait_event_.Get()); +#endif + } + + static std::vector& List() { return *list_; } + + static void ResetList() { + for (size_t i = 0; i < list_->size(); ++i) { + delete list_->at(i); + } + list_->clear(); + } + + private: + bool invoked_; +#if GTEST_OS_WINDOWS + AutoHandle wait_event_; +#endif + static std::vector* const list_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(DestructorCall); +}; + +std::vector* const DestructorCall::list_ = + new std::vector; + +// DestructorTracker keeps track of whether its instances have been +// destroyed. +class DestructorTracker { + public: + DestructorTracker() : index_(GetNewIndex()) {} + DestructorTracker(const DestructorTracker& /* rhs */) + : index_(GetNewIndex()) {} + ~DestructorTracker() { + // We never access DestructorCall::List() concurrently, so we don't need + // to protect this acccess with a mutex. + DestructorCall::List()[index_]->ReportDestroyed(); + } + + private: + static size_t GetNewIndex() { + DestructorCall::List().push_back(new DestructorCall); + return DestructorCall::List().size() - 1; + } + const size_t index_; + + GTEST_DISALLOW_ASSIGN_(DestructorTracker); +}; + +typedef ThreadLocal* ThreadParam; + +void CallThreadLocalGet(ThreadParam thread_local_param) { + thread_local_param->get(); +} + +// Tests that when a ThreadLocal object dies in a thread, it destroys +// the managed object for that thread. +TEST(ThreadLocalTest, DestroysManagedObjectForOwnThreadWhenDying) { + DestructorCall::ResetList(); + + { + ThreadLocal thread_local_tracker; + ASSERT_EQ(0U, DestructorCall::List().size()); + + // This creates another DestructorTracker object for the main thread. + thread_local_tracker.get(); + ASSERT_EQ(1U, DestructorCall::List().size()); + ASSERT_FALSE(DestructorCall::List()[0]->CheckDestroyed()); + } + + // Now thread_local_tracker has died. + ASSERT_EQ(1U, DestructorCall::List().size()); + EXPECT_TRUE(DestructorCall::List()[0]->CheckDestroyed()); + + DestructorCall::ResetList(); +} + +// Tests that when a thread exits, the thread-local object for that +// thread is destroyed. +TEST(ThreadLocalTest, DestroysManagedObjectAtThreadExit) { + DestructorCall::ResetList(); + + { + ThreadLocal thread_local_tracker; + ASSERT_EQ(0U, DestructorCall::List().size()); + + // This creates another DestructorTracker object in the new thread. + ThreadWithParam thread( + &CallThreadLocalGet, &thread_local_tracker, NULL); + thread.Join(); + + // The thread has exited, and we should have a DestroyedTracker + // instance created for it. But it may not have been destroyed yet. + ASSERT_EQ(1U, DestructorCall::List().size()); + } + + // The thread has exited and thread_local_tracker has died. + ASSERT_EQ(1U, DestructorCall::List().size()); + EXPECT_TRUE(DestructorCall::List()[0]->CheckDestroyed()); + + DestructorCall::ResetList(); +} + +TEST(ThreadLocalTest, ThreadLocalMutationsAffectOnlyCurrentThread) { + ThreadLocal thread_local_string; + thread_local_string.set("Foo"); + EXPECT_STREQ("Foo", thread_local_string.get().c_str()); + + std::string result; + RunFromThread(&RetrieveThreadLocalValue, + make_pair(&thread_local_string, &result)); + EXPECT_TRUE(result.empty()); +} + +#endif // GTEST_IS_THREADSAFE + +#if GTEST_OS_WINDOWS +TEST(WindowsTypesTest, HANDLEIsVoidStar) { + StaticAssertTypeEq(); +} + +TEST(WindowsTypesTest, CRITICAL_SECTIONIs_RTL_CRITICAL_SECTION) { + StaticAssertTypeEq(); +} +#endif // GTEST_OS_WINDOWS + +} // namespace internal +} // namespace testing +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Test - The Google C++ Testing Framework +// +// This file tests the universal value printer. + +#include "gtest/gtest-printers.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" + +// hash_map and hash_set are available under Visual C++, or on Linux. +#if GTEST_HAS_HASH_MAP_ +# include // NOLINT +#endif // GTEST_HAS_HASH_MAP_ +#if GTEST_HAS_HASH_SET_ +# include // NOLINT +#endif // GTEST_HAS_HASH_SET_ + +#if GTEST_HAS_STD_FORWARD_LIST_ +# include // NOLINT +#endif // GTEST_HAS_STD_FORWARD_LIST_ + +// Some user-defined types for testing the universal value printer. + +// An anonymous enum type. +enum AnonymousEnum { + kAE1 = -1, + kAE2 = 1 +}; + +// An enum without a user-defined printer. +enum EnumWithoutPrinter { + kEWP1 = -2, + kEWP2 = 42 +}; + +// An enum with a << operator. +enum EnumWithStreaming { + kEWS1 = 10 +}; + +std::ostream& operator<<(std::ostream& os, EnumWithStreaming e) { + return os << (e == kEWS1 ? "kEWS1" : "invalid"); +} + +// An enum with a PrintTo() function. +enum EnumWithPrintTo { + kEWPT1 = 1 +}; + +void PrintTo(EnumWithPrintTo e, std::ostream* os) { + *os << (e == kEWPT1 ? "kEWPT1" : "invalid"); +} + +// A class implicitly convertible to BiggestInt. +class BiggestIntConvertible { + public: + operator ::testing::internal::BiggestInt() const { return 42; } +}; + +// A user-defined unprintable class template in the global namespace. +template +class UnprintableTemplateInGlobal { + public: + UnprintableTemplateInGlobal() : value_() {} + private: + T value_; +}; + +// A user-defined streamable type in the global namespace. +class StreamableInGlobal { + public: + virtual ~StreamableInGlobal() {} +}; + +inline void operator<<(::std::ostream& os, const StreamableInGlobal& /* x */) { + os << "StreamableInGlobal"; +} + +void operator<<(::std::ostream& os, const StreamableInGlobal* /* x */) { + os << "StreamableInGlobal*"; +} + +namespace foo { + +// A user-defined unprintable type in a user namespace. +class UnprintableInFoo { + public: + UnprintableInFoo() : z_(0) { memcpy(xy_, "\xEF\x12\x0\x0\x34\xAB\x0\x0", 8); } + double z() const { return z_; } + private: + char xy_[8]; + double z_; +}; + +// A user-defined printable type in a user-chosen namespace. +struct PrintableViaPrintTo { + PrintableViaPrintTo() : value() {} + int value; +}; + +void PrintTo(const PrintableViaPrintTo& x, ::std::ostream* os) { + *os << "PrintableViaPrintTo: " << x.value; +} + +// A type with a user-defined << for printing its pointer. +struct PointerPrintable { +}; + +::std::ostream& operator<<(::std::ostream& os, + const PointerPrintable* /* x */) { + return os << "PointerPrintable*"; +} + +// A user-defined printable class template in a user-chosen namespace. +template +class PrintableViaPrintToTemplate { + public: + explicit PrintableViaPrintToTemplate(const T& a_value) : value_(a_value) {} + + const T& value() const { return value_; } + private: + T value_; +}; + +template +void PrintTo(const PrintableViaPrintToTemplate& x, ::std::ostream* os) { + *os << "PrintableViaPrintToTemplate: " << x.value(); +} + +// A user-defined streamable class template in a user namespace. +template +class StreamableTemplateInFoo { + public: + StreamableTemplateInFoo() : value_() {} + + const T& value() const { return value_; } + private: + T value_; +}; + +template +inline ::std::ostream& operator<<(::std::ostream& os, + const StreamableTemplateInFoo& x) { + return os << "StreamableTemplateInFoo: " << x.value(); +} + +} // namespace foo + +namespace testing { +namespace gtest_printers_test { + +using ::std::deque; +using ::std::list; +using ::std::make_pair; +using ::std::map; +using ::std::multimap; +using ::std::multiset; +using ::std::pair; +using ::std::set; +using ::std::vector; +using ::testing::PrintToString; +using ::testing::internal::FormatForComparisonFailureMessage; +using ::testing::internal::ImplicitCast_; +using ::testing::internal::NativeArray; +using ::testing::internal::RE; +using ::testing::internal::RelationToSourceReference; +using ::testing::internal::Strings; +using ::testing::internal::UniversalPrint; +using ::testing::internal::UniversalPrinter; +using ::testing::internal::UniversalTersePrint; +using ::testing::internal::UniversalTersePrintTupleFieldsToStrings; +using ::testing::internal::string; + +// The hash_* classes are not part of the C++ standard. STLport +// defines them in namespace std. MSVC defines them in ::stdext. GCC +// defines them in ::. +#ifdef _STLP_HASH_MAP // We got from STLport. +using ::std::hash_map; +using ::std::hash_set; +using ::std::hash_multimap; +using ::std::hash_multiset; +#elif _MSC_VER +using ::stdext::hash_map; +using ::stdext::hash_set; +using ::stdext::hash_multimap; +using ::stdext::hash_multiset; +#endif + +// Prints a value to a string using the universal value printer. This +// is a helper for testing UniversalPrinter::Print() for various types. +template +string Print(const T& value) { + ::std::stringstream ss; + UniversalPrinter::Print(value, &ss); + return ss.str(); +} + +// Prints a value passed by reference to a string, using the universal +// value printer. This is a helper for testing +// UniversalPrinter::Print() for various types. +template +string PrintByRef(const T& value) { + ::std::stringstream ss; + UniversalPrinter::Print(value, &ss); + return ss.str(); +} + +// Tests printing various enum types. + +TEST(PrintEnumTest, AnonymousEnum) { + EXPECT_EQ("-1", Print(kAE1)); + EXPECT_EQ("1", Print(kAE2)); +} + +TEST(PrintEnumTest, EnumWithoutPrinter) { + EXPECT_EQ("-2", Print(kEWP1)); + EXPECT_EQ("42", Print(kEWP2)); +} + +TEST(PrintEnumTest, EnumWithStreaming) { + EXPECT_EQ("kEWS1", Print(kEWS1)); + EXPECT_EQ("invalid", Print(static_cast(0))); +} + +TEST(PrintEnumTest, EnumWithPrintTo) { + EXPECT_EQ("kEWPT1", Print(kEWPT1)); + EXPECT_EQ("invalid", Print(static_cast(0))); +} + +// Tests printing a class implicitly convertible to BiggestInt. + +TEST(PrintClassTest, BiggestIntConvertible) { + EXPECT_EQ("42", Print(BiggestIntConvertible())); +} + +// Tests printing various char types. + +// char. +TEST(PrintCharTest, PlainChar) { + EXPECT_EQ("'\\0'", Print('\0')); + EXPECT_EQ("'\\'' (39, 0x27)", Print('\'')); + EXPECT_EQ("'\"' (34, 0x22)", Print('"')); + EXPECT_EQ("'?' (63, 0x3F)", Print('?')); + EXPECT_EQ("'\\\\' (92, 0x5C)", Print('\\')); + EXPECT_EQ("'\\a' (7)", Print('\a')); + EXPECT_EQ("'\\b' (8)", Print('\b')); + EXPECT_EQ("'\\f' (12, 0xC)", Print('\f')); + EXPECT_EQ("'\\n' (10, 0xA)", Print('\n')); + EXPECT_EQ("'\\r' (13, 0xD)", Print('\r')); + EXPECT_EQ("'\\t' (9)", Print('\t')); + EXPECT_EQ("'\\v' (11, 0xB)", Print('\v')); + EXPECT_EQ("'\\x7F' (127)", Print('\x7F')); + EXPECT_EQ("'\\xFF' (255)", Print('\xFF')); + EXPECT_EQ("' ' (32, 0x20)", Print(' ')); + EXPECT_EQ("'a' (97, 0x61)", Print('a')); +} + +// signed char. +TEST(PrintCharTest, SignedChar) { + EXPECT_EQ("'\\0'", Print(static_cast('\0'))); + EXPECT_EQ("'\\xCE' (-50)", + Print(static_cast(-50))); +} + +// unsigned char. +TEST(PrintCharTest, UnsignedChar) { + EXPECT_EQ("'\\0'", Print(static_cast('\0'))); + EXPECT_EQ("'b' (98, 0x62)", + Print(static_cast('b'))); +} + +// Tests printing other simple, built-in types. + +// bool. +TEST(PrintBuiltInTypeTest, Bool) { + EXPECT_EQ("false", Print(false)); + EXPECT_EQ("true", Print(true)); +} + +// wchar_t. +TEST(PrintBuiltInTypeTest, Wchar_t) { + EXPECT_EQ("L'\\0'", Print(L'\0')); + EXPECT_EQ("L'\\'' (39, 0x27)", Print(L'\'')); + EXPECT_EQ("L'\"' (34, 0x22)", Print(L'"')); + EXPECT_EQ("L'?' (63, 0x3F)", Print(L'?')); + EXPECT_EQ("L'\\\\' (92, 0x5C)", Print(L'\\')); + EXPECT_EQ("L'\\a' (7)", Print(L'\a')); + EXPECT_EQ("L'\\b' (8)", Print(L'\b')); + EXPECT_EQ("L'\\f' (12, 0xC)", Print(L'\f')); + EXPECT_EQ("L'\\n' (10, 0xA)", Print(L'\n')); + EXPECT_EQ("L'\\r' (13, 0xD)", Print(L'\r')); + EXPECT_EQ("L'\\t' (9)", Print(L'\t')); + EXPECT_EQ("L'\\v' (11, 0xB)", Print(L'\v')); + EXPECT_EQ("L'\\x7F' (127)", Print(L'\x7F')); + EXPECT_EQ("L'\\xFF' (255)", Print(L'\xFF')); + EXPECT_EQ("L' ' (32, 0x20)", Print(L' ')); + EXPECT_EQ("L'a' (97, 0x61)", Print(L'a')); + EXPECT_EQ("L'\\x576' (1398)", Print(static_cast(0x576))); + EXPECT_EQ("L'\\xC74D' (51021)", Print(static_cast(0xC74D))); +} + +// Test that Int64 provides more storage than wchar_t. +TEST(PrintTypeSizeTest, Wchar_t) { + EXPECT_LT(sizeof(wchar_t), sizeof(testing::internal::Int64)); +} + +// Various integer types. +TEST(PrintBuiltInTypeTest, Integer) { + EXPECT_EQ("'\\xFF' (255)", Print(static_cast(255))); // uint8 + EXPECT_EQ("'\\x80' (-128)", Print(static_cast(-128))); // int8 + EXPECT_EQ("65535", Print(USHRT_MAX)); // uint16 + EXPECT_EQ("-32768", Print(SHRT_MIN)); // int16 + EXPECT_EQ("4294967295", Print(UINT_MAX)); // uint32 + EXPECT_EQ("-2147483648", Print(INT_MIN)); // int32 + EXPECT_EQ("18446744073709551615", + Print(static_cast(-1))); // uint64 + EXPECT_EQ("-9223372036854775808", + Print(static_cast(1) << 63)); // int64 +} + +// Size types. +TEST(PrintBuiltInTypeTest, Size_t) { + EXPECT_EQ("1", Print(sizeof('a'))); // size_t. +#if !GTEST_OS_WINDOWS + // Windows has no ssize_t type. + EXPECT_EQ("-2", Print(static_cast(-2))); // ssize_t. +#endif // !GTEST_OS_WINDOWS +} + +// Floating-points. +TEST(PrintBuiltInTypeTest, FloatingPoints) { + EXPECT_EQ("1.5", Print(1.5f)); // float + EXPECT_EQ("-2.5", Print(-2.5)); // double +} + +// Since ::std::stringstream::operator<<(const void *) formats the pointer +// output differently with different compilers, we have to create the expected +// output first and use it as our expectation. +static string PrintPointer(const void *p) { + ::std::stringstream expected_result_stream; + expected_result_stream << p; + return expected_result_stream.str(); +} + +// Tests printing C strings. + +// const char*. +TEST(PrintCStringTest, Const) { + const char* p = "World"; + EXPECT_EQ(PrintPointer(p) + " pointing to \"World\"", Print(p)); +} + +// char*. +TEST(PrintCStringTest, NonConst) { + char p[] = "Hi"; + EXPECT_EQ(PrintPointer(p) + " pointing to \"Hi\"", + Print(static_cast(p))); +} + +// NULL C string. +TEST(PrintCStringTest, Null) { + const char* p = NULL; + EXPECT_EQ("NULL", Print(p)); +} + +// Tests that C strings are escaped properly. +TEST(PrintCStringTest, EscapesProperly) { + const char* p = "'\"?\\\a\b\f\n\r\t\v\x7F\xFF a"; + EXPECT_EQ(PrintPointer(p) + " pointing to \"'\\\"?\\\\\\a\\b\\f" + "\\n\\r\\t\\v\\x7F\\xFF a\"", + Print(p)); +} + +// MSVC compiler can be configured to define whar_t as a typedef +// of unsigned short. Defining an overload for const wchar_t* in that case +// would cause pointers to unsigned shorts be printed as wide strings, +// possibly accessing more memory than intended and causing invalid +// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when +// wchar_t is implemented as a native type. +#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) + +// const wchar_t*. +TEST(PrintWideCStringTest, Const) { + const wchar_t* p = L"World"; + EXPECT_EQ(PrintPointer(p) + " pointing to L\"World\"", Print(p)); +} + +// wchar_t*. +TEST(PrintWideCStringTest, NonConst) { + wchar_t p[] = L"Hi"; + EXPECT_EQ(PrintPointer(p) + " pointing to L\"Hi\"", + Print(static_cast(p))); +} + +// NULL wide C string. +TEST(PrintWideCStringTest, Null) { + const wchar_t* p = NULL; + EXPECT_EQ("NULL", Print(p)); +} + +// Tests that wide C strings are escaped properly. +TEST(PrintWideCStringTest, EscapesProperly) { + const wchar_t s[] = {'\'', '"', '?', '\\', '\a', '\b', '\f', '\n', '\r', + '\t', '\v', 0xD3, 0x576, 0x8D3, 0xC74D, ' ', 'a', '\0'}; + EXPECT_EQ(PrintPointer(s) + " pointing to L\"'\\\"?\\\\\\a\\b\\f" + "\\n\\r\\t\\v\\xD3\\x576\\x8D3\\xC74D a\"", + Print(static_cast(s))); +} +#endif // native wchar_t + +// Tests printing pointers to other char types. + +// signed char*. +TEST(PrintCharPointerTest, SignedChar) { + signed char* p = reinterpret_cast(0x1234); + EXPECT_EQ(PrintPointer(p), Print(p)); + p = NULL; + EXPECT_EQ("NULL", Print(p)); +} + +// const signed char*. +TEST(PrintCharPointerTest, ConstSignedChar) { + signed char* p = reinterpret_cast(0x1234); + EXPECT_EQ(PrintPointer(p), Print(p)); + p = NULL; + EXPECT_EQ("NULL", Print(p)); +} + +// unsigned char*. +TEST(PrintCharPointerTest, UnsignedChar) { + unsigned char* p = reinterpret_cast(0x1234); + EXPECT_EQ(PrintPointer(p), Print(p)); + p = NULL; + EXPECT_EQ("NULL", Print(p)); +} + +// const unsigned char*. +TEST(PrintCharPointerTest, ConstUnsignedChar) { + const unsigned char* p = reinterpret_cast(0x1234); + EXPECT_EQ(PrintPointer(p), Print(p)); + p = NULL; + EXPECT_EQ("NULL", Print(p)); +} + +// Tests printing pointers to simple, built-in types. + +// bool*. +TEST(PrintPointerToBuiltInTypeTest, Bool) { + bool* p = reinterpret_cast(0xABCD); + EXPECT_EQ(PrintPointer(p), Print(p)); + p = NULL; + EXPECT_EQ("NULL", Print(p)); +} + +// void*. +TEST(PrintPointerToBuiltInTypeTest, Void) { + void* p = reinterpret_cast(0xABCD); + EXPECT_EQ(PrintPointer(p), Print(p)); + p = NULL; + EXPECT_EQ("NULL", Print(p)); +} + +// const void*. +TEST(PrintPointerToBuiltInTypeTest, ConstVoid) { + const void* p = reinterpret_cast(0xABCD); + EXPECT_EQ(PrintPointer(p), Print(p)); + p = NULL; + EXPECT_EQ("NULL", Print(p)); +} + +// Tests printing pointers to pointers. +TEST(PrintPointerToPointerTest, IntPointerPointer) { + int** p = reinterpret_cast(0xABCD); + EXPECT_EQ(PrintPointer(p), Print(p)); + p = NULL; + EXPECT_EQ("NULL", Print(p)); +} + +// Tests printing (non-member) function pointers. + +void MyFunction(int /* n */) {} + +TEST(PrintPointerTest, NonMemberFunctionPointer) { + // We cannot directly cast &MyFunction to const void* because the + // standard disallows casting between pointers to functions and + // pointers to objects, and some compilers (e.g. GCC 3.4) enforce + // this limitation. + EXPECT_EQ( + PrintPointer(reinterpret_cast( + reinterpret_cast(&MyFunction))), + Print(&MyFunction)); + int (*p)(bool) = NULL; // NOLINT + EXPECT_EQ("NULL", Print(p)); +} + +// An assertion predicate determining whether a one string is a prefix for +// another. +template +AssertionResult HasPrefix(const StringType& str, const StringType& prefix) { + if (str.find(prefix, 0) == 0) + return AssertionSuccess(); + + const bool is_wide_string = sizeof(prefix[0]) > 1; + const char* const begin_string_quote = is_wide_string ? "L\"" : "\""; + return AssertionFailure() + << begin_string_quote << prefix << "\" is not a prefix of " + << begin_string_quote << str << "\"\n"; +} + +// Tests printing member variable pointers. Although they are called +// pointers, they don't point to a location in the address space. +// Their representation is implementation-defined. Thus they will be +// printed as raw bytes. + +struct Foo { + public: + virtual ~Foo() {} + int MyMethod(char x) { return x + 1; } + virtual char MyVirtualMethod(int /* n */) { return 'a'; } + + int value; +}; + +TEST(PrintPointerTest, MemberVariablePointer) { + EXPECT_TRUE(HasPrefix(Print(&Foo::value), + Print(sizeof(&Foo::value)) + "-byte object ")); + int (Foo::*p) = NULL; // NOLINT + EXPECT_TRUE(HasPrefix(Print(p), + Print(sizeof(p)) + "-byte object ")); +} + +// Tests printing member function pointers. Although they are called +// pointers, they don't point to a location in the address space. +// Their representation is implementation-defined. Thus they will be +// printed as raw bytes. +TEST(PrintPointerTest, MemberFunctionPointer) { + EXPECT_TRUE(HasPrefix(Print(&Foo::MyMethod), + Print(sizeof(&Foo::MyMethod)) + "-byte object ")); + EXPECT_TRUE( + HasPrefix(Print(&Foo::MyVirtualMethod), + Print(sizeof((&Foo::MyVirtualMethod))) + "-byte object ")); + int (Foo::*p)(char) = NULL; // NOLINT + EXPECT_TRUE(HasPrefix(Print(p), + Print(sizeof(p)) + "-byte object ")); +} + +// Tests printing C arrays. + +// The difference between this and Print() is that it ensures that the +// argument is a reference to an array. +template +string PrintArrayHelper(T (&a)[N]) { + return Print(a); +} + +// One-dimensional array. +TEST(PrintArrayTest, OneDimensionalArray) { + int a[5] = { 1, 2, 3, 4, 5 }; + EXPECT_EQ("{ 1, 2, 3, 4, 5 }", PrintArrayHelper(a)); +} + +// Two-dimensional array. +TEST(PrintArrayTest, TwoDimensionalArray) { + int a[2][5] = { + { 1, 2, 3, 4, 5 }, + { 6, 7, 8, 9, 0 } + }; + EXPECT_EQ("{ { 1, 2, 3, 4, 5 }, { 6, 7, 8, 9, 0 } }", PrintArrayHelper(a)); +} + +// Array of const elements. +TEST(PrintArrayTest, ConstArray) { + const bool a[1] = { false }; + EXPECT_EQ("{ false }", PrintArrayHelper(a)); +} + +// char array without terminating NUL. +TEST(PrintArrayTest, CharArrayWithNoTerminatingNul) { + // Array a contains '\0' in the middle and doesn't end with '\0'. + char a[] = { 'H', '\0', 'i' }; + EXPECT_EQ("\"H\\0i\" (no terminating NUL)", PrintArrayHelper(a)); +} + +// const char array with terminating NUL. +TEST(PrintArrayTest, ConstCharArrayWithTerminatingNul) { + const char a[] = "\0Hi"; + EXPECT_EQ("\"\\0Hi\"", PrintArrayHelper(a)); +} + +// const wchar_t array without terminating NUL. +TEST(PrintArrayTest, WCharArrayWithNoTerminatingNul) { + // Array a contains '\0' in the middle and doesn't end with '\0'. + const wchar_t a[] = { L'H', L'\0', L'i' }; + EXPECT_EQ("L\"H\\0i\" (no terminating NUL)", PrintArrayHelper(a)); +} + +// wchar_t array with terminating NUL. +TEST(PrintArrayTest, WConstCharArrayWithTerminatingNul) { + const wchar_t a[] = L"\0Hi"; + EXPECT_EQ("L\"\\0Hi\"", PrintArrayHelper(a)); +} + +// Array of objects. +TEST(PrintArrayTest, ObjectArray) { + string a[3] = { "Hi", "Hello", "Ni hao" }; + EXPECT_EQ("{ \"Hi\", \"Hello\", \"Ni hao\" }", PrintArrayHelper(a)); +} + +// Array with many elements. +TEST(PrintArrayTest, BigArray) { + int a[100] = { 1, 2, 3 }; + EXPECT_EQ("{ 1, 2, 3, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0 }", + PrintArrayHelper(a)); +} + +// Tests printing ::string and ::std::string. + +#if GTEST_HAS_GLOBAL_STRING +// ::string. +TEST(PrintStringTest, StringInGlobalNamespace) { + const char s[] = "'\"?\\\a\b\f\n\0\r\t\v\x7F\xFF a"; + const ::string str(s, sizeof(s)); + EXPECT_EQ("\"'\\\"?\\\\\\a\\b\\f\\n\\0\\r\\t\\v\\x7F\\xFF a\\0\"", + Print(str)); +} +#endif // GTEST_HAS_GLOBAL_STRING + +// ::std::string. +TEST(PrintStringTest, StringInStdNamespace) { + const char s[] = "'\"?\\\a\b\f\n\0\r\t\v\x7F\xFF a"; + const ::std::string str(s, sizeof(s)); + EXPECT_EQ("\"'\\\"?\\\\\\a\\b\\f\\n\\0\\r\\t\\v\\x7F\\xFF a\\0\"", + Print(str)); +} + +TEST(PrintStringTest, StringAmbiguousHex) { + // "\x6BANANA" is ambiguous, it can be interpreted as starting with either of: + // '\x6', '\x6B', or '\x6BA'. + + // a hex escaping sequence following by a decimal digit + EXPECT_EQ("\"0\\x12\" \"3\"", Print(::std::string("0\x12" "3"))); + // a hex escaping sequence following by a hex digit (lower-case) + EXPECT_EQ("\"mm\\x6\" \"bananas\"", Print(::std::string("mm\x6" "bananas"))); + // a hex escaping sequence following by a hex digit (upper-case) + EXPECT_EQ("\"NOM\\x6\" \"BANANA\"", Print(::std::string("NOM\x6" "BANANA"))); + // a hex escaping sequence following by a non-xdigit + EXPECT_EQ("\"!\\x5-!\"", Print(::std::string("!\x5-!"))); +} + +// Tests printing ::wstring and ::std::wstring. + +#if GTEST_HAS_GLOBAL_WSTRING +// ::wstring. +TEST(PrintWideStringTest, StringInGlobalNamespace) { + const wchar_t s[] = L"'\"?\\\a\b\f\n\0\r\t\v\xD3\x576\x8D3\xC74D a"; + const ::wstring str(s, sizeof(s)/sizeof(wchar_t)); + EXPECT_EQ("L\"'\\\"?\\\\\\a\\b\\f\\n\\0\\r\\t\\v" + "\\xD3\\x576\\x8D3\\xC74D a\\0\"", + Print(str)); +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +#if GTEST_HAS_STD_WSTRING +// ::std::wstring. +TEST(PrintWideStringTest, StringInStdNamespace) { + const wchar_t s[] = L"'\"?\\\a\b\f\n\0\r\t\v\xD3\x576\x8D3\xC74D a"; + const ::std::wstring str(s, sizeof(s)/sizeof(wchar_t)); + EXPECT_EQ("L\"'\\\"?\\\\\\a\\b\\f\\n\\0\\r\\t\\v" + "\\xD3\\x576\\x8D3\\xC74D a\\0\"", + Print(str)); +} + +TEST(PrintWideStringTest, StringAmbiguousHex) { + // same for wide strings. + EXPECT_EQ("L\"0\\x12\" L\"3\"", Print(::std::wstring(L"0\x12" L"3"))); + EXPECT_EQ("L\"mm\\x6\" L\"bananas\"", + Print(::std::wstring(L"mm\x6" L"bananas"))); + EXPECT_EQ("L\"NOM\\x6\" L\"BANANA\"", + Print(::std::wstring(L"NOM\x6" L"BANANA"))); + EXPECT_EQ("L\"!\\x5-!\"", Print(::std::wstring(L"!\x5-!"))); +} +#endif // GTEST_HAS_STD_WSTRING + +// Tests printing types that support generic streaming (i.e. streaming +// to std::basic_ostream for any valid Char and +// CharTraits types). + +// Tests printing a non-template type that supports generic streaming. + +class AllowsGenericStreaming {}; + +template +std::basic_ostream& operator<<( + std::basic_ostream& os, + const AllowsGenericStreaming& /* a */) { + return os << "AllowsGenericStreaming"; +} + +TEST(PrintTypeWithGenericStreamingTest, NonTemplateType) { + AllowsGenericStreaming a; + EXPECT_EQ("AllowsGenericStreaming", Print(a)); +} + +// Tests printing a template type that supports generic streaming. + +template +class AllowsGenericStreamingTemplate {}; + +template +std::basic_ostream& operator<<( + std::basic_ostream& os, + const AllowsGenericStreamingTemplate& /* a */) { + return os << "AllowsGenericStreamingTemplate"; +} + +TEST(PrintTypeWithGenericStreamingTest, TemplateType) { + AllowsGenericStreamingTemplate a; + EXPECT_EQ("AllowsGenericStreamingTemplate", Print(a)); +} + +// Tests printing a type that supports generic streaming and can be +// implicitly converted to another printable type. + +template +class AllowsGenericStreamingAndImplicitConversionTemplate { + public: + operator bool() const { return false; } +}; + +template +std::basic_ostream& operator<<( + std::basic_ostream& os, + const AllowsGenericStreamingAndImplicitConversionTemplate& /* a */) { + return os << "AllowsGenericStreamingAndImplicitConversionTemplate"; +} + +TEST(PrintTypeWithGenericStreamingTest, TypeImplicitlyConvertible) { + AllowsGenericStreamingAndImplicitConversionTemplate a; + EXPECT_EQ("AllowsGenericStreamingAndImplicitConversionTemplate", Print(a)); +} + +#if GTEST_HAS_STRING_PIECE_ + +// Tests printing StringPiece. + +TEST(PrintStringPieceTest, SimpleStringPiece) { + const StringPiece sp = "Hello"; + EXPECT_EQ("\"Hello\"", Print(sp)); +} + +TEST(PrintStringPieceTest, UnprintableCharacters) { + const char str[] = "NUL (\0) and \r\t"; + const StringPiece sp(str, sizeof(str) - 1); + EXPECT_EQ("\"NUL (\\0) and \\r\\t\"", Print(sp)); +} + +#endif // GTEST_HAS_STRING_PIECE_ + +// Tests printing STL containers. + +TEST(PrintStlContainerTest, EmptyDeque) { + deque empty; + EXPECT_EQ("{}", Print(empty)); +} + +TEST(PrintStlContainerTest, NonEmptyDeque) { + deque non_empty; + non_empty.push_back(1); + non_empty.push_back(3); + EXPECT_EQ("{ 1, 3 }", Print(non_empty)); +} + +#if GTEST_HAS_HASH_MAP_ + +TEST(PrintStlContainerTest, OneElementHashMap) { + hash_map map1; + map1[1] = 'a'; + EXPECT_EQ("{ (1, 'a' (97, 0x61)) }", Print(map1)); +} + +TEST(PrintStlContainerTest, HashMultiMap) { + hash_multimap map1; + map1.insert(make_pair(5, true)); + map1.insert(make_pair(5, false)); + + // Elements of hash_multimap can be printed in any order. + const string result = Print(map1); + EXPECT_TRUE(result == "{ (5, true), (5, false) }" || + result == "{ (5, false), (5, true) }") + << " where Print(map1) returns \"" << result << "\"."; +} + +#endif // GTEST_HAS_HASH_MAP_ + +#if GTEST_HAS_HASH_SET_ + +TEST(PrintStlContainerTest, HashSet) { + hash_set + +// Clones a 0-terminated C string, allocating memory using new. +const char* MyString::CloneCString(const char* a_c_string) { + if (a_c_string == NULL) return NULL; + + const size_t len = strlen(a_c_string); + char* const clone = new char[ len + 1 ]; + memcpy(clone, a_c_string, len + 1); + + return clone; +} + +// Sets the 0-terminated C string this MyString object +// represents. +void MyString::Set(const char* a_c_string) { + // Makes sure this works when c_string == c_string_ + const char* const temp = MyString::CloneCString(a_c_string); + delete[] c_string_; + c_string_ = temp; +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + +#include + +#include "sample4.h" + +// Returns the current counter value, and increments it. +int Counter::Increment() { + return counter_++; +} + +// Prints the current counter value to STDOUT. +void Counter::Print() const { + printf("%d", counter_); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "sample1.h" + +// Returns n! (the factorial of n). For negative n, n! is defined to be 1. +int Factorial(int n) { + int result = 1; + for (int i = 1; i <= n; i++) { + result *= i; + } + + return result; +} + +// Returns true iff n is a prime number. +bool IsPrime(int n) { + // Trivial case 1: small numbers + if (n <= 1) return false; + + // Trivial case 2: even numbers + if (n % 2 == 0) return n == 2; + + // Now, we have that n is odd and n >= 3. + + // Try to divide n by every odd number i, starting from 3 + for (int i = 3; ; i += 2) { + // We only have to try i up to the squre root of n + if (i > n/i) break; + + // Now, we have i <= n/i < n. + // If n is divisible by i, n is not prime. + if (n % i == 0) return false; + } + + // n has no integer factor in the range (1, n), and thus is prime. + return true; +} +// Copyright 2009 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// This sample shows how to use Google Test listener API to implement +// a primitive leak checker. + +#include +#include + +#include "gtest/gtest.h" + +using ::testing::EmptyTestEventListener; +using ::testing::InitGoogleTest; +using ::testing::Test; +using ::testing::TestCase; +using ::testing::TestEventListeners; +using ::testing::TestInfo; +using ::testing::TestPartResult; +using ::testing::UnitTest; + +namespace { + +// We will track memory used by this class. +class Water { + public: + // Normal Water declarations go here. + + // operator new and operator delete help us control water allocation. + void* operator new(size_t allocation_size) { + allocated_++; + return malloc(allocation_size); + } + + void operator delete(void* block, size_t /* allocation_size */) { + allocated_--; + free(block); + } + + static int allocated() { return allocated_; } + + private: + static int allocated_; +}; + +int Water::allocated_ = 0; + +// This event listener monitors how many Water objects are created and +// destroyed by each test, and reports a failure if a test leaks some Water +// objects. It does this by comparing the number of live Water objects at +// the beginning of a test and at the end of a test. +class LeakChecker : public EmptyTestEventListener { + private: + // Called before a test starts. + virtual void OnTestStart(const TestInfo& /* test_info */) { + initially_allocated_ = Water::allocated(); + } + + // Called after a test ends. + virtual void OnTestEnd(const TestInfo& /* test_info */) { + int difference = Water::allocated() - initially_allocated_; + + // You can generate a failure in any event handler except + // OnTestPartResult. Just use an appropriate Google Test assertion to do + // it. + EXPECT_LE(difference, 0) << "Leaked " << difference << " unit(s) of Water!"; + } + + int initially_allocated_; +}; + +TEST(ListenersTest, DoesNotLeak) { + Water* water = new Water; + delete water; +} + +// This should fail when the --check_for_leaks command line flag is +// specified. +TEST(ListenersTest, LeaksWater) { + Water* water = new Water; + EXPECT_TRUE(water != NULL); +} + +} // namespace + +int main(int argc, char **argv) { + InitGoogleTest(&argc, argv); + + bool check_for_leaks = false; + if (argc > 1 && strcmp(argv[1], "--check_for_leaks") == 0 ) + check_for_leaks = true; + else + printf("%s\n", "Run this program with --check_for_leaks to enable " + "custom leak checking in the tests."); + + // If we are given the --check_for_leaks command line flag, installs the + // leak checker. + if (check_for_leaks) { + TestEventListeners& listeners = UnitTest::GetInstance()->listeners(); + + // Adds the leak checker to the end of the test event listener list, + // after the default text output printer and the default XML report + // generator. + // + // The order is important - it ensures that failures generated in the + // leak checker's OnTestEnd() method are processed by the text and XML + // printers *before* their OnTestEnd() methods are called, such that + // they are attributed to the right test. Remember that a listener + // receives an OnXyzStart event *after* listeners preceding it in the + // list received that event, and receives an OnXyzEnd event *before* + // listeners preceding it. + // + // We don't need to worry about deleting the new listener later, as + // Google Test will do it. + listeners.Append(new LeakChecker); + } + return RUN_ALL_TESTS(); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + + +// This sample shows how to write a simple unit test for a function, +// using Google C++ testing framework. +// +// Writing a unit test using Google C++ testing framework is easy as 1-2-3: + + +// Step 1. Include necessary header files such that the stuff your +// test logic needs is declared. +// +// Don't forget gtest.h, which declares the testing framework. + +#include +#include "sample1.h" +#include "gtest/gtest.h" + + +// Step 2. Use the TEST macro to define your tests. +// +// TEST has two parameters: the test case name and the test name. +// After using the macro, you should define your test logic between a +// pair of braces. You can use a bunch of macros to indicate the +// success or failure of a test. EXPECT_TRUE and EXPECT_EQ are +// examples of such macros. For a complete list, see gtest.h. +// +// +// +// In Google Test, tests are grouped into test cases. This is how we +// keep test code organized. You should put logically related tests +// into the same test case. +// +// The test case name and the test name should both be valid C++ +// identifiers. And you should not use underscore (_) in the names. +// +// Google Test guarantees that each test you define is run exactly +// once, but it makes no guarantee on the order the tests are +// executed. Therefore, you should write your tests in such a way +// that their results don't depend on their order. +// +// + + +// Tests Factorial(). + +// Tests factorial of negative numbers. +TEST(FactorialTest, Negative) { + // This test is named "Negative", and belongs to the "FactorialTest" + // test case. + EXPECT_EQ(1, Factorial(-5)); + EXPECT_EQ(1, Factorial(-1)); + EXPECT_GT(Factorial(-10), 0); + + // + // + // EXPECT_EQ(expected, actual) is the same as + // + // EXPECT_TRUE((expected) == (actual)) + // + // except that it will print both the expected value and the actual + // value when the assertion fails. This is very helpful for + // debugging. Therefore in this case EXPECT_EQ is preferred. + // + // On the other hand, EXPECT_TRUE accepts any Boolean expression, + // and is thus more general. + // + // +} + +// Tests factorial of 0. +TEST(FactorialTest, Zero) { + EXPECT_EQ(1, Factorial(0)); +} + +// Tests factorial of positive numbers. +TEST(FactorialTest, Positive) { + EXPECT_EQ(1, Factorial(1)); + EXPECT_EQ(2, Factorial(2)); + EXPECT_EQ(6, Factorial(3)); + EXPECT_EQ(40320, Factorial(8)); +} + + +// Tests IsPrime() + +// Tests negative input. +TEST(IsPrimeTest, Negative) { + // This test belongs to the IsPrimeTest test case. + + EXPECT_FALSE(IsPrime(-1)); + EXPECT_FALSE(IsPrime(-2)); + EXPECT_FALSE(IsPrime(INT_MIN)); +} + +// Tests some trivial cases. +TEST(IsPrimeTest, Trivial) { + EXPECT_FALSE(IsPrime(0)); + EXPECT_FALSE(IsPrime(1)); + EXPECT_TRUE(IsPrime(2)); + EXPECT_TRUE(IsPrime(3)); +} + +// Tests positive input. +TEST(IsPrimeTest, Positive) { + EXPECT_FALSE(IsPrime(4)); + EXPECT_TRUE(IsPrime(5)); + EXPECT_FALSE(IsPrime(6)); + EXPECT_TRUE(IsPrime(23)); +} + +// Step 3. Call RUN_ALL_TESTS() in main(). +// +// We do this by linking in src/gtest_main.cc file, which consists of +// a main() function which calls RUN_ALL_TESTS() for us. +// +// This runs all the tests you've defined, prints the result, and +// returns 0 if successful, or 1 otherwise. +// +// Did you notice that we didn't register the tests? The +// RUN_ALL_TESTS() macro magically knows about all the tests we +// defined. Isn't this convenient? +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, \ No newline at end of file diff --git a/qa/workunits/rgw/jcksum/file-256k b/qa/workunits/rgw/jcksum/file-256k new file mode 100644 index 000000000000..7b52e1829a17 --- /dev/null +++ b/qa/workunits/rgw/jcksum/file-256k @@ -0,0 +1,7216 @@ +// Copyright 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Josh Kelley (joshkel@gmail.com) +// +// Google C++ Testing Framework (Google Test) +// +// C++Builder's IDE cannot build a static library from files with hyphens +// in their name. See http://qc.codegear.com/wc/qcmain.aspx?d=70977 . +// This file serves as a workaround. + +#include "src/gtest-all.cc" +// Copyright 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Josh Kelley (joshkel@gmail.com) +// +// Google C++ Testing Framework (Google Test) +// +// Links gtest.lib and gtest_main.lib into the current project in C++Builder. +// This means that these libraries can't be renamed, but it's the only way to +// ensure that Debug versus Release test builds are linked against the +// appropriate Debug or Release build of the libraries. + +#pragma link "gtest.lib" +#pragma link "gtest_main.lib" +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "sample2.h" + +#include + +// Clones a 0-terminated C string, allocating memory using new. +const char* MyString::CloneCString(const char* a_c_string) { + if (a_c_string == NULL) return NULL; + + const size_t len = strlen(a_c_string); + char* const clone = new char[ len + 1 ]; + memcpy(clone, a_c_string, len + 1); + + return clone; +} + +// Sets the 0-terminated C string this MyString object +// represents. +void MyString::Set(const char* a_c_string) { + // Makes sure this works when c_string == c_string_ + const char* const temp = MyString::CloneCString(a_c_string); + delete[] c_string_; + c_string_ = temp; +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + +#include + +#include "sample4.h" + +// Returns the current counter value, and increments it. +int Counter::Increment() { + return counter_++; +} + +// Prints the current counter value to STDOUT. +void Counter::Print() const { + printf("%d", counter_); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "sample1.h" + +// Returns n! (the factorial of n). For negative n, n! is defined to be 1. +int Factorial(int n) { + int result = 1; + for (int i = 1; i <= n; i++) { + result *= i; + } + + return result; +} + +// Returns true iff n is a prime number. +bool IsPrime(int n) { + // Trivial case 1: small numbers + if (n <= 1) return false; + + // Trivial case 2: even numbers + if (n % 2 == 0) return n == 2; + + // Now, we have that n is odd and n >= 3. + + // Try to divide n by every odd number i, starting from 3 + for (int i = 3; ; i += 2) { + // We only have to try i up to the squre root of n + if (i > n/i) break; + + // Now, we have i <= n/i < n. + // If n is divisible by i, n is not prime. + if (n % i == 0) return false; + } + + // n has no integer factor in the range (1, n), and thus is prime. + return true; +} +// Copyright 2009 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// This sample shows how to use Google Test listener API to implement +// a primitive leak checker. + +#include +#include + +#include "gtest/gtest.h" + +using ::testing::EmptyTestEventListener; +using ::testing::InitGoogleTest; +using ::testing::Test; +using ::testing::TestCase; +using ::testing::TestEventListeners; +using ::testing::TestInfo; +using ::testing::TestPartResult; +using ::testing::UnitTest; + +namespace { + +// We will track memory used by this class. +class Water { + public: + // Normal Water declarations go here. + + // operator new and operator delete help us control water allocation. + void* operator new(size_t allocation_size) { + allocated_++; + return malloc(allocation_size); + } + + void operator delete(void* block, size_t /* allocation_size */) { + allocated_--; + free(block); + } + + static int allocated() { return allocated_; } + + private: + static int allocated_; +}; + +int Water::allocated_ = 0; + +// This event listener monitors how many Water objects are created and +// destroyed by each test, and reports a failure if a test leaks some Water +// objects. It does this by comparing the number of live Water objects at +// the beginning of a test and at the end of a test. +class LeakChecker : public EmptyTestEventListener { + private: + // Called before a test starts. + virtual void OnTestStart(const TestInfo& /* test_info */) { + initially_allocated_ = Water::allocated(); + } + + // Called after a test ends. + virtual void OnTestEnd(const TestInfo& /* test_info */) { + int difference = Water::allocated() - initially_allocated_; + + // You can generate a failure in any event handler except + // OnTestPartResult. Just use an appropriate Google Test assertion to do + // it. + EXPECT_LE(difference, 0) << "Leaked " << difference << " unit(s) of Water!"; + } + + int initially_allocated_; +}; + +TEST(ListenersTest, DoesNotLeak) { + Water* water = new Water; + delete water; +} + +// This should fail when the --check_for_leaks command line flag is +// specified. +TEST(ListenersTest, LeaksWater) { + Water* water = new Water; + EXPECT_TRUE(water != NULL); +} + +} // namespace + +int main(int argc, char **argv) { + InitGoogleTest(&argc, argv); + + bool check_for_leaks = false; + if (argc > 1 && strcmp(argv[1], "--check_for_leaks") == 0 ) + check_for_leaks = true; + else + printf("%s\n", "Run this program with --check_for_leaks to enable " + "custom leak checking in the tests."); + + // If we are given the --check_for_leaks command line flag, installs the + // leak checker. + if (check_for_leaks) { + TestEventListeners& listeners = UnitTest::GetInstance()->listeners(); + + // Adds the leak checker to the end of the test event listener list, + // after the default text output printer and the default XML report + // generator. + // + // The order is important - it ensures that failures generated in the + // leak checker's OnTestEnd() method are processed by the text and XML + // printers *before* their OnTestEnd() methods are called, such that + // they are attributed to the right test. Remember that a listener + // receives an OnXyzStart event *after* listeners preceding it in the + // list received that event, and receives an OnXyzEnd event *before* + // listeners preceding it. + // + // We don't need to worry about deleting the new listener later, as + // Google Test will do it. + listeners.Append(new LeakChecker); + } + return RUN_ALL_TESTS(); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + + +// This sample shows how to write a simple unit test for a function, +// using Google C++ testing framework. +// +// Writing a unit test using Google C++ testing framework is easy as 1-2-3: + + +// Step 1. Include necessary header files such that the stuff your +// test logic needs is declared. +// +// Don't forget gtest.h, which declares the testing framework. + +#include +#include "sample1.h" +#include "gtest/gtest.h" + + +// Step 2. Use the TEST macro to define your tests. +// +// TEST has two parameters: the test case name and the test name. +// After using the macro, you should define your test logic between a +// pair of braces. You can use a bunch of macros to indicate the +// success or failure of a test. EXPECT_TRUE and EXPECT_EQ are +// examples of such macros. For a complete list, see gtest.h. +// +// +// +// In Google Test, tests are grouped into test cases. This is how we +// keep test code organized. You should put logically related tests +// into the same test case. +// +// The test case name and the test name should both be valid C++ +// identifiers. And you should not use underscore (_) in the names. +// +// Google Test guarantees that each test you define is run exactly +// once, but it makes no guarantee on the order the tests are +// executed. Therefore, you should write your tests in such a way +// that their results don't depend on their order. +// +// + + +// Tests Factorial(). + +// Tests factorial of negative numbers. +TEST(FactorialTest, Negative) { + // This test is named "Negative", and belongs to the "FactorialTest" + // test case. + EXPECT_EQ(1, Factorial(-5)); + EXPECT_EQ(1, Factorial(-1)); + EXPECT_GT(Factorial(-10), 0); + + // + // + // EXPECT_EQ(expected, actual) is the same as + // + // EXPECT_TRUE((expected) == (actual)) + // + // except that it will print both the expected value and the actual + // value when the assertion fails. This is very helpful for + // debugging. Therefore in this case EXPECT_EQ is preferred. + // + // On the other hand, EXPECT_TRUE accepts any Boolean expression, + // and is thus more general. + // + // +} + +// Tests factorial of 0. +TEST(FactorialTest, Zero) { + EXPECT_EQ(1, Factorial(0)); +} + +// Tests factorial of positive numbers. +TEST(FactorialTest, Positive) { + EXPECT_EQ(1, Factorial(1)); + EXPECT_EQ(2, Factorial(2)); + EXPECT_EQ(6, Factorial(3)); + EXPECT_EQ(40320, Factorial(8)); +} + + +// Tests IsPrime() + +// Tests negative input. +TEST(IsPrimeTest, Negative) { + // This test belongs to the IsPrimeTest test case. + + EXPECT_FALSE(IsPrime(-1)); + EXPECT_FALSE(IsPrime(-2)); + EXPECT_FALSE(IsPrime(INT_MIN)); +} + +// Tests some trivial cases. +TEST(IsPrimeTest, Trivial) { + EXPECT_FALSE(IsPrime(0)); + EXPECT_FALSE(IsPrime(1)); + EXPECT_TRUE(IsPrime(2)); + EXPECT_TRUE(IsPrime(3)); +} + +// Tests positive input. +TEST(IsPrimeTest, Positive) { + EXPECT_FALSE(IsPrime(4)); + EXPECT_TRUE(IsPrime(5)); + EXPECT_FALSE(IsPrime(6)); + EXPECT_TRUE(IsPrime(23)); +} + +// Step 3. Call RUN_ALL_TESTS() in main(). +// +// We do this by linking in src/gtest_main.cc file, which consists of +// a main() function which calls RUN_ALL_TESTS() for us. +// +// This runs all the tests you've defined, prints the result, and +// returns 0 if successful, or 1 otherwise. +// +// Did you notice that we didn't register the tests? The +// RUN_ALL_TESTS() macro magically knows about all the tests we +// defined. Isn't this convenient? +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + + +// This sample shows how to write a more complex unit test for a class +// that has multiple member functions. +// +// Usually, it's a good idea to have one test for each method in your +// class. You don't have to do that exactly, but it helps to keep +// your tests organized. You may also throw in additional tests as +// needed. + +#include "sample2.h" +#include "gtest/gtest.h" + +// In this example, we test the MyString class (a simple string). + +// Tests the default c'tor. +TEST(MyString, DefaultConstructor) { + const MyString s; + + // Asserts that s.c_string() returns NULL. + // + // + // + // If we write NULL instead of + // + // static_cast(NULL) + // + // in this assertion, it will generate a warning on gcc 3.4. The + // reason is that EXPECT_EQ needs to know the types of its + // arguments in order to print them when it fails. Since NULL is + // #defined as 0, the compiler will use the formatter function for + // int to print it. However, gcc thinks that NULL should be used as + // a pointer, not an int, and therefore complains. + // + // The root of the problem is C++'s lack of distinction between the + // integer number 0 and the null pointer constant. Unfortunately, + // we have to live with this fact. + // + // + EXPECT_STREQ(NULL, s.c_string()); + + EXPECT_EQ(0u, s.Length()); +} + +const char kHelloString[] = "Hello, world!"; + +// Tests the c'tor that accepts a C string. +TEST(MyString, ConstructorFromCString) { + const MyString s(kHelloString); + EXPECT_EQ(0, strcmp(s.c_string(), kHelloString)); + EXPECT_EQ(sizeof(kHelloString)/sizeof(kHelloString[0]) - 1, + s.Length()); +} + +// Tests the copy c'tor. +TEST(MyString, CopyConstructor) { + const MyString s1(kHelloString); + const MyString s2 = s1; + EXPECT_EQ(0, strcmp(s2.c_string(), kHelloString)); +} + +// Tests the Set method. +TEST(MyString, Set) { + MyString s; + + s.Set(kHelloString); + EXPECT_EQ(0, strcmp(s.c_string(), kHelloString)); + + // Set should work when the input pointer is the same as the one + // already in the MyString object. + s.Set(s.c_string()); + EXPECT_EQ(0, strcmp(s.c_string(), kHelloString)); + + // Can we set the MyString to NULL? + s.Set(NULL); + EXPECT_STREQ(NULL, s.c_string()); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A sample program demonstrating using Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) + + +// In this example, we use a more advanced feature of Google Test called +// test fixture. +// +// A test fixture is a place to hold objects and functions shared by +// all tests in a test case. Using a test fixture avoids duplicating +// the test code necessary to initialize and cleanup those common +// objects for each test. It is also useful for defining sub-routines +// that your tests need to invoke a lot. +// +// +// +// The tests share the test fixture in the sense of code sharing, not +// data sharing. Each test is given its own fresh copy of the +// fixture. You cannot expect the data modified by one test to be +// passed on to another test, which is a bad idea. +// +// The reason for this design is that tests should be independent and +// repeatable. In particular, a test should not fail as the result of +// another test's failure. If one test depends on info produced by +// another test, then the two tests should really be one big test. +// +// The macros for indicating the success/failure of a test +// (EXPECT_TRUE, FAIL, etc) need to know what the current test is +// (when Google Test prints the test result, it tells you which test +// each failure belongs to). Technically, these macros invoke a +// member function of the Test class. Therefore, you cannot use them +// in a global function. That's why you should put test sub-routines +// in a test fixture. +// +// + +#include "sample3-inl.h" +#include "gtest/gtest.h" + +// To use a test fixture, derive a class from testing::Test. +class QueueTest : public testing::Test { + protected: // You should make the members protected s.t. they can be + // accessed from sub-classes. + + // virtual void SetUp() will be called before each test is run. You + // should define it if you need to initialize the varaibles. + // Otherwise, this can be skipped. + virtual void SetUp() { + q1_.Enqueue(1); + q2_.Enqueue(2); + q2_.Enqueue(3); + } + + // virtual void TearDown() will be called after each test is run. + // You should define it if there is cleanup work to do. Otherwise, + // you don't have to provide it. + // + // virtual void TearDown() { + // } + + // A helper function that some test uses. + static int Double(int n) { + return 2*n; + } + + // A helper function for testing Queue::Map(). + void MapTester(const Queue * q) { + // Creates a new queue, where each element is twice as big as the + // corresponding one in q. + const Queue * const new_q = q->Map(Double); + + // Verifies that the new queue has the same size as q. + ASSERT_EQ(q->Size(), new_q->Size()); + + // Verifies the relationship between the elements of the two queues. + for ( const QueueNode * n1 = q->Head(), * n2 = new_q->Head(); + n1 != NULL; n1 = n1->next(), n2 = n2->next() ) { + EXPECT_EQ(2 * n1->element(), n2->element()); + } + + delete new_q; + } + + // Declares the variables your tests want to use. + Queue q0_; + Queue q1_; + Queue q2_; +}; + +// When you have a test fixture, you define a test using TEST_F +// instead of TEST. + +// Tests the default c'tor. +TEST_F(QueueTest, DefaultConstructor) { + // You can access data in the test fixture here. + EXPECT_EQ(0u, q0_.Size()); +} + +// Tests Dequeue(). +TEST_F(QueueTest, Dequeue) { + int * n = q0_.Dequeue(); + EXPECT_TRUE(n == NULL); + + n = q1_.Dequeue(); + ASSERT_TRUE(n != NULL); + EXPECT_EQ(1, *n); + EXPECT_EQ(0u, q1_.Size()); + delete n; + + n = q2_.Dequeue(); + ASSERT_TRUE(n != NULL); + EXPECT_EQ(2, *n); + EXPECT_EQ(1u, q2_.Size()); + delete n; +} + +// Tests the Queue::Map() function. +TEST_F(QueueTest, Map) { + MapTester(&q0_); + MapTester(&q1_); + MapTester(&q2_); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "gtest/gtest.h" +#include "sample4.h" + +// Tests the Increment() method. +TEST(Counter, Increment) { + Counter c; + + // EXPECT_EQ() evaluates its arguments exactly once, so they + // can have side effects. + + EXPECT_EQ(0, c.Increment()); + EXPECT_EQ(1, c.Increment()); + EXPECT_EQ(2, c.Increment()); +} +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// This sample teaches how to reuse a test fixture in multiple test +// cases by deriving sub-fixtures from it. +// +// When you define a test fixture, you specify the name of the test +// case that will use this fixture. Therefore, a test fixture can +// be used by only one test case. +// +// Sometimes, more than one test cases may want to use the same or +// slightly different test fixtures. For example, you may want to +// make sure that all tests for a GUI library don't leak important +// system resources like fonts and brushes. In Google Test, you do +// this by putting the shared logic in a super (as in "super class") +// test fixture, and then have each test case use a fixture derived +// from this super fixture. + +#include +#include +#include "sample3-inl.h" +#include "gtest/gtest.h" +#include "sample1.h" + +// In this sample, we want to ensure that every test finishes within +// ~5 seconds. If a test takes longer to run, we consider it a +// failure. +// +// We put the code for timing a test in a test fixture called +// "QuickTest". QuickTest is intended to be the super fixture that +// other fixtures derive from, therefore there is no test case with +// the name "QuickTest". This is OK. +// +// Later, we will derive multiple test fixtures from QuickTest. +class QuickTest : public testing::Test { + protected: + // Remember that SetUp() is run immediately before a test starts. + // This is a good place to record the start time. + virtual void SetUp() { + start_time_ = time(NULL); + } + + // TearDown() is invoked immediately after a test finishes. Here we + // check if the test was too slow. + virtual void TearDown() { + // Gets the time when the test finishes + const time_t end_time = time(NULL); + + // Asserts that the test took no more than ~5 seconds. Did you + // know that you can use assertions in SetUp() and TearDown() as + // well? + EXPECT_TRUE(end_time - start_time_ <= 5) << "The test took too long."; + } + + // The UTC time (in seconds) when the test starts + time_t start_time_; +}; + + +// We derive a fixture named IntegerFunctionTest from the QuickTest +// fixture. All tests using this fixture will be automatically +// required to be quick. +class IntegerFunctionTest : public QuickTest { + // We don't need any more logic than already in the QuickTest fixture. + // Therefore the body is empty. +}; + + +// Now we can write tests in the IntegerFunctionTest test case. + +// Tests Factorial() +TEST_F(IntegerFunctionTest, Factorial) { + // Tests factorial of negative numbers. + EXPECT_EQ(1, Factorial(-5)); + EXPECT_EQ(1, Factorial(-1)); + EXPECT_GT(Factorial(-10), 0); + + // Tests factorial of 0. + EXPECT_EQ(1, Factorial(0)); + + // Tests factorial of positive numbers. + EXPECT_EQ(1, Factorial(1)); + EXPECT_EQ(2, Factorial(2)); + EXPECT_EQ(6, Factorial(3)); + EXPECT_EQ(40320, Factorial(8)); +} + + +// Tests IsPrime() +TEST_F(IntegerFunctionTest, IsPrime) { + // Tests negative input. + EXPECT_FALSE(IsPrime(-1)); + EXPECT_FALSE(IsPrime(-2)); + EXPECT_FALSE(IsPrime(INT_MIN)); + + // Tests some trivial cases. + EXPECT_FALSE(IsPrime(0)); + EXPECT_FALSE(IsPrime(1)); + EXPECT_TRUE(IsPrime(2)); + EXPECT_TRUE(IsPrime(3)); + + // Tests positive input. + EXPECT_FALSE(IsPrime(4)); + EXPECT_TRUE(IsPrime(5)); + EXPECT_FALSE(IsPrime(6)); + EXPECT_TRUE(IsPrime(23)); +} + + +// The next test case (named "QueueTest") also needs to be quick, so +// we derive another fixture from QuickTest. +// +// The QueueTest test fixture has some logic and shared objects in +// addition to what's in QuickTest already. We define the additional +// stuff inside the body of the test fixture, as usual. +class QueueTest : public QuickTest { + protected: + virtual void SetUp() { + // First, we need to set up the super fixture (QuickTest). + QuickTest::SetUp(); + + // Second, some additional setup for this fixture. + q1_.Enqueue(1); + q2_.Enqueue(2); + q2_.Enqueue(3); + } + + // By default, TearDown() inherits the behavior of + // QuickTest::TearDown(). As we have no additional cleaning work + // for QueueTest, we omit it here. + // + // virtual void TearDown() { + // QuickTest::TearDown(); + // } + + Queue q0_; + Queue q1_; + Queue q2_; +}; + + +// Now, let's write tests using the QueueTest fixture. + +// Tests the default constructor. +TEST_F(QueueTest, DefaultConstructor) { + EXPECT_EQ(0u, q0_.Size()); +} + +// Tests Dequeue(). +TEST_F(QueueTest, Dequeue) { + int* n = q0_.Dequeue(); + EXPECT_TRUE(n == NULL); + + n = q1_.Dequeue(); + EXPECT_TRUE(n != NULL); + EXPECT_EQ(1, *n); + EXPECT_EQ(0u, q1_.Size()); + delete n; + + n = q2_.Dequeue(); + EXPECT_TRUE(n != NULL); + EXPECT_EQ(2, *n); + EXPECT_EQ(1u, q2_.Size()); + delete n; +} + +// If necessary, you can derive further test fixtures from a derived +// fixture itself. For example, you can derive another fixture from +// QueueTest. Google Test imposes no limit on how deep the hierarchy +// can be. In practice, however, you probably don't want it to be too +// deep as to be confusing. +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// This sample shows how to test common properties of multiple +// implementations of the same interface (aka interface tests). + +// The interface and its implementations are in this header. +#include "prime_tables.h" + +#include "gtest/gtest.h" + +// First, we define some factory functions for creating instances of +// the implementations. You may be able to skip this step if all your +// implementations can be constructed the same way. + +template +PrimeTable* CreatePrimeTable(); + +template <> +PrimeTable* CreatePrimeTable() { + return new OnTheFlyPrimeTable; +} + +template <> +PrimeTable* CreatePrimeTable() { + return new PreCalculatedPrimeTable(10000); +} + +// Then we define a test fixture class template. +template +class PrimeTableTest : public testing::Test { + protected: + // The ctor calls the factory function to create a prime table + // implemented by T. + PrimeTableTest() : table_(CreatePrimeTable()) {} + + virtual ~PrimeTableTest() { delete table_; } + + // Note that we test an implementation via the base interface + // instead of the actual implementation class. This is important + // for keeping the tests close to the real world scenario, where the + // implementation is invoked via the base interface. It avoids + // got-yas where the implementation class has a method that shadows + // a method with the same name (but slightly different argument + // types) in the base interface, for example. + PrimeTable* const table_; +}; + +#if GTEST_HAS_TYPED_TEST + +using testing::Types; + +// Google Test offers two ways for reusing tests for different types. +// The first is called "typed tests". You should use it if you +// already know *all* the types you are gonna exercise when you write +// the tests. + +// To write a typed test case, first use +// +// TYPED_TEST_CASE(TestCaseName, TypeList); +// +// to declare it and specify the type parameters. As with TEST_F, +// TestCaseName must match the test fixture name. + +// The list of types we want to test. +typedef Types Implementations; + +TYPED_TEST_CASE(PrimeTableTest, Implementations); + +// Then use TYPED_TEST(TestCaseName, TestName) to define a typed test, +// similar to TEST_F. +TYPED_TEST(PrimeTableTest, ReturnsFalseForNonPrimes) { + // Inside the test body, you can refer to the type parameter by + // TypeParam, and refer to the fixture class by TestFixture. We + // don't need them in this example. + + // Since we are in the template world, C++ requires explicitly + // writing 'this->' when referring to members of the fixture class. + // This is something you have to learn to live with. + EXPECT_FALSE(this->table_->IsPrime(-5)); + EXPECT_FALSE(this->table_->IsPrime(0)); + EXPECT_FALSE(this->table_->IsPrime(1)); + EXPECT_FALSE(this->table_->IsPrime(4)); + EXPECT_FALSE(this->table_->IsPrime(6)); + EXPECT_FALSE(this->table_->IsPrime(100)); +} + +TYPED_TEST(PrimeTableTest, ReturnsTrueForPrimes) { + EXPECT_TRUE(this->table_->IsPrime(2)); + EXPECT_TRUE(this->table_->IsPrime(3)); + EXPECT_TRUE(this->table_->IsPrime(5)); + EXPECT_TRUE(this->table_->IsPrime(7)); + EXPECT_TRUE(this->table_->IsPrime(11)); + EXPECT_TRUE(this->table_->IsPrime(131)); +} + +TYPED_TEST(PrimeTableTest, CanGetNextPrime) { + EXPECT_EQ(2, this->table_->GetNextPrime(0)); + EXPECT_EQ(3, this->table_->GetNextPrime(2)); + EXPECT_EQ(5, this->table_->GetNextPrime(3)); + EXPECT_EQ(7, this->table_->GetNextPrime(5)); + EXPECT_EQ(11, this->table_->GetNextPrime(7)); + EXPECT_EQ(131, this->table_->GetNextPrime(128)); +} + +// That's it! Google Test will repeat each TYPED_TEST for each type +// in the type list specified in TYPED_TEST_CASE. Sit back and be +// happy that you don't have to define them multiple times. + +#endif // GTEST_HAS_TYPED_TEST + +#if GTEST_HAS_TYPED_TEST_P + +using testing::Types; + +// Sometimes, however, you don't yet know all the types that you want +// to test when you write the tests. For example, if you are the +// author of an interface and expect other people to implement it, you +// might want to write a set of tests to make sure each implementation +// conforms to some basic requirements, but you don't know what +// implementations will be written in the future. +// +// How can you write the tests without committing to the type +// parameters? That's what "type-parameterized tests" can do for you. +// It is a bit more involved than typed tests, but in return you get a +// test pattern that can be reused in many contexts, which is a big +// win. Here's how you do it: + +// First, define a test fixture class template. Here we just reuse +// the PrimeTableTest fixture defined earlier: + +template +class PrimeTableTest2 : public PrimeTableTest { +}; + +// Then, declare the test case. The argument is the name of the test +// fixture, and also the name of the test case (as usual). The _P +// suffix is for "parameterized" or "pattern". +TYPED_TEST_CASE_P(PrimeTableTest2); + +// Next, use TYPED_TEST_P(TestCaseName, TestName) to define a test, +// similar to what you do with TEST_F. +TYPED_TEST_P(PrimeTableTest2, ReturnsFalseForNonPrimes) { + EXPECT_FALSE(this->table_->IsPrime(-5)); + EXPECT_FALSE(this->table_->IsPrime(0)); + EXPECT_FALSE(this->table_->IsPrime(1)); + EXPECT_FALSE(this->table_->IsPrime(4)); + EXPECT_FALSE(this->table_->IsPrime(6)); + EXPECT_FALSE(this->table_->IsPrime(100)); +} + +TYPED_TEST_P(PrimeTableTest2, ReturnsTrueForPrimes) { + EXPECT_TRUE(this->table_->IsPrime(2)); + EXPECT_TRUE(this->table_->IsPrime(3)); + EXPECT_TRUE(this->table_->IsPrime(5)); + EXPECT_TRUE(this->table_->IsPrime(7)); + EXPECT_TRUE(this->table_->IsPrime(11)); + EXPECT_TRUE(this->table_->IsPrime(131)); +} + +TYPED_TEST_P(PrimeTableTest2, CanGetNextPrime) { + EXPECT_EQ(2, this->table_->GetNextPrime(0)); + EXPECT_EQ(3, this->table_->GetNextPrime(2)); + EXPECT_EQ(5, this->table_->GetNextPrime(3)); + EXPECT_EQ(7, this->table_->GetNextPrime(5)); + EXPECT_EQ(11, this->table_->GetNextPrime(7)); + EXPECT_EQ(131, this->table_->GetNextPrime(128)); +} + +// Type-parameterized tests involve one extra step: you have to +// enumerate the tests you defined: +REGISTER_TYPED_TEST_CASE_P( + PrimeTableTest2, // The first argument is the test case name. + // The rest of the arguments are the test names. + ReturnsFalseForNonPrimes, ReturnsTrueForPrimes, CanGetNextPrime); + +// At this point the test pattern is done. However, you don't have +// any real test yet as you haven't said which types you want to run +// the tests with. + +// To turn the abstract test pattern into real tests, you instantiate +// it with a list of types. Usually the test pattern will be defined +// in a .h file, and anyone can #include and instantiate it. You can +// even instantiate it more than once in the same program. To tell +// different instances apart, you give each of them a name, which will +// become part of the test case name and can be used in test filters. + +// The list of types we want to test. Note that it doesn't have to be +// defined at the time we write the TYPED_TEST_P()s. +typedef Types + PrimeTableImplementations; +INSTANTIATE_TYPED_TEST_CASE_P(OnTheFlyAndPreCalculated, // Instance name + PrimeTableTest2, // Test case name + PrimeTableImplementations); // Type list + +#endif // GTEST_HAS_TYPED_TEST_P +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// This sample shows how to test common properties of multiple +// implementations of an interface (aka interface tests) using +// value-parameterized tests. Each test in the test case has +// a parameter that is an interface pointer to an implementation +// tested. + +// The interface and its implementations are in this header. +#include "prime_tables.h" + +#include "gtest/gtest.h" + +#if GTEST_HAS_PARAM_TEST + +using ::testing::TestWithParam; +using ::testing::Values; + +// As a general rule, to prevent a test from affecting the tests that come +// after it, you should create and destroy the tested objects for each test +// instead of reusing them. In this sample we will define a simple factory +// function for PrimeTable objects. We will instantiate objects in test's +// SetUp() method and delete them in TearDown() method. +typedef PrimeTable* CreatePrimeTableFunc(); + +PrimeTable* CreateOnTheFlyPrimeTable() { + return new OnTheFlyPrimeTable(); +} + +template +PrimeTable* CreatePreCalculatedPrimeTable() { + return new PreCalculatedPrimeTable(max_precalculated); +} + +// Inside the test body, fixture constructor, SetUp(), and TearDown() you +// can refer to the test parameter by GetParam(). In this case, the test +// parameter is a factory function which we call in fixture's SetUp() to +// create and store an instance of PrimeTable. +class PrimeTableTest : public TestWithParam { + public: + virtual ~PrimeTableTest() { delete table_; } + virtual void SetUp() { table_ = (*GetParam())(); } + virtual void TearDown() { + delete table_; + table_ = NULL; + } + + protected: + PrimeTable* table_; +}; + +TEST_P(PrimeTableTest, ReturnsFalseForNonPrimes) { + EXPECT_FALSE(table_->IsPrime(-5)); + EXPECT_FALSE(table_->IsPrime(0)); + EXPECT_FALSE(table_->IsPrime(1)); + EXPECT_FALSE(table_->IsPrime(4)); + EXPECT_FALSE(table_->IsPrime(6)); + EXPECT_FALSE(table_->IsPrime(100)); +} + +TEST_P(PrimeTableTest, ReturnsTrueForPrimes) { + EXPECT_TRUE(table_->IsPrime(2)); + EXPECT_TRUE(table_->IsPrime(3)); + EXPECT_TRUE(table_->IsPrime(5)); + EXPECT_TRUE(table_->IsPrime(7)); + EXPECT_TRUE(table_->IsPrime(11)); + EXPECT_TRUE(table_->IsPrime(131)); +} + +TEST_P(PrimeTableTest, CanGetNextPrime) { + EXPECT_EQ(2, table_->GetNextPrime(0)); + EXPECT_EQ(3, table_->GetNextPrime(2)); + EXPECT_EQ(5, table_->GetNextPrime(3)); + EXPECT_EQ(7, table_->GetNextPrime(5)); + EXPECT_EQ(11, table_->GetNextPrime(7)); + EXPECT_EQ(131, table_->GetNextPrime(128)); +} + +// In order to run value-parameterized tests, you need to instantiate them, +// or bind them to a list of values which will be used as test parameters. +// You can instantiate them in a different translation module, or even +// instantiate them several times. +// +// Here, we instantiate our tests with a list of two PrimeTable object +// factory functions: +INSTANTIATE_TEST_CASE_P( + OnTheFlyAndPreCalculated, + PrimeTableTest, + Values(&CreateOnTheFlyPrimeTable, &CreatePreCalculatedPrimeTable<1000>)); + +#else + +// Google Test may not support value-parameterized tests with some +// compilers. If we use conditional compilation to compile out all +// code referring to the gtest_main library, MSVC linker will not link +// that library at all and consequently complain about missing entry +// point defined in that library (fatal error LNK1561: entry point +// must be defined). This dummy test keeps gtest_main linked in. +TEST(DummyTest, ValueParameterizedTestsAreNotSupportedOnThisPlatform) {} + +#endif // GTEST_HAS_PARAM_TEST +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// This sample shows how to test code relying on some global flag variables. +// Combine() helps with generating all possible combinations of such flags, +// and each test is given one combination as a parameter. + +// Use class definitions to test from this header. +#include "prime_tables.h" + +#include "gtest/gtest.h" + +#if GTEST_HAS_COMBINE + +// Suppose we want to introduce a new, improved implementation of PrimeTable +// which combines speed of PrecalcPrimeTable and versatility of +// OnTheFlyPrimeTable (see prime_tables.h). Inside it instantiates both +// PrecalcPrimeTable and OnTheFlyPrimeTable and uses the one that is more +// appropriate under the circumstances. But in low memory conditions, it can be +// told to instantiate without PrecalcPrimeTable instance at all and use only +// OnTheFlyPrimeTable. +class HybridPrimeTable : public PrimeTable { + public: + HybridPrimeTable(bool force_on_the_fly, int max_precalculated) + : on_the_fly_impl_(new OnTheFlyPrimeTable), + precalc_impl_(force_on_the_fly ? NULL : + new PreCalculatedPrimeTable(max_precalculated)), + max_precalculated_(max_precalculated) {} + virtual ~HybridPrimeTable() { + delete on_the_fly_impl_; + delete precalc_impl_; + } + + virtual bool IsPrime(int n) const { + if (precalc_impl_ != NULL && n < max_precalculated_) + return precalc_impl_->IsPrime(n); + else + return on_the_fly_impl_->IsPrime(n); + } + + virtual int GetNextPrime(int p) const { + int next_prime = -1; + if (precalc_impl_ != NULL && p < max_precalculated_) + next_prime = precalc_impl_->GetNextPrime(p); + + return next_prime != -1 ? next_prime : on_the_fly_impl_->GetNextPrime(p); + } + + private: + OnTheFlyPrimeTable* on_the_fly_impl_; + PreCalculatedPrimeTable* precalc_impl_; + int max_precalculated_; +}; + +using ::testing::TestWithParam; +using ::testing::Bool; +using ::testing::Values; +using ::testing::Combine; + +// To test all code paths for HybridPrimeTable we must test it with numbers +// both within and outside PreCalculatedPrimeTable's capacity and also with +// PreCalculatedPrimeTable disabled. We do this by defining fixture which will +// accept different combinations of parameters for instantiating a +// HybridPrimeTable instance. +class PrimeTableTest : public TestWithParam< ::testing::tuple > { + protected: + virtual void SetUp() { + // This can be written as + // + // bool force_on_the_fly; + // int max_precalculated; + // tie(force_on_the_fly, max_precalculated) = GetParam(); + // + // once the Google C++ Style Guide allows use of ::std::tr1::tie. + // + bool force_on_the_fly = ::testing::get<0>(GetParam()); + int max_precalculated = ::testing::get<1>(GetParam()); + table_ = new HybridPrimeTable(force_on_the_fly, max_precalculated); + } + virtual void TearDown() { + delete table_; + table_ = NULL; + } + HybridPrimeTable* table_; +}; + +TEST_P(PrimeTableTest, ReturnsFalseForNonPrimes) { + // Inside the test body, you can refer to the test parameter by GetParam(). + // In this case, the test parameter is a PrimeTable interface pointer which + // we can use directly. + // Please note that you can also save it in the fixture's SetUp() method + // or constructor and use saved copy in the tests. + + EXPECT_FALSE(table_->IsPrime(-5)); + EXPECT_FALSE(table_->IsPrime(0)); + EXPECT_FALSE(table_->IsPrime(1)); + EXPECT_FALSE(table_->IsPrime(4)); + EXPECT_FALSE(table_->IsPrime(6)); + EXPECT_FALSE(table_->IsPrime(100)); +} + +TEST_P(PrimeTableTest, ReturnsTrueForPrimes) { + EXPECT_TRUE(table_->IsPrime(2)); + EXPECT_TRUE(table_->IsPrime(3)); + EXPECT_TRUE(table_->IsPrime(5)); + EXPECT_TRUE(table_->IsPrime(7)); + EXPECT_TRUE(table_->IsPrime(11)); + EXPECT_TRUE(table_->IsPrime(131)); +} + +TEST_P(PrimeTableTest, CanGetNextPrime) { + EXPECT_EQ(2, table_->GetNextPrime(0)); + EXPECT_EQ(3, table_->GetNextPrime(2)); + EXPECT_EQ(5, table_->GetNextPrime(3)); + EXPECT_EQ(7, table_->GetNextPrime(5)); + EXPECT_EQ(11, table_->GetNextPrime(7)); + EXPECT_EQ(131, table_->GetNextPrime(128)); +} + +// In order to run value-parameterized tests, you need to instantiate them, +// or bind them to a list of values which will be used as test parameters. +// You can instantiate them in a different translation module, or even +// instantiate them several times. +// +// Here, we instantiate our tests with a list of parameters. We must combine +// all variations of the boolean flag suppressing PrecalcPrimeTable and some +// meaningful values for tests. We choose a small value (1), and a value that +// will put some of the tested numbers beyond the capability of the +// PrecalcPrimeTable instance and some inside it (10). Combine will produce all +// possible combinations. +INSTANTIATE_TEST_CASE_P(MeaningfulTestParameters, + PrimeTableTest, + Combine(Bool(), Values(1, 10))); + +#else + +// Google Test may not support Combine() with some compilers. If we +// use conditional compilation to compile out all code referring to +// the gtest_main library, MSVC linker will not link that library at +// all and consequently complain about missing entry point defined in +// that library (fatal error LNK1561: entry point must be +// defined). This dummy test keeps gtest_main linked in. +TEST(DummyTest, CombineIsNotSupportedOnThisPlatform) {} + +#endif // GTEST_HAS_COMBINE +// Copyright 2009 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// This sample shows how to use Google Test listener API to implement +// an alternative console output and how to use the UnitTest reflection API +// to enumerate test cases and tests and to inspect their results. + +#include + +#include "gtest/gtest.h" + +using ::testing::EmptyTestEventListener; +using ::testing::InitGoogleTest; +using ::testing::Test; +using ::testing::TestCase; +using ::testing::TestEventListeners; +using ::testing::TestInfo; +using ::testing::TestPartResult; +using ::testing::UnitTest; + +namespace { + +// Provides alternative output mode which produces minimal amount of +// information about tests. +class TersePrinter : public EmptyTestEventListener { + private: + // Called before any test activity starts. + virtual void OnTestProgramStart(const UnitTest& /* unit_test */) {} + + // Called after all test activities have ended. + virtual void OnTestProgramEnd(const UnitTest& unit_test) { + fprintf(stdout, "TEST %s\n", unit_test.Passed() ? "PASSED" : "FAILED"); + fflush(stdout); + } + + // Called before a test starts. + virtual void OnTestStart(const TestInfo& test_info) { + fprintf(stdout, + "*** Test %s.%s starting.\n", + test_info.test_case_name(), + test_info.name()); + fflush(stdout); + } + + // Called after a failed assertion or a SUCCEED() invocation. + virtual void OnTestPartResult(const TestPartResult& test_part_result) { + fprintf(stdout, + "%s in %s:%d\n%s\n", + test_part_result.failed() ? "*** Failure" : "Success", + test_part_result.file_name(), + test_part_result.line_number(), + test_part_result.summary()); + fflush(stdout); + } + + // Called after a test ends. + virtual void OnTestEnd(const TestInfo& test_info) { + fprintf(stdout, + "*** Test %s.%s ending.\n", + test_info.test_case_name(), + test_info.name()); + fflush(stdout); + } +}; // class TersePrinter + +TEST(CustomOutputTest, PrintsMessage) { + printf("Printing something from the test body...\n"); +} + +TEST(CustomOutputTest, Succeeds) { + SUCCEED() << "SUCCEED() has been invoked from here"; +} + +TEST(CustomOutputTest, Fails) { + EXPECT_EQ(1, 2) + << "This test fails in order to demonstrate alternative failure messages"; +} + +} // namespace + +int main(int argc, char **argv) { + InitGoogleTest(&argc, argv); + + bool terse_output = false; + if (argc > 1 && strcmp(argv[1], "--terse_output") == 0 ) + terse_output = true; + else + printf("%s\n", "Run this program with --terse_output to change the way " + "it prints its output."); + + UnitTest& unit_test = *UnitTest::GetInstance(); + + // If we are given the --terse_output command line flag, suppresses the + // standard output and attaches own result printer. + if (terse_output) { + TestEventListeners& listeners = unit_test.listeners(); + + // Removes the default console output listener from the list so it will + // not receive events from Google Test and won't print any output. Since + // this operation transfers ownership of the listener to the caller we + // have to delete it as well. + delete listeners.Release(listeners.default_result_printer()); + + // Adds the custom output listener to the list. It will now receive + // events from Google Test and print the alternative output. We don't + // have to worry about deleting it since Google Test assumes ownership + // over it after adding it to the list. + listeners.Append(new TersePrinter); + } + int ret_val = RUN_ALL_TESTS(); + + // This is an example of using the UnitTest reflection API to inspect test + // results. Here we discount failures from the tests we expected to fail. + int unexpectedly_failed_tests = 0; + for (int i = 0; i < unit_test.total_test_case_count(); ++i) { + const TestCase& test_case = *unit_test.GetTestCase(i); + for (int j = 0; j < test_case.total_test_count(); ++j) { + const TestInfo& test_info = *test_case.GetTestInfo(j); + // Counts failed tests that were not meant to fail (those without + // 'Fails' in the name). + if (test_info.result()->Failed() && + strcmp(test_info.name(), "Fails") != 0) { + unexpectedly_failed_tests++; + } + } + } + + // Test that were meant to fail should not affect the test program outcome. + if (unexpectedly_failed_tests == 0) + ret_val = 0; + + return ret_val; +} +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: mheule@google.com (Markus Heule) +// +// Google C++ Testing Framework (Google Test) +// +// Sometimes it's desirable to build Google Test by compiling a single file. +// This file serves this purpose. + +// This line ensures that gtest.h can be compiled on its own, even +// when it's fused. +#include "gtest/gtest.h" + +// The following lines pull in the real gtest *.cc files. +#include "src/gtest.cc" +#include "src/gtest-death-test.cc" +#include "src/gtest-filepath.cc" +#include "src/gtest-port.cc" +#include "src/gtest-printers.cc" +#include "src/gtest-test-part.cc" +#include "src/gtest-typed-test.cc" +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev) +// +// This file implements death tests. + +#include "gtest/gtest-death-test.h" +#include "gtest/internal/gtest-port.h" +#include "gtest/internal/custom/gtest.h" + +#if GTEST_HAS_DEATH_TEST + +# if GTEST_OS_MAC +# include +# endif // GTEST_OS_MAC + +# include +# include +# include + +# if GTEST_OS_LINUX +# include +# endif // GTEST_OS_LINUX + +# include + +# if GTEST_OS_WINDOWS +# include +# else +# include +# include +# endif // GTEST_OS_WINDOWS + +# if GTEST_OS_QNX +# include +# endif // GTEST_OS_QNX + +#endif // GTEST_HAS_DEATH_TEST + +#include "gtest/gtest-message.h" +#include "gtest/internal/gtest-string.h" + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick exists to +// prevent the accidental inclusion of gtest-internal-inl.h in the +// user's code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +namespace testing { + +// Constants. + +// The default death test style. +static const char kDefaultDeathTestStyle[] = "fast"; + +GTEST_DEFINE_string_( + death_test_style, + internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle), + "Indicates how to run a death test in a forked child process: " + "\"threadsafe\" (child process re-executes the test binary " + "from the beginning, running only the specific death test) or " + "\"fast\" (child process runs the death test immediately " + "after forking)."); + +GTEST_DEFINE_bool_( + death_test_use_fork, + internal::BoolFromGTestEnv("death_test_use_fork", false), + "Instructs to use fork()/_exit() instead of clone() in death tests. " + "Ignored and always uses fork() on POSIX systems where clone() is not " + "implemented. Useful when running under valgrind or similar tools if " + "those do not support clone(). Valgrind 3.3.1 will just fail if " + "it sees an unsupported combination of clone() flags. " + "It is not recommended to use this flag w/o valgrind though it will " + "work in 99% of the cases. Once valgrind is fixed, this flag will " + "most likely be removed."); + +namespace internal { +GTEST_DEFINE_string_( + internal_run_death_test, "", + "Indicates the file, line number, temporal index of " + "the single death test to run, and a file descriptor to " + "which a success code may be sent, all separated by " + "the '|' characters. This flag is specified if and only if the current " + "process is a sub-process launched for running a thread-safe " + "death test. FOR INTERNAL USE ONLY."); +} // namespace internal + +#if GTEST_HAS_DEATH_TEST + +namespace internal { + +// Valid only for fast death tests. Indicates the code is running in the +// child process of a fast style death test. +# if !GTEST_OS_WINDOWS +static bool g_in_fast_death_test_child = false; +# endif + +// Returns a Boolean value indicating whether the caller is currently +// executing in the context of the death test child process. Tools such as +// Valgrind heap checkers may need this to modify their behavior in death +// tests. IMPORTANT: This is an internal utility. Using it may break the +// implementation of death tests. User code MUST NOT use it. +bool InDeathTestChild() { +# if GTEST_OS_WINDOWS + + // On Windows, death tests are thread-safe regardless of the value of the + // death_test_style flag. + return !GTEST_FLAG(internal_run_death_test).empty(); + +# else + + if (GTEST_FLAG(death_test_style) == "threadsafe") + return !GTEST_FLAG(internal_run_death_test).empty(); + else + return g_in_fast_death_test_child; +#endif +} + +} // namespace internal + +// ExitedWithCode constructor. +ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) { +} + +// ExitedWithCode function-call operator. +bool ExitedWithCode::operator()(int exit_status) const { +# if GTEST_OS_WINDOWS + + return exit_status == exit_code_; + +# else + + return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_; + +# endif // GTEST_OS_WINDOWS +} + +# if !GTEST_OS_WINDOWS +// KilledBySignal constructor. +KilledBySignal::KilledBySignal(int signum) : signum_(signum) { +} + +// KilledBySignal function-call operator. +bool KilledBySignal::operator()(int exit_status) const { +# if defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_) + { + bool result; + if (GTEST_KILLED_BY_SIGNAL_OVERRIDE_(signum_, exit_status, &result)) { + return result; + } + } +# endif // defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_) + return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_; +} +# endif // !GTEST_OS_WINDOWS + +namespace internal { + +// Utilities needed for death tests. + +// Generates a textual description of a given exit code, in the format +// specified by wait(2). +static std::string ExitSummary(int exit_code) { + Message m; + +# if GTEST_OS_WINDOWS + + m << "Exited with exit status " << exit_code; + +# else + + if (WIFEXITED(exit_code)) { + m << "Exited with exit status " << WEXITSTATUS(exit_code); + } else if (WIFSIGNALED(exit_code)) { + m << "Terminated by signal " << WTERMSIG(exit_code); + } +# ifdef WCOREDUMP + if (WCOREDUMP(exit_code)) { + m << " (core dumped)"; + } +# endif +# endif // GTEST_OS_WINDOWS + + return m.GetString(); +} + +// Returns true if exit_status describes a process that was terminated +// by a signal, or exited normally with a nonzero exit code. +bool ExitedUnsuccessfully(int exit_status) { + return !ExitedWithCode(0)(exit_status); +} + +# if !GTEST_OS_WINDOWS +// Generates a textual failure message when a death test finds more than +// one thread running, or cannot determine the number of threads, prior +// to executing the given statement. It is the responsibility of the +// caller not to pass a thread_count of 1. +static std::string DeathTestThreadWarning(size_t thread_count) { + Message msg; + msg << "Death tests use fork(), which is unsafe particularly" + << " in a threaded context. For this test, " << GTEST_NAME_ << " "; + if (thread_count == 0) + msg << "couldn't detect the number of threads."; + else + msg << "detected " << thread_count << " threads."; + return msg.GetString(); +} +# endif // !GTEST_OS_WINDOWS + +// Flag characters for reporting a death test that did not die. +static const char kDeathTestLived = 'L'; +static const char kDeathTestReturned = 'R'; +static const char kDeathTestThrew = 'T'; +static const char kDeathTestInternalError = 'I'; + +// An enumeration describing all of the possible ways that a death test can +// conclude. DIED means that the process died while executing the test +// code; LIVED means that process lived beyond the end of the test code; +// RETURNED means that the test statement attempted to execute a return +// statement, which is not allowed; THREW means that the test statement +// returned control by throwing an exception. IN_PROGRESS means the test +// has not yet concluded. +// TODO(vladl@google.com): Unify names and possibly values for +// AbortReason, DeathTestOutcome, and flag characters above. +enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW }; + +// Routine for aborting the program which is safe to call from an +// exec-style death test child process, in which case the error +// message is propagated back to the parent process. Otherwise, the +// message is simply printed to stderr. In either case, the program +// then exits with status 1. +void DeathTestAbort(const std::string& message) { + // On a POSIX system, this function may be called from a threadsafe-style + // death test child process, which operates on a very small stack. Use + // the heap for any additional non-minuscule memory requirements. + const InternalRunDeathTestFlag* const flag = + GetUnitTestImpl()->internal_run_death_test_flag(); + if (flag != NULL) { + FILE* parent = posix::FDOpen(flag->write_fd(), "w"); + fputc(kDeathTestInternalError, parent); + fprintf(parent, "%s", message.c_str()); + fflush(parent); + _exit(1); + } else { + fprintf(stderr, "%s", message.c_str()); + fflush(stderr); + posix::Abort(); + } +} + +// A replacement for CHECK that calls DeathTestAbort if the assertion +// fails. +# define GTEST_DEATH_TEST_CHECK_(expression) \ + do { \ + if (!::testing::internal::IsTrue(expression)) { \ + DeathTestAbort( \ + ::std::string("CHECK failed: File ") + __FILE__ + ", line " \ + + ::testing::internal::StreamableToString(__LINE__) + ": " \ + + #expression); \ + } \ + } while (::testing::internal::AlwaysFalse()) + +// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for +// evaluating any system call that fulfills two conditions: it must return +// -1 on failure, and set errno to EINTR when it is interrupted and +// should be tried again. The macro expands to a loop that repeatedly +// evaluates the expression as long as it evaluates to -1 and sets +// errno to EINTR. If the expression evaluates to -1 but errno is +// something other than EINTR, DeathTestAbort is called. +# define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \ + do { \ + int gtest_retval; \ + do { \ + gtest_retval = (expression); \ + } while (gtest_retval == -1 && errno == EINTR); \ + if (gtest_retval == -1) { \ + DeathTestAbort( \ + ::std::string("CHECK failed: File ") + __FILE__ + ", line " \ + + ::testing::internal::StreamableToString(__LINE__) + ": " \ + + #expression + " != -1"); \ + } \ + } while (::testing::internal::AlwaysFalse()) + +// Returns the message describing the last system error in errno. +std::string GetLastErrnoDescription() { + return errno == 0 ? "" : posix::StrError(errno); +} + +// This is called from a death test parent process to read a failure +// message from the death test child process and log it with the FATAL +// severity. On Windows, the message is read from a pipe handle. On other +// platforms, it is read from a file descriptor. +static void FailFromInternalError(int fd) { + Message error; + char buffer[256]; + int num_read; + + do { + while ((num_read = posix::Read(fd, buffer, 255)) > 0) { + buffer[num_read] = '\0'; + error << buffer; + } + } while (num_read == -1 && errno == EINTR); + + if (num_read == 0) { + GTEST_LOG_(FATAL) << error.GetString(); + } else { + const int last_error = errno; + GTEST_LOG_(FATAL) << "Error while reading death test internal: " + << GetLastErrnoDescription() << " [" << last_error << "]"; + } +} + +// Death test constructor. Increments the running death test count +// for the current test. +DeathTest::DeathTest() { + TestInfo* const info = GetUnitTestImpl()->current_test_info(); + if (info == NULL) { + DeathTestAbort("Cannot run a death test outside of a TEST or " + "TEST_F construct"); + } +} + +// Creates and returns a death test by dispatching to the current +// death test factory. +bool DeathTest::Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test) { + return GetUnitTestImpl()->death_test_factory()->Create( + statement, regex, file, line, test); +} + +const char* DeathTest::LastMessage() { + return last_death_test_message_.c_str(); +} + +void DeathTest::set_last_death_test_message(const std::string& message) { + last_death_test_message_ = message; +} + +std::string DeathTest::last_death_test_message_; + +// Provides cross platform implementation for some death functionality. +class DeathTestImpl : public DeathTest { + protected: + DeathTestImpl(const char* a_statement, const RE* a_regex) + : statement_(a_statement), + regex_(a_regex), + spawned_(false), + status_(-1), + outcome_(IN_PROGRESS), + read_fd_(-1), + write_fd_(-1) {} + + // read_fd_ is expected to be closed and cleared by a derived class. + ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); } + + void Abort(AbortReason reason); + virtual bool Passed(bool status_ok); + + const char* statement() const { return statement_; } + const RE* regex() const { return regex_; } + bool spawned() const { return spawned_; } + void set_spawned(bool is_spawned) { spawned_ = is_spawned; } + int status() const { return status_; } + void set_status(int a_status) { status_ = a_status; } + DeathTestOutcome outcome() const { return outcome_; } + void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; } + int read_fd() const { return read_fd_; } + void set_read_fd(int fd) { read_fd_ = fd; } + int write_fd() const { return write_fd_; } + void set_write_fd(int fd) { write_fd_ = fd; } + + // Called in the parent process only. Reads the result code of the death + // test child process via a pipe, interprets it to set the outcome_ + // member, and closes read_fd_. Outputs diagnostics and terminates in + // case of unexpected codes. + void ReadAndInterpretStatusByte(); + + private: + // The textual content of the code this object is testing. This class + // doesn't own this string and should not attempt to delete it. + const char* const statement_; + // The regular expression which test output must match. DeathTestImpl + // doesn't own this object and should not attempt to delete it. + const RE* const regex_; + // True if the death test child process has been successfully spawned. + bool spawned_; + // The exit status of the child process. + int status_; + // How the death test concluded. + DeathTestOutcome outcome_; + // Descriptor to the read end of the pipe to the child process. It is + // always -1 in the child process. The child keeps its write end of the + // pipe in write_fd_. + int read_fd_; + // Descriptor to the child's write end of the pipe to the parent process. + // It is always -1 in the parent process. The parent keeps its end of the + // pipe in read_fd_. + int write_fd_; +}; + +// Called in the parent process only. Reads the result code of the death +// test child process via a pipe, interprets it to set the outcome_ +// member, and closes read_fd_. Outputs diagnostics and terminates in +// case of unexpected codes. +void DeathTestImpl::ReadAndInterpretStatusByte() { + char flag; + int bytes_read; + + // The read() here blocks until data is available (signifying the + // failure of the death test) or until the pipe is closed (signifying + // its success), so it's okay to call this in the parent before + // the child process has exited. + do { + bytes_read = posix::Read(read_fd(), &flag, 1); + } while (bytes_read == -1 && errno == EINTR); + + if (bytes_read == 0) { + set_outcome(DIED); + } else if (bytes_read == 1) { + switch (flag) { + case kDeathTestReturned: + set_outcome(RETURNED); + break; + case kDeathTestThrew: + set_outcome(THREW); + break; + case kDeathTestLived: + set_outcome(LIVED); + break; + case kDeathTestInternalError: + FailFromInternalError(read_fd()); // Does not return. + break; + default: + GTEST_LOG_(FATAL) << "Death test child process reported " + << "unexpected status byte (" + << static_cast(flag) << ")"; + } + } else { + GTEST_LOG_(FATAL) << "Read from death test child process failed: " + << GetLastErrnoDescription(); + } + GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd())); + set_read_fd(-1); +} + +// Signals that the death test code which should have exited, didn't. +// Should be called only in a death test child process. +// Writes a status byte to the child's status file descriptor, then +// calls _exit(1). +void DeathTestImpl::Abort(AbortReason reason) { + // The parent process considers the death test to be a failure if + // it finds any data in our pipe. So, here we write a single flag byte + // to the pipe, then exit. + const char status_ch = + reason == TEST_DID_NOT_DIE ? kDeathTestLived : + reason == TEST_THREW_EXCEPTION ? kDeathTestThrew : kDeathTestReturned; + + GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1)); + // We are leaking the descriptor here because on some platforms (i.e., + // when built as Windows DLL), destructors of global objects will still + // run after calling _exit(). On such systems, write_fd_ will be + // indirectly closed from the destructor of UnitTestImpl, causing double + // close if it is also closed here. On debug configurations, double close + // may assert. As there are no in-process buffers to flush here, we are + // relying on the OS to close the descriptor after the process terminates + // when the destructors are not run. + _exit(1); // Exits w/o any normal exit hooks (we were supposed to crash) +} + +// Returns an indented copy of stderr output for a death test. +// This makes distinguishing death test output lines from regular log lines +// much easier. +static ::std::string FormatDeathTestOutput(const ::std::string& output) { + ::std::string ret; + for (size_t at = 0; ; ) { + const size_t line_end = output.find('\n', at); + ret += "[ DEATH ] "; + if (line_end == ::std::string::npos) { + ret += output.substr(at); + break; + } + ret += output.substr(at, line_end + 1 - at); + at = line_end + 1; + } + return ret; +} + +// Assesses the success or failure of a death test, using both private +// members which have previously been set, and one argument: +// +// Private data members: +// outcome: An enumeration describing how the death test +// concluded: DIED, LIVED, THREW, or RETURNED. The death test +// fails in the latter three cases. +// status: The exit status of the child process. On *nix, it is in the +// in the format specified by wait(2). On Windows, this is the +// value supplied to the ExitProcess() API or a numeric code +// of the exception that terminated the program. +// regex: A regular expression object to be applied to +// the test's captured standard error output; the death test +// fails if it does not match. +// +// Argument: +// status_ok: true if exit_status is acceptable in the context of +// this particular death test, which fails if it is false +// +// Returns true iff all of the above conditions are met. Otherwise, the +// first failing condition, in the order given above, is the one that is +// reported. Also sets the last death test message string. +bool DeathTestImpl::Passed(bool status_ok) { + if (!spawned()) + return false; + + const std::string error_message = GetCapturedStderr(); + + bool success = false; + Message buffer; + + buffer << "Death test: " << statement() << "\n"; + switch (outcome()) { + case LIVED: + buffer << " Result: failed to die.\n" + << " Error msg:\n" << FormatDeathTestOutput(error_message); + break; + case THREW: + buffer << " Result: threw an exception.\n" + << " Error msg:\n" << FormatDeathTestOutput(error_message); + break; + case RETURNED: + buffer << " Result: illegal return in test statement.\n" + << " Error msg:\n" << FormatDeathTestOutput(error_message); + break; + case DIED: + if (status_ok) { + const bool matched = RE::PartialMatch(error_message.c_str(), *regex()); + if (matched) { + success = true; + } else { + buffer << " Result: died but not with expected error.\n" + << " Expected: " << regex()->pattern() << "\n" + << "Actual msg:\n" << FormatDeathTestOutput(error_message); + } + } else { + buffer << " Result: died but not with expected exit code:\n" + << " " << ExitSummary(status()) << "\n" + << "Actual msg:\n" << FormatDeathTestOutput(error_message); + } + break; + case IN_PROGRESS: + default: + GTEST_LOG_(FATAL) + << "DeathTest::Passed somehow called before conclusion of test"; + } + + DeathTest::set_last_death_test_message(buffer.GetString()); + return success; +} + +# if GTEST_OS_WINDOWS +// WindowsDeathTest implements death tests on Windows. Due to the +// specifics of starting new processes on Windows, death tests there are +// always threadsafe, and Google Test considers the +// --gtest_death_test_style=fast setting to be equivalent to +// --gtest_death_test_style=threadsafe there. +// +// A few implementation notes: Like the Linux version, the Windows +// implementation uses pipes for child-to-parent communication. But due to +// the specifics of pipes on Windows, some extra steps are required: +// +// 1. The parent creates a communication pipe and stores handles to both +// ends of it. +// 2. The parent starts the child and provides it with the information +// necessary to acquire the handle to the write end of the pipe. +// 3. The child acquires the write end of the pipe and signals the parent +// using a Windows event. +// 4. Now the parent can release the write end of the pipe on its side. If +// this is done before step 3, the object's reference count goes down to +// 0 and it is destroyed, preventing the child from acquiring it. The +// parent now has to release it, or read operations on the read end of +// the pipe will not return when the child terminates. +// 5. The parent reads child's output through the pipe (outcome code and +// any possible error messages) from the pipe, and its stderr and then +// determines whether to fail the test. +// +// Note: to distinguish Win32 API calls from the local method and function +// calls, the former are explicitly resolved in the global namespace. +// +class WindowsDeathTest : public DeathTestImpl { + public: + WindowsDeathTest(const char* a_statement, + const RE* a_regex, + const char* file, + int line) + : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {} + + // All of these virtual functions are inherited from DeathTest. + virtual int Wait(); + virtual TestRole AssumeRole(); + + private: + // The name of the file in which the death test is located. + const char* const file_; + // The line number on which the death test is located. + const int line_; + // Handle to the write end of the pipe to the child process. + AutoHandle write_handle_; + // Child process handle. + AutoHandle child_handle_; + // Event the child process uses to signal the parent that it has + // acquired the handle to the write end of the pipe. After seeing this + // event the parent can release its own handles to make sure its + // ReadFile() calls return when the child terminates. + AutoHandle event_handle_; +}; + +// Waits for the child in a death test to exit, returning its exit +// status, or 0 if no child process exists. As a side effect, sets the +// outcome data member. +int WindowsDeathTest::Wait() { + if (!spawned()) + return 0; + + // Wait until the child either signals that it has acquired the write end + // of the pipe or it dies. + const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() }; + switch (::WaitForMultipleObjects(2, + wait_handles, + FALSE, // Waits for any of the handles. + INFINITE)) { + case WAIT_OBJECT_0: + case WAIT_OBJECT_0 + 1: + break; + default: + GTEST_DEATH_TEST_CHECK_(false); // Should not get here. + } + + // The child has acquired the write end of the pipe or exited. + // We release the handle on our side and continue. + write_handle_.Reset(); + event_handle_.Reset(); + + ReadAndInterpretStatusByte(); + + // Waits for the child process to exit if it haven't already. This + // returns immediately if the child has already exited, regardless of + // whether previous calls to WaitForMultipleObjects synchronized on this + // handle or not. + GTEST_DEATH_TEST_CHECK_( + WAIT_OBJECT_0 == ::WaitForSingleObject(child_handle_.Get(), + INFINITE)); + DWORD status_code; + GTEST_DEATH_TEST_CHECK_( + ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE); + child_handle_.Reset(); + set_status(static_cast(status_code)); + return status(); +} + +// The AssumeRole process for a Windows death test. It creates a child +// process with the same executable as the current process to run the +// death test. The child process is given the --gtest_filter and +// --gtest_internal_run_death_test flags such that it knows to run the +// current death test only. +DeathTest::TestRole WindowsDeathTest::AssumeRole() { + const UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const TestInfo* const info = impl->current_test_info(); + const int death_test_index = info->result()->death_test_count(); + + if (flag != NULL) { + // ParseInternalRunDeathTestFlag() has performed all the necessary + // processing. + set_write_fd(flag->write_fd()); + return EXECUTE_TEST; + } + + // WindowsDeathTest uses an anonymous pipe to communicate results of + // a death test. + SECURITY_ATTRIBUTES handles_are_inheritable = { + sizeof(SECURITY_ATTRIBUTES), NULL, TRUE }; + HANDLE read_handle, write_handle; + GTEST_DEATH_TEST_CHECK_( + ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable, + 0) // Default buffer size. + != FALSE); + set_read_fd(::_open_osfhandle(reinterpret_cast(read_handle), + O_RDONLY)); + write_handle_.Reset(write_handle); + event_handle_.Reset(::CreateEvent( + &handles_are_inheritable, + TRUE, // The event will automatically reset to non-signaled state. + FALSE, // The initial state is non-signalled. + NULL)); // The even is unnamed. + GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL); + const std::string filter_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" + + info->test_case_name() + "." + info->name(); + const std::string internal_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + + "=" + file_ + "|" + StreamableToString(line_) + "|" + + StreamableToString(death_test_index) + "|" + + StreamableToString(static_cast(::GetCurrentProcessId())) + + // size_t has the same width as pointers on both 32-bit and 64-bit + // Windows platforms. + // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx. + "|" + StreamableToString(reinterpret_cast(write_handle)) + + "|" + StreamableToString(reinterpret_cast(event_handle_.Get())); + + char executable_path[_MAX_PATH + 1]; // NOLINT + GTEST_DEATH_TEST_CHECK_( + _MAX_PATH + 1 != ::GetModuleFileNameA(NULL, + executable_path, + _MAX_PATH)); + + std::string command_line = + std::string(::GetCommandLineA()) + " " + filter_flag + " \"" + + internal_flag + "\""; + + DeathTest::set_last_death_test_message(""); + + CaptureStderr(); + // Flush the log buffers since the log streams are shared with the child. + FlushInfoLog(); + + // The child process will share the standard handles with the parent. + STARTUPINFOA startup_info; + memset(&startup_info, 0, sizeof(STARTUPINFO)); + startup_info.dwFlags = STARTF_USESTDHANDLES; + startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE); + startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE); + startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE); + + PROCESS_INFORMATION process_info; + GTEST_DEATH_TEST_CHECK_(::CreateProcessA( + executable_path, + const_cast(command_line.c_str()), + NULL, // Retuned process handle is not inheritable. + NULL, // Retuned thread handle is not inheritable. + TRUE, // Child inherits all inheritable handles (for write_handle_). + 0x0, // Default creation flags. + NULL, // Inherit the parent's environment. + UnitTest::GetInstance()->original_working_dir(), + &startup_info, + &process_info) != FALSE); + child_handle_.Reset(process_info.hProcess); + ::CloseHandle(process_info.hThread); + set_spawned(true); + return OVERSEE_TEST; +} +# else // We are not on Windows. + +// ForkingDeathTest provides implementations for most of the abstract +// methods of the DeathTest interface. Only the AssumeRole method is +// left undefined. +class ForkingDeathTest : public DeathTestImpl { + public: + ForkingDeathTest(const char* statement, const RE* regex); + + // All of these virtual functions are inherited from DeathTest. + virtual int Wait(); + + protected: + void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; } + + private: + // PID of child process during death test; 0 in the child process itself. + pid_t child_pid_; +}; + +// Constructs a ForkingDeathTest. +ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex) + : DeathTestImpl(a_statement, a_regex), + child_pid_(-1) {} + +// Waits for the child in a death test to exit, returning its exit +// status, or 0 if no child process exists. As a side effect, sets the +// outcome data member. +int ForkingDeathTest::Wait() { + if (!spawned()) + return 0; + + ReadAndInterpretStatusByte(); + + int status_value; + GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0)); + set_status(status_value); + return status_value; +} + +// A concrete death test class that forks, then immediately runs the test +// in the child process. +class NoExecDeathTest : public ForkingDeathTest { + public: + NoExecDeathTest(const char* a_statement, const RE* a_regex) : + ForkingDeathTest(a_statement, a_regex) { } + virtual TestRole AssumeRole(); +}; + +// The AssumeRole process for a fork-and-run death test. It implements a +// straightforward fork, with a simple pipe to transmit the status byte. +DeathTest::TestRole NoExecDeathTest::AssumeRole() { + const size_t thread_count = GetThreadCount(); + if (thread_count != 1) { + GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count); + } + + int pipe_fd[2]; + GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1); + + DeathTest::set_last_death_test_message(""); + CaptureStderr(); + // When we fork the process below, the log file buffers are copied, but the + // file descriptors are shared. We flush all log files here so that closing + // the file descriptors in the child process doesn't throw off the + // synchronization between descriptors and buffers in the parent process. + // This is as close to the fork as possible to avoid a race condition in case + // there are multiple threads running before the death test, and another + // thread writes to the log file. + FlushInfoLog(); + + const pid_t child_pid = fork(); + GTEST_DEATH_TEST_CHECK_(child_pid != -1); + set_child_pid(child_pid); + if (child_pid == 0) { + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0])); + set_write_fd(pipe_fd[1]); + // Redirects all logging to stderr in the child process to prevent + // concurrent writes to the log files. We capture stderr in the parent + // process and append the child process' output to a log. + LogToStderr(); + // Event forwarding to the listeners of event listener API mush be shut + // down in death test subprocesses. + GetUnitTestImpl()->listeners()->SuppressEventForwarding(); + g_in_fast_death_test_child = true; + return EXECUTE_TEST; + } else { + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1])); + set_read_fd(pipe_fd[0]); + set_spawned(true); + return OVERSEE_TEST; + } +} + +// A concrete death test class that forks and re-executes the main +// program from the beginning, with command-line flags set that cause +// only this specific death test to be run. +class ExecDeathTest : public ForkingDeathTest { + public: + ExecDeathTest(const char* a_statement, const RE* a_regex, + const char* file, int line) : + ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { } + virtual TestRole AssumeRole(); + private: + static ::std::vector + GetArgvsForDeathTestChildProcess() { + ::std::vector args = GetInjectableArgvs(); +# if defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_) + ::std::vector extra_args = + GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_(); + args.insert(args.end(), extra_args.begin(), extra_args.end()); +# endif // defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_) + return args; + } + // The name of the file in which the death test is located. + const char* const file_; + // The line number on which the death test is located. + const int line_; +}; + +// Utility class for accumulating command-line arguments. +class Arguments { + public: + Arguments() { + args_.push_back(NULL); + } + + ~Arguments() { + for (std::vector::iterator i = args_.begin(); i != args_.end(); + ++i) { + free(*i); + } + } + void AddArgument(const char* argument) { + args_.insert(args_.end() - 1, posix::StrDup(argument)); + } + + template + void AddArguments(const ::std::vector& arguments) { + for (typename ::std::vector::const_iterator i = arguments.begin(); + i != arguments.end(); + ++i) { + args_.insert(args_.end() - 1, posix::StrDup(i->c_str())); + } + } + char* const* Argv() { + return &args_[0]; + } + + private: + std::vector args_; +}; + +// A struct that encompasses the arguments to the child process of a +// threadsafe-style death test process. +struct ExecDeathTestArgs { + char* const* argv; // Command-line arguments for the child's call to exec + int close_fd; // File descriptor to close; the read end of a pipe +}; + +# if GTEST_OS_MAC +inline char** GetEnviron() { + // When Google Test is built as a framework on MacOS X, the environ variable + // is unavailable. Apple's documentation (man environ) recommends using + // _NSGetEnviron() instead. + return *_NSGetEnviron(); +} +# else +// Some POSIX platforms expect you to declare environ. extern "C" makes +// it reside in the global namespace. +extern "C" char** environ; +inline char** GetEnviron() { return environ; } +# endif // GTEST_OS_MAC + +# if !GTEST_OS_QNX +// The main function for a threadsafe-style death test child process. +// This function is called in a clone()-ed process and thus must avoid +// any potentially unsafe operations like malloc or libc functions. +static int ExecDeathTestChildMain(void* child_arg) { + ExecDeathTestArgs* const args = static_cast(child_arg); + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd)); + + // We need to execute the test program in the same environment where + // it was originally invoked. Therefore we change to the original + // working directory first. + const char* const original_dir = + UnitTest::GetInstance()->original_working_dir(); + // We can safely call chdir() as it's a direct system call. + if (chdir(original_dir) != 0) { + DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " + + GetLastErrnoDescription()); + return EXIT_FAILURE; + } + + // We can safely call execve() as it's a direct system call. We + // cannot use execvp() as it's a libc function and thus potentially + // unsafe. Since execve() doesn't search the PATH, the user must + // invoke the test program via a valid path that contains at least + // one path separator. + execve(args->argv[0], args->argv, GetEnviron()); + DeathTestAbort(std::string("execve(") + args->argv[0] + ", ...) in " + + original_dir + " failed: " + + GetLastErrnoDescription()); + return EXIT_FAILURE; +} +# endif // !GTEST_OS_QNX + +// Two utility routines that together determine the direction the stack +// grows. +// This could be accomplished more elegantly by a single recursive +// function, but we want to guard against the unlikely possibility of +// a smart compiler optimizing the recursion away. +// +// GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining +// StackLowerThanAddress into StackGrowsDown, which then doesn't give +// correct answer. +void StackLowerThanAddress(const void* ptr, bool* result) GTEST_NO_INLINE_; +void StackLowerThanAddress(const void* ptr, bool* result) { + int dummy; + *result = (&dummy < ptr); +} + +// Make sure AddressSanitizer does not tamper with the stack here. +GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ +bool StackGrowsDown() { + int dummy; + bool result; + StackLowerThanAddress(&dummy, &result); + return result; +} + +// Spawns a child process with the same executable as the current process in +// a thread-safe manner and instructs it to run the death test. The +// implementation uses fork(2) + exec. On systems where clone(2) is +// available, it is used instead, being slightly more thread-safe. On QNX, +// fork supports only single-threaded environments, so this function uses +// spawn(2) there instead. The function dies with an error message if +// anything goes wrong. +static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) { + ExecDeathTestArgs args = { argv, close_fd }; + pid_t child_pid = -1; + +# if GTEST_OS_QNX + // Obtains the current directory and sets it to be closed in the child + // process. + const int cwd_fd = open(".", O_RDONLY); + GTEST_DEATH_TEST_CHECK_(cwd_fd != -1); + GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(cwd_fd, F_SETFD, FD_CLOEXEC)); + // We need to execute the test program in the same environment where + // it was originally invoked. Therefore we change to the original + // working directory first. + const char* const original_dir = + UnitTest::GetInstance()->original_working_dir(); + // We can safely call chdir() as it's a direct system call. + if (chdir(original_dir) != 0) { + DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " + + GetLastErrnoDescription()); + return EXIT_FAILURE; + } + + int fd_flags; + // Set close_fd to be closed after spawn. + GTEST_DEATH_TEST_CHECK_SYSCALL_(fd_flags = fcntl(close_fd, F_GETFD)); + GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(close_fd, F_SETFD, + fd_flags | FD_CLOEXEC)); + struct inheritance inherit = {0}; + // spawn is a system call. + child_pid = spawn(args.argv[0], 0, NULL, &inherit, args.argv, GetEnviron()); + // Restores the current working directory. + GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1); + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd)); + +# else // GTEST_OS_QNX +# if GTEST_OS_LINUX + // When a SIGPROF signal is received while fork() or clone() are executing, + // the process may hang. To avoid this, we ignore SIGPROF here and re-enable + // it after the call to fork()/clone() is complete. + struct sigaction saved_sigprof_action; + struct sigaction ignore_sigprof_action; + memset(&ignore_sigprof_action, 0, sizeof(ignore_sigprof_action)); + sigemptyset(&ignore_sigprof_action.sa_mask); + ignore_sigprof_action.sa_handler = SIG_IGN; + GTEST_DEATH_TEST_CHECK_SYSCALL_(sigaction( + SIGPROF, &ignore_sigprof_action, &saved_sigprof_action)); +# endif // GTEST_OS_LINUX + +# if GTEST_HAS_CLONE + const bool use_fork = GTEST_FLAG(death_test_use_fork); + + if (!use_fork) { + static const bool stack_grows_down = StackGrowsDown(); + const size_t stack_size = getpagesize(); + // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead. + void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED); + + // Maximum stack alignment in bytes: For a downward-growing stack, this + // amount is subtracted from size of the stack space to get an address + // that is within the stack space and is aligned on all systems we care + // about. As far as I know there is no ABI with stack alignment greater + // than 64. We assume stack and stack_size already have alignment of + // kMaxStackAlignment. + const size_t kMaxStackAlignment = 64; + void* const stack_top = + static_cast(stack) + + (stack_grows_down ? stack_size - kMaxStackAlignment : 0); + GTEST_DEATH_TEST_CHECK_(stack_size > kMaxStackAlignment && + reinterpret_cast(stack_top) % kMaxStackAlignment == 0); + + child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args); + + GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1); + } +# else + const bool use_fork = true; +# endif // GTEST_HAS_CLONE + + if (use_fork && (child_pid = fork()) == 0) { + ExecDeathTestChildMain(&args); + _exit(0); + } +# endif // GTEST_OS_QNX +# if GTEST_OS_LINUX + GTEST_DEATH_TEST_CHECK_SYSCALL_( + sigaction(SIGPROF, &saved_sigprof_action, NULL)); +# endif // GTEST_OS_LINUX + + GTEST_DEATH_TEST_CHECK_(child_pid != -1); + return child_pid; +} + +// The AssumeRole process for a fork-and-exec death test. It re-executes the +// main program from the beginning, setting the --gtest_filter +// and --gtest_internal_run_death_test flags to cause only the current +// death test to be re-run. +DeathTest::TestRole ExecDeathTest::AssumeRole() { + const UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const TestInfo* const info = impl->current_test_info(); + const int death_test_index = info->result()->death_test_count(); + + if (flag != NULL) { + set_write_fd(flag->write_fd()); + return EXECUTE_TEST; + } + + int pipe_fd[2]; + GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1); + // Clear the close-on-exec flag on the write end of the pipe, lest + // it be closed when the child process does an exec: + GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1); + + const std::string filter_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" + + info->test_case_name() + "." + info->name(); + const std::string internal_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "=" + + file_ + "|" + StreamableToString(line_) + "|" + + StreamableToString(death_test_index) + "|" + + StreamableToString(pipe_fd[1]); + Arguments args; + args.AddArguments(GetArgvsForDeathTestChildProcess()); + args.AddArgument(filter_flag.c_str()); + args.AddArgument(internal_flag.c_str()); + + DeathTest::set_last_death_test_message(""); + + CaptureStderr(); + // See the comment in NoExecDeathTest::AssumeRole for why the next line + // is necessary. + FlushInfoLog(); + + const pid_t child_pid = ExecDeathTestSpawnChild(args.Argv(), pipe_fd[0]); + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1])); + set_child_pid(child_pid); + set_read_fd(pipe_fd[0]); + set_spawned(true); + return OVERSEE_TEST; +} + +# endif // !GTEST_OS_WINDOWS + +// Creates a concrete DeathTest-derived class that depends on the +// --gtest_death_test_style flag, and sets the pointer pointed to +// by the "test" argument to its address. If the test should be +// skipped, sets that pointer to NULL. Returns true, unless the +// flag is set to an invalid value. +bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex, + const char* file, int line, + DeathTest** test) { + UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const int death_test_index = impl->current_test_info() + ->increment_death_test_count(); + + if (flag != NULL) { + if (death_test_index > flag->index()) { + DeathTest::set_last_death_test_message( + "Death test count (" + StreamableToString(death_test_index) + + ") somehow exceeded expected maximum (" + + StreamableToString(flag->index()) + ")"); + return false; + } + + if (!(flag->file() == file && flag->line() == line && + flag->index() == death_test_index)) { + *test = NULL; + return true; + } + } + +# if GTEST_OS_WINDOWS + + if (GTEST_FLAG(death_test_style) == "threadsafe" || + GTEST_FLAG(death_test_style) == "fast") { + *test = new WindowsDeathTest(statement, regex, file, line); + } + +# else + + if (GTEST_FLAG(death_test_style) == "threadsafe") { + *test = new ExecDeathTest(statement, regex, file, line); + } else if (GTEST_FLAG(death_test_style) == "fast") { + *test = new NoExecDeathTest(statement, regex); + } + +# endif // GTEST_OS_WINDOWS + + else { // NOLINT - this is more readable than unbalanced brackets inside #if. + DeathTest::set_last_death_test_message( + "Unknown death test style \"" + GTEST_FLAG(death_test_style) + + "\" encountered"); + return false; + } + + return true; +} + +# if GTEST_OS_WINDOWS +// Recreates the pipe and event handles from the provided parameters, +// signals the event, and returns a file descriptor wrapped around the pipe +// handle. This function is called in the child process only. +int GetStatusFileDescriptor(unsigned int parent_process_id, + size_t write_handle_as_size_t, + size_t event_handle_as_size_t) { + AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE, + FALSE, // Non-inheritable. + parent_process_id)); + if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) { + DeathTestAbort("Unable to open parent process " + + StreamableToString(parent_process_id)); + } + + // TODO(vladl@google.com): Replace the following check with a + // compile-time assertion when available. + GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t)); + + const HANDLE write_handle = + reinterpret_cast(write_handle_as_size_t); + HANDLE dup_write_handle; + + // The newly initialized handle is accessible only in in the parent + // process. To obtain one accessible within the child, we need to use + // DuplicateHandle. + if (!::DuplicateHandle(parent_process_handle.Get(), write_handle, + ::GetCurrentProcess(), &dup_write_handle, + 0x0, // Requested privileges ignored since + // DUPLICATE_SAME_ACCESS is used. + FALSE, // Request non-inheritable handler. + DUPLICATE_SAME_ACCESS)) { + DeathTestAbort("Unable to duplicate the pipe handle " + + StreamableToString(write_handle_as_size_t) + + " from the parent process " + + StreamableToString(parent_process_id)); + } + + const HANDLE event_handle = reinterpret_cast(event_handle_as_size_t); + HANDLE dup_event_handle; + + if (!::DuplicateHandle(parent_process_handle.Get(), event_handle, + ::GetCurrentProcess(), &dup_event_handle, + 0x0, + FALSE, + DUPLICATE_SAME_ACCESS)) { + DeathTestAbort("Unable to duplicate the event handle " + + StreamableToString(event_handle_as_size_t) + + " from the parent process " + + StreamableToString(parent_process_id)); + } + + const int write_fd = + ::_open_osfhandle(reinterpret_cast(dup_write_handle), O_APPEND); + if (write_fd == -1) { + DeathTestAbort("Unable to convert pipe handle " + + StreamableToString(write_handle_as_size_t) + + " to a file descriptor"); + } + + // Signals the parent that the write end of the pipe has been acquired + // so the parent can release its own write end. + ::SetEvent(dup_event_handle); + + return write_fd; +} +# endif // GTEST_OS_WINDOWS + +// Returns a newly created InternalRunDeathTestFlag object with fields +// initialized from the GTEST_FLAG(internal_run_death_test) flag if +// the flag is specified; otherwise returns NULL. +InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() { + if (GTEST_FLAG(internal_run_death_test) == "") return NULL; + + // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we + // can use it here. + int line = -1; + int index = -1; + ::std::vector< ::std::string> fields; + SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields); + int write_fd = -1; + +# if GTEST_OS_WINDOWS + + unsigned int parent_process_id = 0; + size_t write_handle_as_size_t = 0; + size_t event_handle_as_size_t = 0; + + if (fields.size() != 6 + || !ParseNaturalNumber(fields[1], &line) + || !ParseNaturalNumber(fields[2], &index) + || !ParseNaturalNumber(fields[3], &parent_process_id) + || !ParseNaturalNumber(fields[4], &write_handle_as_size_t) + || !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) { + DeathTestAbort("Bad --gtest_internal_run_death_test flag: " + + GTEST_FLAG(internal_run_death_test)); + } + write_fd = GetStatusFileDescriptor(parent_process_id, + write_handle_as_size_t, + event_handle_as_size_t); +# else + + if (fields.size() != 4 + || !ParseNaturalNumber(fields[1], &line) + || !ParseNaturalNumber(fields[2], &index) + || !ParseNaturalNumber(fields[3], &write_fd)) { + DeathTestAbort("Bad --gtest_internal_run_death_test flag: " + + GTEST_FLAG(internal_run_death_test)); + } + +# endif // GTEST_OS_WINDOWS + + return new InternalRunDeathTestFlag(fields[0], line, index, write_fd); +} + +} // namespace internal + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: keith.ray@gmail.com (Keith Ray) + +#include "gtest/gtest-message.h" +#include "gtest/internal/gtest-filepath.h" +#include "gtest/internal/gtest-port.h" + +#include + +#if GTEST_OS_WINDOWS_MOBILE +# include +#elif GTEST_OS_WINDOWS +# include +# include +#elif GTEST_OS_SYMBIAN +// Symbian OpenC has PATH_MAX in sys/syslimits.h +# include +#else +# include +# include // Some Linux distributions define PATH_MAX here. +#endif // GTEST_OS_WINDOWS_MOBILE + +#if GTEST_OS_WINDOWS +# define GTEST_PATH_MAX_ _MAX_PATH +#elif defined(PATH_MAX) +# define GTEST_PATH_MAX_ PATH_MAX +#elif defined(_XOPEN_PATH_MAX) +# define GTEST_PATH_MAX_ _XOPEN_PATH_MAX +#else +# define GTEST_PATH_MAX_ _POSIX_PATH_MAX +#endif // GTEST_OS_WINDOWS + +#include "gtest/internal/gtest-string.h" + +namespace testing { +namespace internal { + +#if GTEST_OS_WINDOWS +// On Windows, '\\' is the standard path separator, but many tools and the +// Windows API also accept '/' as an alternate path separator. Unless otherwise +// noted, a file path can contain either kind of path separators, or a mixture +// of them. +const char kPathSeparator = '\\'; +const char kAlternatePathSeparator = '/'; +const char kAlternatePathSeparatorString[] = "/"; +# if GTEST_OS_WINDOWS_MOBILE +// Windows CE doesn't have a current directory. You should not use +// the current directory in tests on Windows CE, but this at least +// provides a reasonable fallback. +const char kCurrentDirectoryString[] = "\\"; +// Windows CE doesn't define INVALID_FILE_ATTRIBUTES +const DWORD kInvalidFileAttributes = 0xffffffff; +# else +const char kCurrentDirectoryString[] = ".\\"; +# endif // GTEST_OS_WINDOWS_MOBILE +#else +const char kPathSeparator = '/'; +const char kCurrentDirectoryString[] = "./"; +#endif // GTEST_OS_WINDOWS + +// Returns whether the given character is a valid path separator. +static bool IsPathSeparator(char c) { +#if GTEST_HAS_ALT_PATH_SEP_ + return (c == kPathSeparator) || (c == kAlternatePathSeparator); +#else + return c == kPathSeparator; +#endif +} + +// Returns the current working directory, or "" if unsuccessful. +FilePath FilePath::GetCurrentDir() { +#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT + // Windows CE doesn't have a current directory, so we just return + // something reasonable. + return FilePath(kCurrentDirectoryString); +#elif GTEST_OS_WINDOWS + char cwd[GTEST_PATH_MAX_ + 1] = { '\0' }; + return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd); +#else + char cwd[GTEST_PATH_MAX_ + 1] = { '\0' }; + char* result = getcwd(cwd, sizeof(cwd)); +# if GTEST_OS_NACL + // getcwd will likely fail in NaCl due to the sandbox, so return something + // reasonable. The user may have provided a shim implementation for getcwd, + // however, so fallback only when failure is detected. + return FilePath(result == NULL ? kCurrentDirectoryString : cwd); +# endif // GTEST_OS_NACL + return FilePath(result == NULL ? "" : cwd); +#endif // GTEST_OS_WINDOWS_MOBILE +} + +// Returns a copy of the FilePath with the case-insensitive extension removed. +// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns +// FilePath("dir/file"). If a case-insensitive extension is not +// found, returns a copy of the original FilePath. +FilePath FilePath::RemoveExtension(const char* extension) const { + const std::string dot_extension = std::string(".") + extension; + if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) { + return FilePath(pathname_.substr( + 0, pathname_.length() - dot_extension.length())); + } + return *this; +} + +// Returns a pointer to the last occurence of a valid path separator in +// the FilePath. On Windows, for example, both '/' and '\' are valid path +// separators. Returns NULL if no path separator was found. +const char* FilePath::FindLastPathSeparator() const { + const char* const last_sep = strrchr(c_str(), kPathSeparator); +#if GTEST_HAS_ALT_PATH_SEP_ + const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator); + // Comparing two pointers of which only one is NULL is undefined. + if (last_alt_sep != NULL && + (last_sep == NULL || last_alt_sep > last_sep)) { + return last_alt_sep; + } +#endif + return last_sep; +} + +// Returns a copy of the FilePath with the directory part removed. +// Example: FilePath("path/to/file").RemoveDirectoryName() returns +// FilePath("file"). If there is no directory part ("just_a_file"), it returns +// the FilePath unmodified. If there is no file part ("just_a_dir/") it +// returns an empty FilePath (""). +// On Windows platform, '\' is the path separator, otherwise it is '/'. +FilePath FilePath::RemoveDirectoryName() const { + const char* const last_sep = FindLastPathSeparator(); + return last_sep ? FilePath(last_sep + 1) : *this; +} + +// RemoveFileName returns the directory path with the filename removed. +// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/". +// If the FilePath is "a_file" or "/a_file", RemoveFileName returns +// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does +// not have a file, like "just/a/dir/", it returns the FilePath unmodified. +// On Windows platform, '\' is the path separator, otherwise it is '/'. +FilePath FilePath::RemoveFileName() const { + const char* const last_sep = FindLastPathSeparator(); + std::string dir; + if (last_sep) { + dir = std::string(c_str(), last_sep + 1 - c_str()); + } else { + dir = kCurrentDirectoryString; + } + return FilePath(dir); +} + +// Helper functions for naming files in a directory for xml output. + +// Given directory = "dir", base_name = "test", number = 0, +// extension = "xml", returns "dir/test.xml". If number is greater +// than zero (e.g., 12), returns "dir/test_12.xml". +// On Windows platform, uses \ as the separator rather than /. +FilePath FilePath::MakeFileName(const FilePath& directory, + const FilePath& base_name, + int number, + const char* extension) { + std::string file; + if (number == 0) { + file = base_name.string() + "." + extension; + } else { + file = base_name.string() + "_" + StreamableToString(number) + + "." + extension; + } + return ConcatPaths(directory, FilePath(file)); +} + +// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml". +// On Windows, uses \ as the separator rather than /. +FilePath FilePath::ConcatPaths(const FilePath& directory, + const FilePath& relative_path) { + if (directory.IsEmpty()) + return relative_path; + const FilePath dir(directory.RemoveTrailingPathSeparator()); + return FilePath(dir.string() + kPathSeparator + relative_path.string()); +} + +// Returns true if pathname describes something findable in the file-system, +// either a file, directory, or whatever. +bool FilePath::FileOrDirectoryExists() const { +#if GTEST_OS_WINDOWS_MOBILE + LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str()); + const DWORD attributes = GetFileAttributes(unicode); + delete [] unicode; + return attributes != kInvalidFileAttributes; +#else + posix::StatStruct file_stat; + return posix::Stat(pathname_.c_str(), &file_stat) == 0; +#endif // GTEST_OS_WINDOWS_MOBILE +} + +// Returns true if pathname describes a directory in the file-system +// that exists. +bool FilePath::DirectoryExists() const { + bool result = false; +#if GTEST_OS_WINDOWS + // Don't strip off trailing separator if path is a root directory on + // Windows (like "C:\\"). + const FilePath& path(IsRootDirectory() ? *this : + RemoveTrailingPathSeparator()); +#else + const FilePath& path(*this); +#endif + +#if GTEST_OS_WINDOWS_MOBILE + LPCWSTR unicode = String::AnsiToUtf16(path.c_str()); + const DWORD attributes = GetFileAttributes(unicode); + delete [] unicode; + if ((attributes != kInvalidFileAttributes) && + (attributes & FILE_ATTRIBUTE_DIRECTORY)) { + result = true; + } +#else + posix::StatStruct file_stat; + result = posix::Stat(path.c_str(), &file_stat) == 0 && + posix::IsDir(file_stat); +#endif // GTEST_OS_WINDOWS_MOBILE + + return result; +} + +// Returns true if pathname describes a root directory. (Windows has one +// root directory per disk drive.) +bool FilePath::IsRootDirectory() const { +#if GTEST_OS_WINDOWS + // TODO(wan@google.com): on Windows a network share like + // \\server\share can be a root directory, although it cannot be the + // current directory. Handle this properly. + return pathname_.length() == 3 && IsAbsolutePath(); +#else + return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]); +#endif +} + +// Returns true if pathname describes an absolute path. +bool FilePath::IsAbsolutePath() const { + const char* const name = pathname_.c_str(); +#if GTEST_OS_WINDOWS + return pathname_.length() >= 3 && + ((name[0] >= 'a' && name[0] <= 'z') || + (name[0] >= 'A' && name[0] <= 'Z')) && + name[1] == ':' && + IsPathSeparator(name[2]); +#else + return IsPathSeparator(name[0]); +#endif +} + +// Returns a pathname for a file that does not currently exist. The pathname +// will be directory/base_name.extension or +// directory/base_name_.extension if directory/base_name.extension +// already exists. The number will be incremented until a pathname is found +// that does not already exist. +// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'. +// There could be a race condition if two or more processes are calling this +// function at the same time -- they could both pick the same filename. +FilePath FilePath::GenerateUniqueFileName(const FilePath& directory, + const FilePath& base_name, + const char* extension) { + FilePath full_pathname; + int number = 0; + do { + full_pathname.Set(MakeFileName(directory, base_name, number++, extension)); + } while (full_pathname.FileOrDirectoryExists()); + return full_pathname; +} + +// Returns true if FilePath ends with a path separator, which indicates that +// it is intended to represent a directory. Returns false otherwise. +// This does NOT check that a directory (or file) actually exists. +bool FilePath::IsDirectory() const { + return !pathname_.empty() && + IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]); +} + +// Create directories so that path exists. Returns true if successful or if +// the directories already exist; returns false if unable to create directories +// for any reason. +bool FilePath::CreateDirectoriesRecursively() const { + if (!this->IsDirectory()) { + return false; + } + + if (pathname_.length() == 0 || this->DirectoryExists()) { + return true; + } + + const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName()); + return parent.CreateDirectoriesRecursively() && this->CreateFolder(); +} + +// Create the directory so that path exists. Returns true if successful or +// if the directory already exists; returns false if unable to create the +// directory for any reason, including if the parent directory does not +// exist. Not named "CreateDirectory" because that's a macro on Windows. +bool FilePath::CreateFolder() const { +#if GTEST_OS_WINDOWS_MOBILE + FilePath removed_sep(this->RemoveTrailingPathSeparator()); + LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str()); + int result = CreateDirectory(unicode, NULL) ? 0 : -1; + delete [] unicode; +#elif GTEST_OS_WINDOWS + int result = _mkdir(pathname_.c_str()); +#else + int result = mkdir(pathname_.c_str(), 0777); +#endif // GTEST_OS_WINDOWS_MOBILE + + if (result == -1) { + return this->DirectoryExists(); // An error is OK if the directory exists. + } + return true; // No error. +} + +// If input name has a trailing separator character, remove it and return the +// name, otherwise return the name string unmodified. +// On Windows platform, uses \ as the separator, other platforms use /. +FilePath FilePath::RemoveTrailingPathSeparator() const { + return IsDirectory() + ? FilePath(pathname_.substr(0, pathname_.length() - 1)) + : *this; +} + +// Removes any redundant separators that might be in the pathname. +// For example, "bar///foo" becomes "bar/foo". Does not eliminate other +// redundancies that might be in a pathname involving "." or "..". +// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share). +void FilePath::Normalize() { + if (pathname_.c_str() == NULL) { + pathname_ = ""; + return; + } + const char* src = pathname_.c_str(); + char* const dest = new char[pathname_.length() + 1]; + char* dest_ptr = dest; + memset(dest_ptr, 0, pathname_.length() + 1); + + while (*src != '\0') { + *dest_ptr = *src; + if (!IsPathSeparator(*src)) { + src++; + } else { +#if GTEST_HAS_ALT_PATH_SEP_ + if (*dest_ptr == kAlternatePathSeparator) { + *dest_ptr = kPathSeparator; + } +#endif + while (IsPathSeparator(*src)) + src++; + } + dest_ptr++; + } + *dest_ptr = '\0'; + pathname_ = dest; + delete[] dest; +} + +} // namespace internal +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "gtest/internal/gtest-port.h" + +#include +#include +#include +#include +#include + +#if GTEST_OS_WINDOWS +# include +# include +# include +# include // Used in ThreadLocal. +#else +# include +#endif // GTEST_OS_WINDOWS + +#if GTEST_OS_MAC +# include +# include +# include +#endif // GTEST_OS_MAC + +#if GTEST_OS_QNX +# include +# include +# include +#endif // GTEST_OS_QNX + +#if GTEST_OS_AIX +# include +# include +#endif // GTEST_OS_AIX + +#include "gtest/gtest-spi.h" +#include "gtest/gtest-message.h" +#include "gtest/internal/gtest-internal.h" +#include "gtest/internal/gtest-string.h" + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick exists to +// prevent the accidental inclusion of gtest-internal-inl.h in the +// user's code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +namespace testing { +namespace internal { + +#if defined(_MSC_VER) || defined(__BORLANDC__) +// MSVC and C++Builder do not provide a definition of STDERR_FILENO. +const int kStdOutFileno = 1; +const int kStdErrFileno = 2; +#else +const int kStdOutFileno = STDOUT_FILENO; +const int kStdErrFileno = STDERR_FILENO; +#endif // _MSC_VER + +#if GTEST_OS_LINUX + +namespace { +template +T ReadProcFileField(const string& filename, int field) { + std::string dummy; + std::ifstream file(filename.c_str()); + while (field-- > 0) { + file >> dummy; + } + T output = 0; + file >> output; + return output; +} +} // namespace + +// Returns the number of active threads, or 0 when there is an error. +size_t GetThreadCount() { + const string filename = + (Message() << "/proc/" << getpid() << "/stat").GetString(); + return ReadProcFileField(filename, 19); +} + +#elif GTEST_OS_MAC + +size_t GetThreadCount() { + const task_t task = mach_task_self(); + mach_msg_type_number_t thread_count; + thread_act_array_t thread_list; + const kern_return_t status = task_threads(task, &thread_list, &thread_count); + if (status == KERN_SUCCESS) { + // task_threads allocates resources in thread_list and we need to free them + // to avoid leaks. + vm_deallocate(task, + reinterpret_cast(thread_list), + sizeof(thread_t) * thread_count); + return static_cast(thread_count); + } else { + return 0; + } +} + +#elif GTEST_OS_QNX + +// Returns the number of threads running in the process, or 0 to indicate that +// we cannot detect it. +size_t GetThreadCount() { + const int fd = open("/proc/self/as", O_RDONLY); + if (fd < 0) { + return 0; + } + procfs_info process_info; + const int status = + devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), NULL); + close(fd); + if (status == EOK) { + return static_cast(process_info.num_threads); + } else { + return 0; + } +} + +#elif GTEST_OS_AIX + +size_t GetThreadCount() { + struct procentry64 entry; + pid_t pid = getpid(); + int status = getprocs64(&entry, sizeof(entry), NULL, 0, &pid, 1); + if (status == 1) { + return entry.pi_thcount; + } else { + return 0; + } +} + +#else + +size_t GetThreadCount() { + // There's no portable way to detect the number of threads, so we just + // return 0 to indicate that we cannot detect it. + return 0; +} + +#endif // GTEST_OS_LINUX + +#if GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS + +void SleepMilliseconds(int n) { + ::Sleep(n); +} + +AutoHandle::AutoHandle() + : handle_(INVALID_HANDLE_VALUE) {} + +AutoHandle::AutoHandle(Handle handle) + : handle_(handle) {} + +AutoHandle::~AutoHandle() { + Reset(); +} + +AutoHandle::Handle AutoHandle::Get() const { + return handle_; +} + +void AutoHandle::Reset() { + Reset(INVALID_HANDLE_VALUE); +} + +void AutoHandle::Reset(HANDLE handle) { + // Resetting with the same handle we already own is invalid. + if (handle_ != handle) { + if (IsCloseable()) { + ::CloseHandle(handle_); + } + handle_ = handle; + } else { + GTEST_CHECK_(!IsCloseable()) + << "Resetting a valid handle to itself is likely a programmer error " + "and thus not allowed."; + } +} + +bool AutoHandle::IsCloseable() const { + // Different Windows APIs may use either of these values to represent an + // invalid handle. + return handle_ != NULL && handle_ != INVALID_HANDLE_VALUE; +} + +Notification::Notification() + : event_(::CreateEvent(NULL, // Default security attributes. + TRUE, // Do not reset automatically. + FALSE, // Initially unset. + NULL)) { // Anonymous event. + GTEST_CHECK_(event_.Get() != NULL); +} + +void Notification::Notify() { + GTEST_CHECK_(::SetEvent(event_.Get()) != FALSE); +} + +void Notification::WaitForNotification() { + GTEST_CHECK_( + ::WaitForSingleObject(event_.Get(), INFINITE) == WAIT_OBJECT_0); +} + +Mutex::Mutex() + : owner_thread_id_(0), + type_(kDynamic), + critical_section_init_phase_(0), + critical_section_(new CRITICAL_SECTION) { + ::InitializeCriticalSection(critical_section_); +} + +Mutex::~Mutex() { + // Static mutexes are leaked intentionally. It is not thread-safe to try + // to clean them up. + // TODO(yukawa): Switch to Slim Reader/Writer (SRW) Locks, which requires + // nothing to clean it up but is available only on Vista and later. + // http://msdn.microsoft.com/en-us/library/windows/desktop/aa904937.aspx + if (type_ == kDynamic) { + ::DeleteCriticalSection(critical_section_); + delete critical_section_; + critical_section_ = NULL; + } +} + +void Mutex::Lock() { + ThreadSafeLazyInit(); + ::EnterCriticalSection(critical_section_); + owner_thread_id_ = ::GetCurrentThreadId(); +} + +void Mutex::Unlock() { + ThreadSafeLazyInit(); + // We don't protect writing to owner_thread_id_ here, as it's the + // caller's responsibility to ensure that the current thread holds the + // mutex when this is called. + owner_thread_id_ = 0; + ::LeaveCriticalSection(critical_section_); +} + +// Does nothing if the current thread holds the mutex. Otherwise, crashes +// with high probability. +void Mutex::AssertHeld() { + ThreadSafeLazyInit(); + GTEST_CHECK_(owner_thread_id_ == ::GetCurrentThreadId()) + << "The current thread is not holding the mutex @" << this; +} + +// Initializes owner_thread_id_ and critical_section_ in static mutexes. +void Mutex::ThreadSafeLazyInit() { + // Dynamic mutexes are initialized in the constructor. + if (type_ == kStatic) { + switch ( + ::InterlockedCompareExchange(&critical_section_init_phase_, 1L, 0L)) { + case 0: + // If critical_section_init_phase_ was 0 before the exchange, we + // are the first to test it and need to perform the initialization. + owner_thread_id_ = 0; + critical_section_ = new CRITICAL_SECTION; + ::InitializeCriticalSection(critical_section_); + // Updates the critical_section_init_phase_ to 2 to signal + // initialization complete. + GTEST_CHECK_(::InterlockedCompareExchange( + &critical_section_init_phase_, 2L, 1L) == + 1L); + break; + case 1: + // Somebody else is already initializing the mutex; spin until they + // are done. + while (::InterlockedCompareExchange(&critical_section_init_phase_, + 2L, + 2L) != 2L) { + // Possibly yields the rest of the thread's time slice to other + // threads. + ::Sleep(0); + } + break; + + case 2: + break; // The mutex is already initialized and ready for use. + + default: + GTEST_CHECK_(false) + << "Unexpected value of critical_section_init_phase_ " + << "while initializing a static mutex."; + } + } +} + +namespace { + +class ThreadWithParamSupport : public ThreadWithParamBase { + public: + static HANDLE CreateThread(Runnable* runnable, + Notification* thread_can_start) { + ThreadMainParam* param = new ThreadMainParam(runnable, thread_can_start); + DWORD thread_id; + // TODO(yukawa): Consider to use _beginthreadex instead. + HANDLE thread_handle = ::CreateThread( + NULL, // Default security. + 0, // Default stack size. + &ThreadWithParamSupport::ThreadMain, + param, // Parameter to ThreadMainStatic + 0x0, // Default creation flags. + &thread_id); // Need a valid pointer for the call to work under Win98. + GTEST_CHECK_(thread_handle != NULL) << "CreateThread failed with error " + << ::GetLastError() << "."; + if (thread_handle == NULL) { + delete param; + } + return thread_handle; + } + + private: + struct ThreadMainParam { + ThreadMainParam(Runnable* runnable, Notification* thread_can_start) + : runnable_(runnable), + thread_can_start_(thread_can_start) { + } + scoped_ptr runnable_; + // Does not own. + Notification* thread_can_start_; + }; + + static DWORD WINAPI ThreadMain(void* ptr) { + // Transfers ownership. + scoped_ptr param(static_cast(ptr)); + if (param->thread_can_start_ != NULL) + param->thread_can_start_->WaitForNotification(); + param->runnable_->Run(); + return 0; + } + + // Prohibit instantiation. + ThreadWithParamSupport(); + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParamSupport); +}; + +} // namespace + +ThreadWithParamBase::ThreadWithParamBase(Runnable *runnable, + Notification* thread_can_start) + : thread_(ThreadWithParamSupport::CreateThread(runnable, + thread_can_start)) { +} + +ThreadWithParamBase::~ThreadWithParamBase() { + Join(); +} + +void ThreadWithParamBase::Join() { + GTEST_CHECK_(::WaitForSingleObject(thread_.Get(), INFINITE) == WAIT_OBJECT_0) + << "Failed to join the thread with error " << ::GetLastError() << "."; +} + +// Maps a thread to a set of ThreadIdToThreadLocals that have values +// instantiated on that thread and notifies them when the thread exits. A +// ThreadLocal instance is expected to persist until all threads it has +// values on have terminated. +class ThreadLocalRegistryImpl { + public: + // Registers thread_local_instance as having value on the current thread. + // Returns a value that can be used to identify the thread from other threads. + static ThreadLocalValueHolderBase* GetValueOnCurrentThread( + const ThreadLocalBase* thread_local_instance) { + DWORD current_thread = ::GetCurrentThreadId(); + MutexLock lock(&mutex_); + ThreadIdToThreadLocals* const thread_to_thread_locals = + GetThreadLocalsMapLocked(); + ThreadIdToThreadLocals::iterator thread_local_pos = + thread_to_thread_locals->find(current_thread); + if (thread_local_pos == thread_to_thread_locals->end()) { + thread_local_pos = thread_to_thread_locals->insert( + std::make_pair(current_thread, ThreadLocalValues())).first; + StartWatcherThreadFor(current_thread); + } + ThreadLocalValues& thread_local_values = thread_local_pos->second; + ThreadLocalValues::iterator value_pos = + thread_local_values.find(thread_local_instance); + if (value_pos == thread_local_values.end()) { + value_pos = + thread_local_values + .insert(std::make_pair( + thread_local_instance, + linked_ptr( + thread_local_instance->NewValueForCurrentThread()))) + .first; + } + return value_pos->second.get(); + } + + static void OnThreadLocalDestroyed( + const ThreadLocalBase* thread_local_instance) { + std::vector > value_holders; + // Clean up the ThreadLocalValues data structure while holding the lock, but + // defer the destruction of the ThreadLocalValueHolderBases. + { + MutexLock lock(&mutex_); + ThreadIdToThreadLocals* const thread_to_thread_locals = + GetThreadLocalsMapLocked(); + for (ThreadIdToThreadLocals::iterator it = + thread_to_thread_locals->begin(); + it != thread_to_thread_locals->end(); + ++it) { + ThreadLocalValues& thread_local_values = it->second; + ThreadLocalValues::iterator value_pos = + thread_local_values.find(thread_local_instance); + if (value_pos != thread_local_values.end()) { + value_holders.push_back(value_pos->second); + thread_local_values.erase(value_pos); + // This 'if' can only be successful at most once, so theoretically we + // could break out of the loop here, but we don't bother doing so. + } + } + } + // Outside the lock, let the destructor for 'value_holders' deallocate the + // ThreadLocalValueHolderBases. + } + + static void OnThreadExit(DWORD thread_id) { + GTEST_CHECK_(thread_id != 0) << ::GetLastError(); + std::vector > value_holders; + // Clean up the ThreadIdToThreadLocals data structure while holding the + // lock, but defer the destruction of the ThreadLocalValueHolderBases. + { + MutexLock lock(&mutex_); + ThreadIdToThreadLocals* const thread_to_thread_locals = + GetThreadLocalsMapLocked(); + ThreadIdToThreadLocals::iterator thread_local_pos = + thread_to_thread_locals->find(thread_id); + if (thread_local_pos != thread_to_thread_locals->end()) { + ThreadLocalValues& thread_local_values = thread_local_pos->second; + for (ThreadLocalValues::iterator value_pos = + thread_local_values.begin(); + value_pos != thread_local_values.end(); + ++value_pos) { + value_holders.push_back(value_pos->second); + } + thread_to_thread_locals->erase(thread_local_pos); + } + } + // Outside the lock, let the destructor for 'value_holders' deallocate the + // ThreadLocalValueHolderBases. + } + + private: + // In a particular thread, maps a ThreadLocal object to its value. + typedef std::map > ThreadLocalValues; + // Stores all ThreadIdToThreadLocals having values in a thread, indexed by + // thread's ID. + typedef std::map ThreadIdToThreadLocals; + + // Holds the thread id and thread handle that we pass from + // StartWatcherThreadFor to WatcherThreadFunc. + typedef std::pair ThreadIdAndHandle; + + static void StartWatcherThreadFor(DWORD thread_id) { + // The returned handle will be kept in thread_map and closed by + // watcher_thread in WatcherThreadFunc. + HANDLE thread = ::OpenThread(SYNCHRONIZE | THREAD_QUERY_INFORMATION, + FALSE, + thread_id); + GTEST_CHECK_(thread != NULL); + // We need to to pass a valid thread ID pointer into CreateThread for it + // to work correctly under Win98. + DWORD watcher_thread_id; + HANDLE watcher_thread = ::CreateThread( + NULL, // Default security. + 0, // Default stack size + &ThreadLocalRegistryImpl::WatcherThreadFunc, + reinterpret_cast(new ThreadIdAndHandle(thread_id, thread)), + CREATE_SUSPENDED, + &watcher_thread_id); + GTEST_CHECK_(watcher_thread != NULL); + // Give the watcher thread the same priority as ours to avoid being + // blocked by it. + ::SetThreadPriority(watcher_thread, + ::GetThreadPriority(::GetCurrentThread())); + ::ResumeThread(watcher_thread); + ::CloseHandle(watcher_thread); + } + + // Monitors exit from a given thread and notifies those + // ThreadIdToThreadLocals about thread termination. + static DWORD WINAPI WatcherThreadFunc(LPVOID param) { + const ThreadIdAndHandle* tah = + reinterpret_cast(param); + GTEST_CHECK_( + ::WaitForSingleObject(tah->second, INFINITE) == WAIT_OBJECT_0); + OnThreadExit(tah->first); + ::CloseHandle(tah->second); + delete tah; + return 0; + } + + // Returns map of thread local instances. + static ThreadIdToThreadLocals* GetThreadLocalsMapLocked() { + mutex_.AssertHeld(); + static ThreadIdToThreadLocals* map = new ThreadIdToThreadLocals; + return map; + } + + // Protects access to GetThreadLocalsMapLocked() and its return value. + static Mutex mutex_; + // Protects access to GetThreadMapLocked() and its return value. + static Mutex thread_map_mutex_; +}; + +Mutex ThreadLocalRegistryImpl::mutex_(Mutex::kStaticMutex); +Mutex ThreadLocalRegistryImpl::thread_map_mutex_(Mutex::kStaticMutex); + +ThreadLocalValueHolderBase* ThreadLocalRegistry::GetValueOnCurrentThread( + const ThreadLocalBase* thread_local_instance) { + return ThreadLocalRegistryImpl::GetValueOnCurrentThread( + thread_local_instance); +} + +void ThreadLocalRegistry::OnThreadLocalDestroyed( + const ThreadLocalBase* thread_local_instance) { + ThreadLocalRegistryImpl::OnThreadLocalDestroyed(thread_local_instance); +} + +#endif // GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS + +#if GTEST_USES_POSIX_RE + +// Implements RE. Currently only needed for death tests. + +RE::~RE() { + if (is_valid_) { + // regfree'ing an invalid regex might crash because the content + // of the regex is undefined. Since the regex's are essentially + // the same, one cannot be valid (or invalid) without the other + // being so too. + regfree(&partial_regex_); + regfree(&full_regex_); + } + free(const_cast(pattern_)); +} + +// Returns true iff regular expression re matches the entire str. +bool RE::FullMatch(const char* str, const RE& re) { + if (!re.is_valid_) return false; + + regmatch_t match; + return regexec(&re.full_regex_, str, 1, &match, 0) == 0; +} + +// Returns true iff regular expression re matches a substring of str +// (including str itself). +bool RE::PartialMatch(const char* str, const RE& re) { + if (!re.is_valid_) return false; + + regmatch_t match; + return regexec(&re.partial_regex_, str, 1, &match, 0) == 0; +} + +// Initializes an RE from its string representation. +void RE::Init(const char* regex) { + pattern_ = posix::StrDup(regex); + + // Reserves enough bytes to hold the regular expression used for a + // full match. + const size_t full_regex_len = strlen(regex) + 10; + char* const full_pattern = new char[full_regex_len]; + + snprintf(full_pattern, full_regex_len, "^(%s)$", regex); + is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0; + // We want to call regcomp(&partial_regex_, ...) even if the + // previous expression returns false. Otherwise partial_regex_ may + // not be properly initialized can may cause trouble when it's + // freed. + // + // Some implementation of POSIX regex (e.g. on at least some + // versions of Cygwin) doesn't accept the empty string as a valid + // regex. We change it to an equivalent form "()" to be safe. + if (is_valid_) { + const char* const partial_regex = (*regex == '\0') ? "()" : regex; + is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0; + } + EXPECT_TRUE(is_valid_) + << "Regular expression \"" << regex + << "\" is not a valid POSIX Extended regular expression."; + + delete[] full_pattern; +} + +#elif GTEST_USES_SIMPLE_RE + +// Returns true iff ch appears anywhere in str (excluding the +// terminating '\0' character). +bool IsInSet(char ch, const char* str) { + return ch != '\0' && strchr(str, ch) != NULL; +} + +// Returns true iff ch belongs to the given classification. Unlike +// similar functions in , these aren't affected by the +// current locale. +bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; } +bool IsAsciiPunct(char ch) { + return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"); +} +bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); } +bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); } +bool IsAsciiWordChar(char ch) { + return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || + ('0' <= ch && ch <= '9') || ch == '_'; +} + +// Returns true iff "\\c" is a supported escape sequence. +bool IsValidEscape(char c) { + return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW")); +} + +// Returns true iff the given atom (specified by escaped and pattern) +// matches ch. The result is undefined if the atom is invalid. +bool AtomMatchesChar(bool escaped, char pattern_char, char ch) { + if (escaped) { // "\\p" where p is pattern_char. + switch (pattern_char) { + case 'd': return IsAsciiDigit(ch); + case 'D': return !IsAsciiDigit(ch); + case 'f': return ch == '\f'; + case 'n': return ch == '\n'; + case 'r': return ch == '\r'; + case 's': return IsAsciiWhiteSpace(ch); + case 'S': return !IsAsciiWhiteSpace(ch); + case 't': return ch == '\t'; + case 'v': return ch == '\v'; + case 'w': return IsAsciiWordChar(ch); + case 'W': return !IsAsciiWordChar(ch); + } + return IsAsciiPunct(pattern_char) && pattern_char == ch; + } + + return (pattern_char == '.' && ch != '\n') || pattern_char == ch; +} + +// Helper function used by ValidateRegex() to format error messages. +std::string FormatRegexSyntaxError(const char* regex, int index) { + return (Message() << "Syntax error at index " << index + << " in simple regular expression \"" << regex << "\": ").GetString(); +} + +// Generates non-fatal failures and returns false if regex is invalid; +// otherwise returns true. +bool ValidateRegex(const char* regex) { + if (regex == NULL) { + // TODO(wan@google.com): fix the source file location in the + // assertion failures to match where the regex is used in user + // code. + ADD_FAILURE() << "NULL is not a valid simple regular expression."; + return false; + } + + bool is_valid = true; + + // True iff ?, *, or + can follow the previous atom. + bool prev_repeatable = false; + for (int i = 0; regex[i]; i++) { + if (regex[i] == '\\') { // An escape sequence + i++; + if (regex[i] == '\0') { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) + << "'\\' cannot appear at the end."; + return false; + } + + if (!IsValidEscape(regex[i])) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) + << "invalid escape sequence \"\\" << regex[i] << "\"."; + is_valid = false; + } + prev_repeatable = true; + } else { // Not an escape sequence. + const char ch = regex[i]; + + if (ch == '^' && i > 0) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'^' can only appear at the beginning."; + is_valid = false; + } else if (ch == '$' && regex[i + 1] != '\0') { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'$' can only appear at the end."; + is_valid = false; + } else if (IsInSet(ch, "()[]{}|")) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'" << ch << "' is unsupported."; + is_valid = false; + } else if (IsRepeat(ch) && !prev_repeatable) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'" << ch << "' can only follow a repeatable token."; + is_valid = false; + } + + prev_repeatable = !IsInSet(ch, "^$?*+"); + } + } + + return is_valid; +} + +// Matches a repeated regex atom followed by a valid simple regular +// expression. The regex atom is defined as c if escaped is false, +// or \c otherwise. repeat is the repetition meta character (?, *, +// or +). The behavior is undefined if str contains too many +// characters to be indexable by size_t, in which case the test will +// probably time out anyway. We are fine with this limitation as +// std::string has it too. +bool MatchRepetitionAndRegexAtHead( + bool escaped, char c, char repeat, const char* regex, + const char* str) { + const size_t min_count = (repeat == '+') ? 1 : 0; + const size_t max_count = (repeat == '?') ? 1 : + static_cast(-1) - 1; + // We cannot call numeric_limits::max() as it conflicts with the + // max() macro on Windows. + + for (size_t i = 0; i <= max_count; ++i) { + // We know that the atom matches each of the first i characters in str. + if (i >= min_count && MatchRegexAtHead(regex, str + i)) { + // We have enough matches at the head, and the tail matches too. + // Since we only care about *whether* the pattern matches str + // (as opposed to *how* it matches), there is no need to find a + // greedy match. + return true; + } + if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i])) + return false; + } + return false; +} + +// Returns true iff regex matches a prefix of str. regex must be a +// valid simple regular expression and not start with "^", or the +// result is undefined. +bool MatchRegexAtHead(const char* regex, const char* str) { + if (*regex == '\0') // An empty regex matches a prefix of anything. + return true; + + // "$" only matches the end of a string. Note that regex being + // valid guarantees that there's nothing after "$" in it. + if (*regex == '$') + return *str == '\0'; + + // Is the first thing in regex an escape sequence? + const bool escaped = *regex == '\\'; + if (escaped) + ++regex; + if (IsRepeat(regex[1])) { + // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so + // here's an indirect recursion. It terminates as the regex gets + // shorter in each recursion. + return MatchRepetitionAndRegexAtHead( + escaped, regex[0], regex[1], regex + 2, str); + } else { + // regex isn't empty, isn't "$", and doesn't start with a + // repetition. We match the first atom of regex with the first + // character of str and recurse. + return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) && + MatchRegexAtHead(regex + 1, str + 1); + } +} + +// Returns true iff regex matches any substring of str. regex must be +// a valid simple regular expression, or the result is undefined. +// +// The algorithm is recursive, but the recursion depth doesn't exceed +// the regex length, so we won't need to worry about running out of +// stack space normally. In rare cases the time complexity can be +// exponential with respect to the regex length + the string length, +// but usually it's must faster (often close to linear). +bool MatchRegexAnywhere(const char* regex, const char* str) { + if (regex == NULL || str == NULL) + return false; + + if (*regex == '^') + return MatchRegexAtHead(regex + 1, str); + + // A successful match can be anywhere in str. + do { + if (MatchRegexAtHead(regex, str)) + return true; + } while (*str++ != '\0'); + return false; +} + +// Implements the RE class. + +RE::~RE() { + free(const_cast(pattern_)); + free(const_cast(full_pattern_)); +} + +// Returns true iff regular expression re matches the entire str. +bool RE::FullMatch(const char* str, const RE& re) { + return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str); +} + +// Returns true iff regular expression re matches a substring of str +// (including str itself). +bool RE::PartialMatch(const char* str, const RE& re) { + return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str); +} + +// Initializes an RE from its string representation. +void RE::Init(const char* regex) { + pattern_ = full_pattern_ = NULL; + if (regex != NULL) { + pattern_ = posix::StrDup(regex); + } + + is_valid_ = ValidateRegex(regex); + if (!is_valid_) { + // No need to calculate the full pattern when the regex is invalid. + return; + } + + const size_t len = strlen(regex); + // Reserves enough bytes to hold the regular expression used for a + // full match: we need space to prepend a '^', append a '$', and + // terminate the string with '\0'. + char* buffer = static_cast(malloc(len + 3)); + full_pattern_ = buffer; + + if (*regex != '^') + *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'. + + // We don't use snprintf or strncpy, as they trigger a warning when + // compiled with VC++ 8.0. + memcpy(buffer, regex, len); + buffer += len; + + if (len == 0 || regex[len - 1] != '$') + *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'. + + *buffer = '\0'; +} + +#endif // GTEST_USES_POSIX_RE + +const char kUnknownFile[] = "unknown file"; + +// Formats a source file path and a line number as they would appear +// in an error message from the compiler used to compile this code. +GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) { + const std::string file_name(file == NULL ? kUnknownFile : file); + + if (line < 0) { + return file_name + ":"; + } +#ifdef _MSC_VER + return file_name + "(" + StreamableToString(line) + "):"; +#else + return file_name + ":" + StreamableToString(line) + ":"; +#endif // _MSC_VER +} + +// Formats a file location for compiler-independent XML output. +// Although this function is not platform dependent, we put it next to +// FormatFileLocation in order to contrast the two functions. +// Note that FormatCompilerIndependentFileLocation() does NOT append colon +// to the file location it produces, unlike FormatFileLocation(). +GTEST_API_ ::std::string FormatCompilerIndependentFileLocation( + const char* file, int line) { + const std::string file_name(file == NULL ? kUnknownFile : file); + + if (line < 0) + return file_name; + else + return file_name + ":" + StreamableToString(line); +} + +GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line) + : severity_(severity) { + const char* const marker = + severity == GTEST_INFO ? "[ INFO ]" : + severity == GTEST_WARNING ? "[WARNING]" : + severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]"; + GetStream() << ::std::endl << marker << " " + << FormatFileLocation(file, line).c_str() << ": "; +} + +// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program. +GTestLog::~GTestLog() { + GetStream() << ::std::endl; + if (severity_ == GTEST_FATAL) { + fflush(stderr); + posix::Abort(); + } +} +// Disable Microsoft deprecation warnings for POSIX functions called from +// this class (creat, dup, dup2, and close) +GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996) + +#if GTEST_HAS_STREAM_REDIRECTION + +// Object that captures an output stream (stdout/stderr). +class CapturedStream { + public: + // The ctor redirects the stream to a temporary file. + explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) { +# if GTEST_OS_WINDOWS + char temp_dir_path[MAX_PATH + 1] = { '\0' }; // NOLINT + char temp_file_path[MAX_PATH + 1] = { '\0' }; // NOLINT + + ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path); + const UINT success = ::GetTempFileNameA(temp_dir_path, + "gtest_redir", + 0, // Generate unique file name. + temp_file_path); + GTEST_CHECK_(success != 0) + << "Unable to create a temporary file in " << temp_dir_path; + const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE); + GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file " + << temp_file_path; + filename_ = temp_file_path; +# else + // There's no guarantee that a test has write access to the current + // directory, so we create the temporary file in the /tmp directory + // instead. We use /tmp on most systems, and /sdcard on Android. + // That's because Android doesn't have /tmp. +# if GTEST_OS_LINUX_ANDROID + // Note: Android applications are expected to call the framework's + // Context.getExternalStorageDirectory() method through JNI to get + // the location of the world-writable SD Card directory. However, + // this requires a Context handle, which cannot be retrieved + // globally from native code. Doing so also precludes running the + // code as part of a regular standalone executable, which doesn't + // run in a Dalvik process (e.g. when running it through 'adb shell'). + // + // The location /sdcard is directly accessible from native code + // and is the only location (unofficially) supported by the Android + // team. It's generally a symlink to the real SD Card mount point + // which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or + // other OEM-customized locations. Never rely on these, and always + // use /sdcard. + char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX"; +# else + char name_template[] = "/tmp/captured_stream.XXXXXX"; +# endif // GTEST_OS_LINUX_ANDROID + const int captured_fd = mkstemp(name_template); + filename_ = name_template; +# endif // GTEST_OS_WINDOWS + fflush(NULL); + dup2(captured_fd, fd_); + close(captured_fd); + } + + ~CapturedStream() { + remove(filename_.c_str()); + } + + std::string GetCapturedString() { + if (uncaptured_fd_ != -1) { + // Restores the original stream. + fflush(NULL); + dup2(uncaptured_fd_, fd_); + close(uncaptured_fd_); + uncaptured_fd_ = -1; + } + + FILE* const file = posix::FOpen(filename_.c_str(), "r"); + const std::string content = ReadEntireFile(file); + posix::FClose(file); + return content; + } + + private: + const int fd_; // A stream to capture. + int uncaptured_fd_; + // Name of the temporary file holding the stderr output. + ::std::string filename_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream); +}; + +GTEST_DISABLE_MSC_WARNINGS_POP_() + +static CapturedStream* g_captured_stderr = NULL; +static CapturedStream* g_captured_stdout = NULL; + +// Starts capturing an output stream (stdout/stderr). +void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) { + if (*stream != NULL) { + GTEST_LOG_(FATAL) << "Only one " << stream_name + << " capturer can exist at a time."; + } + *stream = new CapturedStream(fd); +} + +// Stops capturing the output stream and returns the captured string. +std::string GetCapturedStream(CapturedStream** captured_stream) { + const std::string content = (*captured_stream)->GetCapturedString(); + + delete *captured_stream; + *captured_stream = NULL; + + return content; +} + +// Starts capturing stdout. +void CaptureStdout() { + CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout); +} + +// Starts capturing stderr. +void CaptureStderr() { + CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr); +} + +// Stops capturing stdout and returns the captured string. +std::string GetCapturedStdout() { + return GetCapturedStream(&g_captured_stdout); +} + +// Stops capturing stderr and returns the captured string. +std::string GetCapturedStderr() { + return GetCapturedStream(&g_captured_stderr); +} + +#endif // GTEST_HAS_STREAM_REDIRECTION + +std::string TempDir() { +#if GTEST_OS_WINDOWS_MOBILE + return "\\temp\\"; +#elif GTEST_OS_WINDOWS + const char* temp_dir = posix::GetEnv("TEMP"); + if (temp_dir == NULL || temp_dir[0] == '\0') + return "\\temp\\"; + else if (temp_dir[strlen(temp_dir) - 1] == '\\') + return temp_dir; + else + return std::string(temp_dir) + "\\"; +#elif GTEST_OS_LINUX_ANDROID + return "/sdcard/"; +#else + return "/tmp/"; +#endif // GTEST_OS_WINDOWS_MOBILE +} + +size_t GetFileSize(FILE* file) { + fseek(file, 0, SEEK_END); + return static_cast(ftell(file)); +} + +std::string ReadEntireFile(FILE* file) { + const size_t file_size = GetFileSize(file); + char* const buffer = new char[file_size]; + + size_t bytes_last_read = 0; // # of bytes read in the last fread() + size_t bytes_read = 0; // # of bytes read so far + + fseek(file, 0, SEEK_SET); + + // Keeps reading the file until we cannot read further or the + // pre-determined file size is reached. + do { + bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file); + bytes_read += bytes_last_read; + } while (bytes_last_read > 0 && bytes_read < file_size); + + const std::string content(buffer, bytes_read); + delete[] buffer; + + return content; +} + +#if GTEST_HAS_DEATH_TEST + +static const ::std::vector* g_injected_test_argvs = + NULL; // Owned. + +void SetInjectableArgvs(const ::std::vector* argvs) { + if (g_injected_test_argvs != argvs) + delete g_injected_test_argvs; + g_injected_test_argvs = argvs; +} + +const ::std::vector& GetInjectableArgvs() { + if (g_injected_test_argvs != NULL) { + return *g_injected_test_argvs; + } + return GetArgvs(); +} +#endif // GTEST_HAS_DEATH_TEST + +#if GTEST_OS_WINDOWS_MOBILE +namespace posix { +void Abort() { + DebugBreak(); + TerminateProcess(GetCurrentProcess(), 1); +} +} // namespace posix +#endif // GTEST_OS_WINDOWS_MOBILE + +// Returns the name of the environment variable corresponding to the +// given flag. For example, FlagToEnvVar("foo") will return +// "GTEST_FOO" in the open-source version. +static std::string FlagToEnvVar(const char* flag) { + const std::string full_flag = + (Message() << GTEST_FLAG_PREFIX_ << flag).GetString(); + + Message env_var; + for (size_t i = 0; i != full_flag.length(); i++) { + env_var << ToUpper(full_flag.c_str()[i]); + } + + return env_var.GetString(); +} + +// Parses 'str' for a 32-bit signed integer. If successful, writes +// the result to *value and returns true; otherwise leaves *value +// unchanged and returns false. +bool ParseInt32(const Message& src_text, const char* str, Int32* value) { + // Parses the environment variable as a decimal integer. + char* end = NULL; + const long long_value = strtol(str, &end, 10); // NOLINT + + // Has strtol() consumed all characters in the string? + if (*end != '\0') { + // No - an invalid character was encountered. + Message msg; + msg << "WARNING: " << src_text + << " is expected to be a 32-bit integer, but actually" + << " has value \"" << str << "\".\n"; + printf("%s", msg.GetString().c_str()); + fflush(stdout); + return false; + } + + // Is the parsed value in the range of an Int32? + const Int32 result = static_cast(long_value); + if (long_value == LONG_MAX || long_value == LONG_MIN || + // The parsed value overflows as a long. (strtol() returns + // LONG_MAX or LONG_MIN when the input overflows.) + result != long_value + // The parsed value overflows as an Int32. + ) { + Message msg; + msg << "WARNING: " << src_text + << " is expected to be a 32-bit integer, but actually" + << " has value " << str << ", which overflows.\n"; + printf("%s", msg.GetString().c_str()); + fflush(stdout); + return false; + } + + *value = result; + return true; +} + +// Reads and returns the Boolean environment variable corresponding to +// the given flag; if it's not set, returns default_value. +// +// The value is considered true iff it's not "0". +bool BoolFromGTestEnv(const char* flag, bool default_value) { +#if defined(GTEST_GET_BOOL_FROM_ENV_) + return GTEST_GET_BOOL_FROM_ENV_(flag, default_value); +#endif // defined(GTEST_GET_BOOL_FROM_ENV_) + const std::string env_var = FlagToEnvVar(flag); + const char* const string_value = posix::GetEnv(env_var.c_str()); + return string_value == NULL ? + default_value : strcmp(string_value, "0") != 0; +} + +// Reads and returns a 32-bit integer stored in the environment +// variable corresponding to the given flag; if it isn't set or +// doesn't represent a valid 32-bit integer, returns default_value. +Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) { +#if defined(GTEST_GET_INT32_FROM_ENV_) + return GTEST_GET_INT32_FROM_ENV_(flag, default_value); +#endif // defined(GTEST_GET_INT32_FROM_ENV_) + const std::string env_var = FlagToEnvVar(flag); + const char* const string_value = posix::GetEnv(env_var.c_str()); + if (string_value == NULL) { + // The environment variable is not set. + return default_value; + } + + Int32 result = default_value; + if (!ParseInt32(Message() << "Environment variable " << env_var, + string_value, &result)) { + printf("The default value %s is used.\n", + (Message() << default_value).GetString().c_str()); + fflush(stdout); + return default_value; + } + + return result; +} + +// Reads and returns the string environment variable corresponding to +// the given flag; if it's not set, returns default_value. +std::string StringFromGTestEnv(const char* flag, const char* default_value) { +#if defined(GTEST_GET_STRING_FROM_ENV_) + return GTEST_GET_STRING_FROM_ENV_(flag, default_value); +#endif // defined(GTEST_GET_STRING_FROM_ENV_) + const std::string env_var = FlagToEnvVar(flag); + const char* value = posix::GetEnv(env_var.c_str()); + if (value != NULL) { + return value; + } + + // As a special case for the 'output' flag, if GTEST_OUTPUT is not + // set, we look for XML_OUTPUT_FILE, which is set by the Bazel build + // system. The value of XML_OUTPUT_FILE is a filename without the + // "xml:" prefix of GTEST_OUTPUT. + // + // The net priority order after flag processing is thus: + // --gtest_output command line flag + // GTEST_OUTPUT environment variable + // XML_OUTPUT_FILE environment variable + // 'default_value' + if (strcmp(flag, "output") == 0) { + value = posix::GetEnv("XML_OUTPUT_FILE"); + if (value != NULL) { + return std::string("xml:") + value; + } + } + return default_value; +} + +} // namespace internal +} // namespace testing +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Test - The Google C++ Testing Framework +// +// This file implements a universal value printer that can print a +// value of any type T: +// +// void ::testing::internal::UniversalPrinter::Print(value, ostream_ptr); +// +// It uses the << operator when possible, and prints the bytes in the +// object otherwise. A user can override its behavior for a class +// type Foo by defining either operator<<(::std::ostream&, const Foo&) +// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that +// defines Foo. + +#include "gtest/gtest-printers.h" +#include +#include +#include +#include // NOLINT +#include +#include "gtest/internal/gtest-port.h" + +namespace testing { + +namespace { + +using ::std::ostream; + +// Prints a segment of bytes in the given object. +GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ +GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ +GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ +void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start, + size_t count, ostream* os) { + char text[5] = ""; + for (size_t i = 0; i != count; i++) { + const size_t j = start + i; + if (i != 0) { + // Organizes the bytes into groups of 2 for easy parsing by + // human. + if ((j % 2) == 0) + *os << ' '; + else + *os << '-'; + } + GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]); + *os << text; + } +} + +// Prints the bytes in the given value to the given ostream. +void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count, + ostream* os) { + // Tells the user how big the object is. + *os << count << "-byte object <"; + + const size_t kThreshold = 132; + const size_t kChunkSize = 64; + // If the object size is bigger than kThreshold, we'll have to omit + // some details by printing only the first and the last kChunkSize + // bytes. + // TODO(wan): let the user control the threshold using a flag. + if (count < kThreshold) { + PrintByteSegmentInObjectTo(obj_bytes, 0, count, os); + } else { + PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os); + *os << " ... "; + // Rounds up to 2-byte boundary. + const size_t resume_pos = (count - kChunkSize + 1)/2*2; + PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os); + } + *os << ">"; +} + +} // namespace + +namespace internal2 { + +// Delegates to PrintBytesInObjectToImpl() to print the bytes in the +// given object. The delegation simplifies the implementation, which +// uses the << operator and thus is easier done outside of the +// ::testing::internal namespace, which contains a << operator that +// sometimes conflicts with the one in STL. +void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count, + ostream* os) { + PrintBytesInObjectToImpl(obj_bytes, count, os); +} + +} // namespace internal2 + +namespace internal { + +// Depending on the value of a char (or wchar_t), we print it in one +// of three formats: +// - as is if it's a printable ASCII (e.g. 'a', '2', ' '), +// - as a hexidecimal escape sequence (e.g. '\x7F'), or +// - as a special escape sequence (e.g. '\r', '\n'). +enum CharFormat { + kAsIs, + kHexEscape, + kSpecialEscape +}; + +// Returns true if c is a printable ASCII character. We test the +// value of c directly instead of calling isprint(), which is buggy on +// Windows Mobile. +inline bool IsPrintableAscii(wchar_t c) { + return 0x20 <= c && c <= 0x7E; +} + +// Prints a wide or narrow char c as a character literal without the +// quotes, escaping it when necessary; returns how c was formatted. +// The template argument UnsignedChar is the unsigned version of Char, +// which is the type of c. +template +static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) { + switch (static_cast(c)) { + case L'\0': + *os << "\\0"; + break; + case L'\'': + *os << "\\'"; + break; + case L'\\': + *os << "\\\\"; + break; + case L'\a': + *os << "\\a"; + break; + case L'\b': + *os << "\\b"; + break; + case L'\f': + *os << "\\f"; + break; + case L'\n': + *os << "\\n"; + break; + case L'\r': + *os << "\\r"; + break; + case L'\t': + *os << "\\t"; + break; + case L'\v': + *os << "\\v"; + break; + default: + if (IsPrintableAscii(c)) { + *os << static_cast(c); + return kAsIs; + } else { + *os << "\\x" + String::FormatHexInt(static_cast(c)); + return kHexEscape; + } + } + return kSpecialEscape; +} + +// Prints a wchar_t c as if it's part of a string literal, escaping it when +// necessary; returns how c was formatted. +static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) { + switch (c) { + case L'\'': + *os << "'"; + return kAsIs; + case L'"': + *os << "\\\""; + return kSpecialEscape; + default: + return PrintAsCharLiteralTo(c, os); + } +} + +// Prints a char c as if it's part of a string literal, escaping it when +// necessary; returns how c was formatted. +static CharFormat PrintAsStringLiteralTo(char c, ostream* os) { + return PrintAsStringLiteralTo( + static_cast(static_cast(c)), os); +} + +// Prints a wide or narrow character c and its code. '\0' is printed +// as "'\\0'", other unprintable characters are also properly escaped +// using the standard C++ escape sequence. The template argument +// UnsignedChar is the unsigned version of Char, which is the type of c. +template +void PrintCharAndCodeTo(Char c, ostream* os) { + // First, print c as a literal in the most readable form we can find. + *os << ((sizeof(c) > 1) ? "L'" : "'"); + const CharFormat format = PrintAsCharLiteralTo(c, os); + *os << "'"; + + // To aid user debugging, we also print c's code in decimal, unless + // it's 0 (in which case c was printed as '\\0', making the code + // obvious). + if (c == 0) + return; + *os << " (" << static_cast(c); + + // For more convenience, we print c's code again in hexidecimal, + // unless c was already printed in the form '\x##' or the code is in + // [1, 9]. + if (format == kHexEscape || (1 <= c && c <= 9)) { + // Do nothing. + } else { + *os << ", 0x" << String::FormatHexInt(static_cast(c)); + } + *os << ")"; +} + +void PrintTo(unsigned char c, ::std::ostream* os) { + PrintCharAndCodeTo(c, os); +} +void PrintTo(signed char c, ::std::ostream* os) { + PrintCharAndCodeTo(c, os); +} + +// Prints a wchar_t as a symbol if it is printable or as its internal +// code otherwise and also as its code. L'\0' is printed as "L'\\0'". +void PrintTo(wchar_t wc, ostream* os) { + PrintCharAndCodeTo(wc, os); +} + +// Prints the given array of characters to the ostream. CharType must be either +// char or wchar_t. +// The array starts at begin, the length is len, it may include '\0' characters +// and may not be NUL-terminated. +template +GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ +GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ +GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ +static void PrintCharsAsStringTo( + const CharType* begin, size_t len, ostream* os) { + const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\""; + *os << kQuoteBegin; + bool is_previous_hex = false; + for (size_t index = 0; index < len; ++index) { + const CharType cur = begin[index]; + if (is_previous_hex && IsXDigit(cur)) { + // Previous character is of '\x..' form and this character can be + // interpreted as another hexadecimal digit in its number. Break string to + // disambiguate. + *os << "\" " << kQuoteBegin; + } + is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape; + } + *os << "\""; +} + +// Prints a (const) char/wchar_t array of 'len' elements, starting at address +// 'begin'. CharType must be either char or wchar_t. +template +GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ +GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ +GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ +static void UniversalPrintCharArray( + const CharType* begin, size_t len, ostream* os) { + // The code + // const char kFoo[] = "foo"; + // generates an array of 4, not 3, elements, with the last one being '\0'. + // + // Therefore when printing a char array, we don't print the last element if + // it's '\0', such that the output matches the string literal as it's + // written in the source code. + if (len > 0 && begin[len - 1] == '\0') { + PrintCharsAsStringTo(begin, len - 1, os); + return; + } + + // If, however, the last element in the array is not '\0', e.g. + // const char kFoo[] = { 'f', 'o', 'o' }; + // we must print the entire array. We also print a message to indicate + // that the array is not NUL-terminated. + PrintCharsAsStringTo(begin, len, os); + *os << " (no terminating NUL)"; +} + +// Prints a (const) char array of 'len' elements, starting at address 'begin'. +void UniversalPrintArray(const char* begin, size_t len, ostream* os) { + UniversalPrintCharArray(begin, len, os); +} + +// Prints a (const) wchar_t array of 'len' elements, starting at address +// 'begin'. +void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) { + UniversalPrintCharArray(begin, len, os); +} + +// Prints the given C string to the ostream. +void PrintTo(const char* s, ostream* os) { + if (s == NULL) { + *os << "NULL"; + } else { + *os << ImplicitCast_(s) << " pointing to "; + PrintCharsAsStringTo(s, strlen(s), os); + } +} + +// MSVC compiler can be configured to define whar_t as a typedef +// of unsigned short. Defining an overload for const wchar_t* in that case +// would cause pointers to unsigned shorts be printed as wide strings, +// possibly accessing more memory than intended and causing invalid +// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when +// wchar_t is implemented as a native type. +#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) +// Prints the given wide C string to the ostream. +void PrintTo(const wchar_t* s, ostream* os) { + if (s == NULL) { + *os << "NULL"; + } else { + *os << ImplicitCast_(s) << " pointing to "; + PrintCharsAsStringTo(s, std::wcslen(s), os); + } +} +#endif // wchar_t is native + +// Prints a ::string object. +#if GTEST_HAS_GLOBAL_STRING +void PrintStringTo(const ::string& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} +#endif // GTEST_HAS_GLOBAL_STRING + +void PrintStringTo(const ::std::string& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} + +// Prints a ::wstring object. +#if GTEST_HAS_GLOBAL_WSTRING +void PrintWideStringTo(const ::wstring& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +#if GTEST_HAS_STD_WSTRING +void PrintWideStringTo(const ::std::wstring& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} +#endif // GTEST_HAS_STD_WSTRING + +} // namespace internal + +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: mheule@google.com (Markus Heule) +// +// The Google C++ Testing Framework (Google Test) + +#include "gtest/gtest-test-part.h" + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick exists to +// prevent the accidental inclusion of gtest-internal-inl.h in the +// user's code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +namespace testing { + +using internal::GetUnitTestImpl; + +// Gets the summary of the failure message by omitting the stack trace +// in it. +std::string TestPartResult::ExtractSummary(const char* message) { + const char* const stack_trace = strstr(message, internal::kStackTraceMarker); + return stack_trace == NULL ? message : + std::string(message, stack_trace); +} + +// Prints a TestPartResult object. +std::ostream& operator<<(std::ostream& os, const TestPartResult& result) { + return os + << result.file_name() << ":" << result.line_number() << ": " + << (result.type() == TestPartResult::kSuccess ? "Success" : + result.type() == TestPartResult::kFatalFailure ? "Fatal failure" : + "Non-fatal failure") << ":\n" + << result.message() << std::endl; +} + +// Appends a TestPartResult to the array. +void TestPartResultArray::Append(const TestPartResult& result) { + array_.push_back(result); +} + +// Returns the TestPartResult at the given index (0-based). +const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const { + if (index < 0 || index >= size()) { + printf("\nInvalid index (%d) into TestPartResultArray.\n", index); + internal::posix::Abort(); + } + + return array_[index]; +} + +// Returns the number of TestPartResult objects in the array. +int TestPartResultArray::size() const { + return static_cast(array_.size()); +} + +namespace internal { + +HasNewFatalFailureHelper::HasNewFatalFailureHelper() + : has_new_fatal_failure_(false), + original_reporter_(GetUnitTestImpl()-> + GetTestPartResultReporterForCurrentThread()) { + GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this); +} + +HasNewFatalFailureHelper::~HasNewFatalFailureHelper() { + GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread( + original_reporter_); +} + +void HasNewFatalFailureHelper::ReportTestPartResult( + const TestPartResult& result) { + if (result.fatally_failed()) + has_new_fatal_failure_ = true; + original_reporter_->ReportTestPartResult(result); +} + +} // namespace internal + +} // namespace testing +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +#include "gtest/gtest-typed-test.h" +#include "gtest/gtest.h" + +namespace testing { +namespace internal { + +#if GTEST_HAS_TYPED_TEST_P + +// Skips to the first non-space char in str. Returns an empty string if str +// contains only whitespace characters. +static const char* SkipSpaces(const char* str) { + while (IsSpace(*str)) + str++; + return str; +} + +static std::vector SplitIntoTestNames(const char* src) { + std::vector name_vec; + src = SkipSpaces(src); + for (; src != NULL; src = SkipComma(src)) { + name_vec.push_back(StripTrailingSpaces(GetPrefixUntilComma(src))); + } + return name_vec; +} + +// Verifies that registered_tests match the test names in +// registered_tests_; returns registered_tests if successful, or +// aborts the program otherwise. +const char* TypedTestCasePState::VerifyRegisteredTestNames( + const char* file, int line, const char* registered_tests) { + typedef RegisteredTestsMap::const_iterator RegisteredTestIter; + registered_ = true; + + std::vector name_vec = SplitIntoTestNames(registered_tests); + + Message errors; + + std::set tests; + for (std::vector::const_iterator name_it = name_vec.begin(); + name_it != name_vec.end(); ++name_it) { + const std::string& name = *name_it; + if (tests.count(name) != 0) { + errors << "Test " << name << " is listed more than once.\n"; + continue; + } + + bool found = false; + for (RegisteredTestIter it = registered_tests_.begin(); + it != registered_tests_.end(); + ++it) { + if (name == it->first) { + found = true; + break; + } + } + + if (found) { + tests.insert(name); + } else { + errors << "No test named " << name + << " can be found in this test case.\n"; + } + } + + for (RegisteredTestIter it = registered_tests_.begin(); + it != registered_tests_.end(); + ++it) { + if (tests.count(it->first) == 0) { + errors << "You forgot to list test " << it->first << ".\n"; + } + } + + const std::string& errors_str = errors.GetString(); + if (errors_str != "") { + fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(), + errors_str.c_str()); + fflush(stderr); + posix::Abort(); + } + + return registered_tests; +} + +#endif // GTEST_HAS_TYPED_TEST_P + +} // namespace internal +} // namespace testing +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) + +#include "gtest/gtest.h" +#include "gtest/internal/custom/gtest.h" +#include "gtest/gtest-spi.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include // NOLINT +#include +#include + +#if GTEST_OS_LINUX + +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +# define GTEST_HAS_GETTIMEOFDAY_ 1 + +# include // NOLINT +# include // NOLINT +# include // NOLINT +// Declares vsnprintf(). This header is not available on Windows. +# include // NOLINT +# include // NOLINT +# include // NOLINT +# include // NOLINT +# include + +#elif GTEST_OS_SYMBIAN +# define GTEST_HAS_GETTIMEOFDAY_ 1 +# include // NOLINT + +#elif GTEST_OS_ZOS +# define GTEST_HAS_GETTIMEOFDAY_ 1 +# include // NOLINT + +// On z/OS we additionally need strings.h for strcasecmp. +# include // NOLINT + +#elif GTEST_OS_WINDOWS_MOBILE // We are on Windows CE. + +# include // NOLINT +# undef min + +#elif GTEST_OS_WINDOWS // We are on Windows proper. + +# include // NOLINT +# include // NOLINT +# include // NOLINT +# include // NOLINT + +# if GTEST_OS_WINDOWS_MINGW +// MinGW has gettimeofday() but not _ftime64(). +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +// TODO(kenton@google.com): There are other ways to get the time on +// Windows, like GetTickCount() or GetSystemTimeAsFileTime(). MinGW +// supports these. consider using them instead. +# define GTEST_HAS_GETTIMEOFDAY_ 1 +# include // NOLINT +# endif // GTEST_OS_WINDOWS_MINGW + +// cpplint thinks that the header is already included, so we want to +// silence it. +# include // NOLINT +# undef min + +#else + +// Assume other platforms have gettimeofday(). +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +# define GTEST_HAS_GETTIMEOFDAY_ 1 + +// cpplint thinks that the header is already included, so we want to +// silence it. +# include // NOLINT +# include // NOLINT + +#endif // GTEST_OS_LINUX + +#if GTEST_HAS_EXCEPTIONS +# include +#endif + +#if GTEST_CAN_STREAM_RESULTS_ +# include // NOLINT +# include // NOLINT +# include // NOLINT +# include // NOLINT +#endif + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION_ + +#if GTEST_OS_WINDOWS +# define vsnprintf _vsnprintf +#endif // GTEST_OS_WINDOWS + +namespace testing { + +using internal::CountIf; +using internal::ForEach; +using internal::GetElementOr; +using internal::Shuffle; + +// Constants. + +// A test whose test case name or test name matches this filter is +// disabled and not run. +static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*"; + +// A test case whose name matches this filter is considered a death +// test case and will be run before test cases whose name doesn't +// match this filter. +static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*"; + +// A test filter that matches everything. +static const char kUniversalFilter[] = "*"; + +// The default output file for XML output. +static const char kDefaultOutputFile[] = "test_detail.xml"; + +// The environment variable name for the test shard index. +static const char kTestShardIndex[] = "GTEST_SHARD_INDEX"; +// The environment variable name for the total number of test shards. +static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS"; +// The environment variable name for the test shard status file. +static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE"; + +namespace internal { + +// The text used in failure messages to indicate the start of the +// stack trace. +const char kStackTraceMarker[] = "\nStack trace:\n"; + +// g_help_flag is true iff the --help flag or an equivalent form is +// specified on the command line. +bool g_help_flag = false; + +} // namespace internal + +static const char* GetDefaultFilter() { +#ifdef GTEST_TEST_FILTER_ENV_VAR_ + const char* const testbridge_test_only = getenv(GTEST_TEST_FILTER_ENV_VAR_); + if (testbridge_test_only != NULL) { + return testbridge_test_only; + } +#endif // GTEST_TEST_FILTER_ENV_VAR_ + return kUniversalFilter; +} + +GTEST_DEFINE_bool_( + also_run_disabled_tests, + internal::BoolFromGTestEnv("also_run_disabled_tests", false), + "Run disabled tests too, in addition to the tests normally being run."); + +GTEST_DEFINE_bool_( + break_on_failure, + internal::BoolFromGTestEnv("break_on_failure", false), + "True iff a failed assertion should be a debugger break-point."); + +GTEST_DEFINE_bool_( + catch_exceptions, + internal::BoolFromGTestEnv("catch_exceptions", true), + "True iff " GTEST_NAME_ + " should catch exceptions and treat them as test failures."); + +GTEST_DEFINE_string_( + color, + internal::StringFromGTestEnv("color", "auto"), + "Whether to use colors in the output. Valid values: yes, no, " + "and auto. 'auto' means to use colors if the output is " + "being sent to a terminal and the TERM environment variable " + "is set to a terminal type that supports colors."); + +GTEST_DEFINE_string_( + filter, + internal::StringFromGTestEnv("filter", GetDefaultFilter()), + "A colon-separated list of glob (not regex) patterns " + "for filtering the tests to run, optionally followed by a " + "'-' and a : separated list of negative patterns (tests to " + "exclude). A test is run if it matches one of the positive " + "patterns and does not match any of the negative patterns."); + +GTEST_DEFINE_bool_(list_tests, false, + "List all tests without running them."); + +GTEST_DEFINE_string_( + output, + internal::StringFromGTestEnv("output", ""), + "A format (currently must be \"xml\"), optionally followed " + "by a colon and an output file name or directory. A directory " + "is indicated by a trailing pathname separator. " + "Examples: \"xml:filename.xml\", \"xml::directoryname/\". " + "If a directory is specified, output files will be created " + "within that directory, with file-names based on the test " + "executable's name and, if necessary, made unique by adding " + "digits."); + +GTEST_DEFINE_bool_( + print_time, + internal::BoolFromGTestEnv("print_time", true), + "True iff " GTEST_NAME_ + " should display elapsed time in text output."); + +GTEST_DEFINE_int32_( + random_seed, + internal::Int32FromGTestEnv("random_seed", 0), + "Random number seed to use when shuffling test orders. Must be in range " + "[1, 99999], or 0 to use a seed based on the current time."); + +GTEST_DEFINE_int32_( + repeat, + internal::Int32FromGTestEnv("repeat", 1), + "How many times to repeat each test. Specify a negative number " + "for repeating forever. Useful for shaking out flaky tests."); + +GTEST_DEFINE_bool_( + show_internal_stack_frames, false, + "True iff " GTEST_NAME_ " should include internal stack frames when " + "printing test failure stack traces."); + +GTEST_DEFINE_bool_( + shuffle, + internal::BoolFromGTestEnv("shuffle", false), + "True iff " GTEST_NAME_ + " should randomize tests' order on every run."); + +GTEST_DEFINE_int32_( + stack_trace_depth, + internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth), + "The maximum number of stack frames to print when an " + "assertion fails. The valid range is 0 through 100, inclusive."); + +GTEST_DEFINE_string_( + stream_result_to, + internal::StringFromGTestEnv("stream_result_to", ""), + "This flag specifies the host name and the port number on which to stream " + "test results. Example: \"localhost:555\". The flag is effective only on " + "Linux."); + +GTEST_DEFINE_bool_( + throw_on_failure, + internal::BoolFromGTestEnv("throw_on_failure", false), + "When this flag is specified, a failed assertion will throw an exception " + "if exceptions are enabled or exit the program with a non-zero code " + "otherwise."); + +#if GTEST_USE_OWN_FLAGFILE_FLAG_ +GTEST_DEFINE_string_( + flagfile, + internal::StringFromGTestEnv("flagfile", ""), + "This flag specifies the flagfile to read command-line flags from."); +#endif // GTEST_USE_OWN_FLAGFILE_FLAG_ + +namespace internal { + +// Generates a random number from [0, range), using a Linear +// Congruential Generator (LCG). Crashes if 'range' is 0 or greater +// than kMaxRange. +UInt32 Random::Generate(UInt32 range) { + // These constants are the same as are used in glibc's rand(3). + state_ = (1103515245U*state_ + 12345U) % kMaxRange; + + GTEST_CHECK_(range > 0) + << "Cannot generate a number in the range [0, 0)."; + GTEST_CHECK_(range <= kMaxRange) + << "Generation of a number in [0, " << range << ") was requested, " + << "but this can only generate numbers in [0, " << kMaxRange << ")."; + + // Converting via modulus introduces a bit of downward bias, but + // it's simple, and a linear congruential generator isn't too good + // to begin with. + return state_ % range; +} + +// GTestIsInitialized() returns true iff the user has initialized +// Google Test. Useful for catching the user mistake of not initializing +// Google Test before calling RUN_ALL_TESTS(). +static bool GTestIsInitialized() { return GetArgvs().size() > 0; } + +// Iterates over a vector of TestCases, keeping a running sum of the +// results of calling a given int-returning method on each. +// Returns the sum. +static int SumOverTestCaseList(const std::vector& case_list, + int (TestCase::*method)() const) { + int sum = 0; + for (size_t i = 0; i < case_list.size(); i++) { + sum += (case_list[i]->*method)(); + } + return sum; +} + +// Returns true iff the test case passed. +static bool TestCasePassed(const TestCase* test_case) { + return test_case->should_run() && test_case->Passed(); +} + +// Returns true iff the test case failed. +static bool TestCaseFailed(const TestCase* test_case) { + return test_case->should_run() && test_case->Failed(); +} + +// Returns true iff test_case contains at least one test that should +// run. +static bool ShouldRunTestCase(const TestCase* test_case) { + return test_case->should_run(); +} + +// AssertHelper constructor. +AssertHelper::AssertHelper(TestPartResult::Type type, + const char* file, + int line, + const char* message) + : data_(new AssertHelperData(type, file, line, message)) { +} + +AssertHelper::~AssertHelper() { + delete data_; +} + +// Message assignment, for assertion streaming support. +void AssertHelper::operator=(const Message& message) const { + UnitTest::GetInstance()-> + AddTestPartResult(data_->type, data_->file, data_->line, + AppendUserMessage(data_->message, message), + UnitTest::GetInstance()->impl() + ->CurrentOsStackTraceExceptTop(1) + // Skips the stack frame for this function itself. + ); // NOLINT +} + +// Mutex for linked pointers. +GTEST_API_ GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex); + +// A copy of all command line arguments. Set by InitGoogleTest(). +::std::vector g_argvs; + +const ::std::vector& GetArgvs() { +#if defined(GTEST_CUSTOM_GET_ARGVS_) + return GTEST_CUSTOM_GET_ARGVS_(); +#else // defined(GTEST_CUSTOM_GET_ARGVS_) + return g_argvs; +#endif // defined(GTEST_CUSTOM_GET_ARGVS_) +} + +// Returns the current application's name, removing directory path if that +// is present. +FilePath GetCurrentExecutableName() { + FilePath result; + +#if GTEST_OS_WINDOWS + result.Set(FilePath(GetArgvs()[0]).RemoveExtension("exe")); +#else + result.Set(FilePath(GetArgvs()[0])); +#endif // GTEST_OS_WINDOWS + + return result.RemoveDirectoryName(); +} + +// Functions for processing the gtest_output flag. + +// Returns the output format, or "" for normal printed output. +std::string UnitTestOptions::GetOutputFormat() { + const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); + if (gtest_output_flag == NULL) return std::string(""); + + const char* const colon = strchr(gtest_output_flag, ':'); + return (colon == NULL) ? + std::string(gtest_output_flag) : + std::string(gtest_output_flag, colon - gtest_output_flag); +} + +// Returns the name of the requested output file, or the default if none +// was explicitly specified. +std::string UnitTestOptions::GetAbsolutePathToOutputFile() { + const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); + if (gtest_output_flag == NULL) + return ""; + + const char* const colon = strchr(gtest_output_flag, ':'); + if (colon == NULL) + return internal::FilePath::ConcatPaths( + internal::FilePath( + UnitTest::GetInstance()->original_working_dir()), + internal::FilePath(kDefaultOutputFile)).string(); + + internal::FilePath output_name(colon + 1); + if (!output_name.IsAbsolutePath()) + // TODO(wan@google.com): on Windows \some\path is not an absolute + // path (as its meaning depends on the current drive), yet the + // following logic for turning it into an absolute path is wrong. + // Fix it. + output_name = internal::FilePath::ConcatPaths( + internal::FilePath(UnitTest::GetInstance()->original_working_dir()), + internal::FilePath(colon + 1)); + + if (!output_name.IsDirectory()) + return output_name.string(); + + internal::FilePath result(internal::FilePath::GenerateUniqueFileName( + output_name, internal::GetCurrentExecutableName(), + GetOutputFormat().c_str())); + return result.string(); +} + +// Returns true iff the wildcard pattern matches the string. The +// first ':' or '\0' character in pattern marks the end of it. +// +// This recursive algorithm isn't very efficient, but is clear and +// works well enough for matching test names, which are short. +bool UnitTestOptions::PatternMatchesString(const char *pattern, + const char *str) { + switch (*pattern) { + case '\0': + case ':': // Either ':' or '\0' marks the end of the pattern. + return *str == '\0'; + case '?': // Matches any single character. + return *str != '\0' && PatternMatchesString(pattern + 1, str + 1); + case '*': // Matches any string (possibly empty) of characters. + return (*str != '\0' && PatternMatchesString(pattern, str + 1)) || + PatternMatchesString(pattern + 1, str); + default: // Non-special character. Matches itself. + return *pattern == *str && + PatternMatchesString(pattern + 1, str + 1); + } +} + +bool UnitTestOptions::MatchesFilter( + const std::string& name, const char* filter) { + const char *cur_pattern = filter; + for (;;) { + if (PatternMatchesString(cur_pattern, name.c_str())) { + return true; + } + + // Finds the next pattern in the filter. + cur_pattern = strchr(cur_pattern, ':'); + + // Returns if no more pattern can be found. + if (cur_pattern == NULL) { + return false; + } + + // Skips the pattern separater (the ':' character). + cur_pattern++; + } +} + +// Returns true iff the user-specified filter matches the test case +// name and the test name. +bool UnitTestOptions::FilterMatchesTest(const std::string &test_case_name, + const std::string &test_name) { + const std::string& full_name = test_case_name + "." + test_name.c_str(); + + // Split --gtest_filter at '-', if there is one, to separate into + // positive filter and negative filter portions + const char* const p = GTEST_FLAG(filter).c_str(); + const char* const dash = strchr(p, '-'); + std::string positive; + std::string negative; + if (dash == NULL) { + positive = GTEST_FLAG(filter).c_str(); // Whole string is a positive filter + negative = ""; + } else { + positive = std::string(p, dash); // Everything up to the dash + negative = std::string(dash + 1); // Everything after the dash + if (positive.empty()) { + // Treat '-test1' as the same as '*-test1' + positive = kUniversalFilter; + } + } + + // A filter is a colon-separated list of patterns. It matches a + // test if any pattern in it matches the test. + return (MatchesFilter(full_name, positive.c_str()) && + !MatchesFilter(full_name, negative.c_str())); +} + +#if GTEST_HAS_SEH +// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the +// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise. +// This function is useful as an __except condition. +int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) { + // Google Test should handle a SEH exception if: + // 1. the user wants it to, AND + // 2. this is not a breakpoint exception, AND + // 3. this is not a C++ exception (VC++ implements them via SEH, + // apparently). + // + // SEH exception code for C++ exceptions. + // (see http://support.microsoft.com/kb/185294 for more information). + const DWORD kCxxExceptionCode = 0xe06d7363; + + bool should_handle = true; + + if (!GTEST_FLAG(catch_exceptions)) + should_handle = false; + else if (exception_code == EXCEPTION_BREAKPOINT) + should_handle = false; + else if (exception_code == kCxxExceptionCode) + should_handle = false; + + return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH; +} +#endif // GTEST_HAS_SEH + +} // namespace internal + +// The c'tor sets this object as the test part result reporter used by +// Google Test. The 'result' parameter specifies where to report the +// results. Intercepts only failures from the current thread. +ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( + TestPartResultArray* result) + : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD), + result_(result) { + Init(); +} + +// The c'tor sets this object as the test part result reporter used by +// Google Test. The 'result' parameter specifies where to report the +// results. +ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( + InterceptMode intercept_mode, TestPartResultArray* result) + : intercept_mode_(intercept_mode), + result_(result) { + Init(); +} + +void ScopedFakeTestPartResultReporter::Init() { + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + if (intercept_mode_ == INTERCEPT_ALL_THREADS) { + old_reporter_ = impl->GetGlobalTestPartResultReporter(); + impl->SetGlobalTestPartResultReporter(this); + } else { + old_reporter_ = impl->GetTestPartResultReporterForCurrentThread(); + impl->SetTestPartResultReporterForCurrentThread(this); + } +} + +// The d'tor restores the test part result reporter used by Google Test +// before. +ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() { + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + if (intercept_mode_ == INTERCEPT_ALL_THREADS) { + impl->SetGlobalTestPartResultReporter(old_reporter_); + } else { + impl->SetTestPartResultReporterForCurrentThread(old_reporter_); + } +} + +// Increments the test part result count and remembers the result. +// This method is from the TestPartResultReporterInterface interface. +void ScopedFakeTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + result_->Append(result); +} + +namespace internal { + +// Returns the type ID of ::testing::Test. We should always call this +// instead of GetTypeId< ::testing::Test>() to get the type ID of +// testing::Test. This is to work around a suspected linker bug when +// using Google Test as a framework on Mac OS X. The bug causes +// GetTypeId< ::testing::Test>() to return different values depending +// on whether the call is from the Google Test framework itself or +// from user test code. GetTestTypeId() is guaranteed to always +// return the same value, as it always calls GetTypeId<>() from the +// gtest.cc, which is within the Google Test framework. +TypeId GetTestTypeId() { + return GetTypeId(); +} + +// The value of GetTestTypeId() as seen from within the Google Test +// library. This is solely for testing GetTestTypeId(). +extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId(); + +// This predicate-formatter checks that 'results' contains a test part +// failure of the given type and that the failure message contains the +// given substring. +AssertionResult HasOneFailure(const char* /* results_expr */, + const char* /* type_expr */, + const char* /* substr_expr */, + const TestPartResultArray& results, + TestPartResult::Type type, + const string& substr) { + const std::string expected(type == TestPartResult::kFatalFailure ? + "1 fatal failure" : + "1 non-fatal failure"); + Message msg; + if (results.size() != 1) { + msg << "Expected: " << expected << "\n" + << " Actual: " << results.size() << " failures"; + for (int i = 0; i < results.size(); i++) { + msg << "\n" << results.GetTestPartResult(i); + } + return AssertionFailure() << msg; + } + + const TestPartResult& r = results.GetTestPartResult(0); + if (r.type() != type) { + return AssertionFailure() << "Expected: " << expected << "\n" + << " Actual:\n" + << r; + } + + if (strstr(r.message(), substr.c_str()) == NULL) { + return AssertionFailure() << "Expected: " << expected << " containing \"" + << substr << "\"\n" + << " Actual:\n" + << r; + } + + return AssertionSuccess(); +} + +// The constructor of SingleFailureChecker remembers where to look up +// test part results, what type of failure we expect, and what +// substring the failure message should contain. +SingleFailureChecker:: SingleFailureChecker( + const TestPartResultArray* results, + TestPartResult::Type type, + const string& substr) + : results_(results), + type_(type), + substr_(substr) {} + +// The destructor of SingleFailureChecker verifies that the given +// TestPartResultArray contains exactly one failure that has the given +// type and contains the given substring. If that's not the case, a +// non-fatal failure will be generated. +SingleFailureChecker::~SingleFailureChecker() { + EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_); +} + +DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter( + UnitTestImpl* unit_test) : unit_test_(unit_test) {} + +void DefaultGlobalTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + unit_test_->current_test_result()->AddTestPartResult(result); + unit_test_->listeners()->repeater()->OnTestPartResult(result); +} + +DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter( + UnitTestImpl* unit_test) : unit_test_(unit_test) {} + +void DefaultPerThreadTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result); +} + +// Returns the global test part result reporter. +TestPartResultReporterInterface* +UnitTestImpl::GetGlobalTestPartResultReporter() { + internal::MutexLock lock(&global_test_part_result_reporter_mutex_); + return global_test_part_result_repoter_; +} + +// Sets the global test part result reporter. +void UnitTestImpl::SetGlobalTestPartResultReporter( + TestPartResultReporterInterface* reporter) { + internal::MutexLock lock(&global_test_part_result_reporter_mutex_); + global_test_part_result_repoter_ = reporter; +} + +// Returns the test part result reporter for the current thread. +TestPartResultReporterInterface* +UnitTestImpl::GetTestPartResultReporterForCurrentThread() { + return per_thread_test_part_result_reporter_.get(); +} + +// Sets the test part result reporter for the current thread. +void UnitTestImpl::SetTestPartResultReporterForCurrentThread( + TestPartResultReporterInterface* reporter) { + per_thread_test_part_result_reporter_.set(reporter); +} + +// Gets the number of successful test cases. +int UnitTestImpl::successful_test_case_count() const { + return CountIf(test_cases_, TestCasePassed); +} + +// Gets the number of failed test cases. +int UnitTestImpl::failed_test_case_count() const { + return CountIf(test_cases_, TestCaseFailed); +} + +// Gets the number of all test cases. +int UnitTestImpl::total_test_case_count() const { + return static_cast(test_cases_.size()); +} + +// Gets the number of all test cases that contain at least one test +// that should run. +int UnitTestImpl::test_case_to_run_count() const { + return CountIf(test_cases_, ShouldRunTestCase); +} + +// Gets the number of successful tests. +int UnitTestImpl::successful_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count); +} + +// Gets the number of failed tests. +int UnitTestImpl::failed_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count); +} + +// Gets the number of disabled tests that will be reported in the XML report. +int UnitTestImpl::reportable_disabled_test_count() const { + return SumOverTestCaseList(test_cases_, + &TestCase::reportable_disabled_test_count); +} + +// Gets the number of disabled tests. +int UnitTestImpl::disabled_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count); +} + +// Gets the number of tests to be printed in the XML report. +int UnitTestImpl::reportable_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::reportable_test_count); +} + +// Gets the number of all tests. +int UnitTestImpl::total_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::total_test_count); +} + +// Gets the number of tests that should run. +int UnitTestImpl::test_to_run_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count); +} + +// Returns the current OS stack trace as an std::string. +// +// The maximum number of stack frames to be included is specified by +// the gtest_stack_trace_depth flag. The skip_count parameter +// specifies the number of top frames to be skipped, which doesn't +// count against the number of frames to be included. +// +// For example, if Foo() calls Bar(), which in turn calls +// CurrentOsStackTraceExceptTop(1), Foo() will be included in the +// trace but Bar() and CurrentOsStackTraceExceptTop() won't. +std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) { + return os_stack_trace_getter()->CurrentStackTrace( + static_cast(GTEST_FLAG(stack_trace_depth)), + skip_count + 1 + // Skips the user-specified number of frames plus this function + // itself. + ); // NOLINT +} + +// Returns the current time in milliseconds. +TimeInMillis GetTimeInMillis() { +#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__) + // Difference between 1970-01-01 and 1601-01-01 in milliseconds. + // http://analogous.blogspot.com/2005/04/epoch.html + const TimeInMillis kJavaEpochToWinFileTimeDelta = + static_cast(116444736UL) * 100000UL; + const DWORD kTenthMicrosInMilliSecond = 10000; + + SYSTEMTIME now_systime; + FILETIME now_filetime; + ULARGE_INTEGER now_int64; + // TODO(kenton@google.com): Shouldn't this just use + // GetSystemTimeAsFileTime()? + GetSystemTime(&now_systime); + if (SystemTimeToFileTime(&now_systime, &now_filetime)) { + now_int64.LowPart = now_filetime.dwLowDateTime; + now_int64.HighPart = now_filetime.dwHighDateTime; + now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) - + kJavaEpochToWinFileTimeDelta; + return now_int64.QuadPart; + } + return 0; +#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_ + __timeb64 now; + + // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996 + // (deprecated function) there. + // TODO(kenton@google.com): Use GetTickCount()? Or use + // SystemTimeToFileTime() + GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996) + _ftime64(&now); + GTEST_DISABLE_MSC_WARNINGS_POP_() + + return static_cast(now.time) * 1000 + now.millitm; +#elif GTEST_HAS_GETTIMEOFDAY_ + struct timeval now; + gettimeofday(&now, NULL); + return static_cast(now.tv_sec) * 1000 + now.tv_usec / 1000; +#else +# error "Don't know how to get the current time on your system." +#endif +} + +// Utilities + +// class String. + +#if GTEST_OS_WINDOWS_MOBILE +// Creates a UTF-16 wide string from the given ANSI string, allocating +// memory using new. The caller is responsible for deleting the return +// value using delete[]. Returns the wide string, or NULL if the +// input is NULL. +LPCWSTR String::AnsiToUtf16(const char* ansi) { + if (!ansi) return NULL; + const int length = strlen(ansi); + const int unicode_length = + MultiByteToWideChar(CP_ACP, 0, ansi, length, + NULL, 0); + WCHAR* unicode = new WCHAR[unicode_length + 1]; + MultiByteToWideChar(CP_ACP, 0, ansi, length, + unicode, unicode_length); + unicode[unicode_length] = 0; + return unicode; +} + +// Creates an ANSI string from the given wide string, allocating +// memory using new. The caller is responsible for deleting the return +// value using delete[]. Returns the ANSI string, or NULL if the +// input is NULL. +const char* String::Utf16ToAnsi(LPCWSTR utf16_str) { + if (!utf16_str) return NULL; + const int ansi_length = + WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, + NULL, 0, NULL, NULL); + char* ansi = new char[ansi_length + 1]; + WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, + ansi, ansi_length, NULL, NULL); + ansi[ansi_length] = 0; + return ansi; +} + +#endif // GTEST_OS_WINDOWS_MOBILE + +// Compares two C strings. Returns true iff they have the same content. +// +// Unlike strcmp(), this function can handle NULL argument(s). A NULL +// C string is considered different to any non-NULL C string, +// including the empty string. +bool String::CStringEquals(const char * lhs, const char * rhs) { + if ( lhs == NULL ) return rhs == NULL; + + if ( rhs == NULL ) return false; + + return strcmp(lhs, rhs) == 0; +} + +#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING + +// Converts an array of wide chars to a narrow string using the UTF-8 +// encoding, and streams the result to the given Message object. +static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length, + Message* msg) { + for (size_t i = 0; i != length; ) { // NOLINT + if (wstr[i] != L'\0') { + *msg << WideStringToUtf8(wstr + i, static_cast(length - i)); + while (i != length && wstr[i] != L'\0') + i++; + } else { + *msg << '\0'; + i++; + } + } +} + +#endif // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING + +void SplitString(const ::std::string& str, char delimiter, + ::std::vector< ::std::string>* dest) { + ::std::vector< ::std::string> parsed; + ::std::string::size_type pos = 0; + while (::testing::internal::AlwaysTrue()) { + const ::std::string::size_type colon = str.find(delimiter, pos); + if (colon == ::std::string::npos) { + parsed.push_back(str.substr(pos)); + break; + } else { + parsed.push_back(str.substr(pos, colon - pos)); + pos = colon + 1; + } + } + dest->swap(parsed); +} + +} // namespace internal + +// Constructs an empty Message. +// We allocate the stringstream separately because otherwise each use of +// ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's +// stack frame leading to huge stack frames in some cases; gcc does not reuse +// the stack space. +Message::Message() : ss_(new ::std::stringstream) { + // By default, we want there to be enough precision when printing + // a double to a Message. + *ss_ << std::setprecision(std::numeric_limits::digits10 + 2); +} + +// These two overloads allow streaming a wide C string to a Message +// using the UTF-8 encoding. +Message& Message::operator <<(const wchar_t* wide_c_str) { + return *this << internal::String::ShowWideCString(wide_c_str); +} +Message& Message::operator <<(wchar_t* wide_c_str) { + return *this << internal::String::ShowWideCString(wide_c_str); +} + +#if GTEST_HAS_STD_WSTRING +// Converts the given wide string to a narrow string using the UTF-8 +// encoding, and streams the result to this Message object. +Message& Message::operator <<(const ::std::wstring& wstr) { + internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); + return *this; +} +#endif // GTEST_HAS_STD_WSTRING + +#if GTEST_HAS_GLOBAL_WSTRING +// Converts the given wide string to a narrow string using the UTF-8 +// encoding, and streams the result to this Message object. +Message& Message::operator <<(const ::wstring& wstr) { + internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); + return *this; +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +// Gets the text streamed to this object so far as an std::string. +// Each '\0' character in the buffer is replaced with "\\0". +std::string Message::GetString() const { + return internal::StringStreamToString(ss_.get()); +} + +// AssertionResult constructors. +// Used in EXPECT_TRUE/FALSE(assertion_result). +AssertionResult::AssertionResult(const AssertionResult& other) + : success_(other.success_), + message_(other.message_.get() != NULL ? + new ::std::string(*other.message_) : + static_cast< ::std::string*>(NULL)) { +} + +// Swaps two AssertionResults. +void AssertionResult::swap(AssertionResult& other) { + using std::swap; + swap(success_, other.success_); + swap(message_, other.message_); +} + +// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE. +AssertionResult AssertionResult::operator!() const { + AssertionResult negation(!success_); + if (message_.get() != NULL) + negation << *message_; + return negation; +} + +// Makes a successful assertion result. +AssertionResult AssertionSuccess() { + return AssertionResult(true); +} + +// Makes a failed assertion result. +AssertionResult AssertionFailure() { + return AssertionResult(false); +} + +// Makes a failed assertion result with the given failure message. +// Deprecated; use AssertionFailure() << message. +AssertionResult AssertionFailure(const Message& message) { + return AssertionFailure() << message; +} + +namespace internal { + +namespace edit_distance { +std::vector CalculateOptimalEdits(const std::vector& left, + const std::vector& right) { + std::vector > costs( + left.size() + 1, std::vector(right.size() + 1)); + std::vector > best_move( + left.size() + 1, std::vector(right.size() + 1)); + + // Populate for empty right. + for (size_t l_i = 0; l_i < costs.size(); ++l_i) { + costs[l_i][0] = static_cast(l_i); + best_move[l_i][0] = kRemove; + } + // Populate for empty left. + for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) { + costs[0][r_i] = static_cast(r_i); + best_move[0][r_i] = kAdd; + } + + for (size_t l_i = 0; l_i < left.size(); ++l_i) { + for (size_t r_i = 0; r_i < right.size(); ++r_i) { + if (left[l_i] == right[r_i]) { + // Found a match. Consume it. + costs[l_i + 1][r_i + 1] = costs[l_i][r_i]; + best_move[l_i + 1][r_i + 1] = kMatch; + continue; + } + + const double add = costs[l_i + 1][r_i]; + const double remove = costs[l_i][r_i + 1]; + const double replace = costs[l_i][r_i]; + if (add < remove && add < replace) { + costs[l_i + 1][r_i + 1] = add + 1; + best_move[l_i + 1][r_i + 1] = kAdd; + } else if (remove < add && remove < replace) { + costs[l_i + 1][r_i + 1] = remove + 1; + best_move[l_i + 1][r_i + 1] = kRemove; + } else { + // We make replace a little more expensive than add/remove to lower + // their priority. + costs[l_i + 1][r_i + 1] = replace + 1.00001; + best_move[l_i + 1][r_i + 1] = kReplace; + } + } + } + + // Reconstruct the best path. We do it in reverse order. + std::vector best_path; + for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) { + EditType move = best_move[l_i][r_i]; + best_path.push_back(move); + l_i -= move != kAdd; + r_i -= move != kRemove; + } + std::reverse(best_path.begin(), best_path.end()); + return best_path; +} + +namespace { + +// Helper class to convert string into ids with deduplication. +class InternalStrings { + public: + size_t GetId(const std::string& str) { + IdMap::iterator it = ids_.find(str); + if (it != ids_.end()) return it->second; + size_t id = ids_.size(); + return ids_[str] = id; + } + + private: + typedef std::map IdMap; + IdMap ids_; +}; + +} // namespace + +std::vector CalculateOptimalEdits( + const std::vector& left, + const std::vector& right) { + std::vector left_ids, right_ids; + { + InternalStrings intern_table; + for (size_t i = 0; i < left.size(); ++i) { + left_ids.push_back(intern_table.GetId(left[i])); + } + for (size_t i = 0; i < right.size(); ++i) { + right_ids.push_back(intern_table.GetId(right[i])); + } + } + return CalculateOptimalEdits(left_ids, right_ids); +} + +namespace { + +// Helper class that holds the state for one hunk and prints it out to the +// stream. +// It reorders adds/removes when possible to group all removes before all +// adds. It also adds the hunk header before printint into the stream. +class Hunk { + public: + Hunk(size_t left_start, size_t right_start) + : left_start_(left_start), + right_start_(right_start), + adds_(), + removes_(), + common_() {} + + void PushLine(char edit, const char* line) { + switch (edit) { + case ' ': + ++common_; + FlushEdits(); + hunk_.push_back(std::make_pair(' ', line)); + break; + case '-': + ++removes_; + hunk_removes_.push_back(std::make_pair('-', line)); + break; + case '+': + ++adds_; + hunk_adds_.push_back(std::make_pair('+', line)); + break; + } + } + + void PrintTo(std::ostream* os) { + PrintHeader(os); + FlushEdits(); + for (std::list >::const_iterator it = + hunk_.begin(); + it != hunk_.end(); ++it) { + *os << it->first << it->second << "\n"; + } + } + + bool has_edits() const { return adds_ || removes_; } + + private: + void FlushEdits() { + hunk_.splice(hunk_.end(), hunk_removes_); + hunk_.splice(hunk_.end(), hunk_adds_); + } + + // Print a unified diff header for one hunk. + // The format is + // "@@ -, +, @@" + // where the left/right parts are ommitted if unnecessary. + void PrintHeader(std::ostream* ss) const { + *ss << "@@ "; + if (removes_) { + *ss << "-" << left_start_ << "," << (removes_ + common_); + } + if (removes_ && adds_) { + *ss << " "; + } + if (adds_) { + *ss << "+" << right_start_ << "," << (adds_ + common_); + } + *ss << " @@\n"; + } + + size_t left_start_, right_start_; + size_t adds_, removes_, common_; + std::list > hunk_, hunk_adds_, hunk_removes_; +}; + +} // namespace + +// Create a list of diff hunks in Unified diff format. +// Each hunk has a header generated by PrintHeader above plus a body with +// lines prefixed with ' ' for no change, '-' for deletion and '+' for +// addition. +// 'context' represents the desired unchanged prefix/suffix around the diff. +// If two hunks are close enough that their contexts overlap, then they are +// joined into one hunk. +std::string CreateUnifiedDiff(const std::vector& left, + const std::vector& right, + size_t context) { + const std::vector edits = CalculateOptimalEdits(left, right); + + size_t l_i = 0, r_i = 0, edit_i = 0; + std::stringstream ss; + while (edit_i < edits.size()) { + // Find first edit. + while (edit_i < edits.size() && edits[edit_i] == kMatch) { + ++l_i; + ++r_i; + ++edit_i; + } + + // Find the first line to include in the hunk. + const size_t prefix_context = std::min(l_i, context); + Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1); + for (size_t i = prefix_context; i > 0; --i) { + hunk.PushLine(' ', left[l_i - i].c_str()); + } + + // Iterate the edits until we found enough suffix for the hunk or the input + // is over. + size_t n_suffix = 0; + for (; edit_i < edits.size(); ++edit_i) { + if (n_suffix >= context) { + // Continue only if the next hunk is very close. + std::vector::const_iterator it = edits.begin() + edit_i; + while (it != edits.end() && *it == kMatch) ++it; + if (it == edits.end() || (it - edits.begin()) - edit_i >= context) { + // There is no next edit or it is too far away. + break; + } + } + + EditType edit = edits[edit_i]; + // Reset count when a non match is found. + n_suffix = edit == kMatch ? n_suffix + 1 : 0; + + if (edit == kMatch || edit == kRemove || edit == kReplace) { + hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str()); + } + if (edit == kAdd || edit == kReplace) { + hunk.PushLine('+', right[r_i].c_str()); + } + + // Advance indices, depending on edit type. + l_i += edit != kAdd; + r_i += edit != kRemove; + } + + if (!hunk.has_edits()) { + // We are done. We don't want this hunk. + break; + } + + hunk.PrintTo(&ss); + } + return ss.str(); +} + +} // namespace edit_distance + +namespace { + +// The string representation of the values received in EqFailure() are already +// escaped. Split them on escaped '\n' boundaries. Leave all other escaped +// characters the same. +std::vector SplitEscapedString(const std::string& str) { + std::vector lines; + size_t start = 0, end = str.size(); + if (end > 2 && str[0] == '"' && str[end - 1] == '"') { + ++start; + --end; + } + bool escaped = false; + for (size_t i = start; i + 1 < end; ++i) { + if (escaped) { + escaped = false; + if (str[i] == 'n') { + lines.push_back(str.substr(start, i - start - 1)); + start = i + 1; + } + } else { + escaped = str[i] == '\\'; + } + } + lines.push_back(str.substr(start, end - start)); + return lines; +} + +} // namespace + +// Constructs and returns the message for an equality assertion +// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure. +// +// The first four parameters are the expressions used in the assertion +// and their values, as strings. For example, for ASSERT_EQ(foo, bar) +// where foo is 5 and bar is 6, we have: +// +// lhs_expression: "foo" +// rhs_expression: "bar" +// lhs_value: "5" +// rhs_value: "6" +// +// The ignoring_case parameter is true iff the assertion is a +// *_STRCASEEQ*. When it's true, the string "Ignoring case" will +// be inserted into the message. +AssertionResult EqFailure(const char* lhs_expression, + const char* rhs_expression, + const std::string& lhs_value, + const std::string& rhs_value, + bool ignoring_case) { + Message msg; + msg << " Expected: " << lhs_expression; + if (lhs_value != lhs_expression) { + msg << "\n Which is: " << lhs_value; + } + msg << "\nTo be equal to: " << rhs_expression; + if (rhs_value != rhs_expression) { + msg << "\n Which is: " << rhs_value; + } + + if (ignoring_case) { + msg << "\nIgnoring case"; + } + + if (!lhs_value.empty() && !rhs_value.empty()) { + const std::vector lhs_lines = + SplitEscapedString(lhs_value); + const std::vector rhs_lines = + SplitEscapedString(rhs_value); + if (lhs_lines.size() > 1 || rhs_lines.size() > 1) { + msg << "\nWith diff:\n" + << edit_distance::CreateUnifiedDiff(lhs_lines, rhs_lines); + } + } + + return AssertionFailure() << msg; +} + +// Constructs a failure message for Boolean assertions such as EXPECT_TRUE. +std::string GetBoolAssertionFailureMessage( + const AssertionResult& assertion_result, + const char* expression_text, + const char* actual_predicate_value, + const char* expected_predicate_value) { + const char* actual_message = assertion_result.message(); + Message msg; + msg << "Value of: " << expression_text + << "\n Actual: " << actual_predicate_value; + if (actual_message[0] != '\0') + msg << " (" << actual_message << ")"; + msg << "\nExpected: " << expected_predicate_value; + return msg.GetString(); +} + +// Helper function for implementing ASSERT_NEAR. +AssertionResult DoubleNearPredFormat(const char* expr1, + const char* expr2, + const char* abs_error_expr, + double val1, + double val2, + double abs_error) { + const double diff = fabs(val1 - val2); + if (diff <= abs_error) return AssertionSuccess(); + + // TODO(wan): do not print the value of an expression if it's + // already a literal. + return AssertionFailure() + << "The difference between " << expr1 << " and " << expr2 + << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n" + << expr1 << " evaluates to " << val1 << ",\n" + << expr2 << " evaluates to " << val2 << ", and\n" + << abs_error_expr << " evaluates to " << abs_error << "."; +} + + +// Helper template for implementing FloatLE() and DoubleLE(). +template +AssertionResult FloatingPointLE(const char* expr1, + const char* expr2, + RawType val1, + RawType val2) { + // Returns success if val1 is less than val2, + if (val1 < val2) { + return AssertionSuccess(); + } + + // or if val1 is almost equal to val2. + const FloatingPoint lhs(val1), rhs(val2); + if (lhs.AlmostEquals(rhs)) { + return AssertionSuccess(); + } + + // Note that the above two checks will both fail if either val1 or + // val2 is NaN, as the IEEE floating-point standard requires that + // any predicate involving a NaN must return false. + + ::std::stringstream val1_ss; + val1_ss << std::setprecision(std::numeric_limits::digits10 + 2) + << val1; + + ::std::stringstream val2_ss; + val2_ss << std::setprecision(std::numeric_limits::digits10 + 2) + << val2; + + return AssertionFailure() + << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n" + << " Actual: " << StringStreamToString(&val1_ss) << " vs " + << StringStreamToString(&val2_ss); +} + +} // namespace internal + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +AssertionResult FloatLE(const char* expr1, const char* expr2, + float val1, float val2) { + return internal::FloatingPointLE(expr1, expr2, val1, val2); +} + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +AssertionResult DoubleLE(const char* expr1, const char* expr2, + double val1, double val2) { + return internal::FloatingPointLE(expr1, expr2, val1, val2); +} + +namespace internal { + +// The helper function for {ASSERT|EXPECT}_EQ with int or enum +// arguments. +AssertionResult CmpHelperEQ(const char* lhs_expression, + const char* rhs_expression, + BiggestInt lhs, + BiggestInt rhs) { + if (lhs == rhs) { + return AssertionSuccess(); + } + + return EqFailure(lhs_expression, + rhs_expression, + FormatForComparisonFailureMessage(lhs, rhs), + FormatForComparisonFailureMessage(rhs, lhs), + false); +} + +// A macro for implementing the helper functions needed to implement +// ASSERT_?? and EXPECT_?? with integer or enum arguments. It is here +// just to avoid copy-and-paste of similar code. +#define GTEST_IMPL_CMP_HELPER_(op_name, op)\ +AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ + BiggestInt val1, BiggestInt val2) {\ + if (val1 op val2) {\ + return AssertionSuccess();\ + } else {\ + return AssertionFailure() \ + << "Expected: (" << expr1 << ") " #op " (" << expr2\ + << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\ + << " vs " << FormatForComparisonFailureMessage(val2, val1);\ + }\ +} + +// Implements the helper function for {ASSERT|EXPECT}_NE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(NE, !=) +// Implements the helper function for {ASSERT|EXPECT}_LE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(LE, <=) +// Implements the helper function for {ASSERT|EXPECT}_LT with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(LT, < ) +// Implements the helper function for {ASSERT|EXPECT}_GE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(GE, >=) +// Implements the helper function for {ASSERT|EXPECT}_GT with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(GT, > ) + +#undef GTEST_IMPL_CMP_HELPER_ + +// The helper function for {ASSERT|EXPECT}_STREQ. +AssertionResult CmpHelperSTREQ(const char* lhs_expression, + const char* rhs_expression, + const char* lhs, + const char* rhs) { + if (String::CStringEquals(lhs, rhs)) { + return AssertionSuccess(); + } + + return EqFailure(lhs_expression, + rhs_expression, + PrintToString(lhs), + PrintToString(rhs), + false); +} + +// The helper function for {ASSERT|EXPECT}_STRCASEEQ. +AssertionResult CmpHelperSTRCASEEQ(const char* lhs_expression, + const char* rhs_expression, + const char* lhs, + const char* rhs) { + if (String::CaseInsensitiveCStringEquals(lhs, rhs)) { + return AssertionSuccess(); + } + + return EqFailure(lhs_expression, + rhs_expression, + PrintToString(lhs), + PrintToString(rhs), + true); +} + +// The helper function for {ASSERT|EXPECT}_STRNE. +AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2) { + if (!String::CStringEquals(s1, s2)) { + return AssertionSuccess(); + } else { + return AssertionFailure() << "Expected: (" << s1_expression << ") != (" + << s2_expression << "), actual: \"" + << s1 << "\" vs \"" << s2 << "\""; + } +} + +// The helper function for {ASSERT|EXPECT}_STRCASENE. +AssertionResult CmpHelperSTRCASENE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2) { + if (!String::CaseInsensitiveCStringEquals(s1, s2)) { + return AssertionSuccess(); + } else { + return AssertionFailure() + << "Expected: (" << s1_expression << ") != (" + << s2_expression << ") (ignoring case), actual: \"" + << s1 << "\" vs \"" << s2 << "\""; + } +} + +} // namespace internal + +namespace { + +// Helper functions for implementing IsSubString() and IsNotSubstring(). + +// This group of overloaded functions return true iff needle is a +// substring of haystack. NULL is considered a substring of itself +// only. + +bool IsSubstringPred(const char* needle, const char* haystack) { + if (needle == NULL || haystack == NULL) + return needle == haystack; + + return strstr(haystack, needle) != NULL; +} + +bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) { + if (needle == NULL || haystack == NULL) + return needle == haystack; + + return wcsstr(haystack, needle) != NULL; +} + +// StringType here can be either ::std::string or ::std::wstring. +template +bool IsSubstringPred(const StringType& needle, + const StringType& haystack) { + return haystack.find(needle) != StringType::npos; +} + +// This function implements either IsSubstring() or IsNotSubstring(), +// depending on the value of the expected_to_be_substring parameter. +// StringType here can be const char*, const wchar_t*, ::std::string, +// or ::std::wstring. +template +AssertionResult IsSubstringImpl( + bool expected_to_be_substring, + const char* needle_expr, const char* haystack_expr, + const StringType& needle, const StringType& haystack) { + if (IsSubstringPred(needle, haystack) == expected_to_be_substring) + return AssertionSuccess(); + + const bool is_wide_string = sizeof(needle[0]) > 1; + const char* const begin_string_quote = is_wide_string ? "L\"" : "\""; + return AssertionFailure() + << "Value of: " << needle_expr << "\n" + << " Actual: " << begin_string_quote << needle << "\"\n" + << "Expected: " << (expected_to_be_substring ? "" : "not ") + << "a substring of " << haystack_expr << "\n" + << "Which is: " << begin_string_quote << haystack << "\""; +} + +} // namespace + +// IsSubstring() and IsNotSubstring() check whether needle is a +// substring of haystack (NULL is considered a substring of itself +// only), and return an appropriate error message when they fail. + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +#if GTEST_HAS_STD_WSTRING +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} +#endif // GTEST_HAS_STD_WSTRING + +namespace internal { + +#if GTEST_OS_WINDOWS + +namespace { + +// Helper function for IsHRESULT{SuccessFailure} predicates +AssertionResult HRESULTFailureHelper(const char* expr, + const char* expected, + long hr) { // NOLINT +# if GTEST_OS_WINDOWS_MOBILE + + // Windows CE doesn't support FormatMessage. + const char error_text[] = ""; + +# else + + // Looks up the human-readable system message for the HRESULT code + // and since we're not passing any params to FormatMessage, we don't + // want inserts expanded. + const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS; + const DWORD kBufSize = 4096; + // Gets the system's human readable message string for this HRESULT. + char error_text[kBufSize] = { '\0' }; + DWORD message_length = ::FormatMessageA(kFlags, + 0, // no source, we're asking system + hr, // the error + 0, // no line width restrictions + error_text, // output buffer + kBufSize, // buf size + NULL); // no arguments for inserts + // Trims tailing white space (FormatMessage leaves a trailing CR-LF) + for (; message_length && IsSpace(error_text[message_length - 1]); + --message_length) { + error_text[message_length - 1] = '\0'; + } + +# endif // GTEST_OS_WINDOWS_MOBILE + + const std::string error_hex("0x" + String::FormatHexInt(hr)); + return ::testing::AssertionFailure() + << "Expected: " << expr << " " << expected << ".\n" + << " Actual: " << error_hex << " " << error_text << "\n"; +} + +} // namespace + +AssertionResult IsHRESULTSuccess(const char* expr, long hr) { // NOLINT + if (SUCCEEDED(hr)) { + return AssertionSuccess(); + } + return HRESULTFailureHelper(expr, "succeeds", hr); +} + +AssertionResult IsHRESULTFailure(const char* expr, long hr) { // NOLINT + if (FAILED(hr)) { + return AssertionSuccess(); + } + return HRESULTFailureHelper(expr, "fails", hr); +} + +#endif // GTEST_OS_WINDOWS + +// Utility functions for encoding Unicode text (wide strings) in +// UTF-8. + +// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8 +// like this: +// +// Code-point length Encoding +// 0 - 7 bits 0xxxxxxx +// 8 - 11 bits 110xxxxx 10xxxxxx +// 12 - 16 bits 1110xxxx 10xxxxxx 10xxxxxx +// 17 - 21 bits 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + +// The maximum code-point a one-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint1 = (static_cast(1) << 7) - 1; + +// The maximum code-point a two-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint2 = (static_cast(1) << (5 + 6)) - 1; + +// The maximum code-point a three-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint3 = (static_cast(1) << (4 + 2*6)) - 1; + +// The maximum code-point a four-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint4 = (static_cast(1) << (3 + 3*6)) - 1; + +// Chops off the n lowest bits from a bit pattern. Returns the n +// lowest bits. As a side effect, the original bit pattern will be +// shifted to the right by n bits. +inline UInt32 ChopLowBits(UInt32* bits, int n) { + const UInt32 low_bits = *bits & ((static_cast(1) << n) - 1); + *bits >>= n; + return low_bits; +} + +// Converts a Unicode code point to a narrow string in UTF-8 encoding. +// code_point parameter is of type UInt32 because wchar_t may not be +// wide enough to contain a code point. +// If the code_point is not a valid Unicode code point +// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted +// to "(Invalid Unicode 0xXXXXXXXX)". +std::string CodePointToUtf8(UInt32 code_point) { + if (code_point > kMaxCodePoint4) { + return "(Invalid Unicode 0x" + String::FormatHexInt(code_point) + ")"; + } + + char str[5]; // Big enough for the largest valid code point. + if (code_point <= kMaxCodePoint1) { + str[1] = '\0'; + str[0] = static_cast(code_point); // 0xxxxxxx + } else if (code_point <= kMaxCodePoint2) { + str[2] = '\0'; + str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[0] = static_cast(0xC0 | code_point); // 110xxxxx + } else if (code_point <= kMaxCodePoint3) { + str[3] = '\0'; + str[2] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[0] = static_cast(0xE0 | code_point); // 1110xxxx + } else { // code_point <= kMaxCodePoint4 + str[4] = '\0'; + str[3] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[2] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[0] = static_cast(0xF0 | code_point); // 11110xxx + } + return str; +} + +// The following two functions only make sense if the the system +// uses UTF-16 for wide string encoding. All supported systems +// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16. + +// Determines if the arguments constitute UTF-16 surrogate pair +// and thus should be combined into a single Unicode code point +// using CreateCodePointFromUtf16SurrogatePair. +inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) { + return sizeof(wchar_t) == 2 && + (first & 0xFC00) == 0xD800 && (second & 0xFC00) == 0xDC00; +} + +// Creates a Unicode code point from UTF16 surrogate pair. +inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first, + wchar_t second) { + const UInt32 mask = (1 << 10) - 1; + return (sizeof(wchar_t) == 2) ? + (((first & mask) << 10) | (second & mask)) + 0x10000 : + // This function should not be called when the condition is + // false, but we provide a sensible default in case it is. + static_cast(first); +} + +// Converts a wide string to a narrow string in UTF-8 encoding. +// The wide string is assumed to have the following encoding: +// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS) +// UTF-32 if sizeof(wchar_t) == 4 (on Linux) +// Parameter str points to a null-terminated wide string. +// Parameter num_chars may additionally limit the number +// of wchar_t characters processed. -1 is used when the entire string +// should be processed. +// If the string contains code points that are not valid Unicode code points +// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output +// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding +// and contains invalid UTF-16 surrogate pairs, values in those pairs +// will be encoded as individual Unicode characters from Basic Normal Plane. +std::string WideStringToUtf8(const wchar_t* str, int num_chars) { + if (num_chars == -1) + num_chars = static_cast(wcslen(str)); + + ::std::stringstream stream; + for (int i = 0; i < num_chars; ++i) { + UInt32 unicode_code_point; + + if (str[i] == L'\0') { + break; + } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) { + unicode_code_point = CreateCodePointFromUtf16SurrogatePair(str[i], + str[i + 1]); + i++; + } else { + unicode_code_point = static_cast(str[i]); + } + + stream << CodePointToUtf8(unicode_code_point); + } + return St \ No newline at end of file diff --git a/qa/workunits/rgw/jcksum/file-8b b/qa/workunits/rgw/jcksum/file-8b new file mode 100644 index 000000000000..540606b9d0e9 --- /dev/null +++ b/qa/workunits/rgw/jcksum/file-8b @@ -0,0 +1 @@ +// Copy diff --git a/qa/workunits/rgw/jcksum/mvnw b/qa/workunits/rgw/jcksum/mvnw new file mode 100755 index 000000000000..8d937f4c14f1 --- /dev/null +++ b/qa/workunits/rgw/jcksum/mvnw @@ -0,0 +1,308 @@ +#!/bin/sh +# ---------------------------------------------------------------------------- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Apache Maven Wrapper startup batch script, version 3.2.0 +# +# Required ENV vars: +# ------------------ +# JAVA_HOME - location of a JDK home dir +# +# Optional ENV vars +# ----------------- +# MAVEN_OPTS - parameters passed to the Java VM when running Maven +# e.g. to debug Maven itself, use +# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 +# MAVEN_SKIP_RC - flag to disable loading of mavenrc files +# ---------------------------------------------------------------------------- + +if [ -z "$MAVEN_SKIP_RC" ] ; then + + if [ -f /usr/local/etc/mavenrc ] ; then + . /usr/local/etc/mavenrc + fi + + if [ -f /etc/mavenrc ] ; then + . /etc/mavenrc + fi + + if [ -f "$HOME/.mavenrc" ] ; then + . "$HOME/.mavenrc" + fi + +fi + +# OS specific support. $var _must_ be set to either true or false. +cygwin=false; +darwin=false; +mingw=false +case "$(uname)" in + CYGWIN*) cygwin=true ;; + MINGW*) mingw=true;; + Darwin*) darwin=true + # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home + # See https://developer.apple.com/library/mac/qa/qa1170/_index.html + if [ -z "$JAVA_HOME" ]; then + if [ -x "/usr/libexec/java_home" ]; then + JAVA_HOME="$(/usr/libexec/java_home)"; export JAVA_HOME + else + JAVA_HOME="/Library/Java/Home"; export JAVA_HOME + fi + fi + ;; +esac + +if [ -z "$JAVA_HOME" ] ; then + if [ -r /etc/gentoo-release ] ; then + JAVA_HOME=$(java-config --jre-home) + fi +fi + +# For Cygwin, ensure paths are in UNIX format before anything is touched +if $cygwin ; then + [ -n "$JAVA_HOME" ] && + JAVA_HOME=$(cygpath --unix "$JAVA_HOME") + [ -n "$CLASSPATH" ] && + CLASSPATH=$(cygpath --path --unix "$CLASSPATH") +fi + +# For Mingw, ensure paths are in UNIX format before anything is touched +if $mingw ; then + [ -n "$JAVA_HOME" ] && [ -d "$JAVA_HOME" ] && + JAVA_HOME="$(cd "$JAVA_HOME" || (echo "cannot cd into $JAVA_HOME."; exit 1); pwd)" +fi + +if [ -z "$JAVA_HOME" ]; then + javaExecutable="$(which javac)" + if [ -n "$javaExecutable" ] && ! [ "$(expr "\"$javaExecutable\"" : '\([^ ]*\)')" = "no" ]; then + # readlink(1) is not available as standard on Solaris 10. + readLink=$(which readlink) + if [ ! "$(expr "$readLink" : '\([^ ]*\)')" = "no" ]; then + if $darwin ; then + javaHome="$(dirname "\"$javaExecutable\"")" + javaExecutable="$(cd "\"$javaHome\"" && pwd -P)/javac" + else + javaExecutable="$(readlink -f "\"$javaExecutable\"")" + fi + javaHome="$(dirname "\"$javaExecutable\"")" + javaHome=$(expr "$javaHome" : '\(.*\)/bin') + JAVA_HOME="$javaHome" + export JAVA_HOME + fi + fi +fi + +if [ -z "$JAVACMD" ] ; then + if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + else + JAVACMD="$(\unset -f command 2>/dev/null; \command -v java)" + fi +fi + +if [ ! -x "$JAVACMD" ] ; then + echo "Error: JAVA_HOME is not defined correctly." >&2 + echo " We cannot execute $JAVACMD" >&2 + exit 1 +fi + +if [ -z "$JAVA_HOME" ] ; then + echo "Warning: JAVA_HOME environment variable is not set." +fi + +# traverses directory structure from process work directory to filesystem root +# first directory with .mvn subdirectory is considered project base directory +find_maven_basedir() { + if [ -z "$1" ] + then + echo "Path not specified to find_maven_basedir" + return 1 + fi + + basedir="$1" + wdir="$1" + while [ "$wdir" != '/' ] ; do + if [ -d "$wdir"/.mvn ] ; then + basedir=$wdir + break + fi + # workaround for JBEAP-8937 (on Solaris 10/Sparc) + if [ -d "${wdir}" ]; then + wdir=$(cd "$wdir/.." || exit 1; pwd) + fi + # end of workaround + done + printf '%s' "$(cd "$basedir" || exit 1; pwd)" +} + +# concatenates all lines of a file +concat_lines() { + if [ -f "$1" ]; then + # Remove \r in case we run on Windows within Git Bash + # and check out the repository with auto CRLF management + # enabled. Otherwise, we may read lines that are delimited with + # \r\n and produce $'-Xarg\r' rather than -Xarg due to word + # splitting rules. + tr -s '\r\n' ' ' < "$1" + fi +} + +log() { + if [ "$MVNW_VERBOSE" = true ]; then + printf '%s\n' "$1" + fi +} + +BASE_DIR=$(find_maven_basedir "$(dirname "$0")") +if [ -z "$BASE_DIR" ]; then + exit 1; +fi + +MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}; export MAVEN_PROJECTBASEDIR +log "$MAVEN_PROJECTBASEDIR" + +########################################################################################## +# Extension to allow automatically downloading the maven-wrapper.jar from Maven-central +# This allows using the maven wrapper in projects that prohibit checking in binary data. +########################################################################################## +wrapperJarPath="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" +if [ -r "$wrapperJarPath" ]; then + log "Found $wrapperJarPath" +else + log "Couldn't find $wrapperJarPath, downloading it ..." + + if [ -n "$MVNW_REPOURL" ]; then + wrapperUrl="$MVNW_REPOURL/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" + else + wrapperUrl="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" + fi + while IFS="=" read -r key value; do + # Remove '\r' from value to allow usage on windows as IFS does not consider '\r' as a separator ( considers space, tab, new line ('\n'), and custom '=' ) + safeValue=$(echo "$value" | tr -d '\r') + case "$key" in (wrapperUrl) wrapperUrl="$safeValue"; break ;; + esac + done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties" + log "Downloading from: $wrapperUrl" + + if $cygwin; then + wrapperJarPath=$(cygpath --path --windows "$wrapperJarPath") + fi + + if command -v wget > /dev/null; then + log "Found wget ... using wget" + [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--quiet" + if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then + wget $QUIET "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath" + else + wget $QUIET --http-user="$MVNW_USERNAME" --http-password="$MVNW_PASSWORD" "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath" + fi + elif command -v curl > /dev/null; then + log "Found curl ... using curl" + [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--silent" + if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then + curl $QUIET -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath" + else + curl $QUIET --user "$MVNW_USERNAME:$MVNW_PASSWORD" -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath" + fi + else + log "Falling back to using Java to download" + javaSource="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.java" + javaClass="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.class" + # For Cygwin, switch paths to Windows format before running javac + if $cygwin; then + javaSource=$(cygpath --path --windows "$javaSource") + javaClass=$(cygpath --path --windows "$javaClass") + fi + if [ -e "$javaSource" ]; then + if [ ! -e "$javaClass" ]; then + log " - Compiling MavenWrapperDownloader.java ..." + ("$JAVA_HOME/bin/javac" "$javaSource") + fi + if [ -e "$javaClass" ]; then + log " - Running MavenWrapperDownloader.java ..." + ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$wrapperUrl" "$wrapperJarPath") || rm -f "$wrapperJarPath" + fi + fi + fi +fi +########################################################################################## +# End of extension +########################################################################################## + +# If specified, validate the SHA-256 sum of the Maven wrapper jar file +wrapperSha256Sum="" +while IFS="=" read -r key value; do + case "$key" in (wrapperSha256Sum) wrapperSha256Sum=$value; break ;; + esac +done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties" +if [ -n "$wrapperSha256Sum" ]; then + wrapperSha256Result=false + if command -v sha256sum > /dev/null; then + if echo "$wrapperSha256Sum $wrapperJarPath" | sha256sum -c > /dev/null 2>&1; then + wrapperSha256Result=true + fi + elif command -v shasum > /dev/null; then + if echo "$wrapperSha256Sum $wrapperJarPath" | shasum -a 256 -c > /dev/null 2>&1; then + wrapperSha256Result=true + fi + else + echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." + echo "Please install either command, or disable validation by removing 'wrapperSha256Sum' from your maven-wrapper.properties." + exit 1 + fi + if [ $wrapperSha256Result = false ]; then + echo "Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised." >&2 + echo "Investigate or delete $wrapperJarPath to attempt a clean download." >&2 + echo "If you updated your Maven version, you need to update the specified wrapperSha256Sum property." >&2 + exit 1 + fi +fi + +MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" + +# For Cygwin, switch paths to Windows format before running java +if $cygwin; then + [ -n "$JAVA_HOME" ] && + JAVA_HOME=$(cygpath --path --windows "$JAVA_HOME") + [ -n "$CLASSPATH" ] && + CLASSPATH=$(cygpath --path --windows "$CLASSPATH") + [ -n "$MAVEN_PROJECTBASEDIR" ] && + MAVEN_PROJECTBASEDIR=$(cygpath --path --windows "$MAVEN_PROJECTBASEDIR") +fi + +# Provide a "standardized" way to retrieve the CLI args that will +# work with both Windows and non-Windows executions. +MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $*" +export MAVEN_CMD_LINE_ARGS + +WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain + +# shellcheck disable=SC2086 # safe args +exec "$JAVACMD" \ + $MAVEN_OPTS \ + $MAVEN_DEBUG_OPTS \ + -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ + "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ + ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" diff --git a/qa/workunits/rgw/jcksum/mvnw.cmd b/qa/workunits/rgw/jcksum/mvnw.cmd new file mode 100644 index 000000000000..c4586b564e6f --- /dev/null +++ b/qa/workunits/rgw/jcksum/mvnw.cmd @@ -0,0 +1,205 @@ +@REM ---------------------------------------------------------------------------- +@REM Licensed to the Apache Software Foundation (ASF) under one +@REM or more contributor license agreements. See the NOTICE file +@REM distributed with this work for additional information +@REM regarding copyright ownership. The ASF licenses this file +@REM to you under the Apache License, Version 2.0 (the +@REM "License"); you may not use this file except in compliance +@REM with the License. You may obtain a copy of the License at +@REM +@REM http://www.apache.org/licenses/LICENSE-2.0 +@REM +@REM Unless required by applicable law or agreed to in writing, +@REM software distributed under the License is distributed on an +@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@REM KIND, either express or implied. See the License for the +@REM specific language governing permissions and limitations +@REM under the License. +@REM ---------------------------------------------------------------------------- + +@REM ---------------------------------------------------------------------------- +@REM Apache Maven Wrapper startup batch script, version 3.2.0 +@REM +@REM Required ENV vars: +@REM JAVA_HOME - location of a JDK home dir +@REM +@REM Optional ENV vars +@REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands +@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending +@REM MAVEN_OPTS - parameters passed to the Java VM when running Maven +@REM e.g. to debug Maven itself, use +@REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 +@REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files +@REM ---------------------------------------------------------------------------- + +@REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' +@echo off +@REM set title of command window +title %0 +@REM enable echoing by setting MAVEN_BATCH_ECHO to 'on' +@if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% + +@REM set %HOME% to equivalent of $HOME +if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") + +@REM Execute a user defined script before this one +if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre +@REM check for pre script, once with legacy .bat ending and once with .cmd ending +if exist "%USERPROFILE%\mavenrc_pre.bat" call "%USERPROFILE%\mavenrc_pre.bat" %* +if exist "%USERPROFILE%\mavenrc_pre.cmd" call "%USERPROFILE%\mavenrc_pre.cmd" %* +:skipRcPre + +@setlocal + +set ERROR_CODE=0 + +@REM To isolate internal variables from possible post scripts, we use another setlocal +@setlocal + +@REM ==== START VALIDATION ==== +if not "%JAVA_HOME%" == "" goto OkJHome + +echo. +echo Error: JAVA_HOME not found in your environment. >&2 +echo Please set the JAVA_HOME variable in your environment to match the >&2 +echo location of your Java installation. >&2 +echo. +goto error + +:OkJHome +if exist "%JAVA_HOME%\bin\java.exe" goto init + +echo. +echo Error: JAVA_HOME is set to an invalid directory. >&2 +echo JAVA_HOME = "%JAVA_HOME%" >&2 +echo Please set the JAVA_HOME variable in your environment to match the >&2 +echo location of your Java installation. >&2 +echo. +goto error + +@REM ==== END VALIDATION ==== + +:init + +@REM Find the project base dir, i.e. the directory that contains the folder ".mvn". +@REM Fallback to current working directory if not found. + +set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% +IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir + +set EXEC_DIR=%CD% +set WDIR=%EXEC_DIR% +:findBaseDir +IF EXIST "%WDIR%"\.mvn goto baseDirFound +cd .. +IF "%WDIR%"=="%CD%" goto baseDirNotFound +set WDIR=%CD% +goto findBaseDir + +:baseDirFound +set MAVEN_PROJECTBASEDIR=%WDIR% +cd "%EXEC_DIR%" +goto endDetectBaseDir + +:baseDirNotFound +set MAVEN_PROJECTBASEDIR=%EXEC_DIR% +cd "%EXEC_DIR%" + +:endDetectBaseDir + +IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig + +@setlocal EnableExtensions EnableDelayedExpansion +for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a +@endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% + +:endReadAdditionalConfig + +SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" +set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" +set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain + +set WRAPPER_URL="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" + +FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( + IF "%%A"=="wrapperUrl" SET WRAPPER_URL=%%B +) + +@REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central +@REM This allows using the maven wrapper in projects that prohibit checking in binary data. +if exist %WRAPPER_JAR% ( + if "%MVNW_VERBOSE%" == "true" ( + echo Found %WRAPPER_JAR% + ) +) else ( + if not "%MVNW_REPOURL%" == "" ( + SET WRAPPER_URL="%MVNW_REPOURL%/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" + ) + if "%MVNW_VERBOSE%" == "true" ( + echo Couldn't find %WRAPPER_JAR%, downloading it ... + echo Downloading from: %WRAPPER_URL% + ) + + powershell -Command "&{"^ + "$webclient = new-object System.Net.WebClient;"^ + "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^ + "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^ + "}"^ + "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%WRAPPER_URL%', '%WRAPPER_JAR%')"^ + "}" + if "%MVNW_VERBOSE%" == "true" ( + echo Finished downloading %WRAPPER_JAR% + ) +) +@REM End of extension + +@REM If specified, validate the SHA-256 sum of the Maven wrapper jar file +SET WRAPPER_SHA_256_SUM="" +FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( + IF "%%A"=="wrapperSha256Sum" SET WRAPPER_SHA_256_SUM=%%B +) +IF NOT %WRAPPER_SHA_256_SUM%=="" ( + powershell -Command "&{"^ + "$hash = (Get-FileHash \"%WRAPPER_JAR%\" -Algorithm SHA256).Hash.ToLower();"^ + "If('%WRAPPER_SHA_256_SUM%' -ne $hash){"^ + " Write-Output 'Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised.';"^ + " Write-Output 'Investigate or delete %WRAPPER_JAR% to attempt a clean download.';"^ + " Write-Output 'If you updated your Maven version, you need to update the specified wrapperSha256Sum property.';"^ + " exit 1;"^ + "}"^ + "}" + if ERRORLEVEL 1 goto error +) + +@REM Provide a "standardized" way to retrieve the CLI args that will +@REM work with both Windows and non-Windows executions. +set MAVEN_CMD_LINE_ARGS=%* + +%MAVEN_JAVA_EXE% ^ + %JVM_CONFIG_MAVEN_PROPS% ^ + %MAVEN_OPTS% ^ + %MAVEN_DEBUG_OPTS% ^ + -classpath %WRAPPER_JAR% ^ + "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" ^ + %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* +if ERRORLEVEL 1 goto error +goto end + +:error +set ERROR_CODE=1 + +:end +@endlocal & set ERROR_CODE=%ERROR_CODE% + +if not "%MAVEN_SKIP_RC%"=="" goto skipRcPost +@REM check for post script, once with legacy .bat ending and once with .cmd ending +if exist "%USERPROFILE%\mavenrc_post.bat" call "%USERPROFILE%\mavenrc_post.bat" +if exist "%USERPROFILE%\mavenrc_post.cmd" call "%USERPROFILE%\mavenrc_post.cmd" +:skipRcPost + +@REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' +if "%MAVEN_BATCH_PAUSE%"=="on" pause + +if "%MAVEN_TERMINATE_CMD%"=="on" exit %ERROR_CODE% + +cmd /C exit /B %ERROR_CODE% diff --git a/qa/workunits/rgw/jcksum/pom-SNAPSHOT.xml b/qa/workunits/rgw/jcksum/pom-SNAPSHOT.xml new file mode 100644 index 000000000000..637f36f7499b --- /dev/null +++ b/qa/workunits/rgw/jcksum/pom-SNAPSHOT.xml @@ -0,0 +1,56 @@ + + + 4.0.0 + + com.example + junit5-jupiter-starter-maven-snapshot + 1.0-SNAPSHOT + + + UTF-8 + 1.8 + ${maven.compiler.source} + + + + + + org.junit + junit-bom + 5.10.1-SNAPSHOT + pom + import + + + + + + + org.junit.jupiter + junit-jupiter + test + + + + + + oss-sonatype + oss-sonatype + https://oss.sonatype.org/content/repositories/snapshots/ + + true + + + + + + + + maven-surefire-plugin + 3.0.0 + + + + + diff --git a/qa/workunits/rgw/jcksum/pom.xml b/qa/workunits/rgw/jcksum/pom.xml new file mode 100644 index 000000000000..ab874e57638e --- /dev/null +++ b/qa/workunits/rgw/jcksum/pom.xml @@ -0,0 +1,145 @@ + + + 4.0.0 + + io.ceph + jcksum + 1.0 + + + UTF-8 + 1.9 + ${maven.compiler.source} + 2.20.43 + 1.7.28 + 5.10.1 + 1.10.1 + + + + + + org.junit + junit-bom + ${junit5.version} + pom + import + + + + software.amazon.awssdk + bom + ${aws.java.sdk.version} + pom + import + + + + + + + + + software.amazon.awssdk + s3 + + + software.amazon.awssdk + netty-nio-client + + + software.amazon.awssdk + apache-client + + + + + + software.amazon.awssdk + apache-client + + + commons-logging + commons-logging + + + + + + commons-io + commons-io + 2.15.1 + + + + commons-codec + commons-codec + 1.15 + + + + org.slf4j + slf4j-api + ${slf4j.version} + + + + org.junit.jupiter + junit-jupiter + ${junit5.version} + test + + + + org.junit.jupiter + junit-jupiter-api + ${junit5.version} + test + + + + + org.junit.jupiter + junit-jupiter-engine + ${junit5.version} + test + + + + org.slf4j + slf4j-simple + ${slf4j.version} + + + + + org.slf4j + jcl-over-slf4j + ${slf4j.version} + + + + org.junit.platform + junit-platform-suite + ${junit.platform.version} + test + + + + + + + + maven-surefire-plugin + 3.2.3 + + + maven-failsafe-plugin + 3.2.3 + + + + + diff --git a/qa/workunits/rgw/jcksum/src/main/java/io/ceph/jcksum/jcksum.java b/qa/workunits/rgw/jcksum/src/main/java/io/ceph/jcksum/jcksum.java new file mode 100644 index 000000000000..8ac68a27e3c3 --- /dev/null +++ b/qa/workunits/rgw/jcksum/src/main/java/io/ceph/jcksum/jcksum.java @@ -0,0 +1,371 @@ +package io.ceph.jcksum; + +import java.io.*; +import java.util.*; +import java.net.*; // HTTP, URI, ... +import java.util.stream.*; + +import software.amazon.awssdk.auth.credentials.*; +import software.amazon.awssdk.http.SdkHttpClient; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.*; +import software.amazon.awssdk.services.s3.model.*; +import software.amazon.awssdk.services.s3.waiters.*; +import software.amazon.awssdk.utils.*; // AttributeMap +import software.amazon.awssdk.http.SdkHttpConfigurationOption; +import software.amazon.awssdk.core.sync.*; // RequestBody +import software.amazon.awssdk.core.checksums.*; +import software.amazon.awssdk.core.checksums.Algorithm; +import software.amazon.awssdk.core.waiters.*; + +/* MD5Sum */ +import java.nio.file.Files; +import java.nio.file.Paths; +import org.apache.commons.codec.digest.DigestUtils; + +public class jcksum { + + static Region region = Region.US_EAST_1; + static S3Client client, ssl_client; + + static String bucket_name = "sheik"; + static String object_name = "jerbuti"; + static String access_key = "0555b35654ad1656d804"; + static String secret_key = "h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q=="; + + static String http_endpoint = "http://192.168.111.1:8000"; + static String ssl_endpoint = "https://192.168.111.1:8443"; + + static int mpu_size = 5 * 1024 * 1024; + + /* files containing test data of the corresponding names/sizes */ + public static Stream inputFileNames() { + return Stream.of( + "file-0b", + "file-8b", + "file-200b", + "file-21983b", + "file-5519b", + "file-204329b", + "file-256k", + "file-1m", + "file-1038757b" + ); + } /* inputFileNames */ + + public static Stream mpuFileNames() { + return Stream.of( + "file-5m", + "file-10m", + "file-100m" + ); + } /* mpuFileNames */ + + public static void createBucket(S3Client s3Client, String bucket_name) { + try { + S3Waiter s3Waiter = s3Client.waiter(); + CreateBucketRequest bucketRequest = CreateBucketRequest.builder() + .bucket(bucket_name) + .build(); + + s3Client.createBucket(bucketRequest); + HeadBucketRequest bucketRequestWait = HeadBucketRequest.builder() + .bucket(bucket_name) + .build(); + + // Wait until the bucket is created and print out the response. + WaiterResponse waiterResponse = s3Waiter.waitUntilBucketExists(bucketRequestWait); + waiterResponse.matched().response().ifPresent(System.out::println); + System.out.println(bucket_name +" is ready"); + + } catch (S3Exception e) { + System.err.println(e.awsErrorDetails().errorMessage()); + System.exit(1); + } + } /* createBucket */ + + public static void listBucket(S3Client s3) { + try { + ListObjectsRequest listObjects = ListObjectsRequest.builder() + .bucket(bucket_name) + .build(); + + ListObjectsResponse res = s3.listObjects(listObjects); + List objects = res.contents(); + for (S3Object obj: objects) { + System.out.println( + String.format("obj key: %s owner: %s size: %d", obj.key(), obj.owner(), obj.size())); + } + + } catch (S3Exception e) { + System.err.println(e.awsErrorDetails().errorMessage()); + System.exit(1); + } + } + + public static GetObjectResponse GetObject(S3Client s3, String in_key_name, String out_file_name) { + GetObjectResponse resp = null; + + GetObjectRequest get_req = + GetObjectRequest.builder() + .bucket(bucket_name) + .key(in_key_name) + .build(); + try { + File f = new File(out_file_name); + if (f.exists()) { + f.delete(); + } + resp = s3.getObject(get_req, ResponseTransformer.toFile(f)); + } catch (S3Exception e) { + System.err.println(e.awsErrorDetails().errorMessage()); + System.exit(1); + } catch (Exception e) { + e.printStackTrace(); + } + + return resp; + } + + public static CompleteMultipartUploadResponse mpuObjectFromFile(S3Client s3, String in_file_path, String out_key_name) { + File f = new File(in_file_path); + CompleteMultipartUploadResponse completedUploadResponse = null; + CreateMultipartUploadRequest create_req = + CreateMultipartUploadRequest.builder() + .bucket(bucket_name) + .key(out_key_name) + .checksumAlgorithm(ChecksumAlgorithm.SHA256) + .build(); + + CreateMultipartUploadResponse createdUpload = s3.createMultipartUpload(create_req); + + /* the file streaming method shown in aws-doc-sdk-examples/.../CheckObjectIntegrity.java + * creates a FileInputStream from a file, but then copies each chunk into a ByteBuffer by + * hand before uploading--which per code comments, forces RequestBody to copy the buffer + * again before sending it--let's see if we can use RequestBody.fromInputStream() instead, + * it seems to be designed for this purpose (I'm not clear why you would share the InputStream, + * and the only apparent reason to prefer the buffer even with an async client seems to be + * avoid a deferred close on it) */ + + try { + InputStream in = new FileInputStream(f); + List completedParts = new ArrayList(); + int partNumber = 1; + + for (long resid = f.length(); resid > 0;) { + long bytes = Math.min(mpu_size, resid); + UploadPartRequest uploadPartRequest = UploadPartRequest.builder() + .partNumber(partNumber) + .uploadId(createdUpload.uploadId()) + .bucket(bucket_name) + .key(out_key_name) + .checksumAlgorithm(ChecksumAlgorithm.SHA256) + .build(); + UploadPartResponse uploadedPart = s3.uploadPart(uploadPartRequest, + RequestBody.fromInputStream(in, bytes)); + CompletedPart part = CompletedPart.builder(). + partNumber(partNumber) + .checksumSHA256(uploadedPart.checksumSHA256()) + .eTag(uploadedPart.eTag()).build(); + completedParts.add(part); + partNumber++; + resid -= bytes; + } /* for all chunks * bytes */ + + CompletedMultipartUpload completedMultipartUpload = + CompletedMultipartUpload.builder().parts(completedParts).build(); + completedUploadResponse = s3.completeMultipartUpload( + CompleteMultipartUploadRequest.builder() + .bucket(bucket_name) + .key(out_key_name) + .uploadId(createdUpload.uploadId()) + .multipartUpload(completedMultipartUpload).build()); + } catch (Exception e) { + e.printStackTrace(); + } + return completedUploadResponse; + } /* mpuObjectFromFile */ + + public static CompleteMultipartUploadResponse mpuObjectFromFileNoCksum(S3Client s3, String in_file_path, String out_key_name) { + File f = new File(in_file_path); + CompleteMultipartUploadResponse completedUploadResponse = null; + CreateMultipartUploadRequest create_req = + CreateMultipartUploadRequest.builder() + .bucket(bucket_name) + .key(out_key_name) + /* .checksumAlgorithm(ChecksumAlgorithm.SHA256) */ + .build(); + + CreateMultipartUploadResponse createdUpload = s3.createMultipartUpload(create_req); + + /* the file streaming method shown in aws-doc-sdk-examples/.../CheckObjectIntegrity.java + * creates a FileInputStream from a file, but then copies each chunk into a ByteBuffer by + * hand before uploading--which per code comments, forces RequestBody to copy the buffer + * again before sending it--let's see if we can use RequestBody.fromInputStream() instead, + * it seems to be designed for this purpose (I'm not clear why you would share the InputStream, + * and the only apparent reason to prefer the buffer even with an async client seems to be + * avoid a deferred close on it) */ + + try { + InputStream in = new FileInputStream(f); + List completedParts = new ArrayList(); + int partNumber = 1; + + for (long resid = f.length(); resid > 0;) { + long bytes = Math.min(mpu_size, resid); + UploadPartRequest uploadPartRequest = UploadPartRequest.builder() + .partNumber(partNumber) + .uploadId(createdUpload.uploadId()) + .bucket(bucket_name) + .key(out_key_name) + /* .checksumAlgorithm(ChecksumAlgorithm.SHA256) */ + .build(); + UploadPartResponse uploadedPart = s3.uploadPart(uploadPartRequest, + RequestBody.fromInputStream(in, bytes)); + CompletedPart part = CompletedPart.builder(). + partNumber(partNumber) + .checksumSHA256(uploadedPart.checksumSHA256()) + .eTag(uploadedPart.eTag()).build(); + completedParts.add(part); + partNumber++; + resid -= bytes; + } /* for all chunks * bytes */ + + CompletedMultipartUpload completedMultipartUpload = + CompletedMultipartUpload.builder().parts(completedParts).build(); + completedUploadResponse = s3.completeMultipartUpload( + CompleteMultipartUploadRequest.builder() + .bucket(bucket_name) + .key(out_key_name) + .uploadId(createdUpload.uploadId()) + .multipartUpload(completedMultipartUpload).build()); + } catch (Exception e) { + e.printStackTrace(); + } + return completedUploadResponse; + } /* mpuObjectFromFileNoCksum */ + + /* without mpu and without explicit checksum request, chunked encoding is + * not (automatically?) sent; with a checksum specified, it is */ + public static PutObjectResponse putObjectFromFileNoCksum(S3Client s3, String in_file_path, String out_key_name) { + PutObjectResponse resp = null; + try { + Map metadata = new HashMap<>(); + metadata.put("x-amz-meta-wax", "ahatchee"); + PutObjectRequest putOb = PutObjectRequest.builder() + .bucket(bucket_name) + .key(out_key_name) + .metadata(metadata) + .build(); + + resp = s3.putObject(putOb, RequestBody.fromFile(new File(in_file_path))); // "using the full contents of the specified file" + + } catch (S3Exception e) { + System.err.println(e.getMessage()); + System.exit(1); + } + return resp; + } /* putObjectFromFileNoCksum */ + + /* without mpu and without explicit checksum request, chunked encoding is + * not (automatically?) sent; with a checksum specified, it is */ + public static PutObjectResponse putObjectFromFile(S3Client s3, String in_file_path, String out_key_name) { + PutObjectResponse resp = null; + try { + Map metadata = new HashMap<>(); + metadata.put("x-amz-meta-wax", "ahatchee"); + PutObjectRequest putOb = PutObjectRequest.builder() + .bucket(bucket_name) + .key(out_key_name) + .metadata(metadata) + .checksumAlgorithm(ChecksumAlgorithm.SHA256) + .build(); + + RequestBody rbody = RequestBody.fromFile(new File(in_file_path)); + resp = s3.putObject(putOb, rbody); // "using the full contents of the specified file" + System.out.println("PutObjectResponse"); + } catch (S3Exception e) { + System.err.println(e.getMessage()); + System.exit(1); + } + return resp; + } /* putObjectFromFile */ + + + public static String getMD5Sum(String filePath) throws IOException { + try (InputStream is = Files.newInputStream(Paths.get(filePath))) { + return DigestUtils.md5Hex(is); + } + } + + public static String getSHA512Sum(String filePath) throws IOException { + try (InputStream is = Files.newInputStream(Paths.get(filePath))) { + return DigestUtils.sha512Hex(is); + } + } + + public static void main(String[] args) throws URISyntaxException { + + AwsCredentials creds = AwsBasicCredentials.create(access_key, secret_key); + URI http_uri = new URI(http_endpoint); + + /* ah, yeah. so many options. + * https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/http/apache/ApacheHttpClient.Builder.html + */ + SdkHttpClient apacheHttpClient = ApacheHttpClient.builder() + .buildWithDefaults(AttributeMap.builder().put(SdkHttpConfigurationOption.TRUST_ALL_CERTIFICATES, true).build()); + + /* https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/s3/S3Client.html */ + client = S3Client.builder() + .endpointOverride(http_uri) + .credentialsProvider(StaticCredentialsProvider.create(creds)) + .region(region) + .build(); + + URI ssl_uri = new URI(ssl_endpoint); + ssl_client = S3Client.builder() + .httpClient(apacheHttpClient) + .endpointOverride(ssl_uri) + .credentialsProvider(StaticCredentialsProvider.create(creds)) + .region(region) + .build(); + + //listBucket(client); + //listBucket(ssl_client); + + String out_name = "object_out"; + + // if !ssl, we see x-amz-trailer-signature (in the trailer) + //putObjectFromFile(client, "file-8b", out_name); // minimal STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER case + putObjectFromFile(client, "file-200b", out_name); // STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER (multi) (200) (checksum?) + //putObjectFromFile(client, "file-21983b", out_name); // STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER (multi) (200) (checksum?) + //putObjectFromFile(client, "file-256k", out_name); // x-amz-content-sha256:STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER (multi) (200) (checksum?) + //putObjectFromFile(client, "file-1M", out_name); // x-amz-content-sha256:STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER (multi) (200) (checksum?) + + /* ok to here! */ + + // XXXX minimal streaming unsigned checksum trailer case + //putObjectFromFile(ssl_client, "file-8b", out_name); + //putObjectFromFile(ssl_client, "file-200b", out_name); // STREAMING-UNSIGNED-PAYLOAD-TRAILER (400) + //putObjectFromFile(ssl_client, "file-21983b", out_name); // x-amz-content-sha256:STREAMING-UNSIGNED-PAYLOAD-TRAILER (400) + //putObjectFromFile(ssl_client, "file-256k", out_name); //x-amz-content-sha256:STREAMING-UNSIGNED-PAYLOAD-TRAILER (multi) (400) + + // minimal, traditional awssigv4 streaming hmac sha256 case (works) + //putObjectFromFileNoCksum(client, "file-8b", out_name); + + //putObjectFromFileNoCksum(client, "file-200b", object_name); // STREAMING-AWS4-HMAC-SHA256-PAYLOAD (multi) 200 + //putObjectFromFileNoCksum(ssl_client, "file-200b", out_name); // UNSIGNED-PAYLOAD (no completer) 200 + + //mpuObjectFromFile(client, "file-200b", out_name); // STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER(multi) 400 (no completer?) + //mpuObjectFromFile(client, "file-256k", out_name); + //mpuObjectFromFile(ssl_client, "file-200b", out_name); // STREAMING-UNSIGNED-PAYLOAD-TRAILER (no completer) 400 + //mpuObjectFromFile(ssl_client, "file-256k", out_name); + + //mpuObjectFromFileNoCksum(client, "file-200b", out_name); // AWS4-HMAC-SHA256-PAYLOAD (no completer?) 200 + //mpuObjectFromFileNoCksum(client, "file-256k", out_name); + //mpuObjectFromFileNoCksum(ssl_client, "file-200b", out_name); //x-amz-content-sha256:UNSIGNED-PAYLOAD (no completer) 200 + //mpuObjectFromFileNoCksum(ssl_client, "file-256k", out_name); + System.out.println("all that way..."); + } /* main */ +} /* jcksum */ diff --git a/qa/workunits/rgw/jcksum/src/test/java/io/ceph/jcksum/PutObjects.java b/qa/workunits/rgw/jcksum/src/test/java/io/ceph/jcksum/PutObjects.java new file mode 100644 index 000000000000..9f9d3475c04f --- /dev/null +++ b/qa/workunits/rgw/jcksum/src/test/java/io/ceph/jcksum/PutObjects.java @@ -0,0 +1,289 @@ +/** + * + */ +package io.ceph.jcksum; + +import static org.junit.jupiter.api.Assertions.*; + +import java.io.*; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.stream.*; +import java.nio.*; // ByteBuffer +import java.nio.file.Files.*; //newByteChannel +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.nio.channels.*; +import java.lang.Math.*; + +import io.ceph.jcksum.*; +import software.amazon.awssdk.auth.credentials.*; +import software.amazon.awssdk.http.*; +import software.amazon.awssdk.http.apache.ApacheHttpClient; + +import software.amazon.awssdk.services.s3.*; +import software.amazon.awssdk.services.s3.model.*; +import software.amazon.awssdk.utils.*; // AttributeMap +import software.amazon.awssdk.http.SdkHttpConfigurationOption; +import software.amazon.awssdk.core.sync.*; // RequestBody +import software.amazon.awssdk.core.checksums.*; +import software.amazon.awssdk.core.checksums.Algorithm; + +import org.junit.jupiter.api.*; /* BeforeAll, Test, &c */ +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.TestInstance.*; + +import org.junit.jupiter.params.*; +import org.junit.jupiter.params.provider.*; + +/** + * + */ +@TestInstance(Lifecycle.PER_CLASS) +class PutObjects { + + public AwsCredentials creds; + public URI http_uri; + static S3Client client; + + void generateFile(String in_file_path, String out_file_path, long length) { + try { + System.out.println("DEBUG: Generating File"); + Path ifp = Paths.get(in_file_path); + File f = ifp.toFile(); + + long if_size = f.length(); + if (if_size < (1024 * 1024)) { + throw new IOException("in_file_path is supposed to be file-1m (i.e., a 1Mb file"); + } + + File of = new File(out_file_path); + if (of.exists()) { + of.delete(); + } + + FileOutputStream fout = new FileOutputStream(of); + FileChannel wch = fout.getChannel(); + + long resid = length; + long r_offset = 0; + long f_resid = 0; + + FileInputStream fin = new FileInputStream(f); + FileChannel rch = fin.getChannel(); + + while (resid > 0) { + long to_write = Long.min(resid, f_resid); + while (to_write > 0) { + long written = rch.transferTo(r_offset, to_write, wch); + r_offset += written; + to_write -= written; + resid -= written; + f_resid -= written; + } + if (f_resid < 0) { + throw new IOException("read overrun (logic error)"); + } + if (f_resid == 0) { + rch.position(0); + f_resid = 1024 * 1024; + r_offset = 0; + + } + } + if (rch != null) { + rch.close(); + } + if (wch != null) { + wch.close(); + } + System.out.println("DEBUG: File Generated"); + } catch (IOException e) { + System.err.println(e.getMessage()); + } + } /* generateFile */ + + + String get_envvar(String key, String defstr) { + String var = System.getenv(key); + if (var == null) { + return defstr; + } + return var; + } + + void readEnvironmentVars() { + jcksum.access_key = get_envvar("AWS_ACCESS_KEY_ID", "0555b35654ad1656d804"); + jcksum.secret_key = get_envvar("AWS_SECRET_ACCESS_KEY", "h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q=="); + jcksum.http_endpoint = get_envvar("RGW_HTTP_ENDPOINT_URL", ""); + } /* readEnvironmentVArs */ + + void generateBigFiles() { + generateFile("file-1m", "file-5m", 5 * 1024 * 1024); + generateFile("file-1m", "file-10m", 10 * 1024 * 1024); + generateFile("file-1m", "file-100m", 100 * 1024 * 1024); + /* the next lengths happen to be prime */ + generateFile("file-1m", "file-5519b", 5519); + generateFile("file-1m", "file-204329b", 204329); + generateFile("file-1m", "file-1038757b", 1038757); + } + + @BeforeAll + void setup() throws URISyntaxException { + + readEnvironmentVars(); + + System.out.println("PutObjects.java: starting test run:"); + System.out.println("\tAccessKey=" + jcksum.access_key); + System.out.println("\tSecretKey=" + jcksum.secret_key); + System.out.println("\tEndpointUrl=" + jcksum.http_endpoint); + + creds = AwsBasicCredentials.create(jcksum.access_key, jcksum.secret_key); + http_uri = new URI(jcksum.http_endpoint); + + SdkHttpClient apacheHttpClient = ApacheHttpClient.builder() + .buildWithDefaults(AttributeMap.builder().put(SdkHttpConfigurationOption.TRUST_ALL_CERTIFICATES, true).build()); + + /* https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/s3/S3Client.html */ + + System.out.println("DEBUG: Environment Variables Read"); + try { + client = S3Client.builder() + .endpointOverride(http_uri) + .credentialsProvider(StaticCredentialsProvider.create(creds)) + .region(jcksum.region) + .forcePathStyle(true) /* XXX change in future */ + .build(); + } catch (Exception e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + System.out.println("DEBUG: S3 Client Initialized"); + generateBigFiles(); + System.out.println("DEBUG: Generated Big Files"); + + /* create test bucket if it doesn't exist yet */ + try { + jcksum.createBucket(client, jcksum.bucket_name); + } catch (Exception e) { + System.err.println(e.getMessage()); + System.exit(1); + } + + System.out.println("DEBUG: Test Bucket Created"); + } /* setup */ + + /* TODO: zap */ + @ParameterizedTest + @MethodSource("io.ceph.jcksum.jcksum#inputFileNames") + void testWithExplicitLocalMethodSource(String argument) { + assertNotNull(argument); + System.out.println("arg: " + argument); + } + + boolean compareFileDigests(String lhp, String rhp) throws IOException { + String lh5 = jcksum.getSHA512Sum(lhp); + String rh5 = jcksum.getSHA512Sum(rhp); + return lh5.equals(rh5); + } + + boolean putAndVerifyCksum(S3Client s3, String in_file_path) { + boolean md5_check = false; + try { + String out_key_name = "out_key_name"; // name we'll give the object in S3 + PutObjectResponse put_rsp = jcksum.putObjectFromFile(s3, in_file_path, out_key_name); + String out_file_path = "out_file_name"; // name of the temp object when we download it back + GetObjectResponse get_rsp = jcksum.GetObject(s3, out_key_name, out_file_path); + md5_check = compareFileDigests(in_file_path, out_file_path); + } catch (Exception e) { + System.err.println(e.getMessage()); + System.exit(1); + } + return md5_check; + } + + boolean putAndVerifyNoCksum(S3Client s3, String in_file_path) { + boolean md5_check = false; + try { + String out_key_name = "out_key_name"; // name we'll give the object in S3 + PutObjectResponse put_rsp = jcksum.putObjectFromFileNoCksum(s3, in_file_path, out_key_name); + String out_file_path = "out_file_name"; // name of the temp object when we download it back + GetObjectResponse get_rsp = jcksum.GetObject(s3, out_key_name, out_file_path); + md5_check = compareFileDigests(in_file_path, out_file_path); + } catch (Exception e) { + System.err.println(e.getMessage()); + System.exit(1); + } + return md5_check; + } + + boolean mpuAndVerifyCksum(S3Client s3, String in_file_path) { + boolean md5_check = false; + try { + String out_key_name = "out_key_name"; // name we'll give the object in S3 + CompleteMultipartUploadResponse put_rsp = jcksum.mpuObjectFromFile(s3, in_file_path, out_key_name); + String out_file_path = "out_file_name"; // name of the temp object when we download it back + GetObjectResponse get_rsp = jcksum.GetObject(s3, out_key_name, out_file_path); + md5_check = compareFileDigests(in_file_path, out_file_path); + } catch (Exception e) { + System.err.println(e.getMessage()); + System.exit(1); + } + return md5_check; + } + + boolean mpuAndVerifyNoCksum(S3Client s3, String in_file_path) { + boolean md5_check = false; + try { + String out_key_name = "out_key_name"; // name we'll give the object in S3 + CompleteMultipartUploadResponse put_rsp = jcksum.mpuObjectFromFileNoCksum(s3, in_file_path, out_key_name); + String out_file_path = "out_file_name"; // name of the temp object when we download it back + GetObjectResponse get_rsp = jcksum.GetObject(s3, out_key_name, out_file_path); + md5_check = compareFileDigests(in_file_path, out_file_path); + } catch (Exception e) { + System.err.println(e.getMessage()); + System.exit(1); + } + return md5_check; + } + + @ParameterizedTest + @MethodSource("io.ceph.jcksum.jcksum#inputFileNames") + void putObjectFromFileCksum(String in_file_path) { + boolean rslt = false; + System.out.println("putObjectFromFileCksum called with " + in_file_path); + rslt = putAndVerifyCksum(client, in_file_path); + assertTrue(rslt); + } + + @ParameterizedTest + @MethodSource("io.ceph.jcksum.jcksum#inputFileNames") + void putObjectFromFileNoCksum(String in_file_path) { + boolean rslt = false; + System.out.println("putObjectFromFileNoCksum called with " + in_file_path); + rslt = putAndVerifyNoCksum(client, in_file_path); + assertTrue(rslt); + } + + @ParameterizedTest + @MethodSource("io.ceph.jcksum.jcksum#mpuFileNames") + void mpuObjectFromFileCksum(String in_file_path) { + boolean rslt = false; + System.out.println("mpuObjectFromFileCksum called with " + in_file_path); + rslt = mpuAndVerifyCksum(client, in_file_path); + assertTrue(rslt); + } + + @ParameterizedTest + @MethodSource("io.ceph.jcksum.jcksum#mpuFileNames") + void mpuObjectFromFileNoCksum(String in_file_path) { + boolean rslt = false; + System.out.println("mpuObjectFromFileNoCksum called with " + in_file_path); + rslt = mpuAndVerifyNoCksum(client, in_file_path); + assertTrue(rslt); + } + +} /* class PutObjects */ diff --git a/qa/workunits/rgw/s3_bucket_quota.pl b/qa/workunits/rgw/s3_bucket_quota.pl index 7f5476ef676c..6f0552ab7018 100755 --- a/qa/workunits/rgw/s3_bucket_quota.pl +++ b/qa/workunits/rgw/s3_bucket_quota.pl @@ -16,7 +16,7 @@ =head1 SYNOPSIS =head1 DESCRIPTION -This script intends to test the rgw bucket quota funcionality using s3 interface +This script intends to test the rgw bucket quota functionality using s3 interface and reports the test results =head1 ARGUMENTS diff --git a/qa/workunits/rgw/s3_user_quota.pl b/qa/workunits/rgw/s3_user_quota.pl index 6d5c02a9a015..04546eac8404 100755 --- a/qa/workunits/rgw/s3_user_quota.pl +++ b/qa/workunits/rgw/s3_user_quota.pl @@ -16,7 +16,7 @@ =head1 SYNOPSIS =head1 DESCRIPTION -This script intends to test the rgw user quota funcionality using s3 interface +This script intends to test the rgw user quota functionality using s3 interface and reports the test results =head1 ARGUMENTS diff --git a/qa/workunits/rgw/s3_utilities.pm b/qa/workunits/rgw/s3_utilities.pm index 3c3fae900e83..5a91db9d1fdd 100644 --- a/qa/workunits/rgw/s3_utilities.pm +++ b/qa/workunits/rgw/s3_utilities.pm @@ -21,7 +21,7 @@ sub get_timestamp { if ($min < 10) { $min = "0$min"; } if ($sec < 10) { $sec = "0$sec"; } $year=$year+1900; - return $year . '_' . $mon . '_' . $mday . '_' . $hour . '_' . $min . '_' . $sec; + return $year . '-' . $mon . '-' . $mday . '-' . $hour . '-' . $min . '-' . $sec; } # Function to check if radosgw is already running @@ -195,11 +195,12 @@ sub run_s3 host => $hostname, secure => 0, retry => 1, + dns_bucket_names => 0, } ); } -our $bucketname = 'buck_'.get_timestamp(); +our $bucketname = 'buck-'.get_timestamp(); # create a new bucket (the test bucket) our $bucket = $s3->add_bucket( { bucket => $bucketname } ) or die $s3->err. "bucket $bucketname create failed\n". $s3->errstr; diff --git a/qa/workunits/rgw/test_awssdkv4_sig.sh b/qa/workunits/rgw/test_awssdkv4_sig.sh new file mode 100755 index 000000000000..0f4782260c49 --- /dev/null +++ b/qa/workunits/rgw/test_awssdkv4_sig.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# +# To run this test script with a cluster created via vstart.sh: +# $PATH needs to be set for radosgw-admin executables. +# $CEPH_ROOT needs to be set to the path of the Ceph source code +# $RGW_HTTP_ENDPOINT_URL needs to be set to the endpoint of the RGW +# +# Example when ceph source is cloned into $HOME and a vstart cluster is already running with a radosgw: +# $ PATH=~/ceph/build/bin/:$PATH CEPH_ROOT=~/ceph RGW_HTTP_ENDPOINT=http://localhost:8000 ~/ceph/qa/workunits/rgw/test_awssdkv4_sig.sh +# + +set -x + +if [ -z ${AWS_ACCESS_KEY_ID} ] +then + export AWS_ACCESS_KEY_ID="lNCnR47C2g+ZidCWBAUuwfSAA7Q=" + export AWS_SECRET_ACCESS_KEY="tYuA2Y+Uu1ow2l9Xe59tWKVml3gMuVfyhUjjJwfwEI0vFFONIcqf4g==" + + radosgw-admin user create --uid ceph-test-maven \ + --access-key $AWS_ACCESS_KEY_ID \ + --secret $AWS_SECRET_ACCESS_KEY \ + --display-name "maven test user" \ + --email sigv4@example.com || echo "sigv4 maven user exists" +fi + +if [ -z ${RGW_HTTP_ENDPOINT_URL} ] +then + # TESTDIR and this block are meant for when this script is run in a teuthology environment + if [ -z ${TESTDIR} ] + then + echo "TESTDIR is not defined, cannot set RGW_HTTP_ENDPOINT_URL in teuthology" + exit + else + export RGW_HTTP_ENDPOINT_URL=$(cat ${TESTDIR}/url_file) + fi +fi + +if [ -z ${CEPH_ROOT} ] +then + echo "CEPH_ROOT is not defined" + exit +else + cd $CEPH_ROOT/qa/workunits/rgw/jcksum +fi + +./mvnw clean package +./mvnw test -Dtest=PutObjects + +exit diff --git a/qa/workunits/rgw/test_d4n.sh b/qa/workunits/rgw/test_d4n.sh new file mode 100755 index 000000000000..bee47d540179 --- /dev/null +++ b/qa/workunits/rgw/test_d4n.sh @@ -0,0 +1,9 @@ +#!/bin/sh -e + +# run d4n workunits that depend on a running redis server +ceph_test_rgw_d4n_directory +ceph_test_rgw_d4n_policy +ceph_test_rgw_redis_driver +ceph_test_rgw_ssd_driver + +exit 0 diff --git a/qa/workunits/rgw/test_librgw_file.sh b/qa/workunits/rgw/test_librgw_file.sh index 1371ff711075..8a0f952ad63c 100755 --- a/qa/workunits/rgw/test_librgw_file.sh +++ b/qa/workunits/rgw/test_librgw_file.sh @@ -1,5 +1,11 @@ #!/bin/sh -e - +# +# To run this test script with a cluster created via vstart.sh: +# $PATH needs to be set for radosgw-admin and ceph_test_librgw executables. +# $KEYRING need to be set as the path for a vstart clusters Ceph keyring. +# +# Example when ceph source is cloned into $HOME and a vstart cluster is already running with a radosgw: +# $ PATH=~/ceph/build/bin/:$PATH KEYRING=~/ceph/build/keyring ~/ceph/qa/workunits/rgw/test_librgw_file.sh if [ -z ${AWS_ACCESS_KEY_ID} ] then @@ -13,13 +19,16 @@ then --email librgw@example.com || echo "librgw user exists" # keyring override for teuthology env - KEYRING="/etc/ceph/ceph.keyring" + if [ -z ${KEYRING} ] + then + KEYRING="/etc/ceph/ceph.keyring" + fi K="-k ${KEYRING}" fi # nfsns is the main suite -# create herarchy, and then list it +# create hierarchy, and then list it echo "phase 1.1" ceph_test_librgw_file_nfsns ${K} --hier1 --dirs1 --create --rename --verbose @@ -56,4 +65,8 @@ ceph_test_librgw_file_gp ${K} --get --stat --put --create echo "phase 5.2" ceph_test_librgw_file_gp ${K} --delete +# rename tests +echo "phase 6.1" +ceph_test_librgw_file_rename ${K} --create + exit 0 diff --git a/qa/workunits/rgw/test_rgw_bucket_check.py b/qa/workunits/rgw/test_rgw_bucket_check.py index bfa6d65d6e77..33936df2401f 100755 --- a/qa/workunits/rgw/test_rgw_bucket_check.py +++ b/qa/workunits/rgw/test_rgw_bucket_check.py @@ -173,6 +173,7 @@ def main(): exec_cmd(f'radosgw-admin bucket check --fix --bucket {BUCKET_NAME}') out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --fix --min-age-hours 0 --rgw-olh-pending-timeout-sec 0 --dump-keys') json_out = json.loads(out) + log.info(f'"bucket check unlinked" returned {json_out}, expecting {unlinked_keys}') assert len(json_out) == len(unlinked_keys) bucket.object_versions.all().delete() out = exec_cmd(f'radosgw-admin bucket stats --bucket {BUCKET_NAME}') diff --git a/qa/workunits/rgw/test_rgw_d4n.py b/qa/workunits/rgw/test_rgw_d4n.py index 85d0dc23907e..c9e08bd439c8 100644 --- a/qa/workunits/rgw/test_rgw_d4n.py +++ b/qa/workunits/rgw/test_rgw_d4n.py @@ -1,11 +1,19 @@ #!/usr/bin/python3 +''' +This workunits tests the functionality of the D4N read workflow on a small object of size 4. +''' + import logging as log from configobj import ConfigObj import boto3 import redis import subprocess import json +import os +import hashlib +import string +import random log.basicConfig(level=log.DEBUG) @@ -72,113 +80,239 @@ def create_s3cmd_config(path, proto): f.close() log.info("s3cmd config written") +def generate_random(size, part_size=5*1024*1024): + """ + Generate the specified number random data. + (actually each MB is a repetition of the first KB) + """ + chunk = 1024 + allowed = string.ascii_letters + for x in range(0, size, part_size): + strpart = ''.join([allowed[random.randint(0, len(allowed) - 1)] for _ in range(chunk)]) + s = '' + left = size - x + this_part_size = min(left, part_size) + for y in range(this_part_size // chunk): + s = s + strpart + if this_part_size > len(s): + s = s + strpart[0:this_part_size - len(s)] + yield s + if (x == size): + return + +def _multipart_upload(bucket_name, key, size, part_size=5*1024*1024, client=None, content_type=None, metadata=None, resend_parts=[]): + """ + generate a multi-part upload for a random file of specifed size, + if requested, generate a list of the parts + return the upload descriptor + """ + + if content_type == None and metadata == None: + response = client.create_multipart_upload(Bucket=bucket_name, Key=key) + else: + response = client.create_multipart_upload(Bucket=bucket_name, Key=key, Metadata=metadata, ContentType=content_type) + + upload_id = response['UploadId'] + s = '' + parts = [] + for i, part in enumerate(generate_random(size, part_size)): + # part_num is necessary because PartNumber for upload_part and in parts must start at 1 and i starts at 0 + part_num = i+1 + s += part + response = client.upload_part(UploadId=upload_id, Bucket=bucket_name, Key=key, PartNumber=part_num, Body=part) + parts.append({'ETag': response['ETag'].strip('"'), 'PartNumber': part_num}) + if i in resend_parts: + client.upload_part(UploadId=upload_id, Bucket=bucket_name, Key=key, PartNumber=part_num, Body=part) + + return (upload_id, s, parts) + def get_cmd_output(cmd_out): out = cmd_out.decode('utf8') out = out.strip('\n') return out -def test_directory_methods(r, client, obj): - test_txt = b'test' +def get_body(response): + body = response['Body'] + got = body.read() + if type(got) is bytes: + got = got.decode() + return got - # setValue call - response_put = obj.put(Body=test_txt) +def test_small_object(r, client, obj): + test_txt = 'test' + response_put = obj.put(Body=test_txt) assert(response_put.get('ResponseMetadata').get('HTTPStatusCode') == 200) - data = r.hgetall('rgw-object:test.txt:directory') + # first get call + response_get = obj.get() + assert(response_get.get('ResponseMetadata').get('HTTPStatusCode') == 200) - assert(data.get('key') == 'rgw-object:test.txt:directory') - assert(data.get('size') == '4') - assert(data.get('bucket_name') == 'bkt') - assert(data.get('obj_name') == 'test.txt') - assert(data.get('hosts') == '127.0.0.1:6379') + # check logs to ensure object was retrieved from storage backend + res = subprocess.call(['grep', '"D4NFilterObject::iterate:: iterate(): Fetching object from backend store"', '/var/log/ceph/rgw.ceph.client.0.log']) + assert(res >= 1) - # getValue call - response_get = obj.get() + # retrieve and compare cache contents + body = get_body(response_get) + assert(body == "test") - assert(response_get.get('ResponseMetadata').get('HTTPStatusCode') == 200) + data = subprocess.check_output(['ls', '/tmp/rgw_d4n_datacache/']) + data = data.decode('latin-1').strip() + output = subprocess.check_output(['md5sum', '/tmp/rgw_d4n_datacache/' + data]).decode('latin-1') - data = r.hgetall('rgw-object:test.txt:directory') + assert(output.splitlines()[0].split()[0] == hashlib.md5("test".encode('utf-8')).hexdigest()) - assert(data.get('key') == 'rgw-object:test.txt:directory') - assert(data.get('size') == '4') - assert(data.get('bucket_name') == 'bkt') - assert(data.get('obj_name') == 'test.txt') - assert(data.get('hosts') == '127.0.0.1:6379') + data = r.hgetall('bkt_test.txt_0_4') + output = subprocess.check_output(['radosgw-admin', 'object', 'stat', '--bucket=bkt', '--object=test.txt']) + attrs = json.loads(output.decode('latin-1')) - # delValue call - response_del = obj.delete() + # directory entry comparisons + assert(data.get('blockID') == '0') + assert(data.get('version') == attrs.get('tag')) + assert(data.get('size') == '4') + assert(data.get('globalWeight') == '0') + assert(data.get('blockHosts') == '127.0.0.1:6379') + assert(data.get('objName') == 'test.txt') + assert(data.get('bucketName') == 'bkt') + assert(data.get('creationTime') == attrs.get('mtime')) + assert(data.get('dirty') == '0') + assert(data.get('objHosts') == '') + + # repopulate cache + response_put = obj.put(Body=test_txt) + assert(response_put.get('ResponseMetadata').get('HTTPStatusCode') == 200) - assert(response_del.get('ResponseMetadata').get('HTTPStatusCode') == 204) - assert(r.exists('rgw-object:test.txt:directory') == False) + # second get call + response_get = obj.get() + assert(response_get.get('ResponseMetadata').get('HTTPStatusCode') == 200) - r.flushall() + # check logs to ensure object was retrieved from cache + res = subprocess.call(['grep', '"SSDCache: get_async(): ::aio_read(), ret=0"', '/var/log/ceph/rgw.ceph.client.0.log']) + assert(res >= 1) -def test_cache_methods(r, client, obj): - test_txt = b'test' + # retrieve and compare cache contents + body = get_body(response_get) + assert(body == "test") - # setObject call - response_put = obj.put(Body=test_txt) + data = subprocess.check_output(['ls', '/tmp/rgw_d4n_datacache/']) + data = data.decode('latin-1').strip() + output = subprocess.check_output(['md5sum', '/tmp/rgw_d4n_datacache/' + data]).decode('latin-1') - assert(response_put.get('ResponseMetadata').get('HTTPStatusCode') == 200) + assert(output.splitlines()[0].split()[0] == hashlib.md5("test".encode('utf-8')).hexdigest()) - data = r.hgetall('rgw-object:test.txt:cache') + data = r.hgetall('bkt_test.txt_0_4') output = subprocess.check_output(['radosgw-admin', 'object', 'stat', '--bucket=bkt', '--object=test.txt']) attrs = json.loads(output.decode('latin-1')) - assert((data.get(b'user.rgw.tail_tag')) == attrs.get('attrs').get('user.rgw.tail_tag').encode("latin-1") + b'\x00') - assert((data.get(b'user.rgw.idtag')) == attrs.get('tag').encode("latin-1") + b'\x00') - assert((data.get(b'user.rgw.etag')) == attrs.get('etag').encode("latin-1")) - assert((data.get(b'user.rgw.x-amz-content-sha256')) == attrs.get('attrs').get('user.rgw.x-amz-content-sha256').encode("latin-1") + b'\x00') - assert((data.get(b'user.rgw.x-amz-date')) == attrs.get('attrs').get('user.rgw.x-amz-date').encode("latin-1") + b'\x00') - - tmp1 = '\x08\x06L\x01\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x06\x84\x00\x00\x00\n\nj\x00\x00\x00\x03\x00\x00\x00bkt+\x00\x00\x00' - tmp2 = '+\x00\x00\x00' - tmp3 = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\b\x00\x00\x00test.txt\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00!\x00\x00\x00' - tmp4 = '\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x01 \x00\x00\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \ - '\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11\x00\x00\x00default-placement\x11\x00\x00\x00default-placement\x00\x00\x00\x00\x02\x02\x18' \ - '\x00\x00\x00\x04\x00\x00\x00none\x01\x01\t\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - assert(data.get(b'user.rgw.manifest') == tmp1.encode("latin-1") + attrs.get('manifest').get('tail_placement').get('bucket').get('bucket_id').encode("utf-8") - + tmp2.encode("latin-1") + attrs.get('manifest').get('tail_placement').get('bucket').get('bucket_id').encode("utf-8") - + tmp3.encode("latin-1") + attrs.get('manifest').get('prefix').encode("utf-8") - + tmp4.encode("latin-1")) - - tmp5 = '\x02\x02\x81\x00\x00\x00\x03\x02\x11\x00\x00\x00\x06\x00\x00\x00s3main\x03\x00\x00\x00Foo\x04\x03d\x00\x00\x00\x01\x01\x00\x00\x00\x06\x00\x00' \ - '\x00s3main\x0f\x00\x00\x00\x01\x00\x00\x00\x06\x00\x00\x00s3main\x05\x035\x00\x00\x00\x02\x02\x04\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00s3main' \ - '\x00\x00\x00\x00\x00\x00\x00\x00\x02\x02\x04\x00\x00\x00\x0f\x00\x00\x00\x03\x00\x00\x00Foo\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' \ - '\x00\x00\x00' - assert((data.get(b'user.rgw.acl')) == tmp5.encode("latin-1")) - - # getObject call - response_get = obj.get() + # directory entries should remain consistent + assert(data.get('blockID') == '0') + assert(data.get('version') == attrs.get('tag')) + assert(data.get('size') == '4') + assert(data.get('globalWeight') == '0') + assert(data.get('blockHosts') == '127.0.0.1:6379') + assert(data.get('objName') == 'test.txt') + assert(data.get('bucketName') == 'bkt') + assert(data.get('creationTime') == attrs.get('mtime')) + assert(data.get('dirty') == '0') + assert(data.get('objHosts') == '') - assert(response_get.get('ResponseMetadata').get('HTTPStatusCode') == 200) + r.flushall() + +def test_large_object(r, client, s3): + key="mymultipart" + bucket_name="bkt" + content_type='text/bla' + objlen = 30 * 1024 * 1024 + metadata = {'foo': 'bar'} - # Copy to new object with 'COPY' directive; metadata value should not change - obj.metadata.update({'test':'value'}) - m = obj.metadata - m['test'] = 'value_replace' + (upload_id, data, parts) = _multipart_upload(bucket_name=bucket_name, key=key, size=objlen, client=client, content_type=content_type, metadata=metadata) + client.complete_multipart_upload(Bucket=bucket_name, Key=key, UploadId=upload_id, MultipartUpload={'Parts': parts}) - # copyObject call - client.copy_object(Bucket='bkt', Key='test_copy.txt', CopySource='bkt/test.txt', Metadata = m, MetadataDirective='COPY') + file_path = os.path.dirname(__file__)+'mymultipart' - assert(r.hexists('rgw-object:test_copy.txt:cache', b'user.rgw.x-amz-meta-test') == 0) + # first get + s3.Object(bucket_name, key).download_file(file_path) - # Update object with 'REPLACE' directive; metadata value should change - client.copy_object(Bucket='bkt', Key='test.txt', CopySource='bkt/test.txt', Metadata = m, MetadataDirective='REPLACE') + # check logs to ensure object was retrieved from storage backend + res = subprocess.call(['grep', '"D4NFilterObject::iterate:: iterate(): Fetching object from backend store"', '/var/log/ceph/rgw.ceph.client.0.log']) + assert(res >= 1) - data = r.hget('rgw-object:test.txt:cache', b'user.rgw.x-amz-meta-test') + # retrieve and compare cache contents + with open(file_path, 'r') as body: + assert(body.read() == data) - assert(data == b'value_replace\x00') + datacache_path = '/tmp/rgw_d4n_datacache/' + datacache = subprocess.check_output(['ls', datacache_path]) + datacache = datacache.decode('latin-1').splitlines() - # Ensure cache entry exists in cache before deletion - assert(r.exists('rgw-object:test.txt:cache') == True) + for file in datacache: + ofs = int(file.split("_")[3]) + size = int(file.split("_")[4]) + output = subprocess.check_output(['md5sum', datacache_path + file]).decode('latin-1') + assert(output.splitlines()[0].split()[0] == hashlib.md5(data[ofs:ofs+size].encode('utf-8')).hexdigest()) - # delObject call - response_del = obj.delete() + output = subprocess.check_output(['radosgw-admin', 'object', 'stat', '--bucket=bkt', '--object=mymultipart']) + attrs = json.loads(output.decode('latin-1')) + + for entry in r.scan_iter("bkt_mymultipart_*"): + data = r.hgetall(entry) + name = entry.split("_") + + # directory entry comparisons + assert(data.get('blockID') == name[2]) + assert(data.get('version') == attrs.get('tag')) + assert(data.get('size') == name[3]) + assert(data.get('globalWeight') == '0') + assert(data.get('blockHosts') == '127.0.0.1:6379') + assert(data.get('objName') == 'mymultipart') + assert(data.get('bucketName') == 'bkt') + assert(data.get('creationTime') == attrs.get('mtime')) + assert(data.get('dirty') == '0') + assert(data.get('objHosts') == '') + + # repopulate cache + (upload_id, data, parts) = _multipart_upload(bucket_name=bucket_name, key=key, size=objlen, client=client, content_type=content_type, metadata=metadata) + client.complete_multipart_upload(Bucket=bucket_name, Key=key, UploadId=upload_id, MultipartUpload={'Parts': parts}) + + #second get + s3.Object(bucket_name, key).download_file(file_path) + + # check logs to ensure object was retrieved from cache + res = subprocess.call(['grep', '"SSDCache: get_async(): ::aio_read(), ret=0"', '/var/log/ceph/rgw.ceph.client.0.log']) + assert(res >= 1) + + # retrieve and compare cache contents + with open(file_path, 'r') as body: + assert(body.read() == data) + + datacache_path = '/tmp/rgw_d4n_datacache/' + datacache = subprocess.check_output(['ls', datacache_path]) + datacache = datacache.decode('latin-1').splitlines() + + for file in datacache: + ofs = int(file.split("_")[3]) + size = int(file.split("_")[4]) + output = subprocess.check_output(['md5sum', datacache_path + file]).decode('latin-1') + assert(output.splitlines()[0].split()[0] == hashlib.md5(data[ofs:ofs+size].encode('utf-8')).hexdigest()) + + output = subprocess.check_output(['radosgw-admin', 'object', 'stat', '--bucket=bkt', '--object=mymultipart']) + attrs = json.loads(output.decode('latin-1')) - assert(response_del.get('ResponseMetadata').get('HTTPStatusCode') == 204) - assert(r.exists('rgw-object:test.txt:cache') == False) + for key in r.scan_iter("bkt_mymultipart_*"): + data = r.hgetall(key) + name = key.split("_") + + # directory entry comparisons + assert(data.get('blockID') == name[2]) + assert(data.get('version') == attrs.get('tag')) + assert(data.get('size') == name[3]) + assert(data.get('globalWeight') == '0') + assert(data.get('blockHosts') == '127.0.0.1:6379') + assert(data.get('objName') == 'mymultipart') + assert(data.get('bucketName') == 'bkt') + assert(data.get('creationTime') == attrs.get('mtime')) + assert(data.get('dirty') == '0') + assert(data.get('objHosts') == '') r.flushall() @@ -228,14 +362,13 @@ def main(): r = redis.Redis(host='localhost', port=6379, db=0, decode_responses=True) - test_directory_methods(r, client, obj) - - # Responses should not be decoded - r = redis.Redis(host='localhost', port=6379, db=0) + # Run small object test + test_small_object(r, client, obj) - test_cache_methods(r, client, obj) + # Run large object test + test_large_object(r, client, s3) - log.info("D4NFilterTest successfully completed.") + log.info("D4NFilterTest completed.") main() log.info("Completed D4N tests") diff --git a/qa/workunits/rgw/test_rgw_datacache.py b/qa/workunits/rgw/test_rgw_datacache.py index f070ec0f1799..2af2a0d3aa3d 100755 --- a/qa/workunits/rgw/test_rgw_datacache.py +++ b/qa/workunits/rgw/test_rgw_datacache.py @@ -202,7 +202,7 @@ def main(): # remove datacache dir #cmd = exec_cmd('rm -rf %s' % (cache_dir)) #log.debug("RGW Datacache dir deleted") - #^ commenting for future refrence - the work unit will continue running tests and if the cache_dir is removed + #^ commenting for future reference - the work unit will continue running tests and if the cache_dir is removed # all the writes to cache will fail with errno 2 ENOENT No such file or directory. main() diff --git a/qa/workunits/rgw/test_rgw_orphan_list.sh b/qa/workunits/rgw/test_rgw_orphan_list.sh index 34d550ceade6..00d8ed64670b 100755 --- a/qa/workunits/rgw/test_rgw_orphan_list.sh +++ b/qa/workunits/rgw/test_rgw_orphan_list.sh @@ -6,7 +6,8 @@ set -e # if defined, debug messages will be displayed and prepended with the string # debug="DEBUG" -huge_size=5100 # in megabytes +#huge_size=5100 # in megabytes +huge_size=51 # in megabytes big_size=7 # in megabytes huge_obj=/tmp/huge_obj.temp.$$ @@ -376,6 +377,95 @@ done mys3cmd rb --recursive s3://$o_bkt +############################################################ +# copy multipart objects and delete destination + +o_bkt="orig-mp-bkt-5" +d_bkt="copy-mp-bkt-5" + +mys3cmd mb s3://$o_bkt + +for f in $(seq 2) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${o_bkt}/$dest_obj +done + +mys3cmd mb s3://$d_bkt + +mys3cmd cp s3://${o_bkt}/orig-multipart-obj-1 \ + s3://${d_bkt}/copied-multipart-obj-1 + +for f in $(seq 5 5) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${d_bkt}/$dest_obj +done + +mys3cmd rb --recursive s3://$d_bkt + +##################################################################### +# FORCE GARBAGE COLLECTION +sleep 6 # since for testing age at which gc can happen is 5 secs +radosgw-admin gc process --include-all +##################################################################### + +############################################################ +# copy multipart objects and delete original then destination + +o_bkt="orig-mp-bkt-6" +d_bkt="copy-mp-bkt-6" + +mys3cmd mb s3://$o_bkt + +for f in $(seq 2) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${o_bkt}/$dest_obj +done + +mys3cmd mb s3://$d_bkt + +mys3cmd cp s3://${o_bkt}/orig-multipart-obj-1 \ + s3://${d_bkt}/copied-multipart-obj-1 + +for f in $(seq 5 5) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${d_bkt}/$dest_obj +done + +mys3cmd rb --recursive s3://$o_bkt +mys3cmd rb --recursive s3://$d_bkt + +############################################################ +# copy multipart objects and delete destination then original + +o_bkt="orig-mp-bkt-7" +d_bkt="copy-mp-bkt-7" + +mys3cmd mb s3://$o_bkt + +for f in $(seq 2) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${o_bkt}/$dest_obj +done + +mys3cmd mb s3://$d_bkt + +mys3cmd cp s3://${o_bkt}/orig-multipart-obj-1 \ + s3://${d_bkt}/copied-multipart-obj-1 + +for f in $(seq 5 5) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${d_bkt}/$dest_obj +done + +mys3cmd rb --recursive s3://$d_bkt +mys3cmd rb --recursive s3://$o_bkt + +##################################################################### +# FORCE GARBAGE COLLECTION +sleep 6 # since for testing age at which gc can happen is 5 secs +radosgw-admin gc process --include-all +##################################################################### + ######################################################################## # SWIFT TESTS diff --git a/qa/workunits/rgw/test_rgw_reshard.py b/qa/workunits/rgw/test_rgw_reshard.py index 6326e7b173cf..18ffb1022507 100755 --- a/qa/workunits/rgw/test_rgw_reshard.py +++ b/qa/workunits/rgw/test_rgw_reshard.py @@ -76,6 +76,16 @@ def get_bucket_num_shards(bucket_name, bucket_id): num_shards = json_op['data']['bucket_info']['num_shards'] return num_shards +def get_bucket_reshard_status(bucket_name): + """ + function to get bucket reshard status + """ + cmd = exec_cmd("radosgw-admin bucket stats --bucket {}".format(bucket_name)) + json_op = json.loads(cmd) + #print(json.dumps(json_op, indent = 4, sort_keys=True)) + reshard_status = json_op['reshard_status'] + return reshard_status + def run_bucket_reshard_cmd(bucket_name, num_shards, **kwargs): cmd = 'radosgw-admin bucket reshard --bucket {} --num-shards {}'.format(bucket_name, num_shards) cmd += ' --rgw-reshard-bucket-lock-duration 30' # reduce to minimum @@ -104,7 +114,7 @@ def test_bucket_reshard(conn, name, **fault): # try reshard with fault injection _, ret = run_bucket_reshard_cmd(name, num_shards_expected, check_retcode=False, **fault) - if fault.get('error_code') == errno.ECANCELED: + if fault.get('error_code') == errno.ECANCELED or fault.get('error_code') == errno.EOPNOTSUPP: assert(ret == 0) # expect ECANCELED to retry and succeed else: assert(ret != 0 and ret != errno.EBUSY) @@ -139,6 +149,11 @@ def test_bucket_reshard(conn, name, **fault): bucket.delete_objects(Delete={'Objects':[{'Key':o.key} for o in objs]}) bucket.delete() +def calc_reshardlog_count(json_op): + cnt = 0 + for shard in json_op: + cnt += len(shard['shard_entries']) + return cnt def main(): """ @@ -163,9 +178,14 @@ def main(): cmd = exec_cmd('radosgw-admin reshard add --bucket {} --num-shards {}'.format(BUCKET_NAME, num_shards_expected)) cmd = exec_cmd('radosgw-admin reshard list') json_op = json.loads(cmd) - log.debug('bucket name {}'.format(json_op[0]['bucket_name'])) - assert json_op[0]['bucket_name'] == BUCKET_NAME - assert json_op[0]['tentative_new_num_shards'] == num_shards_expected + if (len(json_op) >= 1): + log.debug('bucket name {}'.format(json_op[0]['bucket_name'])) + assert json_op[0]['bucket_name'] == BUCKET_NAME + assert json_op[0]['tentative_new_num_shards'] == num_shards_expected + else: + cmd = exec_cmd('radosgw-admin bucket stats --bucket {}'.format(BUCKET_NAME)) + json_op = json.loads(cmd) + assert json_op['num_shards'] == num_shards_expected # TESTCASE 'reshard-process','reshard','','process bucket resharding','succeeds' log.debug('TEST: reshard process\n') @@ -187,8 +207,14 @@ def main(): cmd = exec_cmd('radosgw-admin reshard add --bucket {} --num-shards {}'.format(BUCKET_NAME, num_shards_expected)) cmd = exec_cmd('radosgw-admin reshard list') json_op = json.loads(cmd) - assert json_op[0]['bucket_name'] == BUCKET_NAME - assert json_op[0]['tentative_new_num_shards'] == num_shards_expected + if (len(json_op) >= 1): + log.debug('bucket name {}'.format(json_op[0]['bucket_name'])) + assert json_op[0]['bucket_name'] == BUCKET_NAME + assert json_op[0]['tentative_new_num_shards'] == num_shards_expected + else: + cmd = exec_cmd('radosgw-admin bucket stats --bucket {}'.format(BUCKET_NAME)) + json_op = json.loads(cmd) + assert json_op['num_shards'] == num_shards_expected # TESTCASE 'reshard process ,'reshard','process','reshard non empty bucket','succeeds' log.debug('TEST: reshard process non empty bucket\n') @@ -199,6 +225,13 @@ def main(): log.error("Resharding failed on bucket {}. Expected number of shards are not created\n".format(BUCKET_NAME)) # TESTCASE 'manual bucket resharding','inject error','fail','check bucket accessibility', 'retry reshard' + log.debug('TEST: reshard bucket with EIO injected at init_index\n') + test_bucket_reshard(connection, 'error-at-init-index', error_at='init_index') + log.debug('TEST: reshard bucket with EOPNOTSUPP injected at init_index\n') + test_bucket_reshard(connection, 'error-at-init-index', error_at='init_index', error_code=errno.EOPNOTSUPP) + log.debug('TEST: reshard bucket with abort at init_index\n') + test_bucket_reshard(connection, 'abort-at-init-indext', abort_at='init_index') + log.debug('TEST: reshard bucket with EIO injected at set_target_layout\n') test_bucket_reshard(connection, 'error-at-set-target-layout', error_at='set_target_layout') log.debug('TEST: reshard bucket with ECANCELED injected at set_target_layout\n') @@ -206,6 +239,13 @@ def main(): log.debug('TEST: reshard bucket with abort at set_target_layout\n') test_bucket_reshard(connection, 'abort-at-set-target-layout', abort_at='set_target_layout') + log.debug('TEST: reshard bucket with EIO injected at trim_reshard_log_entries\n') + test_bucket_reshard(connection, 'error-at-trim-reshard-log-entries', error_at='trim_reshard_log_entries') + log.debug('TEST: reshard bucket with EOPNOTSUPP injected at trim_reshard_log_entries\n') + test_bucket_reshard(connection, 'error-at-trim-reshard-log-entries', error_at='trim_reshard_log_entries', error_code=errno.EOPNOTSUPP) + log.debug('TEST: reshard bucket with abort at trim_reshard_log_entries\n') + test_bucket_reshard(connection, 'abort-at-trim-reshard-log-entries', abort_at='trim_reshard_log_entries') + log.debug('TEST: reshard bucket with EIO injected at block_writes\n') test_bucket_reshard(connection, 'error-at-block-writes', error_at='block_writes') log.debug('TEST: reshard bucket with abort at block_writes\n') @@ -223,6 +263,80 @@ def main(): log.debug('TEST: reshard bucket with abort at do_reshard\n') test_bucket_reshard(connection, 'abort-at-do-reshard', abort_at='do_reshard') + log.debug('TEST: reshard bucket with EIO injected at logrecord_writes\n') + test_bucket_reshard(connection, 'error-at-logrecord-writes', error_at='logrecord_writes') + log.debug('TEST: reshard bucket with abort at logrecord_writes\n') + test_bucket_reshard(connection, 'abort-at-logrecord-writes', abort_at='logrecord_writes') + + log.debug('TEST: reshard bucket with EIO injected at change_reshard_state\n') + test_bucket_reshard(connection, 'error-at-change-reshard-state', error_at='change_reshard_state') + log.debug('TEST: reshard bucket with ECANCELED injected at change_reshard_state\n') + test_bucket_reshard(connection, 'error-at-change-reshard-state', error_at='change_reshard_state', error_code=errno.ECANCELED) + log.debug('TEST: reshard bucket with abort at change_reshard_state\n') + test_bucket_reshard(connection, 'abort-at-change-reshard-state', abort_at='change_reshard_state') + + # TESTCASE 'logrecord could be stopped after reshard failed' + log.debug(' test: logrecord could be stopped after reshard failed') + num_shards = get_bucket_stats(BUCKET_NAME).num_shards + assert "None" == get_bucket_reshard_status(BUCKET_NAME) + _, ret = run_bucket_reshard_cmd(BUCKET_NAME, num_shards + 1, check_retcode=False, abort_at='change_reshard_state') + assert(ret != 0 and ret != errno.EBUSY) + assert "InLogrecord" == get_bucket_reshard_status(BUCKET_NAME) + + bucket.put_object(Key='put_during_logrecord', Body=b"some_data") + cmd = exec_cmd('radosgw-admin reshardlog list --bucket %s' % BUCKET_NAME) + json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80 + assert calc_reshardlog_count(json_op) == 1 + + # end up with logrecord status, the logrecord will be purged + time.sleep(30) + assert "InLogrecord" == get_bucket_reshard_status(BUCKET_NAME) + bucket.put_object(Key='put_during_logrecord1', Body=b"some_data1") + cmd = exec_cmd('radosgw-admin reshardlog list --bucket %s' % BUCKET_NAME) + json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80 + assert calc_reshardlog_count(json_op) == 0 + assert "None" == get_bucket_reshard_status(BUCKET_NAME) + + # TESTCASE 'duplicated entries should be purged before reshard' + log.debug(' test: duplicated entries should be purged before reshard') + num_shards = get_bucket_stats(BUCKET_NAME).num_shards + _, ret = run_bucket_reshard_cmd(BUCKET_NAME, num_shards + 1, check_retcode=False, abort_at='do_reshard') + assert(ret != 0 and ret != errno.EBUSY) + assert "InLogrecord" == get_bucket_reshard_status(BUCKET_NAME) + + bucket.put_object(Key='put_during_logrecord2', Body=b"some_data2") + cmd = exec_cmd('radosgw-admin reshardlog list --bucket %s' % BUCKET_NAME) + json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80 + assert calc_reshardlog_count(json_op) == 1 + + # begin to reshard again, the duplicated entries will be purged + time.sleep(30) + _, ret = run_bucket_reshard_cmd(BUCKET_NAME, num_shards + 1, check_retcode=False, abort_at='logrecord_writes') + assert(ret != 0 and ret != errno.EBUSY) + cmd = exec_cmd('radosgw-admin reshardlog list --bucket %s' % BUCKET_NAME) + json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80 + assert calc_reshardlog_count(json_op) == 0 + + # TESTCASE 'duplicated entries can be purged manually' + log.debug(' test: duplicated entries can be purged manually') + time.sleep(30) + num_shards = get_bucket_stats(BUCKET_NAME).num_shards + _, ret = run_bucket_reshard_cmd(BUCKET_NAME, num_shards + 1, check_retcode=False, abort_at='do_reshard') + assert(ret != 0 and ret != errno.EBUSY) + assert "InLogrecord" == get_bucket_reshard_status(BUCKET_NAME) + + bucket.put_object(Key='put_during_logrecord3', Body=b"some_data3") + cmd = exec_cmd('radosgw-admin reshardlog list --bucket %s' % BUCKET_NAME) + json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80 + assert calc_reshardlog_count(json_op) == 1 + + time.sleep(30) + exec_cmd('radosgw-admin reshardlog purge --bucket %s' % BUCKET_NAME) + cmd = exec_cmd('radosgw-admin reshardlog list --bucket %s' % BUCKET_NAME) + json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80 + assert calc_reshardlog_count(json_op) == 0 + log.debug('check reshard logrecord successfully') + # TESTCASE 'versioning reshard-','bucket', reshard','versioning reshard','succeeds' log.debug(' test: reshard versioned bucket') num_shards_expected = get_bucket_stats(VER_BUCKET_NAME).num_shards + 1 @@ -276,6 +390,8 @@ def main(): time.sleep(1) ver_bucket.put_object(Key='put_during_reshard', Body=b"some_data") log.debug('put object successful') + # waiter for delay reshard to finish + time.sleep(5) # TESTCASE 'check that bucket stats are correct after reshard with unlinked entries' log.debug('TEST: check that bucket stats are correct after reshard with unlinked entries\n') diff --git a/qa/workunits/rgw/test_rgw_versioning.py b/qa/workunits/rgw/test_rgw_versioning.py index fc69e138d41f..f175203ea0bf 100755 --- a/qa/workunits/rgw/test_rgw_versioning.py +++ b/qa/workunits/rgw/test_rgw_versioning.py @@ -5,6 +5,7 @@ import uuid import botocore import time +import threading from common import exec_cmd, create_user, boto_connect from botocore.config import Config @@ -100,7 +101,33 @@ def main(): exec_cmd('ceph config rm client rgw_debug_inject_set_olh_err') get_resp = bucket.Object(key).get() assert put_resp.e_tag == get_resp['ETag'], 'get did not return null version with correct etag' - + + # TESTCASE 'verify that concurrent delete requests do not leave behind olh entries' + log.debug('TEST: verify that concurrent delete requests do not leave behind olh entries\n') + bucket.object_versions.all().delete() + + key = 'concurrent-delete' + # create a delete marker + resp = bucket.Object(key).delete() + version_id = resp['ResponseMetadata']['HTTPHeaders']['x-amz-version-id'] + try: + exec_cmd('ceph config set client rgw_debug_inject_latency_bi_unlink 2') + time.sleep(1) + + def do_delete(): + connection.ObjectVersion(bucket.name, key, version_id).delete() + + t2 = threading.Thread(target=do_delete) + t2.start() + do_delete() + t2.join() + finally: + exec_cmd('ceph config rm client rgw_debug_inject_latency_bi_unlink') + out = exec_cmd(f'radosgw-admin bucket check olh --bucket {bucket.name} --dump-keys') + num_leftover_olh_entries = len(json.loads(out)) + assert num_leftover_olh_entries == 0, \ + 'Found leftover olh entries after concurrent deletes' + # Clean up log.debug("Deleting bucket {}".format(BUCKET_NAME)) bucket.object_versions.all().delete() diff --git a/qa/workunits/suites/cephfs_journal_tool_smoke.sh b/qa/workunits/suites/cephfs_journal_tool_smoke.sh index a24dac532d5a..6a5379e1b477 100755 --- a/qa/workunits/suites/cephfs_journal_tool_smoke.sh +++ b/qa/workunits/suites/cephfs_journal_tool_smoke.sh @@ -50,7 +50,7 @@ if [ ! -s $JOURNAL_FILE ] ; then fi # Can we execute a journal reset? -$BIN journal reset +$BIN journal reset --yes-i-really-really-mean-it $BIN journal inspect $BIN header get @@ -86,6 +86,6 @@ $BIN event splice summary # Metadata objects have been modified by the 'event recover_dentries' command. # Journal is no long consistent with respect to metadata objects (especially inotable). # To ensure mds successfully replays its journal, we need to do journal reset. -$BIN journal reset +$BIN journal reset --yes-i-really-really-mean-it cephfs-table-tool all reset session diff --git a/qa/workunits/suites/fsx.sh b/qa/workunits/suites/fsx.sh index 0d5ba3a58baf..9eac07119d3e 100755 --- a/qa/workunits/suites/fsx.sh +++ b/qa/workunits/suites/fsx.sh @@ -4,7 +4,8 @@ set -e git clone https://git.ceph.com/xfstests-dev.git cd xfstests-dev -git checkout 12973fc04fd10d4af086901e10ffa8e48866b735 +# This sha1 is the latest master head and works well for our tests. +git checkout 0e5c12dfd008efc2848c98108c9237487e91ef35 make -j4 cd .. cp xfstests-dev/ltp/fsx . diff --git a/qa/workunits/test_telemetry_pacific.sh b/qa/workunits/test_telemetry_pacific.sh deleted file mode 100755 index a971f5883f03..000000000000 --- a/qa/workunits/test_telemetry_pacific.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -ex - -# Set up ident details for cluster -ceph config set mgr mgr/telemetry/channel_ident true -ceph config set mgr mgr/telemetry/organization 'ceph-qa' -ceph config set mgr mgr/telemetry/description 'upgrade test cluster' - -# Opt-in -ceph telemetry on --license sharing-1-0 - -# Check last_opt_revision -LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision) -if [ $LAST_OPT_REVISION -ne 3 ]; then - echo "last_opt_revision is incorrect." - exit 1 -fi - -# Check reports -ceph telemetry show -ceph telemetry show-device -ceph telemetry show-all - -echo OK diff --git a/qa/workunits/test_telemetry_pacific_x.sh b/qa/workunits/test_telemetry_pacific_x.sh deleted file mode 100755 index 0e4a832db64f..000000000000 --- a/qa/workunits/test_telemetry_pacific_x.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash -ex - -# Assert that we're still opted in -LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision) -if [ $LAST_OPT_REVISION -ne 3 ]; then - echo "last_opt_revision is incorrect" - exit 1 -fi - -# Check the warning: -STATUS=$(ceph -s) -if ! [[ $STATUS == *"Telemetry requires re-opt-in"* ]] -then - echo "STATUS does not contain re-opt-in warning" - exit 1 -fi - -# Check new collections -COLLECTIONS=$(ceph telemetry collection ls) -NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" "basic_rook_v01" "perf_memory_metrics") -for col in ${NEW_COLLECTIONS[@]}; do - if ! [[ $COLLECTIONS == *$col* ]]; - then - echo "COLLECTIONS does not contain" "'"$col"'." - exit 1 - fi -done - -# Run preview commands -ceph telemetry preview -ceph telemetry preview-device -ceph telemetry preview-all - -# Opt in to new collections -ceph telemetry on --license sharing-1-0 -ceph telemetry enable channel perf - -# Check the warning: -timeout=60 -STATUS=$(ceph -s) -until [[ $STATUS != *"Telemetry requires re-opt-in"* ]] || [ $timeout -le 0 ]; do - STATUS=$(ceph -s) - sleep 1 - timeout=$(( timeout - 1 )) -done -if [ $timeout -le 0 ]; then - echo "STATUS should not contain re-opt-in warning at this point" - exit 1 -fi - -# Run show commands -ceph telemetry show -ceph telemetry show-device -ceph telemetry show - -# Opt out -ceph telemetry off - -echo OK diff --git a/qa/workunits/test_telemetry_quincy.sh b/qa/workunits/test_telemetry_quincy.sh index e8b07ec13032..2ce268eadbbc 100755 --- a/qa/workunits/test_telemetry_quincy.sh +++ b/qa/workunits/test_telemetry_quincy.sh @@ -13,7 +13,8 @@ ceph telemetry preview-all # Assert that new collections are available COLLECTIONS=$(ceph telemetry collection ls) -NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" "basic_rook_v01" "perf_memory_metrics") +NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" + "basic_rook_v01" "perf_memory_metrics" "basic_pool_options_bluestore") for col in ${NEW_COLLECTIONS[@]}; do if ! [[ $COLLECTIONS == *$col* ]]; then diff --git a/qa/workunits/test_telemetry_quincy_x.sh b/qa/workunits/test_telemetry_quincy_x.sh index 4734132d024c..1ed5f28d55be 100755 --- a/qa/workunits/test_telemetry_quincy_x.sh +++ b/qa/workunits/test_telemetry_quincy_x.sh @@ -1,5 +1,9 @@ #!/bin/bash -ex +# Opt in to new collections right away to avoid "TELEMETRY_CHANGED" +# health warning (see https://tracker.ceph.com/issues/64458). +# Currently, no new collections between latest quincy and reef (dev) + # For quincy, the last_opt_revision remains at 1 since last_opt_revision # was phased out for fresh installs of quincy. LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision) @@ -12,7 +16,8 @@ fi ceph -s COLLECTIONS=$(ceph telemetry collection ls) -NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" "basic_rook_v01" "perf_memory_metrics") +NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" + "basic_rook_v01" "perf_memory_metrics" "basic_pool_options_bluestore") for col in ${NEW_COLLECTIONS[@]}; do if ! [[ $COLLECTIONS == *$col* ]]; then @@ -26,9 +31,6 @@ ceph telemetry preview ceph telemetry preview-device ceph telemetry preview-all -# Opt in to new collections -# Currently, no new collections between latest quincy and reef (dev) - # Run show commands ceph telemetry show ceph telemetry show-device diff --git a/qa/workunits/test_telemetry_reef.sh b/qa/workunits/test_telemetry_reef.sh new file mode 100755 index 000000000000..e8b07ec13032 --- /dev/null +++ b/qa/workunits/test_telemetry_reef.sh @@ -0,0 +1,44 @@ +#!/bin/bash -ex + +# Set up ident details for cluster +ceph config set mgr mgr/telemetry/channel_ident true +ceph config set mgr mgr/telemetry/organization 'ceph-qa' +ceph config set mgr mgr/telemetry/description 'upgrade test cluster' + + +#Run preview commands +ceph telemetry preview +ceph telemetry preview-device +ceph telemetry preview-all + +# Assert that new collections are available +COLLECTIONS=$(ceph telemetry collection ls) +NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" "basic_rook_v01" "perf_memory_metrics") +for col in ${NEW_COLLECTIONS[@]}; do + if ! [[ $COLLECTIONS == *$col* ]]; + then + echo "COLLECTIONS does not contain" "'"$col"'." + exit 1 + fi +done + +# Opt-in +ceph telemetry on --license sharing-1-0 + +# Enable perf channel +ceph telemetry enable channel perf + +# For quincy, the last_opt_revision remains at 1 since last_opt_revision +# was phased out for fresh installs of quincy. +LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision) +if [ $LAST_OPT_REVISION -ne 1 ]; then + echo "last_opt_revision is incorrect" + exit 1 +fi + +# Run show commands +ceph telemetry show +ceph telemetry show-device +ceph telemetry show-all + +echo OK diff --git a/qa/workunits/test_telemetry_reef_x.sh b/qa/workunits/test_telemetry_reef_x.sh new file mode 100755 index 000000000000..ced20aea2e49 --- /dev/null +++ b/qa/workunits/test_telemetry_reef_x.sh @@ -0,0 +1,41 @@ +#!/bin/bash -ex + +# Opt in to new collections right away to avoid "TELEMETRY_CHANGED" +# warning (see https://tracker.ceph.com/issues/64458) +ceph telemetry on --license sharing-1-0 + +# For quincy, the last_opt_revision remains at 1 since last_opt_revision +# was phased out for fresh installs of quincy. +LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision) +if [ $LAST_OPT_REVISION -ne 1 ]; then + echo "last_opt_revision is incorrect" + exit 1 +fi + +# Check the warning: +ceph -s + +COLLECTIONS=$(ceph telemetry collection ls) +NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" "basic_rook_v01" "perf_memory_metrics" "basic_pool_flags") +for col in ${NEW_COLLECTIONS[@]}; do + if ! [[ $COLLECTIONS == *$col* ]]; + then + echo "COLLECTIONS does not contain" "'"$col"'." + exit 1 + fi +done + +#Run preview commands +ceph telemetry preview +ceph telemetry preview-device +ceph telemetry preview-all + +# Run show commands +ceph telemetry show +ceph telemetry show-device +ceph telemetry show + +# Opt out +ceph telemetry off + +echo OK diff --git a/qa/workunits/windows/py_tests/__init__.py b/qa/workunits/windows/py_tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/qa/workunits/windows/py_tests/internal/__init__.py b/qa/workunits/windows/py_tests/internal/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/qa/workunits/windows/py_tests/internal/exception.py b/qa/workunits/windows/py_tests/internal/exception.py new file mode 100644 index 000000000000..27a02dbe8cb6 --- /dev/null +++ b/qa/workunits/windows/py_tests/internal/exception.py @@ -0,0 +1,27 @@ +# Copyright (C) 2023 Cloudbase Solutions +# +# This is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License version 2.1, as published by the Free Software +# Foundation (see LICENSE). + +class CephTestException(Exception): + msg_fmt = "An exception has been encountered." + + def __init__(self, message: str = '', **kwargs): + self.kwargs = kwargs + if not message: + message = self.msg_fmt % kwargs + self.message = message + super(CephTestException, self).__init__(message) + + +class CommandFailed(CephTestException): + msg_fmt = ( + "Command failed: %(command)s. " + "Return code: %(returncode)s. " + "Stdout: %(stdout)s. Stderr: %(stderr)s.") + + +class CephTestTimeout(CephTestException): + msg_fmt = "Operation timeout." diff --git a/qa/workunits/windows/py_tests/internal/rbd_image.py b/qa/workunits/windows/py_tests/internal/rbd_image.py new file mode 100644 index 000000000000..e7599383321c --- /dev/null +++ b/qa/workunits/windows/py_tests/internal/rbd_image.py @@ -0,0 +1,286 @@ +# Copyright (C) 2023 Cloudbase Solutions +# +# This is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License version 2.1, as published by the Free Software +# Foundation (see LICENSE). + +import json +import logging +import os +import time + +from py_tests.internal import exception +from py_tests.internal.tracer import Tracer +from py_tests.internal import utils + +LOG = logging.getLogger() + + +class RbdImage(object): + def __init__(self, + name: str, + size_mb: int, + is_shared: bool = True, + disk_number: int = -1, + mapped: bool = False): + self.name = name + self.size_mb = size_mb + self.is_shared = is_shared + self.disk_number = disk_number + self.mapped = mapped + self.removed = False + self.drive_letter = "" + + @classmethod + @Tracer.trace + def create(cls, + name: str, + size_mb: int = 1024, + is_shared: bool = True): + LOG.info("Creating image: %s. Size: %s.", name, "%sM" % size_mb) + cmd = ["rbd", "create", name, "--size", "%sM" % size_mb] + if is_shared: + cmd += ["--image-shared"] + utils.execute(*cmd) + + return RbdImage(name, size_mb, is_shared) + + @Tracer.trace + def get_disk_number(self, + timeout: int = 60, + retry_interval: int = 2): + @utils.retry_decorator( + retried_exceptions=exception.CephTestException, + timeout=timeout, + retry_interval=retry_interval) + def _get_disk_number(): + LOG.info("Retrieving disk number: %s", self.name) + + result = utils.execute( + "rbd-wnbd", "show", self.name, "--format=json") + disk_info = json.loads(result.stdout) + disk_number = disk_info["disk_number"] + if disk_number > 0: + LOG.debug("Image %s disk number: %d", self.name, disk_number) + return disk_number + + raise exception.CephTestException( + f"Could not get disk number: {self.name}.") + + return _get_disk_number() + + @Tracer.trace + def _wait_for_disk(self, + timeout: int = 60, + retry_interval: int = 2): + @utils.retry_decorator( + retried_exceptions=(FileNotFoundError, OSError), + additional_details="the mapped disk isn't available yet", + timeout=timeout, + retry_interval=retry_interval) + def wait_for_disk(): + LOG.debug("Waiting for disk to be accessible: %s %s", + self.name, self.path) + + with open(self.path, 'rb'): + pass + + return wait_for_disk() + + @Tracer.trace + def _wait_for_fs(self, + timeout: int = 60, + retry_interval: int = 2): + @utils.retry_decorator( + retried_exceptions=exception.CephTestException, + additional_details="the mapped fs isn't available yet", + timeout=timeout, + retry_interval=retry_interval) + def wait_for_fs(): + drive_letter = self._get_drive_letter() + path = f"{drive_letter}:\\" + + LOG.debug("Waiting for disk to be accessible: %s %s", + self.name, self.path) + + if not os.path.exists(path): + raise exception.CephTestException( + f"path not available yet: {path}") + + return wait_for_fs() + + @property + def path(self): + return f"\\\\.\\PhysicalDrive{self.disk_number}" + + @Tracer.trace + @utils.retry_decorator( + additional_details="couldn't clear disk read-only flag") + def set_writable(self): + utils.ps_execute( + "Set-Disk", "-Number", str(self.disk_number), + "-IsReadOnly", "$false") + + @Tracer.trace + @utils.retry_decorator(additional_details="couldn't bring the disk online") + def set_online(self): + utils.ps_execute( + "Set-Disk", "-Number", str(self.disk_number), + "-IsOffline", "$false") + + @Tracer.trace + def map(self, timeout: int = 60): + LOG.info("Mapping image: %s", self.name) + tstart = time.time() + + utils.execute("rbd-wnbd", "map", self.name) + self.mapped = True + + self.disk_number = self.get_disk_number(timeout=timeout) + + elapsed = time.time() - tstart + self._wait_for_disk(timeout=timeout - elapsed) + + @Tracer.trace + def refresh_after_remap(self, timeout: int = 60): + tstart = time.time() + + # The disk number may change after a remap, we need to refresh it. + self.disk_number = self.get_disk_number(timeout=timeout) + + elapsed = time.time() - tstart + self._wait_for_disk(timeout=timeout - elapsed) + + if self.drive_letter: + elapsed = time.time() - tstart + self._wait_for_fs(timeout=timeout - elapsed) + + drive_letter = self._get_drive_letter() + + # We expect the drive letter to remain the same after a remap. + assert self.drive_letter == drive_letter + + @Tracer.trace + def unmap(self): + if self.mapped: + LOG.info("Unmapping image: %s", self.name) + utils.execute("rbd-wnbd", "unmap", self.name) + self.mapped = False + + @Tracer.trace + @utils.retry_decorator() + def remove(self): + if not self.removed: + LOG.info("Removing image: %s", self.name) + utils.execute("rbd", "rm", self.name) + self.removed = True + + def cleanup(self): + try: + self.unmap() + finally: + self.remove() + + @Tracer.trace + @utils.retry_decorator() + def _init_disk(self): + cmd = (f"Get-Disk -Number {self.disk_number} | " + "Initialize-Disk -PartitionStyle MBR") + utils.ps_execute(cmd) + + @Tracer.trace + @utils.retry_decorator() + def _create_partition(self): + cmd = (f"Get-Disk -Number {self.disk_number} | " + "New-Partition -AssignDriveLetter -UseMaximumSize") + utils.ps_execute(cmd) + + @Tracer.trace + @utils.retry_decorator() + def _format_volume(self): + cmd = ( + f"(Get-Partition -DiskNumber {self.disk_number}" + " | ? { $_.DriveLetter }) | Format-Volume -Force -Confirm:$false") + utils.ps_execute(cmd) + + @Tracer.trace + @utils.retry_decorator() + def _get_drive_letter(self): + cmd = (f"(Get-Partition -DiskNumber {self.disk_number}" + " | ? { $_.DriveLetter }).DriveLetter") + result = utils.ps_execute(cmd) + + # The PowerShell command will place a null character if no drive letter + # is available. For example, we can receive "\x00\r\n". + drive_letter = result.stdout.decode().strip() + if not drive_letter.isalpha() or len(drive_letter) != 1: + raise exception.CephTestException( + "Invalid drive letter received: %s" % drive_letter) + return drive_letter + + @Tracer.trace + def init_fs(self): + if not self.mapped: + raise exception.CephTestException( + "Unable to create fs, image not mapped.") + + LOG.info("Initializing fs, image: %s.", self.name) + + self._init_disk() + self._create_partition() + self._format_volume() + self.drive_letter = self._get_drive_letter() + + @Tracer.trace + def get_fs_capacity(self): + if not self.drive_letter: + raise exception.CephTestException("No drive letter available") + + cmd = f"(Get-Volume -DriveLetter {self.drive_letter}).Size" + result = utils.ps_execute(cmd) + + return int(result.stdout.decode().strip()) + + @Tracer.trace + def resize(self, new_size_mb, allow_shrink=False): + LOG.info( + "Resizing image: %s. New size: %s MB, old size: %s MB", + self.name, new_size_mb, self.size_mb) + + cmd = ["rbd", "resize", self.name, + "--size", f"{new_size_mb}M", "--no-progress"] + if allow_shrink: + cmd.append("--allow-shrink") + + utils.execute(*cmd) + + self.size_mb = new_size_mb + + @Tracer.trace + def get_disk_size(self): + """Retrieve the virtual disk size (bytes) reported by Windows.""" + cmd = f"(Get-Disk -Number {self.disk_number}).Size" + result = utils.ps_execute(cmd) + + disk_size = result.stdout.decode().strip() + if not disk_size.isdigit(): + raise exception.CephTestException( + "Invalid disk size received: %s" % disk_size) + + return int(disk_size) + + @Tracer.trace + @utils.retry_decorator(timeout=30) + def wait_for_disk_resize(self): + # After resizing the rbd image, the daemon is expected to receive + # the notification, inform the WNBD driver and then trigger a disk + # rescan (IOCTL_DISK_UPDATE_PROPERTIES). This might take a few seconds, + # so we'll need to do some polling. + disk_size = self.get_disk_size() + disk_size_mb = disk_size // (1 << 20) + + if disk_size_mb != self.size_mb: + raise exception.CephTestException( + "The disk size hasn't been updated yet. Retrieved size: " + f"{disk_size_mb}MB. Expected size: {self.size_mb}MB.") diff --git a/qa/workunits/windows/py_tests/internal/task_group.py b/qa/workunits/windows/py_tests/internal/task_group.py new file mode 100644 index 000000000000..ccdba44233d1 --- /dev/null +++ b/qa/workunits/windows/py_tests/internal/task_group.py @@ -0,0 +1,63 @@ +from concurrent import futures +import logging +import threading + + +LOG = logging.getLogger() + + +class TaskGroup(object): + def __init__(self, max_workers=1, stop_on_error=True): + self._executor = futures.ThreadPoolExecutor(max_workers=max_workers) + self._lock = threading.Lock() + + self.errors = 0 + self.completed = 0 + self.pending = 0 + + self.stopped = False + self.stop_on_error = stop_on_error + + self._submitted_tasks = [] + + def _wrap_task(self, task): + def wrapper(): + with self._lock: + if self.stopped: + self.pending -= 1 + return + + try: + task() + except Exception as ex: + with self._lock: + if self.stop_on_error: + self.stopped = True + + self.errors += 1 + LOG.exception( + "Task exception: %s. Total exceptions: %d", + ex, self.errors) + finally: + with self._lock: + self.completed += 1 + self.pending -= 1 + LOG.info("Completed tasks: %d. Pending: %d", + self.completed, self.pending) + + return wrapper + + def submit(self, task): + task_wrapper = self._wrap_task(task) + + with self._lock: + self.pending += 1 + + submitted_task = self._executor.submit(task_wrapper) + self._submitted_tasks.append(submitted_task) + + def join(self): + LOG.info("Waiting for %d tasks to complete.", + len(self._submitted_tasks)) + futures.wait(self._submitted_tasks) + LOG.info("Tasks completed.") diff --git a/qa/workunits/windows/py_tests/internal/tracer.py b/qa/workunits/windows/py_tests/internal/tracer.py new file mode 100644 index 000000000000..d80b0a5ffe95 --- /dev/null +++ b/qa/workunits/windows/py_tests/internal/tracer.py @@ -0,0 +1,75 @@ +# Copyright (C) 2023 Cloudbase Solutions +# +# This is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License version 2.1, as published by the Free Software +# Foundation (see LICENSE). + +import collections +import prettytable +import threading +import time + +from py_tests.internal import utils + + +class Tracer: + data: collections.OrderedDict = collections.OrderedDict() + lock = threading.Lock() + + @classmethod + def trace(cls, func): + def wrapper(*args, **kwargs): + tstart = time.time() + exc_str = None + + # Preserve call order + with cls.lock: + if func.__qualname__ not in cls.data: + cls.data[func.__qualname__] = list() + + try: + return func(*args, **kwargs) + except Exception as exc: + exc_str = "%r: %s" % (exc, exc) + raise + finally: + tend = time.time() + + with cls.lock: + cls.data[func.__qualname__] += [{ + "duration": tend - tstart, + "error": exc_str, + }] + + return wrapper + + @classmethod + def get_results(cls): + stats = collections.OrderedDict() + for f in cls.data.keys(): + stats[f] = utils.array_stats([i['duration'] for i in cls.data[f]]) + errors = [] + for i in cls.data[f]: + if i['error']: + errors.append(i['error']) + + stats[f]['errors'] = errors + return stats + + @classmethod + def print_results(cls): + r = cls.get_results() + + table = prettytable.PrettyTable(title="Duration (s)") + table.field_names = [ + "function", "min", "max", "total", + "mean", "median", "std_dev", + "max 90%", "min 90%", "count", "errors"] + table.float_format = ".4" + for f, s in r.items(): + table.add_row([f, s['min'], s['max'], s['sum'], + s['mean'], s['median'], s['std_dev'], + s['max_90'], s['min_90'], + s['count'], len(s['errors'])]) + print(table) diff --git a/qa/workunits/windows/py_tests/internal/utils.py b/qa/workunits/windows/py_tests/internal/utils.py new file mode 100644 index 000000000000..0fb5d328961e --- /dev/null +++ b/qa/workunits/windows/py_tests/internal/utils.py @@ -0,0 +1,119 @@ +# Copyright (C) 2023 Cloudbase Solutions +# +# This is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License version 2.1, as published by the Free Software +# Foundation (see LICENSE). + +import collections +import functools +import logging +import math +import subprocess +import time +import typing + +from py_tests.internal import exception + +LOG = logging.getLogger() + + +def setup_logging(log_level: int = logging.INFO): + handler = logging.StreamHandler() + handler.setLevel(log_level) + + log_fmt = '[%(asctime)s] %(levelname)s - %(message)s' + formatter = logging.Formatter(log_fmt) + handler.setFormatter(formatter) + + LOG.addHandler(handler) + LOG.setLevel(logging.DEBUG) + + +def retry_decorator(timeout: int = 60, + retry_interval: int = 2, + silent_interval: int = 10, + additional_details: str = "", + retried_exceptions: + typing.Union[ + typing.Type[Exception], + collections.abc.Iterable[ + typing.Type[Exception]]] = Exception): + def wrapper(f: typing.Callable[..., typing.Any]): + @functools.wraps(f) + def inner(*args, **kwargs): + tstart: float = time.time() + elapsed: float = 0 + exc = None + details = additional_details or "%s failed" % f.__qualname__ + + while elapsed < timeout or not timeout: + try: + return f(*args, **kwargs) + except retried_exceptions as ex: + exc = ex + elapsed = time.time() - tstart + if elapsed > silent_interval: + level = logging.WARNING + else: + level = logging.DEBUG + LOG.log(level, + "Exception: %s. Additional details: %s. " + "Time elapsed: %d. Timeout: %d", + ex, details, elapsed, timeout) + + time.sleep(retry_interval) + elapsed = time.time() - tstart + + msg = ( + "Operation timed out. Exception: %s. Additional details: %s. " + "Time elapsed: %d. Timeout: %d.") + raise exception.CephTestTimeout( + msg % (exc, details, elapsed, timeout)) + return inner + return wrapper + + +def execute(*args, **kwargs): + LOG.debug("Executing: %s", args) + result = subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs) + LOG.debug("Command %s returned %d.", args, result.returncode) + if result.returncode: + exc = exception.CommandFailed( + command=args, returncode=result.returncode, + stdout=result.stdout, stderr=result.stderr) + raise exc + return result + + +def ps_execute(*args, **kwargs): + # Disable PS progress bar, causes issues when invoked remotely. + prefix = "$global:ProgressPreference = 'SilentlyContinue' ; " + return execute( + "powershell.exe", "-NonInteractive", + "-Command", prefix, *args, **kwargs) + + +def array_stats(array: list): + mean = sum(array) / len(array) if len(array) else 0 + variance = (sum((i - mean) ** 2 for i in array) / len(array) + if len(array) else 0) + std_dev = math.sqrt(variance) + sorted_array = sorted(array) + + return { + 'min': min(array) if len(array) else 0, + 'max': max(array) if len(array) else 0, + 'sum': sum(array) if len(array) else 0, + 'mean': mean, + 'median': sorted_array[len(array) // 2] if len(array) else 0, + 'max_90': sorted_array[int(len(array) * 0.9)] if len(array) else 0, + 'min_90': sorted_array[int(len(array) * 0.1)] if len(array) else 0, + 'variance': variance, + 'std_dev': std_dev, + 'count': len(array) + } diff --git a/qa/workunits/windows/py_tests/rbd_wnbd/__init__.py b/qa/workunits/windows/py_tests/rbd_wnbd/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/qa/workunits/windows/py_tests/rbd_wnbd/service_restart_test.py b/qa/workunits/windows/py_tests/rbd_wnbd/service_restart_test.py new file mode 100644 index 000000000000..a4c9142f30b9 --- /dev/null +++ b/qa/workunits/windows/py_tests/rbd_wnbd/service_restart_test.py @@ -0,0 +1,232 @@ +# Copyright (C) 2023 Cloudbase Solutions +# +# This is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License version 2.1, as published by the Free Software +# Foundation (see LICENSE). + +import argparse +import logging +import typing + +from py_tests.internal import exception +from py_tests.internal import task_group +from py_tests.internal.tracer import Tracer +from py_tests.internal import utils +from py_tests.rbd_wnbd import stress_test + +LOG = logging.getLogger() + +parser = argparse.ArgumentParser(description='rbd-wnbd service restart test') +parser.add_argument('--test-name', + help='The test to be run.', + default="RbdStampTest") +parser.add_argument('--iterations', + help='Total number of test iterations', + default=2, type=int) +parser.add_argument('--image-count', + help='The number of images to use.', + default=8, type=int) +parser.add_argument('--concurrency', + help='The number of workers to use when ' + 'initializing and running the tests.', + default=4, type=int) +parser.add_argument('--fio-iterations', + help='Total number of benchmark iterations per disk.', + default=1, type=int) +parser.add_argument('--fio-workers', + help='Total number of fio workers per disk.', + default=1, type=int) +parser.add_argument('--fio-depth', + help='The number of concurrent asynchronous operations ' + 'executed per disk', + default=64, type=int) +parser.add_argument('--fio-verify', + help='The mechanism used to validate the written ' + 'data. Examples: crc32c, md5, sha1, null, etc. ' + 'If set to null, the written data will not be ' + 'verified.', + default='crc32c') +parser.add_argument('--bs', + help='Benchmark block size.', + default="2M") +parser.add_argument('--op', + help='Benchmark operation. ' + 'Examples: read, randwrite, rw, etc.', + default="rw") +parser.add_argument('--image-prefix', + help='The image name prefix.', + default="cephTest-") +parser.add_argument('--image-size-mb', + help='The image size in megabytes.', + default=32, type=int) +parser.add_argument('--map-timeout', + help='Image map timeout.', + default=60, type=int) +parser.add_argument('--skip-enabling-disk', action='store_true', + help='If set, the disk will not be turned online and the ' + 'read-only flag will not be removed. Useful when ' + 'the SAN policy is set to "onlineAll".') +parser.add_argument('--verbose', action='store_true', + help='Print info messages.') +parser.add_argument('--debug', action='store_true', + help='Print debug messages.') +parser.add_argument('--stop-on-error', action='store_true', + help='Stop testing when hitting errors.') +parser.add_argument('--skip-cleanup-on-error', action='store_true', + help='Skip cleanup when hitting errors.') + + +class ServiceRestartTestRunner(object): + def __init__(self, + test_cls: typing.Type[stress_test.RbdTest], + test_params: dict = {}, + iterations: int = 1, + image_count: int = 8, + workers: int = 1, + stop_on_error: bool = False, + cleanup_on_error: bool = True): + self.test_cls = test_cls + self.test_params = test_params + self.iterations = iterations + self.image_count = image_count + self.workers = workers + self.errors = 0 + self.stop_on_error = stop_on_error + self.cleanup_on_error = cleanup_on_error + + self.test_instances: list[stress_test.RbdTest] = [] + + @Tracer.trace + def initialize(self): + LOG.info("Initializing mappings") + + tg = task_group.TaskGroup(max_workers=self.workers, + stop_on_error=self.stop_on_error) + + for idx in range(self.image_count): + test = self.test_cls(**self.test_params) + self.test_instances.append(test) + + tg.submit(test.initialize) + + tg.join() + self.errors += tg.errors + + @Tracer.trace + def cleanup(self): + LOG.info("Performing cleanup") + + tg = task_group.TaskGroup(max_workers=self.workers, + stop_on_error=self.stop_on_error) + + for test_instance in self.test_instances: + tg.submit(test_instance.cleanup) + + tg.join() + self.errors += tg.errors + + @Tracer.trace + def run_tests(self): + LOG.info("Running the tests") + + tg = task_group.TaskGroup(max_workers=self.workers, + stop_on_error=self.stop_on_error) + + for test_instance in self.test_instances: + tg.submit(test_instance.run) + + tg.join() + self.errors += tg.errors + + @Tracer.trace + def _restart_service(self): + LOG.info("Restarting ceph-rbd service") + + utils.ps_execute("restart-service", "ceph-rbd") + + @Tracer.trace + def _refresh_test_instances(self): + LOG.info("Refreshing mappings after service restart") + + tg = task_group.TaskGroup(max_workers=self.workers, + stop_on_error=self.stop_on_error) + + for test_instance in self.test_instances: + tg.submit(test_instance.image.refresh_after_remap) + + tg.join() + self.errors += tg.errors + + @Tracer.trace + def run(self): + try: + self.initialize() + + for iteration in range(self.iterations): + self.run_tests() + + self._restart_service() + + self._refresh_test_instances() + except Exception: + LOG.exception("Test failed") + self.errors += 1 + finally: + if not self.errors or self.cleanup_on_error: + self.cleanup() + + +TESTS: typing.Dict[str, typing.Type[stress_test.RbdTest]] = { + 'RbdTest': stress_test.RbdTest, + 'RbdFioTest': stress_test.RbdFioTest, + 'RbdStampTest': stress_test.RbdStampTest, + # FS tests + 'RbdFsTest': stress_test.RbdFsTest, + 'RbdFsFioTest': stress_test.RbdFsFioTest, + 'RbdFsStampTest': stress_test.RbdFsStampTest, +} + +if __name__ == '__main__': + args = parser.parse_args() + + log_level = logging.WARNING + if args.verbose: + log_level = logging.INFO + if args.debug: + log_level = logging.DEBUG + utils.setup_logging(log_level) + + test_params = dict( + image_size_mb=args.image_size_mb, + image_prefix=args.image_prefix, + bs=args.bs, + op=args.op, + verify=args.fio_verify, + iodepth=args.fio_depth, + map_timeout=args.map_timeout, + skip_enabling_disk=args.skip_enabling_disk, + ) + + try: + test_cls = TESTS[args.test_name] + except KeyError: + raise exception.CephTestException( + "Unknown test: {}".format(args.test_name)) + + runner = ServiceRestartTestRunner( + test_cls, + test_params=test_params, + iterations=args.iterations, + image_count=args.image_count, + workers=args.concurrency, + stop_on_error=args.stop_on_error, + cleanup_on_error=not args.skip_cleanup_on_error) + runner.run() + + Tracer.print_results() + test_cls.print_results( + description="count: %d, concurrency: %d" % + (args.iterations, args.concurrency)) + + assert runner.errors == 0, f"encountered {runner.errors} error(s)." diff --git a/qa/workunits/windows/py_tests/rbd_wnbd/stress_test.py b/qa/workunits/windows/py_tests/rbd_wnbd/stress_test.py new file mode 100644 index 000000000000..0c50e6afe977 --- /dev/null +++ b/qa/workunits/windows/py_tests/rbd_wnbd/stress_test.py @@ -0,0 +1,538 @@ +# Copyright (C) 2023 Cloudbase Solutions +# +# This is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License version 2.1, as published by the Free Software +# Foundation (see LICENSE). + +import argparse +import collections +import json +import logging +import math +import os +import prettytable +import random +import time +import threading +import typing +import uuid +from concurrent import futures + +from py_tests.internal import exception +from py_tests.internal.rbd_image import RbdImage +from py_tests.internal.tracer import Tracer +from py_tests.internal import utils + +LOG = logging.getLogger() + +parser = argparse.ArgumentParser(description='rbd-wnbd stress tests') +parser.add_argument('--test-name', + help='The test to be run.', + default="RbdFioTest") +parser.add_argument('--iterations', + help='Total number of test iterations', + default=1, type=int) +parser.add_argument('--concurrency', + help='The number of tests to run in parallel', + default=4, type=int) +parser.add_argument('--fio-iterations', + help='Total number of benchmark iterations per disk.', + default=1, type=int) +parser.add_argument('--fio-workers', + help='Total number of fio workers per disk.', + default=1, type=int) +parser.add_argument('--fio-depth', + help='The number of concurrent asynchronous operations ' + 'executed per disk', + default=64, type=int) +parser.add_argument('--fio-verify', + help='The mechanism used to validate the written ' + 'data. Examples: crc32c, md5, sha1, null, etc. ' + 'If set to null, the written data will not be ' + 'verified.', + default='crc32c') +parser.add_argument('--bs', + help='Benchmark block size.', + default="2M") +parser.add_argument('--op', + help='Benchmark operation. ' + 'Examples: read, randwrite, rw, etc.', + default="rw") +parser.add_argument('--image-prefix', + help='The image name prefix.', + default="cephTest-") +parser.add_argument('--image-size-mb', + help='The image size in megabytes.', + default=1024, type=int) +parser.add_argument('--map-timeout', + help='Image map timeout.', + default=60, type=int) +parser.add_argument('--skip-enabling-disk', action='store_true', + help='If set, the disk will not be turned online and the ' + 'read-only flag will not be removed. Useful when ' + 'the SAN policy is set to "onlineAll".') +parser.add_argument('--verbose', action='store_true', + help='Print info messages.') +parser.add_argument('--debug', action='store_true', + help='Print debug messages.') +parser.add_argument('--stop-on-error', action='store_true', + help='Stop testing when hitting errors.') +parser.add_argument('--skip-cleanup-on-error', action='store_true', + help='Skip cleanup when hitting errors.') + + +class RbdTest(object): + image: RbdImage + + requires_disk_online = False + requires_disk_write = False + + def __init__(self, + image_prefix: str = "cephTest-", + image_size_mb: int = 1024, + map_timeout: int = 60, + **kwargs): + self.image_size_mb = image_size_mb + self.image_name = image_prefix + str(uuid.uuid4()) + self.map_timeout = map_timeout + self.skip_enabling_disk = kwargs.get("skip_enabling_disk") + + @Tracer.trace + def initialize(self): + self.image = RbdImage.create( + self.image_name, + self.image_size_mb) + self.image.map(timeout=self.map_timeout) + + if not self.skip_enabling_disk: + if self.requires_disk_write: + self.image.set_writable() + + if self.requires_disk_online: + self.image.set_online() + + def run(self): + pass + + def cleanup(self): + if self.image: + self.image.cleanup() + self.image = None + + @classmethod + def print_results(cls, + title: str = "Test results", + description: str = ''): + pass + + +class RbdFsTestMixin(object): + # Windows disks must be turned online before accessing partitions. + requires_disk_online = True + requires_disk_write = True + + @Tracer.trace + def initialize(self): + super(RbdFsTestMixin, self).initialize() + + self.image.init_fs() + + def get_subpath(self, *args): + drive_path = f"{self.image.drive_letter}:\\" + return os.path.join(drive_path, *args) + + +class RbdFsTest(RbdFsTestMixin, RbdTest): + pass + + +class RbdFioTest(RbdTest): + data: typing.DefaultDict[str, typing.List[typing.Dict[str, str]]] = ( + collections.defaultdict(list)) + lock = threading.Lock() + + def __init__(self, + *args, + fio_size_mb: int = 0, + iterations: int = 1, + workers: int = 1, + bs: str = "2M", + iodepth: int = 64, + op: str = "rw", + verify: str = "crc32c", + **kwargs): + + super(RbdFioTest, self).__init__(*args, **kwargs) + + self.fio_size_mb = fio_size_mb or self.image_size_mb + self.iterations = iterations + self.workers = workers + self.bs = bs + self.iodepth = iodepth + self.op = op + if op not in ("read", "randread"): + self.requires_disk_write = True + self.verify = verify + + def process_result(self, raw_fio_output: str): + result = json.loads(raw_fio_output) + with self.lock: + for job in result["jobs"]: + # Fio doesn't support trim on Windows + for op in ['read', 'write']: + if op in job: + self.data[op].append({ + 'error': job['error'], + 'io_bytes': job[op]['io_bytes'], + 'bw_bytes': job[op]['bw_bytes'], + 'runtime': job[op]['runtime'] / 1000, # seconds + 'total_ios': job[op]['short_ios'], + 'short_ios': job[op]['short_ios'], + 'dropped_ios': job[op]['short_ios'], + 'clat_ns_min': job[op]['clat_ns']['min'], + 'clat_ns_max': job[op]['clat_ns']['max'], + 'clat_ns_mean': job[op]['clat_ns']['mean'], + 'clat_ns_stddev': job[op]['clat_ns']['stddev'], + 'clat_ns_10': job[op].get('clat_ns', {}) + .get('percentile', {}) + .get('10.000000', 0), + 'clat_ns_90': job[op].get('clat_ns', {}) + .get('percentile', {}) + .get('90.000000', 0) + }) + + def _get_fio_path(self): + return self.image.path + + @Tracer.trace + def _run_fio(self, fio_size_mb: int = 0) -> None: + LOG.info("Starting FIO test.") + cmd = [ + "fio", "--thread", "--output-format=json", + "--randrepeat=%d" % self.iterations, + "--direct=1", "--name=test", + "--bs=%s" % self.bs, "--iodepth=%s" % self.iodepth, + "--size=%sM" % (fio_size_mb or self.fio_size_mb), + "--readwrite=%s" % self.op, + "--numjobs=%s" % self.workers, + "--filename=%s" % self._get_fio_path(), + ] + if self.verify: + cmd += ["--verify=%s" % self.verify] + result = utils.execute(*cmd) + LOG.info("Completed FIO test.") + self.process_result(result.stdout) + + @Tracer.trace + def run(self): + self._run_fio() + + @classmethod + def print_results(cls, + title: str = "Benchmark results", + description: str = ''): + if description: + title = "%s (%s)" % (title, description) + + for op in cls.data.keys(): + op_title = "%s op=%s" % (title, op) + + table = prettytable.PrettyTable(title=op_title) + table.field_names = ["stat", "min", "max", "mean", + "median", "std_dev", + "max 90%", "min 90%", "total"] + table.float_format = ".4" + + op_data = cls.data[op] + + s = utils.array_stats( + [float(i["bw_bytes"]) / 1000_000 for i in op_data]) + table.add_row(["bandwidth (MB/s)", + s['min'], s['max'], s['mean'], + s['median'], s['std_dev'], + s['max_90'], s['min_90'], 'N/A']) + + s = utils.array_stats([float(i["runtime"]) for i in op_data]) + table.add_row(["duration (s)", + s['min'], s['max'], s['mean'], + s['median'], s['std_dev'], + s['max_90'], s['min_90'], s['sum']]) + + s = utils.array_stats([i["error"] for i in op_data]) + table.add_row(["errors", + s['min'], s['max'], s['mean'], + s['median'], s['std_dev'], + s['max_90'], s['min_90'], s['sum']]) + + s = utils.array_stats([i["short_ios"] for i in op_data]) + table.add_row(["incomplete IOs", + s['min'], s['max'], s['mean'], + s['median'], s['std_dev'], + s['max_90'], s['min_90'], s['sum']]) + + s = utils.array_stats([i["dropped_ios"] for i in op_data]) + table.add_row(["dropped IOs", + s['min'], s['max'], s['mean'], + s['median'], s['std_dev'], + s['max_90'], s['min_90'], s['sum']]) + + clat_min = utils.array_stats([i["clat_ns_min"] for i in op_data]) + clat_max = utils.array_stats([i["clat_ns_max"] for i in op_data]) + clat_mean = utils.array_stats([i["clat_ns_mean"] for i in op_data]) + clat_stddev = math.sqrt( + sum([float(i["clat_ns_stddev"]) ** 2 + for i in op_data]) / len(op_data) + if len(op_data) else 0) + clat_10 = utils.array_stats([i["clat_ns_10"] for i in op_data]) + clat_90 = utils.array_stats([i["clat_ns_90"] for i in op_data]) + # For convenience, we'll convert it from ns to seconds. + table.add_row(["completion latency (s)", + clat_min['min'] / 1e+9, + clat_max['max'] / 1e+9, + clat_mean['mean'] / 1e+9, + clat_mean['median'] / 1e+9, + clat_stddev / 1e+9, + clat_10['mean'] / 1e+9, + clat_90['mean'] / 1e+9, + clat_mean['sum'] / 1e+9]) + print(table) + + +class RbdResizeFioTest(RbdFioTest): + """Image resize test. + + This test extends and then shrinks the image, performing FIO tests to + validate the resized image. + """ + + @Tracer.trace + def run(self): + self.image.resize(self.image_size_mb * 2) + self.image.wait_for_disk_resize() + + self._run_fio(fio_size_mb=self.image_size_mb * 2) + + self.image.resize(self.image_size_mb // 2, allow_shrink=True) + self.image.wait_for_disk_resize() + + self._run_fio(fio_size_mb=self.image_size_mb // 2) + + # Just like rbd-nbd, rbd-wnbd is masking out-of-bounds errors. + # For this reason, we don't have a negative test that writes + # passed the disk boundary. + + +class RbdFsFioTest(RbdFsTestMixin, RbdFioTest): + def initialize(self): + super(RbdFsFioTest, self).initialize() + + if not self.fio_size_mb or self.fio_size_mb == self.image_size_mb: + # Out of caution, we'll use up to 80% of the FS by default + self.fio_size_mb = int( + self.image.get_fs_capacity() * 0.8 / (1024 * 1024)) + + @staticmethod + def _fio_escape_path(path): + # FIO allows specifying multiple files separated by colon. + # This means that ":" has to be escaped, so + # F:\filename becomes F\:\filename. + return path.replace(":", "\\:") + + def _get_fio_path(self): + return self._fio_escape_path(self.get_subpath("test-fio")) + + +class RbdStampTest(RbdTest): + requires_disk_write = True + + _write_open_mode = "rb+" + _read_open_mode = "rb" + _expect_path_exists = True + _stamp_size = 512 + + def __init__(self, *args, **kwargs): + super(RbdStampTest, self).__init__(*args, **kwargs) + + # We allow running the test repeatedly, for example after a + # remount operation. + self._previous_stamp = b'\0' * self._stamp_size + + @staticmethod + def _rand_float(min_val: float, max_val: float): + return min_val + (random.random() * max_val - min_val) + + def _get_stamp(self): + buff_str = self.image_name + "-" + str(uuid.uuid4()) + buff = buff_str.encode() + assert len(buff) <= self._stamp_size + + padding = self._stamp_size - len(buff) + buff += b'\0' * padding + return buff + + def _get_stamp_path(self): + return self.image.path + + @Tracer.trace + def _write_stamp(self, stamp): + with open(self._get_stamp_path(), self._write_open_mode) as disk: + disk.write(stamp) + + @Tracer.trace + def _read_stamp(self): + with open(self._get_stamp_path(), self._read_open_mode) as disk: + return disk.read(self._stamp_size) + + @Tracer.trace + def run(self): + if self._expect_path_exists: + # Wait up to 5 seconds and then check the disk, ensuring that + # nobody else wrote to it. This is particularly useful when + # running a high number of tests in parallel, ensuring that + # we aren't writing to the wrong disk. + time.sleep(self._rand_float(0, 5)) + + r_stamp = self._read_stamp() + assert self._previous_stamp == r_stamp + + w_stamp = self._get_stamp() + self._write_stamp(w_stamp) + + r_stamp = self._read_stamp() + assert w_stamp == r_stamp + + self._previous_stamp = w_stamp + + +class RbdFsStampTest(RbdFsTestMixin, RbdStampTest): + _write_open_mode = "wb" + _expect_path_exists = False + + def _get_stamp_path(self): + return self.get_subpath("test-stamp") + + +class StressTestRunner(object): + def __init__(self, + test_cls: typing.Type[RbdTest], + test_params: dict = {}, + iterations: int = 1, + workers: int = 1, + stop_on_error: bool = False, + cleanup_on_error: bool = True): + self.test_cls = test_cls + self.test_params = test_params + self.iterations = iterations + self.workers = workers + self.executor = futures.ThreadPoolExecutor(max_workers=workers) + self.lock = threading.Lock() + self.completed = 0 + self.errors = 0 + self.stopped = False + self.stop_on_error = stop_on_error + self.cleanup_on_error = cleanup_on_error + + @Tracer.trace + def run(self): + tasks = [] + for i in range(self.iterations): + task = self.executor.submit(self.run_single_test) + tasks.append(task) + + LOG.info("Waiting for %d tests to complete.", self.iterations) + for task in tasks: + task.result() + + def run_single_test(self): + failed = False + if self.stopped: + return + + try: + test = self.test_cls(**self.test_params) + test.initialize() + test.run() + except KeyboardInterrupt: + LOG.warning("Received Ctrl-C.") + self.stopped = True + except Exception as ex: + failed = True + if self.stop_on_error: + self.stopped = True + with self.lock: + self.errors += 1 + LOG.exception( + "Test exception: %s. Total exceptions: %d", + ex, self.errors) + finally: + if not failed or self.cleanup_on_error: + try: + test.cleanup() + except KeyboardInterrupt: + LOG.warning("Received Ctrl-C.") + self.stopped = True + # Retry the cleanup + test.cleanup() + except Exception: + LOG.exception("Test cleanup failed.") + + with self.lock: + self.completed += 1 + LOG.info("Completed tests: %d. Pending: %d", + self.completed, self.iterations - self.completed) + + +TESTS: typing.Dict[str, typing.Type[RbdTest]] = { + 'RbdTest': RbdTest, + 'RbdFioTest': RbdFioTest, + 'RbdResizeFioTest': RbdResizeFioTest, + 'RbdStampTest': RbdStampTest, + # FS tests + 'RbdFsTest': RbdFsTest, + 'RbdFsFioTest': RbdFsFioTest, + 'RbdFsStampTest': RbdFsStampTest, +} + +if __name__ == '__main__': + args = parser.parse_args() + + log_level = logging.WARNING + if args.verbose: + log_level = logging.INFO + if args.debug: + log_level = logging.DEBUG + utils.setup_logging(log_level) + + test_params = dict( + image_size_mb=args.image_size_mb, + image_prefix=args.image_prefix, + bs=args.bs, + op=args.op, + verify=args.fio_verify, + iodepth=args.fio_depth, + map_timeout=args.map_timeout, + skip_enabling_disk=args.skip_enabling_disk, + ) + + try: + test_cls = TESTS[args.test_name] + except KeyError: + raise exception.CephTestException( + "Unknown test: {}".format(args.test_name)) + + runner = StressTestRunner( + test_cls, + test_params=test_params, + iterations=args.iterations, + workers=args.concurrency, + stop_on_error=args.stop_on_error, + cleanup_on_error=not args.skip_cleanup_on_error) + runner.run() + + Tracer.print_results() + test_cls.print_results( + description="count: %d, concurrency: %d" % + (args.iterations, args.concurrency)) + + assert runner.errors == 0, f"encountered {runner.errors} error(s)." diff --git a/qa/workunits/windows/run-tests.ps1 b/qa/workunits/windows/run-tests.ps1 index 6d818f4267ec..e0ee8de948dd 100644 --- a/qa/workunits/windows/run-tests.ps1 +++ b/qa/workunits/windows/run-tests.ps1 @@ -4,7 +4,7 @@ $ErrorActionPreference = "Stop" $scriptLocation = [System.IO.Path]::GetDirectoryName( $myInvocation.MyCommand.Definition) -$testRbdWnbd = "$scriptLocation/test_rbd_wnbd.py" +$env:PYTHONPATH += ";$scriptLocation" function safe_exec() { # Powershell doesn't check the command exit code, we'll need to @@ -16,14 +16,27 @@ function safe_exec() { } } -safe_exec python.exe $testRbdWnbd --test-name RbdTest --iterations 100 -safe_exec python.exe $testRbdWnbd --test-name RbdFioTest --iterations 100 -safe_exec python.exe $testRbdWnbd --test-name RbdStampTest --iterations 100 +safe_exec python.exe -m py_tests.rbd_wnbd.stress_test --test-name RbdTest --iterations 100 +safe_exec python.exe -m py_tests.rbd_wnbd.stress_test --test-name RbdFioTest --iterations 100 +safe_exec python.exe -m py_tests.rbd_wnbd.stress_test --test-name RbdStampTest --iterations 100 # It can take a while to setup the partition (~10s), we'll use fewer iterations. -safe_exec python.exe $testRbdWnbd --test-name RbdFsTest --iterations 4 -safe_exec python.exe $testRbdWnbd --test-name RbdFsFioTest --iterations 4 -safe_exec python.exe $testRbdWnbd --test-name RbdFsStampTest --iterations 4 +safe_exec python.exe -m py_tests.rbd_wnbd.stress_test --test-name RbdFsTest --iterations 4 +safe_exec python.exe -m py_tests.rbd_wnbd.stress_test --test-name RbdFsFioTest --iterations 4 +safe_exec python.exe -m py_tests.rbd_wnbd.stress_test --test-name RbdFsStampTest --iterations 4 -safe_exec python.exe $testRbdWnbd ` - --test-name RbdResizeFioTest --image-size-mb 64 +safe_exec python.exe -m py_tests.rbd_wnbd.stress_test --test-name RbdResizeFioTest --image-size-mb 64 + +safe_exec python.exe -m py_tests.rbd_wnbd.service_restart_test ` + --test-name=RbdTest --iterations=3 --image-count=50 --concurrency=8 +safe_exec python.exe -m py_tests.rbd_wnbd.service_restart_test ` + --test-name=RbdFioTest --iterations=3 --image-count=50 --concurrency=8 +safe_exec python.exe -m py_tests.rbd_wnbd.service_restart_test ` + --test-name=RbdStampTest --iterations=3 --image-count=50 --concurrency=8 + +safe_exec python.exe -m py_tests.rbd_wnbd.service_restart_test ` + --test-name=RbdFsTest --iterations=3 --image-count=8 --concurrency=8 --image-size-mb=64 +safe_exec python.exe -m py_tests.rbd_wnbd.service_restart_test ` + --test-name=RbdFsFioTest --iterations=3 --image-count=8 --concurrency=8 --image-size-mb=64 +safe_exec python.exe -m py_tests.rbd_wnbd.service_restart_test ` + --test-name=RbdFsStampTest --iterations=3 --image-count=8 --concurrency=8 --image-size-mb=64 diff --git a/qa/workunits/windows/test_rbd_wnbd.py b/qa/workunits/windows/test_rbd_wnbd.py deleted file mode 100644 index db14234a2b5f..000000000000 --- a/qa/workunits/windows/test_rbd_wnbd.py +++ /dev/null @@ -1,919 +0,0 @@ -import argparse -import collections -import functools -import json -import logging -import math -import os -import prettytable -import random -import subprocess -import time -import threading -import typing -import uuid -from concurrent import futures - -LOG = logging.getLogger() - -parser = argparse.ArgumentParser(description='rbd-wnbd tests') -parser.add_argument('--test-name', - help='The test to be run.', - default="RbdFioTest") -parser.add_argument('--iterations', - help='Total number of test iterations', - default=1, type=int) -parser.add_argument('--concurrency', - help='The number of tests to run in parallel', - default=4, type=int) -parser.add_argument('--fio-iterations', - help='Total number of benchmark iterations per disk.', - default=1, type=int) -parser.add_argument('--fio-workers', - help='Total number of fio workers per disk.', - default=1, type=int) -parser.add_argument('--fio-depth', - help='The number of concurrent asynchronous operations ' - 'executed per disk', - default=64, type=int) -parser.add_argument('--fio-verify', - help='The mechanism used to validate the written ' - 'data. Examples: crc32c, md5, sha1, null, etc. ' - 'If set to null, the written data will not be ' - 'verified.', - default='crc32c') -parser.add_argument('--bs', - help='Benchmark block size.', - default="2M") -parser.add_argument('--op', - help='Benchmark operation. ' - 'Examples: read, randwrite, rw, etc.', - default="rw") -parser.add_argument('--image-prefix', - help='The image name prefix.', - default="cephTest-") -parser.add_argument('--image-size-mb', - help='The image size in megabytes.', - default=1024, type=int) -parser.add_argument('--map-timeout', - help='Image map timeout.', - default=60, type=int) -parser.add_argument('--skip-enabling-disk', action='store_true', - help='If set, the disk will not be turned online and the ' - 'read-only flag will not be removed. Useful when ' - 'the SAN policy is set to "onlineAll".') -parser.add_argument('--verbose', action='store_true', - help='Print info messages.') -parser.add_argument('--debug', action='store_true', - help='Print debug messages.') -parser.add_argument('--stop-on-error', action='store_true', - help='Stop testing when hitting errors.') -parser.add_argument('--skip-cleanup-on-error', action='store_true', - help='Skip cleanup when hitting errors.') - - -class CephTestException(Exception): - msg_fmt = "An exception has been encountered." - - def __init__(self, message: str = None, **kwargs): - self.kwargs = kwargs - if not message: - message = self.msg_fmt % kwargs - self.message = message - super(CephTestException, self).__init__(message) - - -class CommandFailed(CephTestException): - msg_fmt = ( - "Command failed: %(command)s. " - "Return code: %(returncode)s. " - "Stdout: %(stdout)s. Stderr: %(stderr)s.") - - -class CephTestTimeout(CephTestException): - msg_fmt = "Operation timeout." - - -def setup_logging(log_level: int = logging.INFO): - handler = logging.StreamHandler() - handler.setLevel(log_level) - - log_fmt = '[%(asctime)s] %(levelname)s - %(message)s' - formatter = logging.Formatter(log_fmt) - handler.setFormatter(formatter) - - LOG.addHandler(handler) - LOG.setLevel(logging.DEBUG) - - -def retry_decorator(timeout: int = 60, - retry_interval: int = 2, - silent_interval: int = 10, - additional_details: str = "", - retried_exceptions: - typing.Union[ - typing.Type[Exception], - collections.abc.Iterable[ - typing.Type[Exception]]] = Exception): - def wrapper(f: typing.Callable[..., typing.Any]): - @functools.wraps(f) - def inner(*args, **kwargs): - tstart: float = time.time() - elapsed: float = 0 - exc = None - details = additional_details or "%s failed" % f.__qualname__ - - while elapsed < timeout or not timeout: - try: - return f(*args, **kwargs) - except retried_exceptions as ex: - exc = ex - elapsed = time.time() - tstart - if elapsed > silent_interval: - level = logging.WARNING - else: - level = logging.DEBUG - LOG.log(level, - "Exception: %s. Additional details: %s. " - "Time elapsed: %d. Timeout: %d", - ex, details, elapsed, timeout) - - time.sleep(retry_interval) - elapsed = time.time() - tstart - - msg = ( - "Operation timed out. Exception: %s. Additional details: %s. " - "Time elapsed: %d. Timeout: %d.") - raise CephTestTimeout( - msg % (exc, details, elapsed, timeout)) - return inner - return wrapper - - -def execute(*args, **kwargs): - LOG.debug("Executing: %s", args) - result = subprocess.run( - args, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - **kwargs) - LOG.debug("Command %s returned %d.", args, result.returncode) - if result.returncode: - exc = CommandFailed( - command=args, returncode=result.returncode, - stdout=result.stdout, stderr=result.stderr) - LOG.error(exc) - raise exc - return result - - -def ps_execute(*args, **kwargs): - # Disable PS progress bar, causes issues when invoked remotely. - prefix = "$global:ProgressPreference = 'SilentlyContinue' ; " - return execute( - "powershell.exe", "-NonInteractive", - "-Command", prefix, *args, **kwargs) - - -def array_stats(array: list): - mean = sum(array) / len(array) if len(array) else 0 - variance = (sum((i - mean) ** 2 for i in array) / len(array) - if len(array) else 0) - std_dev = math.sqrt(variance) - sorted_array = sorted(array) - - return { - 'min': min(array) if len(array) else 0, - 'max': max(array) if len(array) else 0, - 'sum': sum(array) if len(array) else 0, - 'mean': mean, - 'median': sorted_array[len(array) // 2] if len(array) else 0, - 'max_90': sorted_array[int(len(array) * 0.9)] if len(array) else 0, - 'min_90': sorted_array[int(len(array) * 0.1)] if len(array) else 0, - 'variance': variance, - 'std_dev': std_dev, - 'count': len(array) - } - - -class Tracer: - data: collections.OrderedDict = collections.OrderedDict() - lock = threading.Lock() - - @classmethod - def trace(cls, func): - def wrapper(*args, **kwargs): - tstart = time.time() - exc_str = None - - # Preserve call order - with cls.lock: - if func.__qualname__ not in cls.data: - cls.data[func.__qualname__] = list() - - try: - return func(*args, **kwargs) - except Exception as exc: - exc_str = str(exc) - raise - finally: - tend = time.time() - - with cls.lock: - cls.data[func.__qualname__] += [{ - "duration": tend - tstart, - "error": exc_str, - }] - - return wrapper - - @classmethod - def get_results(cls): - stats = collections.OrderedDict() - for f in cls.data.keys(): - stats[f] = array_stats([i['duration'] for i in cls.data[f]]) - errors = [] - for i in cls.data[f]: - if i['error']: - errors.append(i['error']) - - stats[f]['errors'] = errors - return stats - - @classmethod - def print_results(cls): - r = cls.get_results() - - table = prettytable.PrettyTable(title="Duration (s)") - table.field_names = [ - "function", "min", "max", "total", - "mean", "median", "std_dev", - "max 90%", "min 90%", "count", "errors"] - table.float_format = ".4" - for f, s in r.items(): - table.add_row([f, s['min'], s['max'], s['sum'], - s['mean'], s['median'], s['std_dev'], - s['max_90'], s['min_90'], - s['count'], len(s['errors'])]) - print(table) - - -class RbdImage(object): - def __init__(self, - name: str, - size_mb: int, - is_shared: bool = True, - disk_number: int = -1, - mapped: bool = False): - self.name = name - self.size_mb = size_mb - self.is_shared = is_shared - self.disk_number = disk_number - self.mapped = mapped - self.removed = False - self.drive_letter = "" - - @classmethod - @Tracer.trace - def create(cls, - name: str, - size_mb: int = 1024, - is_shared: bool = True): - LOG.info("Creating image: %s. Size: %s.", name, "%sM" % size_mb) - cmd = ["rbd", "create", name, "--size", "%sM" % size_mb] - if is_shared: - cmd += ["--image-shared"] - execute(*cmd) - - return RbdImage(name, size_mb, is_shared) - - @Tracer.trace - def get_disk_number(self, - timeout: int = 60, - retry_interval: int = 2): - @retry_decorator( - retried_exceptions=CephTestException, - timeout=timeout, - retry_interval=retry_interval) - def _get_disk_number(): - LOG.info("Retrieving disk number: %s", self.name) - - result = execute("rbd-wnbd", "show", self.name, "--format=json") - disk_info = json.loads(result.stdout) - disk_number = disk_info["disk_number"] - if disk_number > 0: - LOG.debug("Image %s disk number: %d", self.name, disk_number) - return disk_number - - raise CephTestException( - f"Could not get disk number: {self.name}.") - - return _get_disk_number() - - @Tracer.trace - def _wait_for_disk(self, - timeout: int = 60, - retry_interval: int = 2): - @retry_decorator( - retried_exceptions=(FileNotFoundError, OSError), - additional_details="the mapped disk isn't available yet", - timeout=timeout, - retry_interval=retry_interval) - def wait_for_disk(): - LOG.debug("Waiting for disk to be accessible: %s %s", - self.name, self.path) - - with open(self.path, 'rb'): - pass - - return wait_for_disk() - - @property - def path(self): - return f"\\\\.\\PhysicalDrive{self.disk_number}" - - @Tracer.trace - @retry_decorator(additional_details="couldn't clear disk read-only flag") - def set_writable(self): - ps_execute( - "Set-Disk", "-Number", str(self.disk_number), - "-IsReadOnly", "$false") - - @Tracer.trace - @retry_decorator(additional_details="couldn't bring the disk online") - def set_online(self): - ps_execute( - "Set-Disk", "-Number", str(self.disk_number), - "-IsOffline", "$false") - - @Tracer.trace - def map(self, timeout: int = 60): - LOG.info("Mapping image: %s", self.name) - tstart = time.time() - - execute("rbd-wnbd", "map", self.name) - self.mapped = True - - self.disk_number = self.get_disk_number(timeout=timeout) - - elapsed = time.time() - tstart - self._wait_for_disk(timeout=timeout - elapsed) - - @Tracer.trace - def unmap(self): - if self.mapped: - LOG.info("Unmapping image: %s", self.name) - execute("rbd-wnbd", "unmap", self.name) - self.mapped = False - - @Tracer.trace - def remove(self): - if not self.removed: - LOG.info("Removing image: %s", self.name) - execute("rbd", "rm", self.name) - self.removed = True - - def cleanup(self): - try: - self.unmap() - finally: - self.remove() - - @Tracer.trace - @retry_decorator() - def _init_disk(self): - cmd = f"Get-Disk -Number {self.disk_number} | Initialize-Disk" - ps_execute(cmd) - - @Tracer.trace - @retry_decorator() - def _create_partition(self): - cmd = (f"Get-Disk -Number {self.disk_number} | " - "New-Partition -AssignDriveLetter -UseMaximumSize") - ps_execute(cmd) - - @Tracer.trace - @retry_decorator() - def _format_volume(self): - cmd = ( - f"(Get-Partition -DiskNumber {self.disk_number}" - " | ? { $_.DriveLetter }) | Format-Volume -Force -Confirm:$false") - ps_execute(cmd) - - @Tracer.trace - @retry_decorator() - def _get_drive_letter(self): - cmd = (f"(Get-Partition -DiskNumber {self.disk_number}" - " | ? { $_.DriveLetter }).DriveLetter") - result = ps_execute(cmd) - - # The PowerShell command will place a null character if no drive letter - # is available. For example, we can receive "\x00\r\n". - self.drive_letter = result.stdout.decode().strip() - if not self.drive_letter.isalpha() or len(self.drive_letter) != 1: - raise CephTestException( - "Invalid drive letter received: %s" % self.drive_letter) - - @Tracer.trace - def init_fs(self): - if not self.mapped: - raise CephTestException("Unable to create fs, image not mapped.") - - LOG.info("Initializing fs, image: %s.", self.name) - - self._init_disk() - self._create_partition() - self._format_volume() - self._get_drive_letter() - - @Tracer.trace - def get_fs_capacity(self): - if not self.drive_letter: - raise CephTestException("No drive letter available") - - cmd = f"(Get-Volume -DriveLetter {self.drive_letter}).Size" - result = ps_execute(cmd) - - return int(result.stdout.decode().strip()) - - @Tracer.trace - def resize(self, new_size_mb, allow_shrink=False): - LOG.info( - "Resizing image: %s. New size: %s MB, old size: %s MB", - self.name, new_size_mb, self.size_mb) - - cmd = ["rbd", "resize", self.name, - "--size", f"{new_size_mb}M", "--no-progress"] - if allow_shrink: - cmd.append("--allow-shrink") - - execute(*cmd) - - self.size_mb = new_size_mb - - @Tracer.trace - def get_disk_size(self): - """Retrieve the virtual disk size (bytes) reported by Windows.""" - cmd = f"(Get-Disk -Number {self.disk_number}).Size" - result = ps_execute(cmd) - - disk_size = result.stdout.decode().strip() - if not disk_size.isdigit(): - raise CephTestException( - "Invalid disk size received: %s" % disk_size) - - return int(disk_size) - - @Tracer.trace - @retry_decorator(timeout=30) - def wait_for_disk_resize(self): - # After resizing the rbd image, the daemon is expected to receive - # the notification, inform the WNBD driver and then trigger a disk - # rescan (IOCTL_DISK_UPDATE_PROPERTIES). This might take a few seconds, - # so we'll need to do some polling. - disk_size = self.get_disk_size() - disk_size_mb = disk_size // (1 << 20) - - if disk_size_mb != self.size_mb: - raise CephTestException( - "The disk size hasn't been updated yet. Retrieved size: " - f"{disk_size_mb}MB. Expected size: {self.size_mb}MB.") - - -class RbdTest(object): - image: RbdImage - - requires_disk_online = False - requires_disk_write = False - - def __init__(self, - image_prefix: str = "cephTest-", - image_size_mb: int = 1024, - map_timeout: int = 60, - **kwargs): - self.image_size_mb = image_size_mb - self.image_name = image_prefix + str(uuid.uuid4()) - self.map_timeout = map_timeout - self.skip_enabling_disk = kwargs.get("skip_enabling_disk") - - @Tracer.trace - def initialize(self): - self.image = RbdImage.create( - self.image_name, - self.image_size_mb) - self.image.map(timeout=self.map_timeout) - - if not self.skip_enabling_disk: - if self.requires_disk_write: - self.image.set_writable() - - if self.requires_disk_online: - self.image.set_online() - - def run(self): - pass - - def cleanup(self): - if self.image: - self.image.cleanup() - - @classmethod - def print_results(cls, - title: str = "Test results", - description: str = None): - pass - - -class RbdFsTestMixin(object): - # Windows disks must be turned online before accessing partitions. - requires_disk_online = True - requires_disk_write = True - - @Tracer.trace - def initialize(self): - super(RbdFsTestMixin, self).initialize() - - self.image.init_fs() - - def get_subpath(self, *args): - drive_path = f"{self.image.drive_letter}:\\" - return os.path.join(drive_path, *args) - - -class RbdFsTest(RbdFsTestMixin, RbdTest): - pass - - -class RbdFioTest(RbdTest): - data: typing.DefaultDict[str, typing.List[typing.Dict[str, str]]] = ( - collections.defaultdict(list)) - lock = threading.Lock() - - def __init__(self, - *args, - fio_size_mb: int = None, - iterations: int = 1, - workers: int = 1, - bs: str = "2M", - iodepth: int = 64, - op: str = "rw", - verify: str = "crc32c", - **kwargs): - - super(RbdFioTest, self).__init__(*args, **kwargs) - - self.fio_size_mb = fio_size_mb or self.image_size_mb - self.iterations = iterations - self.workers = workers - self.bs = bs - self.iodepth = iodepth - self.op = op - if op not in ("read", "randread"): - self.requires_disk_write = True - self.verify = verify - - def process_result(self, raw_fio_output: str): - result = json.loads(raw_fio_output) - with self.lock: - for job in result["jobs"]: - # Fio doesn't support trim on Windows - for op in ['read', 'write']: - if op in job: - self.data[op].append({ - 'error': job['error'], - 'io_bytes': job[op]['io_bytes'], - 'bw_bytes': job[op]['bw_bytes'], - 'runtime': job[op]['runtime'] / 1000, # seconds - 'total_ios': job[op]['short_ios'], - 'short_ios': job[op]['short_ios'], - 'dropped_ios': job[op]['short_ios'], - 'clat_ns_min': job[op]['clat_ns']['min'], - 'clat_ns_max': job[op]['clat_ns']['max'], - 'clat_ns_mean': job[op]['clat_ns']['mean'], - 'clat_ns_stddev': job[op]['clat_ns']['stddev'], - 'clat_ns_10': job[op].get('clat_ns', {}) - .get('percentile', {}) - .get('10.000000', 0), - 'clat_ns_90': job[op].get('clat_ns', {}) - .get('percentile', {}) - .get('90.000000', 0) - }) - - def _get_fio_path(self): - return self.image.path - - @Tracer.trace - def _run_fio(self, fio_size_mb=None): - LOG.info("Starting FIO test.") - cmd = [ - "fio", "--thread", "--output-format=json", - "--randrepeat=%d" % self.iterations, - "--direct=1", "--name=test", - "--bs=%s" % self.bs, "--iodepth=%s" % self.iodepth, - "--size=%sM" % (fio_size_mb or self.fio_size_mb), - "--readwrite=%s" % self.op, - "--numjobs=%s" % self.workers, - "--filename=%s" % self._get_fio_path(), - ] - if self.verify: - cmd += ["--verify=%s" % self.verify] - result = execute(*cmd) - LOG.info("Completed FIO test.") - self.process_result(result.stdout) - - @Tracer.trace - def run(self): - self._run_fio() - - @classmethod - def print_results(cls, - title: str = "Benchmark results", - description: str = None): - if description: - title = "%s (%s)" % (title, description) - - for op in cls.data.keys(): - op_title = "%s op=%s" % (title, op) - - table = prettytable.PrettyTable(title=op_title) - table.field_names = ["stat", "min", "max", "mean", - "median", "std_dev", - "max 90%", "min 90%", "total"] - table.float_format = ".4" - - op_data = cls.data[op] - - s = array_stats([float(i["bw_bytes"]) / 1000_000 for i in op_data]) - table.add_row(["bandwidth (MB/s)", - s['min'], s['max'], s['mean'], - s['median'], s['std_dev'], - s['max_90'], s['min_90'], 'N/A']) - - s = array_stats([float(i["runtime"]) for i in op_data]) - table.add_row(["duration (s)", - s['min'], s['max'], s['mean'], - s['median'], s['std_dev'], - s['max_90'], s['min_90'], s['sum']]) - - s = array_stats([i["error"] for i in op_data]) - table.add_row(["errors", - s['min'], s['max'], s['mean'], - s['median'], s['std_dev'], - s['max_90'], s['min_90'], s['sum']]) - - s = array_stats([i["short_ios"] for i in op_data]) - table.add_row(["incomplete IOs", - s['min'], s['max'], s['mean'], - s['median'], s['std_dev'], - s['max_90'], s['min_90'], s['sum']]) - - s = array_stats([i["dropped_ios"] for i in op_data]) - table.add_row(["dropped IOs", - s['min'], s['max'], s['mean'], - s['median'], s['std_dev'], - s['max_90'], s['min_90'], s['sum']]) - - clat_min = array_stats([i["clat_ns_min"] for i in op_data]) - clat_max = array_stats([i["clat_ns_max"] for i in op_data]) - clat_mean = array_stats([i["clat_ns_mean"] for i in op_data]) - clat_stddev = math.sqrt( - sum([float(i["clat_ns_stddev"]) ** 2 for i in op_data]) / len(op_data) - if len(op_data) else 0) - clat_10 = array_stats([i["clat_ns_10"] for i in op_data]) - clat_90 = array_stats([i["clat_ns_90"] for i in op_data]) - # For convenience, we'll convert it from ns to seconds. - table.add_row(["completion latency (s)", - clat_min['min'] / 1e+9, - clat_max['max'] / 1e+9, - clat_mean['mean'] / 1e+9, - clat_mean['median'] / 1e+9, - clat_stddev / 1e+9, - clat_10['mean'] / 1e+9, - clat_90['mean'] / 1e+9, - clat_mean['sum'] / 1e+9]) - print(table) - - -class RbdResizeFioTest(RbdFioTest): - """Image resize test. - - This test extends and then shrinks the image, performing FIO tests to - validate the resized image. - """ - - @Tracer.trace - def run(self): - self.image.resize(self.image_size_mb * 2) - self.image.wait_for_disk_resize() - - self._run_fio(fio_size_mb=self.image_size_mb * 2) - - self.image.resize(self.image_size_mb // 2, allow_shrink=True) - self.image.wait_for_disk_resize() - - self._run_fio(fio_size_mb=self.image_size_mb // 2) - - # Just like rbd-nbd, rbd-wnbd is masking out-of-bounds errors. - # For this reason, we don't have a negative test that writes - # passed the disk boundary. - - -class RbdFsFioTest(RbdFsTestMixin, RbdFioTest): - def initialize(self): - super(RbdFsFioTest, self).initialize() - - if not self.fio_size_mb or self.fio_size_mb == self.image_size_mb: - # Out of caution, we'll use up to 80% of the FS by default - self.fio_size_mb = int( - self.image.get_fs_capacity() * 0.8 / (1024 * 1024)) - - @staticmethod - def _fio_escape_path(path): - # FIO allows specifying multiple files separated by colon. - # This means that ":" has to be escaped, so - # F:\filename becomes F\:\filename. - return path.replace(":", "\\:") - - def _get_fio_path(self): - return self._fio_escape_path(self.get_subpath("test-fio")) - - -class RbdStampTest(RbdTest): - requires_disk_write = True - - _write_open_mode = "rb+" - _read_open_mode = "rb" - _expect_path_exists = True - - @staticmethod - def _rand_float(min_val: float, max_val: float): - return min_val + (random.random() * max_val - min_val) - - def _get_stamp(self): - buff = self.image_name.encode() - padding = 512 - len(buff) - buff += b'\0' * padding - return buff - - def _get_stamp_path(self): - return self.image.path - - @Tracer.trace - def _write_stamp(self): - with open(self._get_stamp_path(), self._write_open_mode) as disk: - stamp = self._get_stamp() - disk.write(stamp) - - @Tracer.trace - def _read_stamp(self): - with open(self._get_stamp_path(), self._read_open_mode) as disk: - return disk.read(len(self._get_stamp())) - - @Tracer.trace - def run(self): - if self._expect_path_exists: - # Wait up to 5 seconds and then check the disk, ensuring that - # nobody else wrote to it. This is particularly useful when - # running a high number of tests in parallel, ensuring that - # we aren't writing to the wrong disk. - time.sleep(self._rand_float(0, 5)) - - stamp = self._read_stamp() - assert stamp == b'\0' * len(self._get_stamp()) - - self._write_stamp() - - stamp = self._read_stamp() - assert stamp == self._get_stamp() - - -class RbdFsStampTest(RbdFsTestMixin, RbdStampTest): - _write_open_mode = "wb" - _expect_path_exists = False - - def _get_stamp_path(self): - return self.get_subpath("test-stamp") - - -class TestRunner(object): - def __init__(self, - test_cls: typing.Type[RbdTest], - test_params: dict = {}, - iterations: int = 1, - workers: int = 1, - stop_on_error: bool = False, - cleanup_on_error: bool = True): - self.test_cls = test_cls - self.test_params = test_params - self.iterations = iterations - self.workers = workers - self.executor = futures.ThreadPoolExecutor(max_workers=workers) - self.lock = threading.Lock() - self.completed = 0 - self.errors = 0 - self.stopped = False - self.stop_on_error = stop_on_error - self.cleanup_on_error = cleanup_on_error - - @Tracer.trace - def run(self): - tasks = [] - for i in range(self.iterations): - task = self.executor.submit(self.run_single_test) - tasks.append(task) - - LOG.info("Waiting for %d tests to complete.", self.iterations) - for task in tasks: - task.result() - - def run_single_test(self): - failed = False - if self.stopped: - return - - try: - test = self.test_cls(**self.test_params) - test.initialize() - test.run() - except KeyboardInterrupt: - LOG.warning("Received Ctrl-C.") - self.stopped = True - except Exception as ex: - failed = True - if self.stop_on_error: - self.stopped = True - with self.lock: - self.errors += 1 - LOG.exception( - "Test exception: %s. Total exceptions: %d", - ex, self.errors) - finally: - if not failed or self.cleanup_on_error: - try: - test.cleanup() - except KeyboardInterrupt: - LOG.warning("Received Ctrl-C.") - self.stopped = True - # Retry the cleanup - test.cleanup() - except Exception: - LOG.exception("Test cleanup failed.") - - with self.lock: - self.completed += 1 - LOG.info("Completed tests: %d. Pending: %d", - self.completed, self.iterations - self.completed) - - -TESTS: typing.Dict[str, typing.Type[RbdTest]] = { - 'RbdTest': RbdTest, - 'RbdFioTest': RbdFioTest, - 'RbdResizeFioTest': RbdResizeFioTest, - 'RbdStampTest': RbdStampTest, - # FS tests - 'RbdFsTest': RbdFsTest, - 'RbdFsFioTest': RbdFsFioTest, - 'RbdFsStampTest': RbdFsStampTest, -} - -if __name__ == '__main__': - args = parser.parse_args() - - log_level = logging.WARNING - if args.verbose: - log_level = logging.INFO - if args.debug: - log_level = logging.DEBUG - setup_logging(log_level) - - test_params = dict( - image_size_mb=args.image_size_mb, - image_prefix=args.image_prefix, - bs=args.bs, - op=args.op, - verify=args.fio_verify, - iodepth=args.fio_depth, - map_timeout=args.map_timeout, - skip_enabling_disk=args.skip_enabling_disk, - ) - - try: - test_cls = TESTS[args.test_name] - except KeyError: - raise CephTestException("Unknown test: {}".format(args.test_name)) - - runner = TestRunner( - test_cls, - test_params=test_params, - iterations=args.iterations, - workers=args.concurrency, - stop_on_error=args.stop_on_error, - cleanup_on_error=not args.skip_cleanup_on_error) - runner.run() - - Tracer.print_results() - test_cls.print_results( - description="count: %d, concurrency: %d" % - (args.iterations, args.concurrency)) - - assert runner.errors == 0, f"encountered {runner.errors} error(s)." diff --git a/run-make-check.sh b/run-make-check.sh index 0ebe2b5de6bd..c4190a6426c5 100755 --- a/run-make-check.sh +++ b/run-make-check.sh @@ -22,6 +22,14 @@ source src/script/run-make.sh set -e +function gen_ctest_resource_file() { + local file_name=$(mktemp /tmp/ctest-resource-XXXXXX) + local max_cpuid=$(($(nproc) - 1)) + jq -n '$ARGS.positional | map({id:., slots:1}) | {cpus:.} | {version: {major:1, minor:0}, local:[.]}' \ + --args $(seq 0 $max_cpuid) > $file_name + echo "$file_name" +} + function run() { # to prevent OSD EMFILE death on tests, make sure ulimit >= 1024 $DRY_RUN ulimit -n $(ulimit -Hn) @@ -43,14 +51,16 @@ function run() { fi CHECK_MAKEOPTS=${CHECK_MAKEOPTS:-$DEFAULT_MAKEOPTS} + CTEST_RESOURCE_FILE=$(gen_ctest_resource_file) + CHECK_MAKEOPTS+=" --resource-spec-file ${CTEST_RESOURCE_FILE}" if in_jenkins; then if ! ctest $CHECK_MAKEOPTS --no-compress-output --output-on-failure --test-output-size-failed 1024000 -T Test; then # do not return failure, as the jenkins publisher will take care of this - rm -fr ${TMPDIR:-/tmp}/ceph-asok.* + rm -fr ${TMPDIR:-/tmp}/ceph-asok.* ${CTEST_RESOURCE_FILE} fi else if ! $DRY_RUN ctest $CHECK_MAKEOPTS --output-on-failure; then - rm -fr ${TMPDIR:-/tmp}/ceph-asok.* + rm -fr ${TMPDIR:-/tmp}/ceph-asok.* ${CTEST_RESOURCE_FILE} return 1 fi fi diff --git a/src/BLAKE3 b/src/BLAKE3 new file mode 160000 index 000000000000..92e4cd71be48 --- /dev/null +++ b/src/BLAKE3 @@ -0,0 +1 @@ +Subproject commit 92e4cd71be48fdf9a79e88ef37b8f415ec5ac210 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8aa271a2b5b2..43bab75680d0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -70,6 +70,10 @@ configure_file( ${CMAKE_SOURCE_DIR}/src/ceph_ver.h.in.cmake ${CMAKE_BINARY_DIR}/src/include/ceph_ver.h @ONLY) +configure_file( + ${CMAKE_SOURCE_DIR}/src/ceph_release.h.in.cmake + ${CMAKE_BINARY_DIR}/src/include/ceph_release.h + @ONLY) add_definitions( -DHAVE_CONFIG_H @@ -78,10 +82,8 @@ add_definitions( -D_THREAD_SAFE -D__STDC_FORMAT_MACROS -D_FILE_OFFSET_BITS=64 - -DBOOST_ASIO_DISABLE_THREAD_KEYWORD_EXTENSION) -if(Boost_VERSION VERSION_GREATER_EQUAL 1.74) - add_definitions(-DBOOST_ASIO_USE_TS_EXECUTOR_AS_DEFAULT) -endif() + -DBOOST_ASIO_DISABLE_THREAD_KEYWORD_EXTENSION + -DBOOST_ASIO_NO_TS_EXECUTORS) if(LINUX) add_definitions("-D_GNU_SOURCE") @@ -145,7 +147,7 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL Clang) if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12) # require >= clang-12 message(FATAL_ERROR "C++20 support requires a minimum Clang version of 12.") endif() - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_EXPORTS_C_FLAG}") + string(APPEND CMAKE_EXE_LINKER_FLAGS " ${CMAKE_EXE_EXPORTS_C_FLAG}") string(APPEND CMAKE_LINKER_FLAGS " -rdynamic -export-dynamic ${CMAKE_EXE_EXPORTS_C_FLAG}") string(PREPEND CMAKE_CXX_FLAGS_DEBUG "-g ") add_compile_options($<$:-Wno-inconsistent-missing-override>) @@ -198,6 +200,9 @@ if(HAS_GLIBCXX_ASSERTIONS AND CMAKE_BUILD_TYPE STREQUAL Debug) add_compile_options($<$:-D_GLIBCXX_ASSERTIONS>) endif() +# add BLAKE3 before we clobber CMAKE_ASM_COMPILER +add_subdirectory(BLAKE3/c EXCLUDE_FROM_ALL) + include(SIMDExt) if(HAVE_INTEL) if(APPLE) @@ -265,15 +270,18 @@ if(LINUX OR APPLE) list(APPEND EXTRALIBS ${LIB_RESOLV}) endif() +option(ENABLE_COVERAGE "Coverage is enabled" OFF) if(${ENABLE_COVERAGE}) find_program(HAVE_GCOV gcov) if(NOT HAVE_GCOV) message(FATAL_ERROR "Coverage Enabled but gcov Not Found") endif() add_compile_options( - -fprofile-arcs - -ftest-coverage + --coverage -O0) + add_link_options( + --coverage + ) list(APPEND EXTRALIBS gcov) endif(${ENABLE_COVERAGE}) @@ -295,14 +303,25 @@ if(WITH_CEPHFS_JAVA) add_subdirectory(java) endif() -if(WITH_RADOSGW_D4N) - add_subdirectory(cpp_redis) -endif() - if (WITH_BLKIN) add_subdirectory(blkin/blkin-lib) endif(WITH_BLKIN) +if(WITH_JAEGER) + find_package(thrift 0.13.0 REQUIRED) + + if(EXISTS "/etc/redhat-release" OR EXISTS "/etc/fedora-release") + # absl is installed as grpc build dependency on RPM based systems + add_definitions(-DHAVE_ABSEIL) + endif() + + include(BuildOpentelemetry) + build_opentelemetry() + add_library(jaeger_base INTERFACE) + target_link_libraries(jaeger_base INTERFACE opentelemetry::libopentelemetry + thrift::libthrift) +endif() + set(mds_files) list(APPEND mds_files mds/MDSMap.cc @@ -326,19 +345,25 @@ if(NOT TARGET RapidJSON::RapidJSON) endif() option(WITH_FMT_HEADER_ONLY "use header-only version of fmt library" OFF) -set(WITH_FMT_VERSION "8.1.1" CACHE - STRING "build with fmt version") -find_package(fmt ${WITH_FMT_VERSION} QUIET) +option(WITH_SYSTEM_FMT "build against system fmt" OFF) +if(WITH_SYSTEM_FMT) + find_package(fmt 8.1.1...<10.0.0 REQUIRED) +endif() +if (WITH_FMT_HEADER_ONLY) + message(STATUS "Using fmt header-only.") + set(FMT_LIB fmt::fmt-header-only) +else() + message(STATUS "Linking to fmt library.") + set(FMT_LIB fmt::fmt) +endif() if(fmt_FOUND) - include_directories(SYSTEM "${fmt_INCLUDE_DIR}") + message(STATUS "Building with system fmt.") else() - message(STATUS "Could not find fmt, will build it") + message(STATUS "Building fmt as submodule") set(old_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) set(BUILD_SHARED_LIBS FALSE) + set(FMT_INSTALL OFF) add_subdirectory(fmt) - if (WITH_FMT_HEADER_ONLY) - add_library(fmt::fmt ALIAS fmt-header-only) - endif() set(BUILD_SHARED_LIBS ${old_BUILD_SHARED_LIBS}) unset(old_BUILD_SHARED_LIBS) include_directories(SYSTEM "${CMAKE_SOURCE_DIR}/src/fmt/include") @@ -368,7 +393,8 @@ if(WITH_SEASTAR) endmacro () set(Seastar_API_LEVEL "6" CACHE STRING "" FORCE) set(Seastar_HWLOC OFF CACHE BOOL "" FORCE) - set(Seastar_STD_OPTIONAL_VARIANT_STRINGVIEW ON CACHE BOOL "" FORCE) + set(Seastar_IO_URING OFF CACHE BOOL "" FORCE) + set(Seastar_DEPRECATED_OSTREAM_FORMATTERS OFF CACHE BOOL "" FORCE) if(Seastar_DPDK) find_package(dpdk QUIET) if(NOT DPDK_FOUND) @@ -389,6 +415,7 @@ if(WITH_SEASTAR) # create the directory so cmake won't complain when looking at the imported # target: Seastar exports this directory created at build-time file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/seastar/gen/include") + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/seastar/gen/src") add_subdirectory(crimson) endif() @@ -433,17 +460,11 @@ set(libcommon_files set_source_files_properties(ceph_ver.c APPEND PROPERTY OBJECT_DEPENDS ${CMAKE_BINARY_DIR}/src/include/ceph_ver.h) add_library(common-objs OBJECT ${libcommon_files}) +target_link_libraries(common-objs legacy-option-headers) target_compile_definitions(common-objs PRIVATE - $) -add_dependencies(common-objs legacy-option-headers) + $) if(WITH_JAEGER) - find_package(thrift 0.13.0 REQUIRED) - include(BuildOpentelemetry) - build_opentelemetry() - add_library(jaeger_base INTERFACE) - target_link_libraries(jaeger_base INTERFACE opentelemetry::libopentelemetry - thrift::libthrift) add_dependencies(common-objs jaeger_base) target_link_libraries(common-objs jaeger_base) endif() @@ -477,7 +498,7 @@ set(ceph_common_deps Boost::date_time Boost::iostreams StdFilesystem::filesystem - fmt::fmt + ${FMT_LIB} ${BLKID_LIBRARIES} ${Backtrace_LIBRARIES} ${BLKIN_LIBRARIES} @@ -501,10 +522,6 @@ if(NOT WITH_SYSTEM_BOOST) list(APPEND ceph_common_deps ${ZLIB_LIBRARIES}) endif() -if(HAVE_QATZIP) - list(APPEND ceph_common_deps ${qatzip_LIBRARIES}) -endif() - if(WITH_DPDK) list(APPEND ceph_common_deps common_async_dpdk) endif() @@ -542,8 +559,9 @@ if(WITH_BLUESTORE_PMEM OR WITH_RBD_RWL) endif() add_library(common STATIC ${ceph_common_objs}) -target_link_libraries(common ${ceph_common_deps}) -add_dependencies(common legacy-option-headers) +target_link_libraries(common + ${ceph_common_deps} + legacy-option-headers) if(WITH_JAEGER) add_dependencies(common jaeger_base) endif() @@ -561,7 +579,7 @@ if(ENABLE_COVERAGE) target_link_libraries(ceph-common gcov) endif(ENABLE_COVERAGE) -add_dependencies(ceph-common legacy-option-headers) +target_link_libraries(ceph-common legacy-option-headers) if(WITH_JAEGER) add_dependencies(ceph-common jaeger_base) @@ -620,6 +638,8 @@ add_subdirectory(osdc) add_subdirectory(perfglue) add_library(rados_snap_set_diff_obj OBJECT librados/snap_set_diff.cc) +target_link_libraries(rados_snap_set_diff_obj + PRIVATE legacy-option-headers) option(WITH_LIBRADOSSTRIPER "build with libradosstriper support" ON) @@ -659,6 +679,7 @@ endif() if(NOT WIN32) add_subdirectory(pybind) add_subdirectory(ceph-volume) +add_subdirectory(ceph-node-proxy) add_subdirectory(python-common) add_subdirectory(cephadm) endif(NOT WIN32) @@ -865,6 +886,112 @@ if(WITH_FUSE) install(PROGRAMS mount.fuse.ceph DESTINATION ${CMAKE_INSTALL_SBINDIR}) endif(WITH_FUSE) +# NVMEOF GATEWAY MONITOR CLIENT +# Supported on RPM-based platforms only, depends on grpc devel libraries/tools +if(EXISTS "/etc/redhat-release" OR EXISTS "/etc/fedora-release") + option(WITH_NVMEOF_GATEWAY_MONITOR_CLIENT "build nvmeof gateway monitor client" ON) +else() + option(WITH_NVMEOF_GATEWAY_MONITOR_CLIENT "build nvmeof gateway monitor client" OFF) +endif() + +if(WITH_NVMEOF_GATEWAY_MONITOR_CLIENT) + + # Find Protobuf installation + # Looks for protobuf-config.cmake file installed by Protobuf's cmake installation. + option(protobuf_MODULE_COMPATIBLE TRUE) + find_package(Protobuf REQUIRED) + + set(_REFLECTION grpc++_reflection) + if(CMAKE_CROSSCOMPILING) + find_program(_PROTOBUF_PROTOC protoc) + else() + set(_PROTOBUF_PROTOC $) + endif() + + # Find gRPC installation + # Looks for gRPCConfig.cmake file installed by gRPC's cmake installation. + find_package(gRPC CONFIG REQUIRED) + message(STATUS "Using gRPC ${gRPC_VERSION}") + set(_GRPC_GRPCPP gRPC::grpc++) + if(CMAKE_CROSSCOMPILING) + find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) + else() + set(_GRPC_CPP_PLUGIN_EXECUTABLE $) + endif() + + # Gateway Proto file + get_filename_component(nvmeof_gateway_proto "nvmeof/gateway/control/proto/gateway.proto" ABSOLUTE) + get_filename_component(nvmeof_gateway_proto_path "${nvmeof_gateway_proto}" PATH) + + # Generated sources + set(nvmeof_gateway_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/gateway.pb.cc") + set(nvmeof_gateway_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/gateway.pb.h") + set(nvmeof_gateway_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/gateway.grpc.pb.cc") + set(nvmeof_gateway_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/gateway.grpc.pb.h") + + add_custom_command( + OUTPUT "${nvmeof_gateway_proto_srcs}" "${nvmeof_gateway_proto_hdrs}" "${nvmeof_gateway_grpc_srcs}" "${nvmeof_gateway_grpc_hdrs}" + COMMAND ${_PROTOBUF_PROTOC} + ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" + --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" + -I "${nvmeof_gateway_proto_path}" + --experimental_allow_proto3_optional + --plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}" + "${nvmeof_gateway_proto}" + DEPENDS "${nvmeof_gateway_proto}") + + + # Monitor Proto file + get_filename_component(nvmeof_monitor_proto "nvmeof/gateway/control/proto/monitor.proto" ABSOLUTE) + get_filename_component(nvmeof_monitor_proto_path "${nvmeof_monitor_proto}" PATH) + + # Generated sources + set(nvmeof_monitor_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/monitor.pb.cc") + set(nvmeof_monitor_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/monitor.pb.h") + set(nvmeof_monitor_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/monitor.grpc.pb.cc") + set(nvmeof_monitor_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/monitor.grpc.pb.h") + + add_custom_command( + OUTPUT "${nvmeof_monitor_proto_srcs}" "${nvmeof_monitor_proto_hdrs}" "${nvmeof_monitor_grpc_srcs}" "${nvmeof_monitor_grpc_hdrs}" + COMMAND ${_PROTOBUF_PROTOC} + ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" + --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" + -I "${nvmeof_monitor_proto_path}" + --experimental_allow_proto3_optional + --plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}" + "${nvmeof_monitor_proto}" + DEPENDS "${nvmeof_monitor_proto}") + + # Include generated *.pb.h files + include_directories("${CMAKE_CURRENT_BINARY_DIR}") + + set(ceph_nvmeof_monitor_client_srcs + ${nvmeof_gateway_proto_srcs} + ${nvmeof_gateway_proto_hdrs} + ${nvmeof_gateway_grpc_srcs} + ${nvmeof_gateway_grpc_hdrs} + ${nvmeof_monitor_proto_srcs} + ${nvmeof_monitor_proto_hdrs} + ${nvmeof_monitor_grpc_srcs} + ${nvmeof_monitor_grpc_hdrs} + ceph_nvmeof_monitor_client.cc + nvmeof/NVMeofGwClient.cc + nvmeof/NVMeofGwMonitorGroupClient.cc + nvmeof/NVMeofGwMonitorClient.cc) + add_executable(ceph-nvmeof-monitor-client ${ceph_nvmeof_monitor_client_srcs}) + add_dependencies(ceph-nvmeof-monitor-client ceph-common) + target_link_libraries(ceph-nvmeof-monitor-client + client + mon + global-static + ceph-common + ${_REFLECTION} + ${_GRPC_GRPCPP} + ) + install(TARGETS ceph-nvmeof-monitor-client DESTINATION bin) +endif() +# END OF NVMEOF GATEWAY MONITOR CLIENT + if(WITH_DOKAN) add_subdirectory(dokan) endif(WITH_DOKAN) @@ -875,7 +1002,9 @@ if(WITH_RBD) if(WITH_KRBD) add_library(krbd STATIC krbd.cc $) - target_link_libraries(krbd keyutils::keyutils) + target_link_libraries(krbd + keyutils::keyutils + legacy-option-headers) endif() add_subdirectory(librbd) if(WITH_FUSE) @@ -890,10 +1019,6 @@ if(WITH_RBD) add_subdirectory(rbd_replay) endif(WITH_RBD) -set(SPAWN_BUILD_TESTS OFF CACHE INTERNAL "disable building of spawn unit tests") -set(SPAWN_INSTALL OFF CACHE INTERNAL "disable installation of spawn headers") -add_subdirectory(spawn) - # RadosGW if(WITH_KVS) add_subdirectory(key_value_store) @@ -942,44 +1067,31 @@ add_custom_target(vstart-base DEPENDS monmaptool crushtool rados) -if(NOT WIN32) - # WIN32 port does not build python bindings - # TODO: introduce an option for enabling python binding - add_dependencies(vstart-base - cython_rados) -endif() - -if (WITH_MGR) - add_dependencies(vstart-base ceph-mgr) - add_dependencies(vstart-base ceph-exporter) -endif() +foreach(dep + cython_rados + ceph-mgr + ceph-exporter) + if(TARGET ${dep}) + add_dependencies(vstart-base ${dep}) + endif() +endforeach() add_custom_target(vstart DEPENDS vstart-base) -if(WITH_RBD AND NOT WIN32) - add_dependencies(vstart cython_rbd) -endif() -if (WITH_CEPHFS) - add_dependencies(vstart ceph-mds cephfs cython_cephfs) -endif() -if(WITH_RADOSGW) - add_dependencies(vstart radosgw radosgw-admin) -endif() - -if(WITH_LTTNG) - add_dependencies(vstart tracepoint_libraries) -endif(WITH_LTTNG) - -if(WITH_MGR AND WITH_MGR_DASHBOARD_FRONTEND AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64") - add_dependencies(vstart mgr-dashboard-frontend-build) -endif() - -if(WITH_MGR) - add_dependencies(vstart ceph-volume-venv-setup) -endif() - -if(WITH_MGR) - add_dependencies(vstart cephadm) -endif() +foreach(dep + cython_rbd + cephfs + cython_cephfs + ceph-mds + mgr-dashboard-frontend-build + radosgw + radosgw-admin + tracepoint_libraries + ceph-volume-venv-setup + cephadm) + if(TARGET ${dep}) + add_dependencies(vstart ${dep}) + endif() +endforeach() # Everything you need to run CephFS tests add_custom_target(cephfs_testing DEPENDS diff --git a/src/arch/CMakeLists.txt b/src/arch/CMakeLists.txt index e849e4896f4c..e95d9bbb81fa 100644 --- a/src/arch/CMakeLists.txt +++ b/src/arch/CMakeLists.txt @@ -7,6 +7,8 @@ elseif(HAVE_INTEL) list(APPEND arch_srcs intel.c) elseif(HAVE_PPC64LE OR HAVE_PPC64 OR HAVE_PPC) list(APPEND arch_srcs ppc.c) +elseif(HAVE_S390X) + list(APPEND arch_srcs s390x.c) endif() add_library(arch OBJECT ${arch_srcs}) diff --git a/src/arch/probe.cc b/src/arch/probe.cc index 52b913b1b57e..99eab324275f 100644 --- a/src/arch/probe.cc +++ b/src/arch/probe.cc @@ -6,6 +6,7 @@ #include "arch/intel.h" #include "arch/arm.h" #include "arch/ppc.h" +#include "arch/s390x.h" int ceph_arch_probe(void) { @@ -17,6 +18,8 @@ int ceph_arch_probe(void) ceph_arch_arm_probe(); #elif defined(__powerpc__) || defined(__ppc__) ceph_arch_ppc_probe(); +#elif defined(__s390__) + ceph_arch_s390x_probe(); #endif ceph_arch_probed = 1; return 1; diff --git a/src/arch/s390x.c b/src/arch/s390x.c new file mode 100644 index 000000000000..dec654ef7d96 --- /dev/null +++ b/src/arch/s390x.c @@ -0,0 +1,31 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright 2024 IBM Corporation + * + * This is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License version 2.1, as published by + * the Free Software Foundation. See file COPYING. + */ + +#include + +#include "arch/s390x.h" +#include "arch/probe.h" + +/* flags we export */ +int ceph_arch_s390x_crc32 = 0; + +/* Supported starting from the IBM z13 */ +int ceph_arch_s390x_probe(void) +{ + ceph_arch_s390x_crc32 = 0; + + if (getauxval(AT_HWCAP) & HWCAP_S390_VX) { + ceph_arch_s390x_crc32 = 1; + } + + return 0; +} diff --git a/src/arch/s390x.h b/src/arch/s390x.h new file mode 100644 index 000000000000..0eb58b418665 --- /dev/null +++ b/src/arch/s390x.h @@ -0,0 +1,28 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright 2024 IBM Corporation + * + * This is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License version 2.1, as published by + * the Free Software Foundation. See file COPYING. + */ + +#ifndef CEPH_ARCH_S390X_H +#define CEPH_ARCH_S390X_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern int ceph_arch_s390x_crc32; + +extern int ceph_arch_s390x_probe(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/arrow b/src/arrow index 347a88ff9d20..a61f4af724cd 160000 --- a/src/arrow +++ b/src/arrow @@ -1 +1 @@ -Subproject commit 347a88ff9d20e2a4061eec0b455b8ea1aa8335dc +Subproject commit a61f4af724cd06c3a9b4abd20491345997e532c0 diff --git a/src/auth/Auth.h b/src/auth/Auth.h index 5521c8d3fcf0..83e23b34dbe3 100644 --- a/src/auth/Auth.h +++ b/src/auth/Auth.h @@ -16,6 +16,7 @@ #define CEPH_AUTHTYPES_H #include "Crypto.h" +#include "common/ceph_json.h" #include "common/entity_name.h" // The _MAX values are a bit wonky here because we are overloading the first @@ -59,6 +60,14 @@ struct EntityAuth { decode(pending_key, bl); } } + void dump(ceph::Formatter *f) const { + f->dump_object("key", key); + encode_json("caps", caps, f); + f->dump_object("pending_key", pending_key); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new EntityAuth); + } }; WRITE_CLASS_ENCODER(EntityAuth) @@ -95,6 +104,19 @@ struct AuthCapsInfo { allow_all = (bool)a; decode(caps, bl); } + void dump(ceph::Formatter *f) const { + f->dump_bool("allow_all", allow_all); + encode_json("caps", caps, f); + f->dump_unsigned("caps_len", caps.length()); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new AuthCapsInfo); + ls.push_back(new AuthCapsInfo); + ls.back()->allow_all = true; + ls.push_back(new AuthCapsInfo); + ls.back()->caps.append("foo"); + ls.back()->caps.append("bar"); + } }; WRITE_CLASS_ENCODER(AuthCapsInfo) @@ -147,6 +169,25 @@ struct AuthTicket { decode(caps, bl); decode(flags, bl); } + void dump(ceph::Formatter *f) const { + f->dump_object("name", name); + f->dump_unsigned("global_id", global_id); + f->dump_stream("created") << created; + f->dump_stream("renew_after") << renew_after; + f->dump_stream("expires") << expires; + f->dump_object("caps", caps); + f->dump_unsigned("flags", flags); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new AuthTicket); + ls.push_back(new AuthTicket); + ls.back()->name.set_id("client.123"); + ls.back()->global_id = 123; + ls.back()->init_timestamps(utime_t(123, 456), 7); + ls.back()->caps.caps.append("foo"); + ls.back()->caps.caps.append("bar"); + ls.back()->flags = 0x12345678; + } }; WRITE_CLASS_ENCODER(AuthTicket) @@ -231,6 +272,16 @@ struct ExpiringCryptoKey { decode(key, bl); decode(expiration, bl); } + void dump(ceph::Formatter *f) const { + f->dump_object("key", key); + f->dump_stream("expiration") << expiration; + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new ExpiringCryptoKey); + ls.push_back(new ExpiringCryptoKey); + ls.back()->key.set_secret( + CEPH_CRYPTO_AES, bufferptr("1234567890123456", 16), utime_t(123, 456)); + } }; WRITE_CLASS_ENCODER(ExpiringCryptoKey) @@ -295,6 +346,15 @@ struct RotatingSecrets { } void dump(); + void dump(ceph::Formatter *f) const { + encode_json("secrets", secrets, f); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new RotatingSecrets); + ls.push_back(new RotatingSecrets); + auto eck = new ExpiringCryptoKey; + ls.back()->add(*eck); + } }; WRITE_CLASS_ENCODER(RotatingSecrets) diff --git a/src/auth/CMakeLists.txt b/src/auth/CMakeLists.txt index 1ab294332cb8..014057f49bea 100644 --- a/src/auth/CMakeLists.txt +++ b/src/auth/CMakeLists.txt @@ -22,4 +22,4 @@ endif() add_library(common-auth-objs OBJECT ${auth_srcs}) target_include_directories(common-auth-objs PRIVATE ${OPENSSL_INCLUDE_DIR}) -add_dependencies(common-auth-objs legacy-option-headers) +target_link_libraries(common-auth-objs legacy-option-headers) diff --git a/src/auth/Crypto.cc b/src/auth/Crypto.cc index ce666e8bdc8c..5d68d3470bc0 100644 --- a/src/auth/Crypto.cc +++ b/src/auth/Crypto.cc @@ -511,6 +511,23 @@ void CryptoKey::decode(bufferlist::const_iterator& bl) throw ceph::buffer::malformed_input("malformed secret"); } +void CryptoKey::dump(Formatter *f) const +{ + f->dump_int("type", type); + f->dump_stream("created") << created; + f->dump_int("secret.length", secret.length()); +} + +void CryptoKey::generate_test_instances(std::list& ls) +{ + ls.push_back(new CryptoKey); + ls.push_back(new CryptoKey); + ls.back()->type = CEPH_CRYPTO_AES; + ls.back()->set_secret( + CEPH_CRYPTO_AES, bufferptr("1234567890123456", 16), utime_t(123, 456)); + ls.back()->created = utime_t(123, 456); +} + int CryptoKey::set_secret(int type, const bufferptr& s, utime_t c) { int r = _set_secret(type, s); diff --git a/src/auth/Crypto.h b/src/auth/Crypto.h index a29ac1abd811..3ce655a12562 100644 --- a/src/auth/Crypto.h +++ b/src/auth/Crypto.h @@ -111,6 +111,8 @@ class CryptoKey { void encode(ceph::buffer::list& bl) const; void decode(ceph::buffer::list::const_iterator& bl); + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list& ls); void clear() { *this = CryptoKey(); diff --git a/src/auth/cephx/CephxKeyServer.cc b/src/auth/cephx/CephxKeyServer.cc index 236ac451add9..a5ad1f2b7973 100644 --- a/src/auth/cephx/CephxKeyServer.cc +++ b/src/auth/cephx/CephxKeyServer.cc @@ -257,6 +257,16 @@ std::map KeyServer::get_used_pending_keys() return ret; } +void KeyServer::dump(Formatter *f) const +{ + f->dump_object("data", data); +} + +void KeyServer::generate_test_instances(std::list& ls) +{ + ls.push_back(new KeyServer(nullptr, nullptr)); +} + bool KeyServer::generate_secret(CryptoKey& secret) { bufferptr bp; diff --git a/src/auth/cephx/CephxKeyServer.h b/src/auth/cephx/CephxKeyServer.h index 64915c8ce4aa..d147dd441ad2 100644 --- a/src/auth/cephx/CephxKeyServer.h +++ b/src/auth/cephx/CephxKeyServer.h @@ -21,15 +21,16 @@ #include "include/common_fwd.h" struct KeyServerData { - version_t version; + version_t version{0}; /* for each entity */ std::map secrets; - KeyRing *extra_secrets; + KeyRing *extra_secrets = nullptr; /* for each service type */ - version_t rotating_ver; + version_t rotating_ver{0}; std::map rotating_secrets; + KeyServerData() {} explicit KeyServerData(KeyRing *extra) : version(0), @@ -70,7 +71,17 @@ struct KeyServerData { decode(rotating_ver, iter); decode(rotating_secrets, iter); } - + void dump(ceph::Formatter *f) const { + f->dump_unsigned("version", version); + f->dump_unsigned("rotating_version", rotating_ver); + encode_json("secrets", secrets, f); + encode_json("rotating_secrets", rotating_secrets, f); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new KeyServerData); + ls.push_back(new KeyServerData); + ls.back()->version = 1; + } bool contains(const EntityName& name) const { return (secrets.find(name) != secrets.end()); } @@ -159,8 +170,21 @@ struct KeyServerData { decode(auth, bl); } } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("op", op); + f->dump_object("name", name); + f->dump_object("auth", auth); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new Incremental); + ls.back()->op = AUTH_INC_DEL; + ls.push_back(new Incremental); + ls.back()->op = AUTH_INC_ADD; + ls.push_back(new Incremental); + ls.back()->op = AUTH_INC_SET_ROTATING; + } }; - + void apply_incremental(Incremental& inc) { switch (inc.op) { case AUTH_INC_ADD: @@ -188,8 +212,6 @@ WRITE_CLASS_ENCODER(KeyServerData) WRITE_CLASS_ENCODER(KeyServerData::Incremental) - - class KeyServer : public KeyStore { CephContext *cct; KeyServerData data; @@ -205,7 +227,9 @@ class KeyServer : public KeyStore { bool _get_service_caps(const EntityName& name, uint32_t service_id, AuthCapsInfo& caps) const; public: + KeyServer() : lock{ceph::make_mutex("KeyServer::lock")} {} KeyServer(CephContext *cct_, KeyRing *extra_secrets); + KeyServer& operator=(const KeyServer&) = delete; bool generate_secret(CryptoKey& secret); bool get_secret(const EntityName& name, CryptoKey& secret) const override; @@ -248,6 +272,8 @@ class KeyServer : public KeyStore { using ceph::decode; decode(data, bl); } + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list& ls); bool contains(const EntityName& name) const; int encode_secrets(ceph::Formatter *f, std::stringstream *ds) const; void encode_formatted(std::string label, ceph::Formatter *f, ceph::buffer::list &bl); diff --git a/src/auth/cephx/CephxProtocol.h b/src/auth/cephx/CephxProtocol.h index aabfaaad10c9..260cb13ff5ab 100644 --- a/src/auth/cephx/CephxProtocol.h +++ b/src/auth/cephx/CephxProtocol.h @@ -55,6 +55,13 @@ struct CephXServerChallenge { decode(struct_v, bl); decode(server_challenge, bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("server_challenge", server_challenge); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new CephXServerChallenge); + ls.back()->server_challenge = 1; + } }; WRITE_CLASS_ENCODER(CephXServerChallenge) @@ -72,6 +79,13 @@ struct CephXRequestHeader { using ceph::decode; decode(request_type, bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("request_type", request_type); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new CephXRequestHeader); + ls.back()->request_type = 1; + } }; WRITE_CLASS_ENCODER(CephXRequestHeader) @@ -89,6 +103,15 @@ struct CephXResponseHeader { decode(request_type, bl); decode(status, bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("request_type", request_type); + f->dump_int("status", status); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new CephXResponseHeader); + ls.back()->request_type = 1; + ls.back()->status = 0; + } }; WRITE_CLASS_ENCODER(CephXResponseHeader) @@ -113,6 +136,17 @@ struct CephXTicketBlob { decode(secret_id, bl); decode(blob, bl); } + + void dump(ceph::Formatter *f) const { + f->dump_unsigned("secret_id", secret_id); + f->dump_unsigned("blob_len", blob.length()); + } + + static void generate_test_instances(std::list& ls) { + ls.push_back(new CephXTicketBlob); + ls.back()->secret_id = 123; + ls.back()->blob.append(std::string_view("this is a blob")); + } }; WRITE_CLASS_ENCODER(CephXTicketBlob) @@ -152,6 +186,25 @@ struct CephXAuthenticate { // old_ticket both on reconnects and renewals old_ticket_may_be_omitted = struct_v < 3; } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("client_challenge", client_challenge); + f->dump_unsigned("key", key); + f->open_object_section("old_ticket"); + old_ticket.dump(f); + f->close_section(); + f->dump_unsigned("other_keys", other_keys); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new CephXAuthenticate); + ls.back()->client_challenge = 0; + ls.back()->key = 0; + ls.push_back(new CephXAuthenticate); + ls.back()->client_challenge = 1; + ls.back()->key = 2; + ls.back()->old_ticket.secret_id = 3; + ls.back()->old_ticket.blob.append(std::string_view("this is a blob")); + ls.back()->other_keys = 4; + } }; WRITE_CLASS_ENCODER(CephXAuthenticate) @@ -168,6 +221,15 @@ struct CephXChallengeBlob { decode(server_challenge, bl); decode(client_challenge, bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("server_challenge", server_challenge); + f->dump_unsigned("client_challenge", client_challenge); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new CephXChallengeBlob); + ls.back()->server_challenge = 123; + ls.back()->client_challenge = 456; + } }; WRITE_CLASS_ENCODER(CephXChallengeBlob) @@ -218,6 +280,15 @@ struct CephXServiceTicketRequest { decode(struct_v, bl); decode(keys, bl); } + + void dump(ceph::Formatter *f) const { + f->dump_unsigned("keys", keys); + } + + static void generate_test_instances(std::list& ls) { + ls.push_back(new CephXServiceTicketRequest); + ls.back()->keys = 123; + } }; WRITE_CLASS_ENCODER(CephXServiceTicketRequest) @@ -251,6 +322,17 @@ struct CephXAuthorizeReply { decode(connection_secret, bl); } } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("nonce_plus_one", nonce_plus_one); + f->dump_string("connection_secret", connection_secret); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new CephXAuthorizeReply); + ls.back()->nonce_plus_one = 0; + ls.push_back(new CephXAuthorizeReply); + ls.back()->nonce_plus_one = 123; + ls.back()->connection_secret = "secret"; + } }; WRITE_CLASS_ENCODER(CephXAuthorizeReply) @@ -353,6 +435,17 @@ struct CephXServiceTicket { decode(session_key, bl); decode(validity, bl); } + void dump(ceph::Formatter *f) const { + session_key.dump(f); + validity.dump(f); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new CephXServiceTicket); + ls.push_back(new CephXServiceTicket); + ls.back()->session_key.set_secret( + CEPH_CRYPTO_AES, bufferptr("1234567890123456", 16), utime_t(123, 456)); + ls.back()->validity = utime_t(123, 456); + } }; WRITE_CLASS_ENCODER(CephXServiceTicket) @@ -375,6 +468,18 @@ struct CephXServiceTicketInfo { decode(ticket, bl); decode(session_key, bl); } + void dump(ceph::Formatter *f) const { + ticket.dump(f); + session_key.dump(f); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new CephXServiceTicketInfo); + ls.push_back(new CephXServiceTicketInfo); + ls.back()->ticket.global_id = 1234; + ls.back()->ticket.init_timestamps(utime_t(123, 456), utime_t(123, 456)); + ls.back()->session_key.set_secret( + CEPH_CRYPTO_AES, bufferptr("1234567890123456", 16), utime_t(123, 456)); + } }; WRITE_CLASS_ENCODER(CephXServiceTicketInfo) @@ -392,6 +497,13 @@ struct CephXAuthorizeChallenge : public AuthAuthorizerChallenge { decode(struct_v, bl); decode(server_challenge, bl); } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("server_challenge", server_challenge); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new CephXAuthorizeChallenge); + ls.back()->server_challenge = 1234; + } }; WRITE_CLASS_ENCODER(CephXAuthorizeChallenge) @@ -417,6 +529,18 @@ struct CephXAuthorize { decode(server_challenge_plus_one, bl); } } + void dump(ceph::Formatter *f) const { + f->dump_unsigned("nonce", nonce); + f->dump_unsigned("have_challenge", have_challenge); + f->dump_unsigned("server_challenge_plus_one", server_challenge_plus_one); + } + static void generate_test_instances(std::list& ls) { + ls.push_back(new CephXAuthorize); + ls.push_back(new CephXAuthorize); + ls.back()->nonce = 1234; + ls.back()->have_challenge = true; + ls.back()->server_challenge_plus_one = 1234; + } }; WRITE_CLASS_ENCODER(CephXAuthorize) diff --git a/src/blk/BlockDevice.cc b/src/blk/BlockDevice.cc index fd07e443c136..8c06256d2547 100644 --- a/src/blk/BlockDevice.cc +++ b/src/blk/BlockDevice.cc @@ -31,10 +31,6 @@ #include "pmem/PMEMDevice.h" #endif -#if defined(HAVE_LIBZBD) -#include "zoned/HMSMRDevice.h" -#endif - #include "common/debug.h" #include "common/EventTrace.h" #include "common/errno.h" @@ -46,6 +42,7 @@ #define dout_prefix *_dout << "bdev " using std::string; +using ceph::mono_clock; blk_access_mode_t buffermode(bool buffered) @@ -113,11 +110,6 @@ BlockDevice::detect_device_type(const std::string& path) return block_device_t::pmem; } #endif -#if (defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO)) && defined(HAVE_LIBZBD) - if (HMSMRDevice::support(path)) { - return block_device_t::hm_smr; - } -#endif #if defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO) return block_device_t::aio; #else @@ -142,24 +134,19 @@ BlockDevice::device_type_from_name(const std::string& blk_dev_name) if (blk_dev_name == "pmem") { return block_device_t::pmem; } -#endif -#if (defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO)) && defined(HAVE_LIBZBD) - if (blk_dev_name == "hm_smr") { - return block_device_t::hm_smr; - } #endif return block_device_t::unknown; } BlockDevice* BlockDevice::create_with_type(block_device_t device_type, CephContext* cct, const std::string& path, aio_callback_t cb, - void *cbpriv, aio_callback_t d_cb, void *d_cbpriv) + void *cbpriv, aio_callback_t d_cb, void *d_cbpriv, const char* dev_name) { switch (device_type) { #if defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO) case block_device_t::aio: - return new KernelDevice(cct, cb, cbpriv, d_cb, d_cbpriv); + return new KernelDevice(cct, cb, cbpriv, d_cb, d_cbpriv, dev_name); #endif #if defined(HAVE_SPDK) case block_device_t::spdk: @@ -168,10 +155,6 @@ BlockDevice* BlockDevice::create_with_type(block_device_t device_type, #if defined(HAVE_BLUESTORE_PMEM) case block_device_t::pmem: return new PMEMDevice(cct, cb, cbpriv); -#endif -#if (defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO)) && defined(HAVE_LIBZBD) - case block_device_t::hm_smr: - return new HMSMRDevice(cct, cb, cbpriv, d_cb, d_cbpriv); #endif default: ceph_abort_msg("unsupported device"); @@ -181,7 +164,7 @@ BlockDevice* BlockDevice::create_with_type(block_device_t device_type, BlockDevice *BlockDevice::create( CephContext* cct, const string& path, aio_callback_t cb, - void *cbpriv, aio_callback_t d_cb, void *d_cbpriv) + void *cbpriv, aio_callback_t d_cb, void *d_cbpriv, const char* dev_name) { const string blk_dev_name = cct->_conf.get_val("bdev_type"); block_device_t device_type = block_device_t::unknown; @@ -190,7 +173,7 @@ BlockDevice *BlockDevice::create( } else { device_type = device_type_from_name(blk_dev_name); } - return create_with_type(device_type, cct, path, cb, cbpriv, d_cb, d_cbpriv); + return create_with_type(device_type, cct, path, cb, cbpriv, d_cb, d_cbpriv, dev_name); } bool BlockDevice::is_valid_io(uint64_t off, uint64_t len) const { @@ -209,3 +192,39 @@ bool BlockDevice::is_valid_io(uint64_t off, uint64_t len) const { } return ret; } + +size_t BlockDevice::trim_stalled_read_event_queue(mono_clock::time_point cur_time) { + std::lock_guard lock(stalled_read_event_queue_lock); + auto warn_duration = std::chrono::seconds(cct->_conf->bdev_stalled_read_warn_lifetime); + while (!stalled_read_event_queue.empty() && + ((stalled_read_event_queue.front() < cur_time - warn_duration) || + (stalled_read_event_queue.size() > cct->_conf->bdev_stalled_read_warn_threshold))) { + stalled_read_event_queue.pop(); + } + return stalled_read_event_queue.size(); +} + +void BlockDevice::add_stalled_read_event() { + if (!cct->_conf->bdev_stalled_read_warn_threshold) { + return; + } + auto cur_time = mono_clock::now(); + { + std::lock_guard lock(stalled_read_event_queue_lock); + stalled_read_event_queue.push(cur_time); + } + trim_stalled_read_event_queue(cur_time); +} + +void BlockDevice::collect_alerts(osd_alert_list_t& alerts, const std::string& device_name) { + if (cct->_conf->bdev_stalled_read_warn_threshold) { + size_t qsize = trim_stalled_read_event_queue(mono_clock::now()); + if (qsize >= cct->_conf->bdev_stalled_read_warn_threshold) { + std::ostringstream ss; + ss << "observed stalled read indications in " + << device_name << " device"; + alerts.emplace(device_name + "_DEVICE_STALLED_READ_ALERT", ss.str()); + } + } +} + diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h index 440faf3d4b4c..cb795eaa5e5b 100644 --- a/src/blk/BlockDevice.h +++ b/src/blk/BlockDevice.h @@ -25,11 +25,13 @@ #include #include #include +#include #include "acconfig.h" #include "common/ceph_mutex.h" #include "include/common_fwd.h" #include "extblkdev/ExtBlkDevInterface.h" +#include "osd/osd_types.h" #if defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO) #include "aio/aio.h" @@ -148,6 +150,8 @@ class BlockDevice { public: CephContext* cct; typedef void (*aio_callback_t)(void *handle, void *aio); + void collect_alerts(osd_alert_list_t& alerts, const std::string& device_name); + private: ceph::mutex ioc_reap_lock = ceph::make_mutex("BlockDevice::ioc_reap_lock"); std::vector ioc_reap_queue; @@ -156,9 +160,6 @@ class BlockDevice { unknown, #if defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO) aio, -#if defined(HAVE_LIBZBD) - hm_smr, -#endif #endif #if defined(HAVE_SPDK) spdk, @@ -167,11 +168,14 @@ class BlockDevice { pmem, #endif }; + std::queue stalled_read_event_queue; + ceph::mutex stalled_read_event_queue_lock = ceph::make_mutex("BlockDevice::stalled_read_event_queue_lock"); + size_t trim_stalled_read_event_queue(mono_clock::time_point cur_time); static block_device_t detect_device_type(const std::string& path); static block_device_t device_type_from_name(const std::string& blk_dev_name); static BlockDevice *create_with_type(block_device_t device_type, CephContext* cct, const std::string& path, aio_callback_t cb, - void *cbpriv, aio_callback_t d_cb, void *d_cbpriv); + void *cbpriv, aio_callback_t d_cb, void *d_cbpriv, const char* dev_name); protected: uint64_t size = 0; @@ -190,6 +194,7 @@ class BlockDevice { // of the drive. The zones 524-52155 are sequential zones. uint64_t conventional_region_size = 0; uint64_t zone_size = 0; + void add_stalled_read_event(); public: aio_callback_t aio_callback; @@ -202,7 +207,8 @@ class BlockDevice { virtual ~BlockDevice() = default; static BlockDevice *create( - CephContext* cct, const std::string& path, aio_callback_t cb, void *cbpriv, aio_callback_t d_cb, void *d_cbpriv); + CephContext* cct, const std::string& path, aio_callback_t cb, void *cbpriv, aio_callback_t d_cb, + void *d_cbpriv, const char* dev_name = ""); virtual bool supported_bdev_label() { return true; } virtual bool is_rotational() { return rotational; } @@ -236,6 +242,7 @@ class BlockDevice { uint64_t get_size() const { return size; } uint64_t get_block_size() const { return block_size; } uint64_t get_optimal_io_size() const { return optimal_io_size; } + bool is_discard_supported() const { return support_discard; } /// hook to provide utilization of thinly-provisioned device virtual int get_ebd_state(ExtBlkDevState &state) const { @@ -289,7 +296,7 @@ class BlockDevice { virtual int flush() = 0; virtual bool try_discard(interval_set &to_release, bool async=true) { return false; } virtual void discard_drain() { return; } - + virtual void swap_discard_queued(interval_set& other) { other.clear(); } // for managing buffered readers/writers virtual int invalidate_cache(uint64_t off, uint64_t len) = 0; virtual int open(const std::string& path) = 0; diff --git a/src/blk/CMakeLists.txt b/src/blk/CMakeLists.txt index 288955dd0547..62c2a5c29f4b 100644 --- a/src/blk/CMakeLists.txt +++ b/src/blk/CMakeLists.txt @@ -20,11 +20,6 @@ if(WITH_SPDK) spdk/NVMEDevice.cc) endif() -if(WITH_ZBD) - list(APPEND libblk_srcs - zoned/HMSMRDevice.cc) -endif() - if(libblk_srcs) add_library(blk STATIC ${libblk_srcs}) target_include_directories(blk PRIVATE "./") @@ -39,10 +34,6 @@ if(WITH_SPDK) PRIVATE spdk::spdk) endif() -if(WITH_ZBD) - target_link_libraries(blk PRIVATE ${ZBD_LIBRARIES}) -endif() - if(WITH_BLUESTORE_PMEM) if(HAVE_LIBDML) target_link_libraries(blk PRIVATE dml::dml dml::dmlhl) @@ -57,11 +48,5 @@ if(WITH_EVENTTRACE) endif() if(WITH_LIBURING) - if(WITH_SYSTEM_LIBURING) - find_package(uring REQUIRED) - else() - include(Builduring) - build_uring() - endif() target_link_libraries(blk PRIVATE uring::uring) endif() diff --git a/src/blk/aio/aio.cc b/src/blk/aio/aio.cc index 00a12bfd16af..1e6b102f3dc3 100644 --- a/src/blk/aio/aio.cc +++ b/src/blk/aio/aio.cc @@ -16,7 +16,7 @@ std::ostream& operator<<(std::ostream& os, const aio_t& aio) } int aio_queue_t::submit_batch(aio_iter begin, aio_iter end, - uint16_t aios_size, void *priv, + void *priv, int *retries) { // 2^16 * 125us = ~8 seconds, so max sleep is ~16 seconds @@ -25,33 +25,43 @@ int aio_queue_t::submit_batch(aio_iter begin, aio_iter end, int r; aio_iter cur = begin; - struct aio_t *piocb[aios_size]; - int left = 0; - while (cur != end) { - cur->priv = priv; - *(piocb+left) = &(*cur); - ++left; - ++cur; - } - ceph_assert(aios_size >= left); +#if defined(HAVE_LIBAIO) + struct aio_t *piocb[max_iodepth]; +#endif int done = 0; - while (left > 0) { + int pushed = 0; //used for LIBAIO only + int pulled = 0; + while (cur != end || pushed < pulled) { #if defined(HAVE_LIBAIO) - r = io_submit(ctx, std::min(left, max_iodepth), (struct iocb**)(piocb + done)); + while (cur != end && pulled < max_iodepth) { + cur->priv = priv; + piocb[pulled] = &(*cur); + ++pulled; + ++cur; + } + int toSubmit = pulled - pushed; + r = io_submit(ctx, toSubmit, (struct iocb**)(piocb + pushed)); + if (r >= 0 && r < toSubmit) { + pushed += r; + done += r; + r = -EAGAIN; + } #elif defined(HAVE_POSIXAIO) - if (piocb[done]->n_aiocb == 1) { + cur->priv = priv; + if ((cur->n_aiocb == 1) { // TODO: consider batching multiple reads together with lio_listio - piocb[done]->aio.aiocb.aio_sigevent.sigev_notify = SIGEV_KEVENT; - piocb[done]->aio.aiocb.aio_sigevent.sigev_notify_kqueue = ctx; - piocb[done]->aio.aiocb.aio_sigevent.sigev_value.sival_ptr = piocb[done]; - r = aio_read(&piocb[done]->aio.aiocb); + cur->aio.aiocb.aio_sigevent.sigev_notify = SIGEV_KEVENT; + cur->aio.aiocb.aio_sigevent.sigev_notify_kqueue = ctx; + cur->aio.aiocb.aio_sigevent.sigev_value.sival_ptr = &(*cur); + r = aio_write(&cur->aio.aiocb); } else { struct sigevent sev; sev.sigev_notify = SIGEV_KEVENT; sev.sigev_notify_kqueue = ctx; - sev.sigev_value.sival_ptr = piocb[done]; - r = lio_listio(LIO_NOWAIT, &piocb[done]->aio.aiocbp, piocb[done]->n_aiocb, &sev); + sev.sigev_value.sival_ptr = &(*cur); + r = lio_listio(LIO_NOWAIT, &cur->aio.aiocbp, cur->n_aiocb, &sev); } + ++cur; #endif if (r < 0) { if (r == -EAGAIN && attempts-- > 0) { @@ -64,9 +74,9 @@ int aio_queue_t::submit_batch(aio_iter begin, aio_iter end, } ceph_assert(r > 0); done += r; - left -= r; attempts = 16; delay = 125; + pushed = pulled = 0; } return done; } diff --git a/src/blk/aio/aio.h b/src/blk/aio/aio.h index 14b89784bc1b..cf21c4167316 100644 --- a/src/blk/aio/aio.h +++ b/src/blk/aio/aio.h @@ -100,7 +100,7 @@ struct io_queue_t { virtual int init(std::vector &fds) = 0; virtual void shutdown() = 0; - virtual int submit_batch(aio_iter begin, aio_iter end, uint16_t aios_size, + virtual int submit_batch(aio_iter begin, aio_iter end, void *priv, int *retries) = 0; virtual int get_next_completed(int timeout_ms, aio_t **paio, int max) = 0; }; @@ -153,7 +153,7 @@ struct aio_queue_t final : public io_queue_t { } } - int submit_batch(aio_iter begin, aio_iter end, uint16_t aios_size, + int submit_batch(aio_iter begin, aio_iter end, void *priv, int *retries) final; int get_next_completed(int timeout_ms, aio_t **paio, int max) final; }; diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index 754b44d32a69..72921e6d9f08 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -59,18 +59,16 @@ using ceph::make_timespan; using ceph::mono_clock; using ceph::operator <<; -KernelDevice::KernelDevice(CephContext* cct, aio_callback_t cb, void *cbpriv, aio_callback_t d_cb, void *d_cbpriv) +KernelDevice::KernelDevice(CephContext* cct, aio_callback_t cb, void *cbpriv, aio_callback_t d_cb, void *d_cbpriv, const char* dev_name) : BlockDevice(cct, cb, cbpriv), aio(false), dio(false), discard_callback(d_cb), discard_callback_priv(d_cbpriv), aio_stop(false), - discard_started(false), - discard_stop(false), aio_thread(this), - discard_thread(this), injecting_crash(0) { + cct->_conf.add_observer(this); fd_directs.resize(WRITE_LIFE_MAX, -1); fd_buffereds.resize(WRITE_LIFE_MAX, -1); @@ -90,6 +88,26 @@ KernelDevice::KernelDevice(CephContext* cct, aio_callback_t cb, void *cbpriv, ai } io_queue = std::make_unique(iodepth); } + + char name[128]; + sprintf(name, "blk-kernel-device-%s", dev_name); + PerfCountersBuilder b(cct, name, + l_blk_kernel_device_first, l_blk_kernel_device_last); + b.set_prio_default(PerfCountersBuilder::PRIO_USEFUL); + b.add_u64_counter(l_blk_kernel_device_discard_op, "discard_op", + "Number of discard ops issued to kernel device"); + + logger.reset(b.create_perf_counters()); + cct->get_perfcounters_collection()->add(logger.get()); +} + +KernelDevice::~KernelDevice() +{ + if (logger) { + cct->get_perfcounters_collection()->remove(logger.get()); + logger.reset(); + } + cct->_conf.remove_observer(this); } int KernelDevice::_lock() @@ -281,9 +299,8 @@ int KernelDevice::open(const string& p) if (r < 0) { goto out_fail; } - if (support_discard && cct->_conf->bdev_enable_discard && cct->_conf->bdev_async_discard) { - _discard_start(); - } + + _discard_update_threads(); // round size down to an even block size &= ~(block_size - 1); @@ -330,7 +347,7 @@ void KernelDevice::close() { dout(1) << __func__ << dendl; _aio_stop(); - if (discard_thread.is_started()) { + if (_discard_started()) { _discard_stop(); } _pre_close(); @@ -338,11 +355,11 @@ void KernelDevice::close() extblkdev::release_device(ebd_impl); for (int i = 0; i < WRITE_LIFE_MAX; i++) { - assert(fd_directs[i] >= 0); + ceph_assert(fd_directs[i] >= 0); VOID_TEMP_FAILURE_RETRY(::close(fd_directs[i])); fd_directs[i] = -1; - assert(fd_buffereds[i] >= 0); + ceph_assert(fd_buffereds[i] >= 0); VOID_TEMP_FAILURE_RETRY(::close(fd_buffereds[i])); fd_buffereds[i] = -1; } @@ -530,35 +547,63 @@ void KernelDevice::_aio_stop() } } -void KernelDevice::_discard_start() +void KernelDevice::_discard_update_threads(bool discard_stop) { - discard_thread.create("bstore_discard"); + std::unique_lock l(discard_lock); + + uint64_t oldcount = discard_threads.size(); + uint64_t newcount = cct->_conf.get_val("bdev_async_discard_threads"); + if (!cct->_conf.get_val("bdev_enable_discard") || !support_discard || discard_stop) { + newcount = 0; + } + + // Increase? Spawn now, it's quick + if (newcount > oldcount) { + dout(10) << __func__ << " starting " << (newcount - oldcount) << " additional discard threads" << dendl; + discard_threads.reserve(newcount); + for(uint64_t i = oldcount; i < newcount; i++) + { + // All threads created with the same name + discard_threads.emplace_back(new DiscardThread(this, i)); + discard_threads.back()->create("bstore_discard"); + } + // Decrease? Signal threads after telling them to stop + } else if (newcount < oldcount) { + std::vector> discard_threads_to_stop; + dout(10) << __func__ << " stopping " << (oldcount - newcount) << " existing discard threads" << dendl; + + // Signal the last threads to quit, and stop tracking them + for(uint64_t i = oldcount; i > newcount; i--) { + discard_threads[i-1]->stop = true; + discard_threads_to_stop.push_back(discard_threads[i-1]); + } + discard_cond.notify_all(); + discard_threads.resize(newcount); + l.unlock(); + for (auto &t : discard_threads_to_stop) { + t->join(); + } + } } void KernelDevice::_discard_stop() { dout(10) << __func__ << dendl; - { - std::unique_lock l(discard_lock); - while (!discard_started) { - discard_cond.wait(l); - } - discard_stop = true; - discard_cond.notify_all(); - } - discard_thread.join(); - { - std::lock_guard l(discard_lock); - discard_stop = false; - } + _discard_update_threads(true); dout(10) << __func__ << " stopped" << dendl; } +bool KernelDevice::_discard_started() +{ + std::unique_lock l(discard_lock); + return !discard_threads.empty(); +} + void KernelDevice::discard_drain() { dout(10) << __func__ << dendl; std::unique_lock l(discard_lock); - while (!discard_queued.empty() || discard_running) { + while (!discard_queued.empty() || (discard_running > 0)) { discard_cond.wait(l); } } @@ -567,7 +612,7 @@ static bool is_expected_ioerr(const int r) { // https://lxr.missinglinkelectronics.com/linux+v4.15/block/blk-core.c#L135 return (r == -EOPNOTSUPP || r == -ETIMEDOUT || r == -ENOSPC || - r == -ENOLINK || r == -EREMOTEIO || r == -EAGAIN || r == -EIO || + r == -ENOLINK || r == -EREMOTEIO || r == -EAGAIN || r == -EIO || r == -ENODATA || r == -EILSEQ || r == -ENOMEM || #if defined(__linux__) r == -EREMCHG || r == -EBADE @@ -698,64 +743,98 @@ void KernelDevice::_aio_thread() dout(10) << __func__ << " end" << dendl; } -void KernelDevice::_discard_thread() +void KernelDevice::swap_discard_queued(interval_set& other) { std::unique_lock l(discard_lock); - ceph_assert(!discard_started); - discard_started = true; + discard_queued.swap(other); +} + +void KernelDevice::_discard_thread(uint64_t tid) +{ + dout(10) << __func__ << " thread " << tid << " start" << dendl; + + // Thread-local list of processing discards + interval_set discard_processing; + + std::unique_lock l(discard_lock); discard_cond.notify_all(); + + // Keeps the shared pointer around until erased from the vector + // and until we leave this function + auto thr = discard_threads[tid]; + while (true) { - ceph_assert(discard_finishing.empty()); + ceph_assert(discard_processing.empty()); if (discard_queued.empty()) { - if (discard_stop) + if (thr->stop) break; dout(20) << __func__ << " sleep" << dendl; discard_cond.notify_all(); // for the thread trying to drain... discard_cond.wait(l); dout(20) << __func__ << " wake" << dendl; } else { - discard_finishing.swap(discard_queued); - discard_running = true; + // If there are non-stopped discard threads and we have been requested + // to stop, do so now. Otherwise, we need to proceed because + // discard_queued is non-empty and at least one thread is needed to + // drain it. + if (thr->stop && !discard_threads.empty()) + break; + + // Limit local processing to MAX_LOCAL_DISCARD items. + // This will allow threads to work in parallel + // instead of a single thread taking over the whole discard_queued. + // It will also allow threads to finish in a timely manner. + constexpr unsigned MAX_LOCAL_DISCARD = 32; + unsigned count = 0; + for (auto p = discard_queued.begin(); + p != discard_queued.end() && count < MAX_LOCAL_DISCARD; + ++p, ++count) { + discard_processing.insert(p.get_start(), p.get_len()); + discard_queued.erase(p); + } + + // there are multiple active threads -> must use a counter instead of a flag + discard_running ++; l.unlock(); dout(20) << __func__ << " finishing" << dendl; - for (auto p = discard_finishing.begin();p != discard_finishing.end(); ++p) { - _discard(p.get_start(), p.get_len()); + logger->inc(l_blk_kernel_device_discard_op, discard_processing.size()); + for (auto p = discard_processing.begin(); p != discard_processing.end(); ++p) { + _discard(p.get_start(), p.get_len()); } - discard_callback(discard_callback_priv, static_cast(&discard_finishing)); - discard_finishing.clear(); + discard_callback(discard_callback_priv, static_cast(&discard_processing)); + discard_processing.clear(); l.lock(); - discard_running = false; + discard_running --; + ceph_assert(discard_running >= 0); } } - dout(10) << __func__ << " finish" << dendl; - discard_started = false; + + dout(10) << __func__ << " thread " << tid << " finish" << dendl; } -int KernelDevice::_queue_discard(interval_set &to_release) +// this is private and is expected that the caller checks that discard +// threads are running via _discard_started() +void KernelDevice::_queue_discard(interval_set &to_release) { - // if bdev_async_discard enabled on the fly, discard_thread is not started here, fallback to sync discard - if (!discard_thread.is_started()) - return -1; - if (to_release.empty()) - return 0; + return; std::lock_guard l(discard_lock); discard_queued.insert(to_release); - discard_cond.notify_all(); - return 0; + discard_cond.notify_one(); } -// return true only if _queue_discard succeeded, so caller won't have to do alloc->release -// otherwise false +// return true only if discard was queued, so caller won't have to do +// alloc->release, otherwise return false bool KernelDevice::try_discard(interval_set &to_release, bool async) { if (!support_discard || !cct->_conf->bdev_enable_discard) return false; - if (async && discard_thread.is_started()) { - return 0 == _queue_discard(to_release); + if (async && _discard_started()) { + _queue_discard(to_release); + return true; } else { for (auto p = to_release.begin(); p != to_release.end(); ++p) { _discard(p.get_start(), p.get_len()); @@ -866,10 +945,8 @@ void KernelDevice::aio_submit(IOContext *ioc) void *priv = static_cast(ioc); int r, retries = 0; - // num of pending aios should not overflow when passed to submit_batch() - assert(pending <= std::numeric_limits::max()); r = io_queue->submit_batch(ioc->running_aios.begin(), e, - pending, priv, &retries); + priv, &retries); if (retries) derr << __func__ << " retries " << retries << dendl; @@ -1072,8 +1149,8 @@ int KernelDevice::_discard(uint64_t offset, uint64_t len) return 0; } dout(10) << __func__ - << " 0x" << std::hex << offset << "~" << len << std::dec - << dendl; + << " 0x" << std::hex << offset << "~" << len << std::dec + << dendl; r = BlkDev{fd_directs[WRITE_LIFE_NOT_SET]}.discard((int64_t)offset, (int64_t)len); return r; } @@ -1266,6 +1343,7 @@ int KernelDevice::read(uint64_t off, uint64_t len, bufferlist *pbl, << " since " << start1 << ", timeout is " << age << "s" << dendl; + add_stalled_read_event(); } if (r < 0) { if (ioc->allow_eio && is_expected_ioerr(-errno)) { @@ -1339,6 +1417,7 @@ int KernelDevice::direct_read_unaligned(uint64_t off, uint64_t len, char *buf) << " since " << start1 << ", timeout is " << age << "s" << dendl; + add_stalled_read_event(); } if (r < 0) { @@ -1402,6 +1481,7 @@ int KernelDevice::read_random(uint64_t off, uint64_t len, char *buf, << " (buffered) since " << start1 << ", timeout is " << age << "s" << dendl; + add_stalled_read_event(); } } else { //direct and aligned read @@ -1412,6 +1492,7 @@ int KernelDevice::read_random(uint64_t off, uint64_t len, char *buf, << " (direct) since " << start1 << ", timeout is " << age << "s" << dendl; + add_stalled_read_event(); } if (r < 0) { r = -errno; @@ -1447,3 +1528,21 @@ int KernelDevice::invalidate_cache(uint64_t off, uint64_t len) } return r; } + +const char** KernelDevice::get_tracked_conf_keys() const +{ + static const char* KEYS[] = { + "bdev_async_discard_threads", + "bdev_enable_discard", + NULL + }; + return KEYS; +} + +void KernelDevice::handle_conf_change(const ConfigProxy& conf, + const std::set &changed) +{ + if (changed.count("bdev_async_discard_threads") || changed.count("bdev_enable_discard")) { + _discard_update_threads(); + } +} diff --git a/src/blk/kernel/KernelDevice.h b/src/blk/kernel/KernelDevice.h index e00e31f10b17..ac555cdd3daf 100644 --- a/src/blk/kernel/KernelDevice.h +++ b/src/blk/kernel/KernelDevice.h @@ -19,6 +19,7 @@ #include "include/types.h" #include "include/interval_set.h" +#include "common/config_obs.h" #include "common/Thread.h" #include "include/utime.h" @@ -28,7 +29,14 @@ #define RW_IO_MAX (INT_MAX & CEPH_PAGE_MASK) -class KernelDevice : public BlockDevice { +enum { + l_blk_kernel_device_first = 1000, + l_blk_kernel_device_discard_op, + l_blk_kernel_device_last, +}; + +class KernelDevice : public BlockDevice, + public md_config_obs_t { protected: std::string path; private: @@ -50,14 +58,12 @@ class KernelDevice : public BlockDevice { aio_callback_t discard_callback; void *discard_callback_priv; bool aio_stop; - bool discard_started; - bool discard_stop; + std::unique_ptr logger; ceph::mutex discard_lock = ceph::make_mutex("KernelDevice::discard_lock"); ceph::condition_variable discard_cond; - bool discard_running = false; + int discard_running = 0; interval_set discard_queued; - interval_set discard_finishing; struct AioCompletionThread : public Thread { KernelDevice *bdev; @@ -70,12 +76,15 @@ class KernelDevice : public BlockDevice { struct DiscardThread : public Thread { KernelDevice *bdev; - explicit DiscardThread(KernelDevice *b) : bdev(b) {} + const uint64_t id; + bool stop = false; + explicit DiscardThread(KernelDevice *b, uint64_t id) : bdev(b), id(id) {} void *entry() override { - bdev->_discard_thread(); + bdev->_discard_thread(id); return NULL; } - } discard_thread; + }; + std::vector> discard_threads; std::atomic_int injecting_crash; @@ -83,15 +92,16 @@ class KernelDevice : public BlockDevice { virtual void _pre_close() { } // hook for child implementations void _aio_thread(); - void _discard_thread(); - int _queue_discard(interval_set &to_release); + void _discard_thread(uint64_t tid); + void _queue_discard(interval_set &to_release); bool try_discard(interval_set &to_release, bool async = true) override; int _aio_start(); void _aio_stop(); - void _discard_start(); + void _discard_update_threads(bool discard_stop = false); void _discard_stop(); + bool _discard_started(); void _aio_log_start(IOContext *ioc, uint64_t offset, uint64_t length); void _aio_log_finish(IOContext *ioc, uint64_t offset, uint64_t length); @@ -115,11 +125,13 @@ class KernelDevice : public BlockDevice { ceph::unique_leakable_ptr create_custom_aligned(size_t len, IOContext* ioc) const; public: - KernelDevice(CephContext* cct, aio_callback_t cb, void *cbpriv, aio_callback_t d_cb, void *d_cbpriv); + KernelDevice(CephContext* cct, aio_callback_t cb, void *cbpriv, aio_callback_t d_cb, + void *d_cbpriv, const char* dev_name = ""); + ~KernelDevice(); void aio_submit(IOContext *ioc) override; void discard_drain() override; - + void swap_discard_queued(interval_set& other) override; int collect_metadata(const std::string& prefix, std::map *pm) const override; int get_devname(std::string *s) const override { if (devname.empty()) { @@ -151,6 +163,11 @@ class KernelDevice : public BlockDevice { int invalidate_cache(uint64_t off, uint64_t len) override; int open(const std::string& path) override; void close() override; + + // config observer bits + const char** get_tracked_conf_keys() const override; + void handle_conf_change(const ConfigProxy& conf, + const std::set &changed) override; }; #endif diff --git a/src/blk/kernel/io_uring.cc b/src/blk/kernel/io_uring.cc index 5e7fd1227045..be63d63aaf26 100644 --- a/src/blk/kernel/io_uring.cc +++ b/src/blk/kernel/io_uring.cc @@ -176,10 +176,9 @@ void ioring_queue_t::shutdown() } int ioring_queue_t::submit_batch(aio_iter beg, aio_iter end, - uint16_t aios_size, void *priv, + void *priv, int *retries) { - (void)aios_size; (void)retries; pthread_mutex_lock(&d->sq_mutex); @@ -245,7 +244,7 @@ void ioring_queue_t::shutdown() } int ioring_queue_t::submit_batch(aio_iter beg, aio_iter end, - uint16_t aios_size, void *priv, + void *priv, int *retries) { ceph_assert(0); diff --git a/src/blk/kernel/io_uring.h b/src/blk/kernel/io_uring.h index e7d0acde0134..dd8f874728d7 100644 --- a/src/blk/kernel/io_uring.h +++ b/src/blk/kernel/io_uring.h @@ -27,7 +27,7 @@ struct ioring_queue_t final : public io_queue_t { int init(std::vector &fds) final; void shutdown() final; - int submit_batch(aio_iter begin, aio_iter end, uint16_t aios_size, + int submit_batch(aio_iter begin, aio_iter end, void *priv, int *retries) final; int get_next_completed(int timeout_ms, aio_t **paio, int max) final; }; diff --git a/src/blk/zoned/HMSMRDevice.cc b/src/blk/zoned/HMSMRDevice.cc deleted file mode 100644 index 416eae4e49fc..000000000000 --- a/src/blk/zoned/HMSMRDevice.cc +++ /dev/null @@ -1,131 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2014 Red Hat - * Copyright (C) 2020 Abutalib Aghayev - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "HMSMRDevice.h" -extern "C" { -#include -} -#include "common/debug.h" -#include "common/errno.h" - -#define dout_context cct -#define dout_subsys ceph_subsys_bdev -#undef dout_prefix -#define dout_prefix *_dout << "smrbdev(" << this << " " << path << ") " - -using namespace std; - -HMSMRDevice::HMSMRDevice(CephContext* cct, - aio_callback_t cb, - void *cbpriv, - aio_callback_t d_cb, - void *d_cbpriv) - : KernelDevice(cct, cb, cbpriv, d_cb, d_cbpriv) -{ -} - -bool HMSMRDevice::support(const std::string& path) -{ - return zbd_device_is_zoned(path.c_str()) == 1; -} - -int HMSMRDevice::_post_open() -{ - dout(10) << __func__ << dendl; - - zbd_fd = zbd_open(path.c_str(), O_RDWR | O_DIRECT | O_LARGEFILE, nullptr); - int r; - if (zbd_fd < 0) { - r = errno; - derr << __func__ << " zbd_open failed on " << path << ": " - << cpp_strerror(r) << dendl; - return -r; - } - - unsigned int nr_zones = 0; - std::vector zones; - if (zbd_report_nr_zones(zbd_fd, 0, 0, ZBD_RO_NOT_WP, &nr_zones) != 0) { - r = -errno; - derr << __func__ << " zbd_report_nr_zones failed on " << path << ": " - << cpp_strerror(r) << dendl; - goto fail; - } - - zones.resize(nr_zones); - if (zbd_report_zones(zbd_fd, 0, 0, ZBD_RO_NOT_WP, zones.data(), &nr_zones) != 0) { - r = -errno; - derr << __func__ << " zbd_report_zones failed on " << path << dendl; - goto fail; - } - - zone_size = zbd_zone_len(&zones[0]); - conventional_region_size = nr_zones * zone_size; - - dout(10) << __func__ << " setting zone size to " << zone_size - << " and conventional region size to " << conventional_region_size - << dendl; - - return 0; - -fail: - zbd_close(zbd_fd); - zbd_fd = -1; - return r; -} - - -void HMSMRDevice::_pre_close() -{ - if (zbd_fd >= 0) { - zbd_close(zbd_fd); - zbd_fd = -1; - } -} - -void HMSMRDevice::reset_all_zones() -{ - dout(10) << __func__ << dendl; - zbd_reset_zones(zbd_fd, conventional_region_size, 0); -} - -void HMSMRDevice::reset_zone(uint64_t zone) -{ - dout(10) << __func__ << " zone 0x" << std::hex << zone << std::dec << dendl; - if (zbd_reset_zones(zbd_fd, zone * zone_size, zone_size) != 0) { - derr << __func__ << " resetting zone failed for zone 0x" << std::hex - << zone << std::dec << dendl; - ceph_abort("zbd_reset_zones failed"); - } -} - -std::vector HMSMRDevice::get_zones() -{ - std::vector zones; - unsigned int num_zones = size / zone_size; - zones.resize(num_zones); - - int r = zbd_report_zones(zbd_fd, 0, 0, ZBD_RO_ALL, zones.data(), &num_zones); - if (r != 0) { - derr << __func__ << " zbd_report_zones failed on " << path << ": " - << cpp_strerror(errno) << dendl; - ceph_abort("zbd_report_zones failed"); - } - - std::vector wp(num_zones); - for (unsigned i = 0; i < num_zones; ++i) { - wp[i] = zones[i].wp; - } - return wp; -} diff --git a/src/blk/zoned/HMSMRDevice.h b/src/blk/zoned/HMSMRDevice.h deleted file mode 100644 index edf18b5f0ba3..000000000000 --- a/src/blk/zoned/HMSMRDevice.h +++ /dev/null @@ -1,52 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2014 Red Hat - * Copyright (C) 2020 Abutalib Aghayev - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#ifndef CEPH_BLK_HMSMRDEVICE_H -#define CEPH_BLK_HMSMRDEVICE_H - -#include - -#include "include/types.h" -#include "include/interval_set.h" -#include "common/Thread.h" -#include "include/utime.h" - -#include "aio/aio.h" -#include "BlockDevice.h" -#include "../kernel/KernelDevice.h" - - -class HMSMRDevice final : public KernelDevice { - int zbd_fd = -1; ///< fd for the zoned block device - -public: - HMSMRDevice(CephContext* cct, aio_callback_t cb, void *cbpriv, - aio_callback_t d_cb, void *d_cbpriv); - - static bool support(const std::string& path); - - // open/close hooks for libzbd - int _post_open() override; - void _pre_close() override; - - // smr-specific methods - bool is_smr() const final { return true; } - void reset_all_zones() override; - void reset_zone(uint64_t zone) override; - std::vector get_zones() override; - -}; - -#endif //CEPH_BLK_HMSMRDEVICE_H diff --git a/src/btrfs_ioc_test.c b/src/btrfs_ioc_test.c deleted file mode 100644 index e12bad14d1b5..000000000000 --- a/src/btrfs_ioc_test.c +++ /dev/null @@ -1,171 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/safe_io.h" -#include "os/btrfs_ioctl.h" - -void do_open_wr(const char *fname, int *fd) -{ - *fd = open(fname, O_WRONLY | O_CREAT, 0644); - if (*fd < 0) { - perror("open"); - exit(1); - } -} - -void do_open_rd(const char *fname, int *fd) -{ - *fd = open(fname, O_RDONLY); - if (*fd < 0) { - perror("open"); - exit(1); - } -} - -void do_lseek(int fd, int ofs) -{ - int rc = lseek(fd, ofs, SEEK_SET); - if (rc < 0) { - perror("lseek"); - exit(1); - } -} - -void do_write(int fd, int len) -{ - char *buf = malloc(len); - int rc; - if (!buf) { - printf("not enough memory\n"); - exit(1); - } - - memset(buf, 0, len); - rc = safe_write(fd, buf, len); - if (rc) { - fprintf(stderr, "safe_write failed with error %d (%s)\n", - rc, strerror(rc)); - exit(1); - } - - if (rc != len) { - printf("invalid number of bytes written\n"); - exit(1); - } - - free(buf); -} - -void do_link(const char *old, const char *new) -{ - int rc = link(old, new); - if (rc < 0) { - perror("link"); - exit(1); - } -} - -void do_clone_range(int from, int to, int off, int len) -{ - struct btrfs_ioctl_clone_range_args a; - int r; - - a.src_fd = from; - a.src_offset = off; - a.src_length = len; - a.dest_offset = off; - r = ioctl(to, BTRFS_IOC_CLONE_RANGE, &a); - if (r < 0) { - perror("ioctl"); - exit(1); - } -} - -void do_snap_async(int fd, const char *name, unsigned long long *transid) -{ - struct btrfs_ioctl_async_vol_args async_args; - struct btrfs_ioctl_vol_args volargs; - int r; - - strcpy(volargs.name, name); - volargs.fd = fd; - - async_args.args = &volargs; - async_args.transid = transid; - - r = ioctl(fd, BTRFS_IOC_SNAP_CREATE_ASYNC, &async_args); - - if (r < 0) { - perror("ioctl"); - exit(1); - } -} - -void do_snap_destroy(int fd, const char *name) -{ - struct btrfs_ioctl_vol_args volargs; - int r; - - strcpy(volargs.name, name); - volargs.fd = 0; - - r = ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &volargs); - - if (r < 0) { - perror("snap_destroy: ioctl"); - exit(1); - } -} - -void do_snap_wait(int fd, unsigned long long transid) -{ - int r = ioctl(fd, BTRFS_IOC_WAIT_SYNC, &transid); - if (r < 0) { - perror("do_snap_wait: ioctl"); - exit(1); - } -} - -void usage_exit(char *arg) -{ - printf("usage: %s \n", arg); - exit(1); -} - -#define TEMP_FILENAME "temp" -#define DEST_FILENAME "dest" -#define SRC_FILENAME "src" - -int main(int argc, char *argv[]) -{ - const char *base_dir; - const char *snap_name; - - int fd; - int i; - unsigned long long transid; - - if (argc < 3) - usage_exit(argv[0]); - - base_dir = argv[1]; - snap_name = argv[2]; - - for (i=0; i<10; i++) { - printf("%d\n", i); - do_open_rd(base_dir, &fd); - do_snap_async(fd, snap_name, &transid); - sleep(2); - //do_snap_wait(fd, transid); - do_snap_destroy(fd, snap_name); - close(fd); - } - - return 0; -} diff --git a/src/ceph-node-proxy/CMakeLists.txt b/src/ceph-node-proxy/CMakeLists.txt new file mode 100644 index 000000000000..0f83b0b6caa2 --- /dev/null +++ b/src/ceph-node-proxy/CMakeLists.txt @@ -0,0 +1,25 @@ + +include(Distutils) + +distutils_install_module(ceph_node_proxy + INSTALL_SCRIPT ${CMAKE_INSTALL_FULL_SBINDIR}) + +# Required for running ceph-node-proxy in a vstart environment +set(CEPH_NODE_PROXY_VIRTUALENV ${CEPH_BUILD_VIRTUALENV}/ceph-node-proxy-virtualenv) + +add_custom_command( + OUTPUT ${CEPH_NODE_PROXY_VIRTUALENV}/bin/python + COMMAND ${CMAKE_SOURCE_DIR}/src/tools/setup-virtualenv.sh --python=${Python3_EXECUTABLE} ${CEPH_NODE_PROXY_VIRTUALENV} + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/ceph-node-proxy + COMMENT "ceph-node-proxy venv is being created") + +add_custom_command( + OUTPUT ${CEPH_NODE_PROXY_VIRTUALENV}/bin/ceph-node-proxy + DEPENDS ${CEPH_NODE_PROXY_VIRTUALENV}/bin/python + COMMAND . ${CEPH_NODE_PROXY_VIRTUALENV}/bin/activate && ${CEPH_NODE_PROXY_VIRTUALENV}/bin/python setup.py develop && deactivate + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/ceph-node-proxy + COMMENT "${CMAKE_SOURCE_DIR}/src/ceph-node-proxy") + +add_custom_target(ceph-node-proxy-venv-setup + DEPENDS ${CEPH_NODE_PROXY_VIRTUALENV}/bin/ceph-node-proxy) + diff --git a/src/ceph-node-proxy/MANIFEST.in b/src/ceph-node-proxy/MANIFEST.in new file mode 100644 index 000000000000..3e6850fe101a --- /dev/null +++ b/src/ceph-node-proxy/MANIFEST.in @@ -0,0 +1,2 @@ +include bin/ceph-node-proxy + diff --git a/src/ceph-node-proxy/ceph_node_proxy/__init__.py b/src/ceph-node-proxy/ceph_node_proxy/__init__.py new file mode 100644 index 000000000000..20403aa92bbf --- /dev/null +++ b/src/ceph-node-proxy/ceph_node_proxy/__init__.py @@ -0,0 +1,2 @@ +__version__ = '1.0.0' +__release__ = 'squid' diff --git a/src/ceph-node-proxy/ceph_node_proxy/api.py b/src/ceph-node-proxy/ceph_node_proxy/api.py new file mode 100644 index 000000000000..25ae03e51952 --- /dev/null +++ b/src/ceph-node-proxy/ceph_node_proxy/api.py @@ -0,0 +1,285 @@ +import cherrypy # type: ignore +from urllib.error import HTTPError +from cherrypy._cpserver import Server # type: ignore +from threading import Thread, Event +from typing import Dict, Any, List +from ceph_node_proxy.util import Config, get_logger, write_tmp_file +from ceph_node_proxy.basesystem import BaseSystem +from ceph_node_proxy.reporter import Reporter +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from ceph_node_proxy.main import NodeProxyManager + + +@cherrypy.tools.auth_basic(on=True) +@cherrypy.tools.allow(methods=['PUT']) +@cherrypy.tools.json_out() +class Admin(): + def __init__(self, api: 'API') -> None: + self.api = api + + @cherrypy.expose + def start(self) -> Dict[str, str]: + self.api.backend.start() + self.api.reporter.run() + return {'ok': 'node-proxy daemon started'} + + @cherrypy.expose + def reload(self) -> Dict[str, str]: + self.api.config.reload() + return {'ok': 'node-proxy config reloaded'} + + def _stop(self) -> None: + self.api.backend.shutdown() + self.api.reporter.shutdown() + + @cherrypy.expose + def stop(self) -> Dict[str, str]: + self._stop() + return {'ok': 'node-proxy daemon stopped'} + + @cherrypy.expose + def shutdown(self) -> Dict[str, str]: + self._stop() + cherrypy.engine.exit() + return {'ok': 'Server shutdown.'} + + @cherrypy.expose + def flush(self) -> Dict[str, str]: + self.api.backend.flush() + return {'ok': 'node-proxy data flushed'} + + +class API(Server): + def __init__(self, + backend: 'BaseSystem', + reporter: 'Reporter', + config: 'Config', + addr: str = '0.0.0.0', + port: int = 0) -> None: + super().__init__() + self.log = get_logger(__name__) + self.backend = backend + self.reporter = reporter + self.config = config + self.socket_port = self.config.__dict__['api']['port'] if not port else port + self.socket_host = addr + self.subscribe() + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def memory(self) -> Dict[str, Any]: + return {'memory': self.backend.get_memory()} + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def network(self) -> Dict[str, Any]: + return {'network': self.backend.get_network()} + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def processors(self) -> Dict[str, Any]: + return {'processors': self.backend.get_processors()} + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def storage(self) -> Dict[str, Any]: + return {'storage': self.backend.get_storage()} + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def power(self) -> Dict[str, Any]: + return {'power': self.backend.get_power()} + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def fans(self) -> Dict[str, Any]: + return {'fans': self.backend.get_fans()} + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def firmwares(self) -> Dict[str, Any]: + return {'firmwares': self.backend.get_firmwares()} + + def _cp_dispatch(self, vpath: List[str]) -> 'API': + if vpath[0] == 'led' and len(vpath) > 1: # /led/{type}/{id} + _type = vpath[1] + cherrypy.request.params['type'] = _type + vpath.pop(1) # /led/{id} or # /led + if _type == 'drive' and len(vpath) > 1: # /led/{id} + _id = vpath[1] + vpath.pop(1) # /led + cherrypy.request.params['id'] = _id + vpath[0] = '_led' + # / + return self + + @cherrypy.expose + @cherrypy.tools.allow(methods=['POST']) + @cherrypy.tools.json_in() + @cherrypy.tools.json_out() + @cherrypy.tools.auth_basic(on=True) + def shutdown(self, **kw: Any) -> int: + data: Dict[str, bool] = cherrypy.request.json + + if 'force' not in data.keys(): + msg = "The key 'force' wasn't passed." + self.log.debug(msg) + raise cherrypy.HTTPError(400, msg) + try: + result: int = self.backend.shutdown_host(force=data['force']) + except HTTPError as e: + raise cherrypy.HTTPError(e.code, e.reason) + return result + + @cherrypy.expose + @cherrypy.tools.allow(methods=['POST']) + @cherrypy.tools.json_in() + @cherrypy.tools.json_out() + @cherrypy.tools.auth_basic(on=True) + def powercycle(self, **kw: Any) -> int: + try: + result: int = self.backend.powercycle() + except HTTPError as e: + raise cherrypy.HTTPError(e.code, e.reason) + return result + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET', 'PATCH']) + @cherrypy.tools.json_in() + @cherrypy.tools.json_out() + @cherrypy.tools.auth_basic(on=True) + def _led(self, **kw: Any) -> Dict[str, Any]: + method: str = cherrypy.request.method + led_type: Optional[str] = kw.get('type') + id_drive: Optional[str] = kw.get('id') + result: Dict[str, Any] = dict() + + if not led_type: + msg = "the led type must be provided (either 'chassis' or 'drive')." + self.log.debug(msg) + raise cherrypy.HTTPError(400, msg) + + if led_type == 'drive': + id_drive_required = not id_drive + if id_drive_required or id_drive not in self.backend.get_storage(): + msg = 'A valid device ID must be provided.' + self.log.debug(msg) + raise cherrypy.HTTPError(400, msg) + + try: + if method == 'PATCH': + data: Dict[str, Any] = cherrypy.request.json + + if 'state' not in data or data['state'] not in ['on', 'off']: + msg = "Invalid data. 'state' must be provided and have a valid value (on|off)." + self.log.error(msg) + raise cherrypy.HTTPError(400, msg) + + func: Any = (self.backend.device_led_on if led_type == 'drive' and data['state'] == 'on' else + self.backend.device_led_off if led_type == 'drive' and data['state'] == 'off' else + self.backend.chassis_led_on if led_type != 'drive' and data['state'] == 'on' else + self.backend.chassis_led_off if led_type != 'drive' and data['state'] == 'off' else None) + + else: + func = self.backend.get_device_led if led_type == 'drive' else self.backend.get_chassis_led + + result = func(id_drive) if led_type == 'drive' else func() + + except HTTPError as e: + raise cherrypy.HTTPError(e.code, e.reason) + return result + + @cherrypy.expose + @cherrypy.tools.allow(methods=['GET']) + @cherrypy.tools.json_out() + def get_led(self, **kw: Dict[str, Any]) -> Dict[str, Any]: + return self.backend.get_led() + + @cherrypy.expose + @cherrypy.tools.allow(methods=['PATCH']) + @cherrypy.tools.json_in() + @cherrypy.tools.json_out() + @cherrypy.tools.auth_basic(on=True) + def set_led(self, **kw: Dict[str, Any]) -> Dict[str, Any]: + data = cherrypy.request.json + rc = self.backend.set_led(data) + + if rc != 200: + cherrypy.response.status = rc + result = {'state': 'error: please, verify the data you sent.'} + else: + result = {'state': data['state'].lower()} + return result + + def stop(self) -> None: + self.unsubscribe() + super().stop() + + +class NodeProxyApi(Thread): + def __init__(self, node_proxy_mgr: 'NodeProxyManager') -> None: + super().__init__() + self.log = get_logger(__name__) + self.cp_shutdown_event = Event() + self.node_proxy_mgr = node_proxy_mgr + self.username = self.node_proxy_mgr.username + self.password = self.node_proxy_mgr.password + self.ssl_crt = self.node_proxy_mgr.api_ssl_crt + self.ssl_key = self.node_proxy_mgr.api_ssl_key + self.system = self.node_proxy_mgr.system + self.reporter_agent = self.node_proxy_mgr.reporter_agent + self.config = self.node_proxy_mgr.config + self.api = API(self.system, self.reporter_agent, self.config) + + def check_auth(self, realm: str, username: str, password: str) -> bool: + return self.username == username and \ + self.password == password + + def shutdown(self) -> None: + self.log.info('Stopping node-proxy API...') + self.cp_shutdown_event.set() + + def run(self) -> None: + self.log.info('node-proxy API configuration...') + cherrypy.config.update({ + 'environment': 'production', + 'engine.autoreload.on': False, + 'log.screen': True, + }) + config = {'/': { + 'request.methods_with_bodies': ('POST', 'PUT', 'PATCH'), + 'tools.trailing_slash.on': False, + 'tools.auth_basic.realm': 'localhost', + 'tools.auth_basic.checkpassword': self.check_auth + }} + cherrypy.tree.mount(self.api, '/', config=config) + # cherrypy.tree.mount(admin, '/admin', config=config) + + ssl_crt = write_tmp_file(self.ssl_crt, + prefix_name='listener-crt-') + ssl_key = write_tmp_file(self.ssl_key, + prefix_name='listener-key-') + + self.api.ssl_certificate = ssl_crt.name + self.api.ssl_private_key = ssl_key.name + + cherrypy.server.unsubscribe() + try: + cherrypy.engine.start() + self.log.info('node-proxy API started.') + self.cp_shutdown_event.wait() + self.cp_shutdown_event.clear() + cherrypy.engine.exit() + cherrypy.server.httpserver = None + self.log.info('node-proxy API shutdown.') + except Exception as e: + self.log.error(f'node-proxy API error: {e}') diff --git a/src/ceph-node-proxy/ceph_node_proxy/baseclient.py b/src/ceph-node-proxy/ceph_node_proxy/baseclient.py new file mode 100644 index 000000000000..6b46561486d5 --- /dev/null +++ b/src/ceph-node-proxy/ceph_node_proxy/baseclient.py @@ -0,0 +1,20 @@ +from typing import Dict, Any + + +class BaseClient: + def __init__(self, + host: str, + username: str, + password: str) -> None: + self.host = host + self.username = username + self.password = password + + def login(self) -> None: + raise NotImplementedError() + + def logout(self) -> Dict[str, Any]: + raise NotImplementedError() + + def get_path(self, path: str) -> Dict: + raise NotImplementedError() diff --git a/src/ceph-node-proxy/ceph_node_proxy/baseredfishsystem.py b/src/ceph-node-proxy/ceph_node_proxy/baseredfishsystem.py new file mode 100644 index 000000000000..cc1a56055b9f --- /dev/null +++ b/src/ceph-node-proxy/ceph_node_proxy/baseredfishsystem.py @@ -0,0 +1,396 @@ +import concurrent.futures +import json +from ceph_node_proxy.basesystem import BaseSystem +from ceph_node_proxy.redfish_client import RedFishClient +from time import sleep +from ceph_node_proxy.util import get_logger, to_snake_case +from typing import Dict, Any, List, Callable, Union +from urllib.error import HTTPError, URLError + + +class EndpointMgr: + NAME: str = 'EndpointMgr' + + def __init__(self, + client: RedFishClient, + prefix: str = RedFishClient.PREFIX) -> None: + self.log = get_logger(f'{__name__}:{EndpointMgr.NAME}') + self.prefix: str = prefix + self.client: RedFishClient = client + + def __getitem__(self, index: str) -> Any: + if index in self.__dict__: + return self.__dict__[index] + else: + raise RuntimeError(f'{index} is not a valid endpoint.') + + def init(self) -> None: + _error_msg: str = "Can't discover entrypoint(s)" + try: + _, _data, _ = self.client.query(endpoint=self.prefix) + json_data: Dict[str, Any] = json.loads(_data) + for k, v in json_data.items(): + if '@odata.id' in v: + self.log.debug(f'entrypoint found: {to_snake_case(k)} = {v["@odata.id"]}') + _name: str = to_snake_case(k) + _url: str = v['@odata.id'] + e = Endpoint(_url, self.client) + setattr(self, _name, e) + setattr(self, 'session', json_data['Links']['Sessions']['@odata.id']) # TODO(guits): needs to be fixed + except (URLError, KeyError) as e: + msg = f'{_error_msg}: {e}' + self.log.error(msg) + raise RuntimeError + + +class Endpoint: + NAME: str = 'Endpoint' + + def __init__(self, url: str, client: RedFishClient) -> None: + self.log = get_logger(f'{__name__}:{Endpoint.NAME}') + self.url: str = url + self.client: RedFishClient = client + self.data: Dict[str, Any] = self.get_data() + self.id: str = '' + self.members_names: List[str] = [] + + if self.has_members: + self.members_names = self.get_members_names() + + if self.data: + try: + self.id = self.data['Id'] + except KeyError: + self.id = self.data['@odata.id'].split('/')[-1:] + else: + self.log.warning(f'No data could be loaded for {self.url}') + + def __getitem__(self, index: str) -> Any: + if not getattr(self, index, False): + _url: str = f'{self.url}/{index}' + setattr(self, index, Endpoint(_url, self.client)) + return self.__dict__[index] + + def query(self, url: str) -> Dict[str, Any]: + data: Dict[str, Any] = {} + try: + self.log.debug(f'Querying {url}') + _, _data, _ = self.client.query(endpoint=url) + data = json.loads(_data) + except KeyError as e: + self.log.error(f'Error while querying {self.url}: {e}') + return data + + def get_data(self) -> Dict[str, Any]: + return self.query(self.url) + + def get_members_names(self) -> List[str]: + result: List[str] = [] + if self.has_members: + for member in self.data['Members']: + name: str = member['@odata.id'].split('/')[-1:][0] + result.append(name) + return result + + def get_name(self, endpoint: str) -> str: + return endpoint.split('/')[-1:][0] + + def get_members_endpoints(self) -> Dict[str, str]: + members: Dict[str, str] = {} + name: str = '' + if self.has_members: + for member in self.data['Members']: + name = self.get_name(member['@odata.id']) + members[name] = member['@odata.id'] + else: + name = self.get_name(self.data['@odata.id']) + members[name] = self.data['@odata.id'] + + return members + + def get_members_data(self) -> Dict[str, Any]: + result: Dict[str, Any] = {} + if self.has_members: + for member, endpoint in self.get_members_endpoints().items(): + result[member] = self.query(endpoint) + return result + + @property + def has_members(self) -> bool: + return 'Members' in self.data.keys() + + +class BaseRedfishSystem(BaseSystem): + def __init__(self, **kw: Any) -> None: + super().__init__(**kw) + self.log = get_logger(__name__) + self.host: str = kw['host'] + self.port: str = kw['port'] + self.username: str = kw['username'] + self.password: str = kw['password'] + # move the following line (class attribute?) + self.client: RedFishClient = RedFishClient(host=self.host, port=self.port, username=self.username, password=self.password) + self.endpoints: EndpointMgr = EndpointMgr(self.client) + self.log.info(f'redfish system initialization, host: {self.host}, user: {self.username}') + self.data_ready: bool = False + self.previous_data: Dict = {} + self.data: Dict[str, Dict[str, Any]] = {} + self._system: Dict[str, Dict[str, Any]] = {} + self._sys: Dict[str, Any] = {} + self.job_service_endpoint: str = '' + self.create_reboot_job_endpoint: str = '' + self.setup_job_queue_endpoint: str = '' + self.component_list: List[str] = kw.get('component_list', ['memory', + 'power', + 'fans', + 'network', + 'processors', + 'storage', + 'firmwares']) + self.update_funcs: List[Callable] = [] + for component in self.component_list: + self.log.debug(f'adding: {component} to hw component gathered list.') + func = f'_update_{component}' + if hasattr(self, func): + f = getattr(self, func) + self.update_funcs.append(f) + + def main(self) -> None: + self.stop = False + self.client.login() + self.endpoints.init() + + while not self.stop: + self.log.debug('waiting for a lock in the update loop.') + with self.lock: + if not self.pending_shutdown: + self.log.debug('lock acquired in the update loop.') + try: + self._update_system() + self._update_sn() + + with concurrent.futures.ThreadPoolExecutor() as executor: + executor.map(lambda f: f(), self.update_funcs) + + self.data_ready = True + except RuntimeError as e: + self.stop = True + self.log.error(f'Error detected, trying to gracefully log out from redfish api.\n{e}') + self.client.logout() + raise + sleep(5) + self.log.debug('lock released in the update loop.') + self.log.debug('exiting update loop.') + raise SystemExit(0) + + def flush(self) -> None: + self.log.debug('Acquiring lock to flush data.') + self.lock.acquire() + self.log.debug('Lock acquired, flushing data.') + self._system = {} + self.previous_data = {} + self.log.info('Data flushed.') + self.data_ready = False + self.log.debug('Data marked as not ready.') + self.lock.release() + self.log.debug('Released the lock after flushing data.') + + # @retry(retries=10, delay=2) + def _get_path(self, path: str) -> Dict: + result: Dict[str, Any] = {} + try: + if not self.pending_shutdown: + self.log.debug(f'Getting path: {path}') + result = self.client.get_path(path) + else: + self.log.debug(f'Pending shutdown, aborting query to {path}') + except RuntimeError: + raise + if result is None: + self.log.error(f'The client reported an error when getting path: {path}') + raise RuntimeError(f'Could not get path: {path}') + return result + + def get_members(self, data: Dict[str, Any], path: str) -> List: + return [self._get_path(member['@odata.id']) for member in data['Members']] + + def get_system(self) -> Dict[str, Any]: + result = { + 'host': self.get_host(), + 'sn': self.get_sn(), + 'status': { + 'storage': self.get_storage(), + 'processors': self.get_processors(), + 'network': self.get_network(), + 'memory': self.get_memory(), + 'power': self.get_power(), + 'fans': self.get_fans() + }, + 'firmwares': self.get_firmwares(), + } + return result + + def _update_system(self) -> None: + system_members: Dict[str, Any] = self.endpoints['systems'].get_members_data() + update_service_members: Endpoint = self.endpoints['update_service'] + + for member, data in system_members.items(): + self._system[member] = data + self._sys[member] = dict() + + self._system[update_service_members.id] = update_service_members.data + + def _update_sn(self) -> None: + raise NotImplementedError() + + def _update_memory(self) -> None: + raise NotImplementedError() + + def _update_power(self) -> None: + raise NotImplementedError() + + def _update_fans(self) -> None: + raise NotImplementedError() + + def _update_network(self) -> None: + raise NotImplementedError() + + def _update_processors(self) -> None: + raise NotImplementedError() + + def _update_storage(self) -> None: + raise NotImplementedError() + + def _update_firmwares(self) -> None: + raise NotImplementedError() + + def device_led_on(self, device: str) -> int: + data: Dict[str, bool] = {'LocationIndicatorActive': True} + try: + result = self.set_device_led(device, data) + except (HTTPError, KeyError): + return 0 + return result + + def device_led_off(self, device: str) -> int: + data: Dict[str, bool] = {'LocationIndicatorActive': False} + try: + result = self.set_device_led(device, data) + except (HTTPError, KeyError): + return 0 + return result + + def chassis_led_on(self) -> int: + data: Dict[str, str] = {'IndicatorLED': 'Blinking'} + result = self.set_chassis_led(data) + return result + + def chassis_led_off(self) -> int: + data: Dict[str, str] = {'IndicatorLED': 'Lit'} + result = self.set_chassis_led(data) + return result + + def get_device_led(self, device: str) -> Dict[str, Any]: + endpoint = self._sys['storage'][device]['redfish_endpoint'] + try: + result = self.client.query(method='GET', + endpoint=endpoint, + timeout=10) + except HTTPError as e: + self.log.error(f"Couldn't get the ident device LED status for device '{device}': {e}") + raise + response_json = json.loads(result[1]) + _result: Dict[str, Any] = {'http_code': result[2]} + if result[2] == 200: + _result['LocationIndicatorActive'] = response_json['LocationIndicatorActive'] + else: + _result['LocationIndicatorActive'] = None + return _result + + def set_device_led(self, device: str, data: Dict[str, bool]) -> int: + try: + _, _, status = self.client.query( + data=json.dumps(data), + method='PATCH', + endpoint=self._sys['storage'][device]['redfish_endpoint'] + ) + except (HTTPError, KeyError) as e: + self.log.error(f"Couldn't set the ident device LED for device '{device}': {e}") + raise + return status + + def get_chassis_led(self) -> Dict[str, Any]: + endpoint = list(self.endpoints['chassis'].get_members_endpoints().values())[0] + try: + result = self.client.query(method='GET', + endpoint=endpoint, + timeout=10) + except HTTPError as e: + self.log.error(f"Couldn't get the ident chassis LED status: {e}") + raise + response_json = json.loads(result[1]) + _result: Dict[str, Any] = {'http_code': result[2]} + if result[2] == 200: + _result['LocationIndicatorActive'] = response_json['LocationIndicatorActive'] + else: + _result['LocationIndicatorActive'] = None + return _result + + def set_chassis_led(self, data: Dict[str, str]) -> int: + # '{"IndicatorLED": "Lit"}' -> LocationIndicatorActive = false + # '{"IndicatorLED": "Blinking"}' -> LocationIndicatorActive = true + try: + _, _, status = self.client.query( + data=json.dumps(data), + method='PATCH', + endpoint=list(self.endpoints['chassis'].get_members_endpoints().values())[0] + ) + except HTTPError as e: + self.log.error(f"Couldn't set the ident chassis LED: {e}") + raise + return status + + def shutdown_host(self, force: bool = False) -> int: + reboot_type: str = 'GracefulRebootWithForcedShutdown' if force else 'GracefulRebootWithoutForcedShutdown' + + try: + job_id: str = self.create_reboot_job(reboot_type) + status = self.schedule_reboot_job(job_id) + except (HTTPError, KeyError) as e: + self.log.error(f"Couldn't create the reboot job: {e}") + raise + return status + + def powercycle(self) -> int: + try: + job_id: str = self.create_reboot_job('PowerCycle') + status = self.schedule_reboot_job(job_id) + except (HTTPError, URLError) as e: + self.log.error(f"Couldn't perform power cycle: {e}") + raise + return status + + def create_reboot_job(self, reboot_type: str) -> str: + data: Dict[str, str] = dict(RebootJobType=reboot_type) + try: + headers, _, _ = self.client.query( + data=json.dumps(data), + endpoint=self.create_reboot_job_endpoint + ) + job_id: str = headers['Location'].split('/')[-1] + except (HTTPError, URLError) as e: + self.log.error(f"Couldn't create the reboot job: {e}") + raise + return job_id + + def schedule_reboot_job(self, job_id: str) -> int: + data: Dict[str, Union[List[str], str]] = dict(JobArray=[job_id], StartTimeInterval='TIME_NOW') + try: + _, _, status = self.client.query( + data=json.dumps(data), + endpoint=self.setup_job_queue_endpoint + ) + except (HTTPError, KeyError) as e: + self.log.error(f"Couldn't schedule the reboot job: {e}") + raise + return status diff --git a/src/ceph-node-proxy/ceph_node_proxy/basesystem.py b/src/ceph-node-proxy/ceph_node_proxy/basesystem.py new file mode 100644 index 000000000000..65eca55af1f0 --- /dev/null +++ b/src/ceph-node-proxy/ceph_node_proxy/basesystem.py @@ -0,0 +1,96 @@ +import socket +from threading import Lock +from ceph_node_proxy.util import Config, get_logger, BaseThread +from typing import Dict, Any +from ceph_node_proxy.baseclient import BaseClient + + +class BaseSystem(BaseThread): + def __init__(self, **kw: Any) -> None: + super().__init__() + self.lock: Lock = Lock() + self._system: Dict = {} + self.config: Config = kw.get('config', {}) + self.client: BaseClient + self.log = get_logger(__name__) + + def main(self) -> None: + raise NotImplementedError() + + def get_system(self) -> Dict[str, Any]: + raise NotImplementedError() + + def get_status(self) -> Dict[str, Dict[str, Dict]]: + raise NotImplementedError() + + def get_metadata(self) -> Dict[str, Dict[str, Dict]]: + raise NotImplementedError() + + def get_processors(self) -> Dict[str, Dict[str, Dict]]: + raise NotImplementedError() + + def get_memory(self) -> Dict[str, Dict[str, Dict]]: + raise NotImplementedError() + + def get_fans(self) -> Dict[str, Dict[str, Dict]]: + raise NotImplementedError() + + def get_power(self) -> Dict[str, Dict[str, Dict]]: + raise NotImplementedError() + + def get_network(self) -> Dict[str, Dict[str, Dict]]: + raise NotImplementedError() + + def get_storage(self) -> Dict[str, Dict[str, Dict]]: + raise NotImplementedError() + + def get_firmwares(self) -> Dict[str, Dict[str, Dict]]: + raise NotImplementedError() + + def get_sn(self) -> str: + raise NotImplementedError() + + def get_led(self) -> Dict[str, Any]: + raise NotImplementedError() + + def set_led(self, data: Dict[str, str]) -> int: + raise NotImplementedError() + + def get_chassis_led(self) -> Dict[str, Any]: + raise NotImplementedError() + + def set_chassis_led(self, data: Dict[str, str]) -> int: + raise NotImplementedError() + + def device_led_on(self, device: str) -> int: + raise NotImplementedError() + + def device_led_off(self, device: str) -> int: + raise NotImplementedError() + + def get_device_led(self, device: str) -> Dict[str, Any]: + raise NotImplementedError() + + def set_device_led(self, device: str, data: Dict[str, bool]) -> int: + raise NotImplementedError() + + def chassis_led_on(self) -> int: + raise NotImplementedError() + + def chassis_led_off(self) -> int: + raise NotImplementedError() + + def get_host(self) -> str: + return socket.gethostname() + + def stop_update_loop(self) -> None: + raise NotImplementedError() + + def flush(self) -> None: + raise NotImplementedError() + + def shutdown_host(self, force: bool = False) -> int: + raise NotImplementedError() + + def powercycle(self) -> int: + raise NotImplementedError() diff --git a/src/ceph-node-proxy/ceph_node_proxy/main.py b/src/ceph-node-proxy/ceph_node_proxy/main.py new file mode 100644 index 000000000000..9a449ecf8845 --- /dev/null +++ b/src/ceph-node-proxy/ceph_node_proxy/main.py @@ -0,0 +1,199 @@ +from ceph_node_proxy.redfishdellsystem import RedfishDellSystem +from ceph_node_proxy.api import NodeProxyApi +from ceph_node_proxy.reporter import Reporter +from ceph_node_proxy.util import Config, get_logger, http_req, write_tmp_file, CONFIG +from urllib.error import HTTPError +from typing import Dict, Any, Optional + +import argparse +import os +import ssl +import json +import time +import signal + + +class NodeProxyManager: + def __init__(self, **kw: Any) -> None: + self.exc: Optional[Exception] = None + self.log = get_logger(__name__) + self.mgr_host: str = kw['mgr_host'] + self.cephx_name: str = kw['cephx_name'] + self.cephx_secret: str = kw['cephx_secret'] + self.ca_path: str = kw['ca_path'] + self.api_ssl_crt: str = kw['api_ssl_crt'] + self.api_ssl_key: str = kw['api_ssl_key'] + self.mgr_agent_port: str = str(kw['mgr_agent_port']) + self.stop: bool = False + self.ssl_ctx = ssl.create_default_context() + self.ssl_ctx.check_hostname = True + self.ssl_ctx.verify_mode = ssl.CERT_REQUIRED + self.ssl_ctx.load_verify_locations(self.ca_path) + self.reporter_scheme: str = kw.get('reporter_scheme', 'https') + self.reporter_endpoint: str = kw.get('reporter_endpoint', '/node-proxy/data') + self.cephx = {'cephx': {'name': self.cephx_name, + 'secret': self.cephx_secret}} + self.config = Config('/etc/ceph/node-proxy.yml', config=CONFIG) + self.username: str = '' + self.password: str = '' + + def run(self) -> None: + self.init() + self.loop() + + def init(self) -> None: + self.init_system() + self.init_reporter() + self.init_api() + + def fetch_oob_details(self) -> Dict[str, str]: + try: + headers, result, status = http_req(hostname=self.mgr_host, + port=self.mgr_agent_port, + data=json.dumps(self.cephx), + endpoint='/node-proxy/oob', + ssl_ctx=self.ssl_ctx) + except HTTPError as e: + msg = f'No out of band tool details could be loaded: {e.code}, {e.reason}' + self.log.debug(msg) + raise + + result_json = json.loads(result) + oob_details: Dict[str, str] = { + 'host': result_json['result']['addr'], + 'username': result_json['result']['username'], + 'password': result_json['result']['password'], + 'port': result_json['result'].get('port', '443') + } + return oob_details + + def init_system(self) -> None: + try: + oob_details = self.fetch_oob_details() + self.username = oob_details['username'] + self.password = oob_details['password'] + except HTTPError: + self.log.warning('No oob details could be loaded, exiting...') + raise SystemExit(1) + try: + self.system = RedfishDellSystem(host=oob_details['host'], + port=oob_details['port'], + username=oob_details['username'], + password=oob_details['password'], + config=self.config) + self.system.start() + except RuntimeError: + self.log.error("Can't initialize the redfish system.") + raise + + def init_reporter(self) -> None: + try: + self.reporter_agent = Reporter(self.system, + self.cephx, + reporter_scheme=self.reporter_scheme, + reporter_hostname=self.mgr_host, + reporter_port=self.mgr_agent_port, + reporter_endpoint=self.reporter_endpoint) + self.reporter_agent.start() + except RuntimeError: + self.log.error("Can't initialize the reporter.") + raise + + def init_api(self) -> None: + try: + self.log.info('Starting node-proxy API...') + self.api = NodeProxyApi(self) + self.api.start() + except Exception as e: + self.log.error(f"Can't start node-proxy API: {e}") + raise + + def loop(self) -> None: + while not self.stop: + for thread in [self.system, self.reporter_agent]: + try: + status = thread.check_status() + label = 'Ok' if status else 'Critical' + self.log.debug(f'{thread} status: {label}') + except Exception as e: + self.log.error(f'{thread} not running: {e.__class__.__name__}: {e}') + thread.shutdown() + self.init_system() + self.init_reporter() + self.log.debug('All threads are alive, next check in 20sec.') + time.sleep(20) + + def shutdown(self) -> None: + self.stop = True + # if `self.system.shutdown()` is called before self.start(), it will fail. + if hasattr(self, 'api'): + self.api.shutdown() + if hasattr(self, 'reporter_agent'): + self.reporter_agent.shutdown() + if hasattr(self, 'system'): + self.system.shutdown() + + +def handler(signum: Any, frame: Any, t_mgr: 'NodeProxyManager') -> None: + t_mgr.system.pending_shutdown = True + t_mgr.log.info('SIGTERM caught, shutting down threads...') + t_mgr.shutdown() + t_mgr.log.info('Logging out from RedFish API') + t_mgr.system.client.logout() + raise SystemExit(0) + + +def main() -> None: + parser = argparse.ArgumentParser( + description='Ceph Node-Proxy for HW Monitoring', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--config', + help='path of config file in json format', + required=True + ) + parser.add_argument( + '--debug', + help='increase logging verbosity (debug level)', + action='store_true', + ) + + args = parser.parse_args() + if args.debug: + CONFIG['logging']['level'] = 10 + + if not os.path.exists(args.config): + raise Exception(f'No config file found at provided config path: {args.config}') + + with open(args.config, 'r') as f: + try: + config_json = f.read() + config = json.loads(config_json) + except Exception as e: + raise Exception(f'Failed to load json config: {str(e)}') + + target_ip = config['target_ip'] + target_port = config['target_port'] + keyring = config['keyring'] + root_cert = config['root_cert.pem'] + listener_cert = config['listener.crt'] + listener_key = config['listener.key'] + name = config['name'] + + ca_file = write_tmp_file(root_cert, + prefix_name='cephadm-endpoint-root-cert') + + node_proxy_mgr = NodeProxyManager(mgr_host=target_ip, + cephx_name=name, + cephx_secret=keyring, + mgr_agent_port=target_port, + ca_path=ca_file.name, + api_ssl_crt=listener_cert, + api_ssl_key=listener_key) + signal.signal(signal.SIGTERM, + lambda signum, frame: handler(signum, frame, node_proxy_mgr)) + node_proxy_mgr.run() + + +if __name__ == '__main__': + main() diff --git a/src/ceph-node-proxy/ceph_node_proxy/redfish_client.py b/src/ceph-node-proxy/ceph_node_proxy/redfish_client.py new file mode 100644 index 000000000000..d75d9a3cc8c8 --- /dev/null +++ b/src/ceph-node-proxy/ceph_node_proxy/redfish_client.py @@ -0,0 +1,138 @@ +import json +from urllib.error import HTTPError, URLError +from ceph_node_proxy.baseclient import BaseClient +from ceph_node_proxy.util import get_logger, http_req +from typing import Dict, Any, Tuple, Optional +from http.client import HTTPMessage + + +class RedFishClient(BaseClient): + PREFIX = '/redfish/v1/' + + def __init__(self, + host: str = '', + port: str = '443', + username: str = '', + password: str = ''): + super().__init__(host, username, password) + self.log = get_logger(__name__) + self.log.info(f'Initializing redfish client {__name__}') + self.host: str = host + self.port: str = port + self.url: str = f'https://{self.host}:{self.port}' + self.token: str = '' + self.location: str = '' + self.session_service: str = '' + + def sessionservice_discover(self) -> None: + _error_msg: str = "Can't discover SessionService url" + try: + _headers, _data, _status_code = self.query(endpoint=RedFishClient.PREFIX) + json_data: Dict[str, Any] = json.loads(_data) + self.session_service = json_data['Links']['Sessions']['@odata.id'] + except (URLError, KeyError) as e: + msg = f'{_error_msg}: {e}' + self.log.error(msg) + raise RuntimeError + + def login(self) -> None: + if not self.is_logged_in(): + self.log.debug('Discovering SessionService url...') + self.sessionservice_discover() + self.log.debug(f'SessionService url is {self.session_service}') + self.log.info('Logging in to ' + f"{self.url} as '{self.username}'") + oob_credentials = json.dumps({'UserName': self.username, + 'Password': self.password}) + headers = {'Content-Type': 'application/json'} + location_endpoint: str = '' + + try: + _headers, _data, _status_code = self.query(data=oob_credentials, + headers=headers, + endpoint=self.session_service) + if _status_code != 201: + self.log.error(f"Can't log in to {self.url} as '{self.username}': {_status_code}") + raise RuntimeError + except URLError as e: + msg = f"Can't log in to {self.url} as '{self.username}': {e}" + self.log.error(msg) + raise RuntimeError + self.token = _headers['X-Auth-Token'] + if _headers['Location'].startswith('http'): + # We assume the value has the following format: + # scheme://address:port/redfish/v1/SessionService/Session + location_endpoint = f"/{_headers['Location'].split('/', 3)[-1:][0]}" + else: + location_endpoint = _headers['Location'] + self.location = location_endpoint + self.log.info(f'Logged in to {self.url}, Received header "Location": {self.location}') + + def is_logged_in(self) -> bool: + self.log.debug(f'Checking token validity for {self.url}') + if not self.location or not self.token: + self.log.debug(f'No token found for {self.url}.') + return False + headers = {'X-Auth-Token': self.token} + try: + _headers, _data, _status_code = self.query(headers=headers, + endpoint=self.location) + except URLError as e: + self.log.error("Can't check token " + f'validity for {self.url}: {e}') + raise + return _status_code == 200 + + def logout(self) -> Dict[str, Any]: + result: Dict[str, Any] = {} + try: + if self.is_logged_in(): + _, _data, _status_code = self.query(method='DELETE', + headers={'X-Auth-Token': self.token}, + endpoint=self.location) + result = json.loads(_data) + except URLError: + self.log.error(f"Can't log out from {self.url}") + + self.location = '' + self.token = '' + + return result + + def get_path(self, path: str) -> Dict[str, Any]: + if self.PREFIX not in path: + path = f'{self.PREFIX}{path}' + try: + _, result, _status_code = self.query(endpoint=path) + result_json = json.loads(result) + return result_json + except URLError as e: + self.log.error(f"Can't get path {path}:\n{e}") + raise RuntimeError + + def query(self, + data: Optional[str] = None, + headers: Dict[str, str] = {}, + method: Optional[str] = None, + endpoint: str = '', + timeout: int = 10) -> Tuple[HTTPMessage, str, int]: + _headers = headers.copy() if headers else {} + if self.token: + _headers['X-Auth-Token'] = self.token + if not _headers.get('Content-Type') and method in ['POST', 'PUT', 'PATCH']: + _headers['Content-Type'] = 'application/json' + try: + (response_headers, + response_str, + response_status) = http_req(hostname=self.host, + port=self.port, + endpoint=endpoint, + headers=_headers, + method=method, + data=data, + timeout=timeout) + + return response_headers, response_str, response_status + except (HTTPError, URLError) as e: + self.log.debug(f'endpoint={endpoint} err={e}') + raise diff --git a/src/ceph-node-proxy/ceph_node_proxy/redfishdellsystem.py b/src/ceph-node-proxy/ceph_node_proxy/redfishdellsystem.py new file mode 100644 index 000000000000..8a478fe32f63 --- /dev/null +++ b/src/ceph-node-proxy/ceph_node_proxy/redfishdellsystem.py @@ -0,0 +1,194 @@ +from ceph_node_proxy.baseredfishsystem import BaseRedfishSystem, Endpoint +from ceph_node_proxy.util import get_logger, normalize_dict, to_snake_case +from typing import Dict, Any, List, Optional +from urllib.error import HTTPError + + +class RedfishDellSystem(BaseRedfishSystem): + def __init__(self, **kw: Any) -> None: + super().__init__(**kw) + self.log = get_logger(__name__) + self.job_service_endpoint: str = '/redfish/v1/Managers/iDRAC.Embedded.1/Oem/Dell/DellJobService' + self.create_reboot_job_endpoint: str = f'{self.job_service_endpoint}/Actions/DellJobService.CreateRebootJob' + self.setup_job_queue_endpoint: str = f'{self.job_service_endpoint}/Actions/DellJobService.SetupJobQueue' + + def build_data(self, + data: Dict[str, Any], + fields: List[str], + attribute: Optional[str] = None) -> Dict[str, Dict[str, Dict]]: + result: Dict[str, Dict[str, Optional[Dict]]] = dict() + member_id: str = '' + + def process_data(m_id: str, fields: List[str], data: Dict[str, Any]) -> Dict[str, Any]: + result: Dict[str, Any] = {} + for field in fields: + try: + result[to_snake_case(field)] = data[field] + except KeyError: + self.log.warning(f'Could not find field: {field} in data: {data}') + result[to_snake_case(field)] = None + return result + + try: + if attribute is not None: + data_items = data[attribute] + else: + # The following is a hack to re-inject the key to the dict + # as we have the following structure when `attribute` is passed: + # "PowerSupplies": [ {"MemberId": "0", ...}, {"MemberId": "1", ...} ] + # vs. this structure in the opposite case: + # { "CPU.Socket.2": { "Id": "CPU.Socket.2", "Manufacturer": "Intel" }, "CPU.Socket.1": {} } + # With the first case, we clearly use the field "MemberId". + # With the second case, we use the key of the dict. + # This is mostly for avoiding code duplication. + data_items = [{'MemberId': k, **v} for k, v in data.items()] + for d in data_items: + member_id = d.get('MemberId') + result[member_id] = {} + result[member_id] = process_data(member_id, fields, d) + + except Exception as e: + self.log.error(f"Can't build data: {e}") + return normalize_dict(result) + + def get_sn(self) -> str: + return self._sys.get('SKU', '') + + def get_status(self) -> Dict[str, Dict[str, Dict]]: + return self._sys.get('status', {}) + + def get_memory(self) -> Dict[str, Dict[str, Dict]]: + return self._sys.get('memory', {}) + + def get_processors(self) -> Dict[str, Dict[str, Dict]]: + return self._sys.get('processors', {}) + + def get_network(self) -> Dict[str, Dict[str, Dict]]: + return self._sys.get('network', {}) + + def get_storage(self) -> Dict[str, Dict[str, Dict]]: + return self._sys.get('storage', {}) + + def get_firmwares(self) -> Dict[str, Dict[str, Dict]]: + return self._sys.get('firmwares', {}) + + def get_power(self) -> Dict[str, Dict[str, Dict]]: + return self._sys.get('power', {}) + + def get_fans(self) -> Dict[str, Dict[str, Dict]]: + return self._sys.get('fans', {}) + + def _update_network(self) -> None: + fields = ['Description', 'Name', 'SpeedMbps', 'Status'] + self.log.debug('Updating network') + self.update('systems', 'network', 'EthernetInterfaces', fields) + + def update(self, + collection: str, + component: str, + path: str, + fields: List[str], + attribute: Optional[str] = None) -> None: + members: List[str] = self.endpoints[collection].get_members_names() + result: Dict[str, Any] = {} + data: Dict[str, Any] = {} + data_built: Dict[str, Any] = {} + if not members: + data = self.endpoints[collection][path].get_members_data() + data_built = self.build_data(data=data, fields=fields, attribute=attribute) + result = data_built + else: + for member in members: + data_built = {} + try: + if attribute is None: + data = self.endpoints[collection][member][path].get_members_data() + else: + data = self.endpoints[collection][member][path].data + except HTTPError as e: + self.log.debug(f'Error while updating {component}: {e}') + else: + data_built = self.build_data(data=data, fields=fields, attribute=attribute) + result[member] = data_built + self._sys[component] = result + + def _update_processors(self) -> None: + fields = ['Description', + 'TotalCores', + 'TotalThreads', + 'ProcessorType', + 'Model', + 'Status', + 'Manufacturer'] + self.log.debug('Updating processors') + self.update('systems', 'processors', 'Processors', fields) + + def _update_storage(self) -> None: + fields = ['Description', + 'CapacityBytes', + 'Model', 'Protocol', + 'LocationIndicatorActive', + 'SerialNumber', 'Status', + 'PhysicalLocation'] + result: Dict[str, Dict[str, Dict]] = dict() + self.log.debug('Updating storage') + for member in self.endpoints['systems'].get_members_names(): + result[member] = {} + members_data = self.endpoints['systems'][member]['Storage'].get_members_data() + for entity in members_data: + for drive in members_data[entity]['Drives']: + data: Dict[str, Any] = Endpoint(drive['@odata.id'], self.endpoints.client).data + drive_id = data['Id'] + result[member][drive_id] = dict() + result[member][drive_id]['redfish_endpoint'] = data['@odata.id'] + for field in fields: + result[member][drive_id][to_snake_case(field)] = data[field] + result[member][drive_id]['entity'] = entity + self._sys['storage'] = normalize_dict(result) + + def _update_sn(self) -> None: + serials: List[str] = [] + self.log.debug('Updating serial number') + data: Dict[str, Any] = self.endpoints['systems'].get_members_data() + for sys in data.keys(): + serials.append(data[sys]['SKU']) + self._sys['SKU'] = ','.join(serials) + + def _update_memory(self) -> None: + fields = ['Description', + 'MemoryDeviceType', + 'CapacityMiB', + 'Status'] + self.log.debug('Updating memory') + self.update('systems', 'memory', 'Memory', fields) + + def _update_power(self) -> None: + fields = [ + 'Name', + 'Model', + 'Manufacturer', + 'Status' + ] + self.log.debug('Updating powersupplies') + self.update('chassis', 'power', 'Power', fields, attribute='PowerSupplies') + + def _update_fans(self) -> None: + fields = [ + 'Name', + 'PhysicalContext', + 'Status' + ] + self.log.debug('Updating fans') + self.update('chassis', 'fans', 'Thermal', fields, attribute='Fans') + + def _update_firmwares(self) -> None: + fields = [ + 'Name', + 'Description', + 'ReleaseDate', + 'Version', + 'Updateable', + 'Status', + ] + self.log.debug('Updating firmwares') + self.update('update_service', 'firmwares', 'FirmwareInventory', fields) diff --git a/src/ceph-node-proxy/ceph_node_proxy/reporter.py b/src/ceph-node-proxy/ceph_node_proxy/reporter.py new file mode 100644 index 000000000000..20d43b59d332 --- /dev/null +++ b/src/ceph-node-proxy/ceph_node_proxy/reporter.py @@ -0,0 +1,69 @@ +import time +import json +from ceph_node_proxy.util import get_logger, http_req, BaseThread +from urllib.error import HTTPError, URLError +from typing import Dict, Any + + +class Reporter(BaseThread): + def __init__(self, + system: Any, + cephx: Dict[str, Any], + reporter_scheme: str = 'https', + reporter_hostname: str = '', + reporter_port: str = '443', + reporter_endpoint: str = '/node-proxy/data') -> None: + super().__init__() + self.system = system + self.data: Dict[str, Any] = {} + self.stop: bool = False + self.cephx = cephx + self.data['cephx'] = self.cephx['cephx'] + self.reporter_scheme: str = reporter_scheme + self.reporter_hostname: str = reporter_hostname + self.reporter_port: str = reporter_port + self.reporter_endpoint: str = reporter_endpoint + self.log = get_logger(__name__) + self.reporter_url: str = (f'{reporter_scheme}://{reporter_hostname}:' + f'{reporter_port}{reporter_endpoint}') + self.log.info(f'Reporter url set to {self.reporter_url}') + + def main(self) -> None: + while not self.stop: + # Any logic to avoid sending the all the system + # information every loop can go here. In a real + # scenario probably we should just send the sub-parts + # that have changed to minimize the traffic in + # dense clusters + self.log.debug('waiting for a lock in reporter loop.') + with self.system.lock: + if not self.system.pending_shutdown: + self.log.debug('lock acquired in reporter loop.') + if self.system.data_ready: + self.log.debug('data ready to be sent to the mgr.') + if not self.system.get_system() == self.system.previous_data: + self.log.info('data has changed since last iteration.') + self.data['patch'] = self.system.get_system() + try: + # TODO: add a timeout parameter to the reporter in the config file + self.log.info(f'sending data to {self.reporter_url}') + http_req(hostname=self.reporter_hostname, + port=self.reporter_port, + method='POST', + headers={'Content-Type': 'application/json'}, + endpoint=self.reporter_endpoint, + scheme=self.reporter_scheme, + data=json.dumps(self.data)) + except (HTTPError, URLError) as e: + self.log.error(f"The reporter couldn't send data to the mgr: {e}") + raise + # Need to add a new parameter 'max_retries' to the reporter if it can't + # send the data for more than x times, maybe the daemon should stop altogether + else: + self.system.previous_data = self.system.get_system() + else: + self.log.debug('no diff, not sending data to the mgr.') + self.log.debug('lock released in reporter loop.') + time.sleep(5) + self.log.debug('exiting reporter loop.') + raise SystemExit(0) diff --git a/src/ceph-node-proxy/ceph_node_proxy/util.py b/src/ceph-node-proxy/ceph_node_proxy/util.py new file mode 100644 index 000000000000..c6af0304b923 --- /dev/null +++ b/src/ceph-node-proxy/ceph_node_proxy/util.py @@ -0,0 +1,199 @@ +import logging +import yaml +import os +import time +import re +import ssl +import traceback +import threading +from tempfile import NamedTemporaryFile, _TemporaryFileWrapper +from urllib.error import HTTPError, URLError +from urllib.request import urlopen, Request +from typing import Dict, Callable, Any, Optional, MutableMapping, Tuple, Union + + +CONFIG: Dict[str, Any] = { + 'reporter': { + 'check_interval': 5, + 'push_data_max_retries': 30, + 'endpoint': 'https://%(mgr_host):%(mgr_port)/node-proxy/data', + }, + 'system': { + 'refresh_interval': 5 + }, + 'api': { + 'port': 9456, + }, + 'logging': { + 'level': logging.INFO, + } +} + + +def get_logger(name: str, level: Union[int, str] = logging.NOTSET) -> logging.Logger: + log_level: Union[int, str] = level + if log_level == logging.NOTSET: + log_level = CONFIG['logging']['level'] + logger = logging.getLogger(name) + logger.setLevel(log_level) + handler = logging.StreamHandler() + handler.setLevel(log_level) + fmt = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + handler.setFormatter(fmt) + logger.handlers.clear() + logger.addHandler(handler) + logger.propagate = False + + return logger + + +logger = get_logger(__name__) + + +class Config: + def __init__(self, + config_file: str = '/etc/ceph/node-proxy.yaml', + config: Dict[str, Any] = {}) -> None: + self.config_file = config_file + self.config = config + + self.load_config() + + def load_config(self) -> None: + if os.path.exists(self.config_file): + with open(self.config_file, 'r') as f: + self.config = yaml.safe_load(f) + else: + self.config = self.config + + for k, v in self.config.items(): + if k not in self.config.keys(): + self.config[k] = v + + for k, v in self.config.items(): + setattr(self, k, v) + + def reload(self, config_file: str = '') -> None: + if config_file != '': + self.config_file = config_file + self.load_config() + + +class BaseThread(threading.Thread): + def __init__(self) -> None: + super().__init__() + self.exc: Optional[Exception] = None + self.stop: bool = False + self.daemon = True + self.name = self.__class__.__name__ + self.log: logging.Logger = get_logger(__name__) + self.pending_shutdown: bool = False + + def run(self) -> None: + logger.info(f'Starting {self.name}') + try: + self.main() + except Exception as e: + self.exc = e + return + + def shutdown(self) -> None: + self.stop = True + self.pending_shutdown = True + + def check_status(self) -> bool: + logger.debug(f'Checking status of {self.name}') + if self.exc: + traceback.print_tb(self.exc.__traceback__) + logger.error(f'Caught exception: {self.exc.__class__.__name__}') + raise self.exc + if not self.is_alive(): + logger.info(f'{self.name} not alive') + self.start() + return True + + def main(self) -> None: + raise NotImplementedError() + + +def to_snake_case(name: str) -> str: + name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower() + + +def normalize_dict(test_dict: Dict) -> Dict: + res = dict() + for key in test_dict.keys(): + if isinstance(test_dict[key], dict): + res[key.lower()] = normalize_dict(test_dict[key]) + else: + if test_dict[key] is None: + test_dict[key] = 'unknown' + res[key.lower()] = test_dict[key] + return res + + +def retry(exceptions: Any = Exception, retries: int = 20, delay: int = 1) -> Callable: + def decorator(f: Callable) -> Callable: + def _retry(*args: str, **kwargs: Any) -> Callable: + _tries = retries + while _tries > 1: + try: + logger.debug('{} {} attempt(s) left.'.format(f, _tries - 1)) + return f(*args, **kwargs) + except exceptions: + time.sleep(delay) + _tries -= 1 + logger.warn('{} has failed after {} tries'.format(f, retries)) + return f(*args, **kwargs) + return _retry + return decorator + + +def http_req(hostname: str = '', + port: str = '443', + method: Optional[str] = None, + headers: MutableMapping[str, str] = {}, + data: Optional[str] = None, + endpoint: str = '/', + scheme: str = 'https', + ssl_verify: bool = False, + timeout: Optional[int] = None, + ssl_ctx: Optional[Any] = None) -> Tuple[Any, Any, Any]: + + if not ssl_ctx: + ssl_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + if not ssl_verify: + ssl_ctx.check_hostname = False + ssl_ctx.verify_mode = ssl.CERT_NONE + else: + ssl_ctx.verify_mode = ssl.CERT_REQUIRED + + url: str = f'{scheme}://{hostname}:{port}{endpoint}' + _data = bytes(data, 'ascii') if data else None + _headers = headers + if data and not method: + method = 'POST' + if not _headers.get('Content-Type') and method in ['POST', 'PATCH']: + _headers['Content-Type'] = 'application/json' + try: + req = Request(url, _data, _headers, method=method) + with urlopen(req, context=ssl_ctx, timeout=timeout) as response: + response_str = response.read() + response_headers = response.headers + response_code = response.code + return response_headers, response_str.decode(), response_code + except (HTTPError, URLError) as e: + # Log level is debug only. + # We let whatever calls `http_req()` catching and printing the error + logger.debug(f'url={url} err={e}') + # handle error here if needed + raise + + +def write_tmp_file(data: str, prefix_name: str = 'node-proxy-') -> _TemporaryFileWrapper: + f = NamedTemporaryFile(prefix=prefix_name) + os.fchmod(f.fileno(), 0o600) + f.write(data.encode('utf-8')) + f.flush() + return f diff --git a/src/ceph-node-proxy/setup.py b/src/ceph-node-proxy/setup.py new file mode 100644 index 000000000000..7dcc7cdf5bf8 --- /dev/null +++ b/src/ceph-node-proxy/setup.py @@ -0,0 +1,39 @@ +from setuptools import setup, find_packages +import os + + +setup( + name='ceph-node-proxy', + version='1.0.0', + packages=find_packages(), + + author='', + author_email='gabrioux@ibm.com', + description='node-proxy agent to inventory and report hardware statuses.', + license='LGPLv2+', + keywords='ceph hardware inventory monitoring', + url='https://github.com/ceph/ceph', + zip_safe=False, + install_requires='ceph', + dependency_links=[''.join(['file://', os.path.join(os.getcwd(), '../', + 'python-common#egg=ceph-1.0.0')])], + tests_require=[ + 'pytest >=2.1.3', + 'tox', + 'ceph', + ], + entry_points=dict( + console_scripts=[ + 'ceph-node-proxy = ceph_node_proxy.main:main', + ], + ), + classifiers=[ + 'Environment :: Console', + 'Intended Audience :: Information Technology', + 'Intended Audience :: System Administrators', + 'Operating System :: POSIX :: Linux', + 'License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.9', + ] +) diff --git a/src/ceph-run b/src/ceph-run index 764101c564f8..c28244d90f83 100755 --- a/src/ceph-run +++ b/src/ceph-run @@ -3,7 +3,7 @@ sleep=5 no_restart=0 -if [ $1 == "--no-restart" ]; then +if [ "$1" = "--no-restart" ]; then no_restart=1 shift fi diff --git a/src/ceph-volume/ceph_volume/__init__.py b/src/ceph-volume/ceph_volume/__init__.py index dad83c95bb7a..814619cfdddb 100644 --- a/src/ceph-volume/ceph_volume/__init__.py +++ b/src/ceph-volume/ceph_volume/__init__.py @@ -1,8 +1,34 @@ +import os +import logging from collections import namedtuple sys_info = namedtuple('sys_info', ['devices']) sys_info.devices = dict() +logger = logging.getLogger(__name__) +BEING_REPLACED_HEADER: str = 'CEPH_DEVICE_BEING_REPLACED' + + +class AllowLoopDevices: + allow = False + warned = False + + @classmethod + def __call__(cls) -> bool: + val = os.environ.get("CEPH_VOLUME_ALLOW_LOOP_DEVICES", "false").lower() + if val not in ("false", 'no', '0'): + cls.allow = True + if not cls.warned: + logger.warning( + "CEPH_VOLUME_ALLOW_LOOP_DEVICES is set in your " + "environment, so we will allow the use of unattached loop" + " devices as disks. This feature is intended for " + "development purposes only and will never be supported in" + " production. Issues filed based on this behavior will " + "likely be ignored." + ) + cls.warned = True + return cls.allow class UnloadedConfig(object): @@ -14,9 +40,12 @@ class UnloadedConfig(object): def __getattr__(self, *a): raise RuntimeError("No valid ceph configuration file was loaded.") -conf = namedtuple('config', ['ceph', 'cluster', 'verbosity', 'path', 'log_path']) + +allow_loop_devices = AllowLoopDevices() +conf = namedtuple('config', ['ceph', 'cluster', 'verbosity', 'path', 'log_path', 'dmcrypt_no_workqueue']) conf.ceph = UnloadedConfig() +conf.dmcrypt_no_workqueue = None __version__ = "1.0.0" -__release__ = "reef" +__release__ = "squid" diff --git a/src/ceph-volume/ceph_volume/activate/main.py b/src/ceph-volume/ceph_volume/activate/main.py index 1cef038b62fe..76fba733f0fb 100644 --- a/src/ceph-volume/ceph_volume/activate/main.py +++ b/src/ceph-volume/ceph_volume/activate/main.py @@ -3,8 +3,8 @@ import argparse from ceph_volume import terminal -from ceph_volume.devices.lvm.activate import Activate as LVMActivate -from ceph_volume.devices.raw.activate import Activate as RAWActivate +from ceph_volume.objectstore.lvmbluestore import LvmBlueStore as LVMActivate +from ceph_volume.objectstore.rawbluestore import RawBlueStore as RAWActivate from ceph_volume.devices.simple.activate import Activate as SimpleActivate @@ -27,7 +27,8 @@ def main(self): ) parser.add_argument( '--osd-uuid', - help='OSD UUID to activate' + help='OSD UUID to activate', + dest='osd_fsid' ) parser.add_argument( '--no-systemd', @@ -44,27 +45,21 @@ def main(self): # first try raw try: - RAWActivate([]).activate( - devs=None, - start_osd_id=self.args.osd_id, - start_osd_uuid=self.args.osd_uuid, - tmpfs=not self.args.no_tmpfs, - systemd=not self.args.no_systemd, - ) + raw_activate = RAWActivate(self.args) + raw_activate.activate() return except Exception as e: terminal.info(f'Failed to activate via raw: {e}') # then try lvm try: - LVMActivate([]).activate( - argparse.Namespace( - osd_id=self.args.osd_id, - osd_fsid=self.args.osd_uuid, - no_tmpfs=self.args.no_tmpfs, - no_systemd=self.args.no_systemd, - ) - ) + lvm_activate = LVMActivate(argparse.Namespace( + no_tmpfs=self.args.no_tmpfs, + no_systemd=self.args.no_systemd, + osd_fsid=self.args.osd_fsid)) + lvm_activate.activate(None, + self.args.osd_id, + self.args.osd_fsid) return except Exception as e: terminal.info(f'Failed to activate via LVM: {e}') @@ -74,7 +69,7 @@ def main(self): SimpleActivate([]).activate( argparse.Namespace( osd_id=self.args.osd_id, - osd_fsid=self.args.osd_uuid, + osd_fsid=self.args.osd_fsid, no_systemd=self.args.no_systemd, ) ) diff --git a/src/ceph-volume/ceph_volume/api/lvm.py b/src/ceph-volume/ceph_volume/api/lvm.py index dcc4f1862721..fc376f891fd2 100644 --- a/src/ceph-volume/ceph_volume/api/lvm.py +++ b/src/ceph-volume/ceph_volume/api/lvm.py @@ -6,11 +6,12 @@ import logging import os import uuid -import re from itertools import repeat from math import floor from ceph_volume import process, util, conf from ceph_volume.exceptions import SizeAllocationError +from typing import Any, Dict + logger = logging.getLogger(__name__) @@ -808,13 +809,16 @@ def get_all_devices_vgs(name_prefix=''): '--units=b', '--nosuffix'] -class Volume(object): +class Volume: """ Represents a Logical Volume from LVM, with some top-level attributes like ``lv_name`` and parsed tags as a dictionary of key/value pairs. """ - def __init__(self, **kw): + def __init__(self, **kw: str) -> None: + self.lv_path: str = '' + self.lv_name: str = '' + self.lv_uuid: str = '' for k, v in kw.items(): setattr(self, k, v) self.lv_api = kw @@ -825,13 +829,13 @@ def __init__(self, **kw): self.encrypted = self.tags.get('ceph.encrypted', '0') == '1' self.used_by_ceph = 'ceph.osd_id' in self.tags - def __str__(self): + def __str__(self) -> str: return '<%s>' % self.lv_api['lv_path'] - def __repr__(self): + def __repr__(self) -> str: return self.__str__() - def as_dict(self): + def as_dict(self) -> Dict[str, Any]: obj = {} obj.update(self.lv_api) obj['tags'] = self.tags @@ -840,7 +844,7 @@ def as_dict(self): obj['path'] = self.lv_path return obj - def report(self): + def report(self) -> Dict[str, Any]: if not self.used_by_ceph: return { 'name': self.lv_name, @@ -1210,39 +1214,3 @@ def get_lv_by_fullname(full_name): except ValueError: res_lv = None return res_lv - -def get_lv_path_from_mapper(mapper): - """ - This functions translates a given mapper device under the format: - /dev/mapper/LV to the format /dev/VG/LV. - eg: - from: - /dev/mapper/ceph--c1a97e46--234c--46aa--a549--3ca1d1f356a9-osd--block--32e8e896--172e--4a38--a06a--3702598510ec - to: - /dev/ceph-c1a97e46-234c-46aa-a549-3ca1d1f356a9/osd-block-32e8e896-172e-4a38-a06a-3702598510ec - """ - results = re.split(r'^\/dev\/mapper\/(.+\w)-(\w.+)', mapper) - results = list(filter(None, results)) - - if len(results) != 2: - return None - - return f"/dev/{results[0].replace('--', '-')}/{results[1].replace('--', '-')}" - -def get_mapper_from_lv_path(lv_path): - """ - This functions translates a given lv path under the format: - /dev/VG/LV to the format /dev/mapper/LV. - eg: - from: - /dev/ceph-c1a97e46-234c-46aa-a549-3ca1d1f356a9/osd-block-32e8e896-172e-4a38-a06a-3702598510ec - to: - /dev/mapper/ceph--c1a97e46--234c--46aa--a549--3ca1d1f356a9-osd--block--32e8e896--172e--4a38--a06a--3702598510ec - """ - results = re.split(r'^\/dev\/(.+\w)-(\w.+)', lv_path) - results = list(filter(None, results)) - - if len(results) != 2: - return None - - return f"/dev/mapper/{results[0].replace('-', '--')}/{results[1].replace('-', '--')}" diff --git a/src/ceph-volume/ceph_volume/devices/lvm/activate.py b/src/ceph-volume/ceph_volume/devices/lvm/activate.py index feb91053b447..7b4d57c95091 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/activate.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/activate.py @@ -1,214 +1,20 @@ from __future__ import print_function import argparse import logging -import os from textwrap import dedent -from ceph_volume import process, conf, decorators, terminal, configuration -from ceph_volume.util import system, disk -from ceph_volume.util import prepare as prepare_utils -from ceph_volume.util import encryption as encryption_utils -from ceph_volume.systemd import systemctl -from ceph_volume.api import lvm as api -from .listing import direct_report +from ceph_volume import objectstore logger = logging.getLogger(__name__) - -def get_osd_device_path(osd_lvs, device_type, dmcrypt_secret=None): - """ - ``device_type`` can be one of ``db``, ``wal`` or ``block`` so that we can - query LVs on system and fallback to querying the uuid if that is not - present. - - Return a path if possible, failing to do that a ``None``, since some of - these devices are optional. - """ - osd_block_lv = None - for lv in osd_lvs: - if lv.tags.get('ceph.type') == 'block': - osd_block_lv = lv - break - if osd_block_lv: - is_encrypted = osd_block_lv.tags.get('ceph.encrypted', '0') == '1' - logger.debug('Found block device (%s) with encryption: %s', osd_block_lv.name, is_encrypted) - uuid_tag = 'ceph.%s_uuid' % device_type - device_uuid = osd_block_lv.tags.get(uuid_tag) - if not device_uuid: - return None - - device_lv = None - for lv in osd_lvs: - if lv.tags.get('ceph.type') == device_type: - device_lv = lv - break - if device_lv: - if is_encrypted: - encryption_utils.luks_open(dmcrypt_secret, device_lv.lv_path, device_uuid) - return '/dev/mapper/%s' % device_uuid - return device_lv.lv_path - - # this could be a regular device, so query it with blkid - physical_device = disk.get_device_from_partuuid(device_uuid) - if physical_device: - if is_encrypted: - encryption_utils.luks_open(dmcrypt_secret, physical_device, device_uuid) - return '/dev/mapper/%s' % device_uuid - return physical_device - - raise RuntimeError('could not find %s with uuid %s' % (device_type, device_uuid)) - - -def activate_bluestore(osd_lvs, no_systemd=False, no_tmpfs=False): - for lv in osd_lvs: - if lv.tags.get('ceph.type') == 'block': - osd_block_lv = lv - break - else: - raise RuntimeError('could not find a bluestore OSD to activate') - - is_encrypted = osd_block_lv.tags.get('ceph.encrypted', '0') == '1' - dmcrypt_secret = None - osd_id = osd_block_lv.tags['ceph.osd_id'] - conf.cluster = osd_block_lv.tags['ceph.cluster_name'] - osd_fsid = osd_block_lv.tags['ceph.osd_fsid'] - configuration.load_ceph_conf_path(osd_block_lv.tags['ceph.cluster_name']) - configuration.load() - - # mount on tmpfs the osd directory - osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) - if not system.path_is_mounted(osd_path): - # mkdir -p and mount as tmpfs - prepare_utils.create_osd_path(osd_id, tmpfs=not no_tmpfs) - # XXX This needs to be removed once ceph-bluestore-tool can deal with - # symlinks that exist in the osd dir - for link_name in ['block', 'block.db', 'block.wal']: - link_path = os.path.join(osd_path, link_name) - if os.path.exists(link_path): - os.unlink(os.path.join(osd_path, link_name)) - # encryption is handled here, before priming the OSD dir - if is_encrypted: - osd_lv_path = '/dev/mapper/%s' % osd_block_lv.lv_uuid - lockbox_secret = osd_block_lv.tags['ceph.cephx_lockbox_secret'] - encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret) - dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid) - encryption_utils.luks_open(dmcrypt_secret, osd_block_lv.lv_path, osd_block_lv.lv_uuid) - else: - osd_lv_path = osd_block_lv.lv_path - - db_device_path = get_osd_device_path(osd_lvs, 'db', dmcrypt_secret=dmcrypt_secret) - wal_device_path = get_osd_device_path(osd_lvs, 'wal', dmcrypt_secret=dmcrypt_secret) - - # Once symlinks are removed, the osd dir can be 'primed again. chown first, - # regardless of what currently exists so that ``prime-osd-dir`` can succeed - # even if permissions are somehow messed up - system.chown(osd_path) - prime_command = [ - 'ceph-bluestore-tool', '--cluster=%s' % conf.cluster, - 'prime-osd-dir', '--dev', osd_lv_path, - '--path', osd_path, '--no-mon-config'] - - process.run(prime_command) - # always re-do the symlink regardless if it exists, so that the block, - # block.wal, and block.db devices that may have changed can be mapped - # correctly every time - process.run(['ln', '-snf', osd_lv_path, os.path.join(osd_path, 'block')]) - system.chown(os.path.join(osd_path, 'block')) - system.chown(osd_path) - if db_device_path: - destination = os.path.join(osd_path, 'block.db') - process.run(['ln', '-snf', db_device_path, destination]) - system.chown(db_device_path) - system.chown(destination) - if wal_device_path: - destination = os.path.join(osd_path, 'block.wal') - process.run(['ln', '-snf', wal_device_path, destination]) - system.chown(wal_device_path) - system.chown(destination) - - if no_systemd is False: - # enable the ceph-volume unit for this OSD - systemctl.enable_volume(osd_id, osd_fsid, 'lvm') - - # enable the OSD - systemctl.enable_osd(osd_id) - - # start the OSD - systemctl.start_osd(osd_id) - terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id) - - class Activate(object): - help = 'Discover and mount the LVM device associated with an OSD ID and start the Ceph OSD' - def __init__(self, argv): + def __init__(self, argv, args=None): + self.objectstore = None self.argv = argv - - @decorators.needs_root - def activate_all(self, args): - listed_osds = direct_report() - osds = {} - for osd_id, devices in listed_osds.items(): - # the metadata for all devices in each OSD will contain - # the FSID which is required for activation - for device in devices: - fsid = device.get('tags', {}).get('ceph.osd_fsid') - if fsid: - osds[fsid] = osd_id - break - if not osds: - terminal.warning('Was unable to find any OSDs to activate') - terminal.warning('Verify OSDs are present with "ceph-volume lvm list"') - return - for osd_fsid, osd_id in osds.items(): - if not args.no_systemd and systemctl.osd_is_active(osd_id): - terminal.warning( - 'OSD ID %s FSID %s process is active. Skipping activation' % (osd_id, osd_fsid) - ) - else: - terminal.info('Activating OSD ID %s FSID %s' % (osd_id, osd_fsid)) - self.activate(args, osd_id=osd_id, osd_fsid=osd_fsid) - - @decorators.needs_root - def activate(self, args, osd_id=None, osd_fsid=None): - """ - :param args: The parsed arguments coming from the CLI - :param osd_id: When activating all, this gets populated with an - existing OSD ID - :param osd_fsid: When activating all, this gets populated with an - existing OSD FSID - """ - osd_id = osd_id if osd_id else args.osd_id - osd_fsid = osd_fsid if osd_fsid else args.osd_fsid - - if osd_id and osd_fsid: - tags = {'ceph.osd_id': osd_id, 'ceph.osd_fsid': osd_fsid} - elif not osd_id and osd_fsid: - tags = {'ceph.osd_fsid': osd_fsid} - elif osd_id and not osd_fsid: - raise RuntimeError('could not activate osd.{}, please provide the ' - 'osd_fsid too'.format(osd_id)) - else: - raise RuntimeError('Please provide both osd_id and osd_fsid') - lvs = api.get_lvs(tags=tags) - if not lvs: - raise RuntimeError('could not find osd.%s with osd_fsid %s' % - (osd_id, osd_fsid)) - - # This argument is only available when passed in directly or via - # systemd, not when ``create`` is being used - # placeholder when a new objectstore support will be added - if getattr(args, 'auto_detect_objectstore', False): - logger.info('auto detecting objectstore') - return activate_bluestore(lvs, args.no_systemd) - - # explicit 'objectstore' flags take precedence - if getattr(args, 'bluestore', False): - activate_bluestore(lvs, args.no_systemd, getattr(args, 'no_tmpfs', False)) - elif any('ceph.block_device' in lv.tags for lv in lvs): - activate_bluestore(lvs, args.no_systemd, getattr(args, 'no_tmpfs', False)) + self.args = args def main(self): sub_command_help = dedent(""" @@ -254,6 +60,14 @@ def main(self): action='store_true', help='force bluestore objectstore activation', ) + parser.add_argument( + '--objectstore', + dest='objectstore', + help='The OSD objectstore.', + default='bluestore', + choices=['bluestore', 'seastore'], + type=str, + ) parser.add_argument( '--all', dest='activate_all', @@ -271,11 +85,15 @@ def main(self): action='store_true', help='Do not use a tmpfs mount for OSD data dir' ) - if len(self.argv) == 0: + if len(self.argv) == 0 and self.args is None: print(sub_command_help) return - args = parser.parse_args(self.argv) - if args.activate_all: - self.activate_all(args) + if self.args is None: + self.args = parser.parse_args(self.argv) + if self.args.bluestore: + self.args.objectstore = 'bluestore' + self.objectstore = objectstore.mapping['LVM'][self.args.objectstore](args=self.args) + if self.args.activate_all: + self.objectstore.activate_all() else: - self.activate(args) + self.objectstore.activate() diff --git a/src/ceph-volume/ceph_volume/devices/lvm/batch.py b/src/ceph-volume/ceph_volume/devices/lvm/batch.py index 69a3f672b482..c1549d8414be 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/batch.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/batch.py @@ -224,7 +224,6 @@ def __init__(self, argv): action='store_true', help=('deploy multi-device OSDs if rotational and non-rotational drives ' 'are passed in DEVICES'), - default=True ) parser.add_argument( '--no-auto', @@ -233,10 +232,18 @@ def __init__(self, argv): help=('deploy standalone OSDs if rotational and non-rotational drives ' 'are passed in DEVICES'), ) + parser.add_argument( + '--objectstore', + dest='objectstore', + help='The OSD objectstore.', + default='bluestore', + choices=['bluestore', 'seastore'], + type=str, + ) parser.add_argument( '--bluestore', action='store_true', - help='bluestore objectstore (default)', + help='bluestore objectstore (default). (DEPRECATED: use --objectstore instead)', ) parser.add_argument( '--report', @@ -256,9 +263,15 @@ def __init__(self, argv): ) parser.add_argument( '--dmcrypt', - action='store_true', + action=arg_validators.DmcryptAction, help='Enable device encryption via dm-crypt', ) + parser.add_argument( + '--with-tpm', + dest='with_tpm', + help='Whether encrypted OSDs should be enrolled with TPM.', + action='store_true' + ) parser.add_argument( '--crush-device-class', dest='crush_device_class', @@ -323,6 +336,8 @@ def __init__(self, argv): type=arg_validators.valid_osd_id ) self.args = parser.parse_args(argv) + if self.args.bluestore: + self.args.objectstore = 'bluestore' self.parser = parser for dev_list in ['', 'db_', 'wal_']: setattr(self, '{}usable'.format(dev_list), []) @@ -367,7 +382,6 @@ def _sort_rotational_disks(self): ''' mlogger.warning('DEPRECATION NOTICE') mlogger.warning('You are using the legacy automatic disk sorting behavior') - mlogger.warning('The Pacific release will change the default to --no-auto') rotating = [] ssd = [] for d in self.args.devices: @@ -383,11 +397,6 @@ def main(self): if not self.args.devices: return self.parser.print_help() - # Default to bluestore here since defaulting it in add_argument may - # cause both to be True - if not self.args.bluestore: - self.args.bluestore = True - if (self.args.auto and not self.args.db_devices and not self.args.wal_devices): self._sort_rotational_disks() @@ -398,7 +407,7 @@ def main(self): self.args.db_devices, self.args.wal_devices) - plan = self.get_plan(self.args) + plan = self.get_deployment_layout() if self.args.report: self.report(plan) @@ -418,6 +427,7 @@ def _execute(self, plan): global_args = [ 'bluestore', 'dmcrypt', + 'with_tpm', 'crush_device_class', 'no_systemd', ] @@ -425,43 +435,38 @@ def _execute(self, plan): for osd in plan: args = osd.get_args(defaults) if self.args.prepare: - p = Prepare([]) - p.safe_prepare(argparse.Namespace(**args)) + p = Prepare([], args=argparse.Namespace(**args)) + p.main() else: - c = Create([]) - c.create(argparse.Namespace(**args)) - - - def get_plan(self, args): - if args.bluestore: - plan = self.get_deployment_layout(args, args.devices, args.db_devices, - args.wal_devices) - return plan + c = Create([], args=argparse.Namespace(**args)) + c.create() - def get_deployment_layout(self, args, devices, fast_devices=[], - very_fast_devices=[]): + def get_deployment_layout(self): ''' The methods here are mostly just organization, error reporting and setting up of (default) args. The heavy lifting code for the deployment layout can be found in the static get_*_osds and get_*_fast_allocs functions. ''' + devices = self.args.devices + fast_devices = self.args.db_devices + very_fast_devices = self.args.wal_devices plan = [] phys_devs, lvm_devs = separate_devices_from_lvs(devices) mlogger.debug(('passed data devices: {} physical,' ' {} LVM').format(len(phys_devs), len(lvm_devs))) - plan.extend(get_physical_osds(phys_devs, args)) + plan.extend(get_physical_osds(phys_devs, self.args)) - plan.extend(get_lvm_osds(lvm_devs, args)) + plan.extend(get_lvm_osds(lvm_devs, self.args)) num_osds = len(plan) if num_osds == 0: mlogger.info('All data devices are unavailable') return plan - requested_osds = args.osds_per_device * len(phys_devs) + len(lvm_devs) + requested_osds = self.args.osds_per_device * len(phys_devs) + len(lvm_devs) - if args.bluestore: + if self.args.objectstore == 'bluestore': fast_type = 'block_db' fast_allocations = self.fast_allocations(fast_devices, requested_osds, @@ -491,7 +496,7 @@ def get_deployment_layout(self, args, devices, fast_devices=[], if fast_devices: osd.add_fast_device(*fast_allocations.pop(), type_=fast_type) - if very_fast_devices and args.bluestore: + if very_fast_devices and self.args.objectstore == 'bluestore': osd.add_very_fast_device(*very_fast_allocations.pop()) return plan diff --git a/src/ceph-volume/ceph_volume/devices/lvm/common.py b/src/ceph-volume/ceph_volume/devices/lvm/common.py index 35e53181aff0..e18d98bb6fbd 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/common.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/common.py @@ -36,6 +36,13 @@ def rollback_osd(args, osd_id=None): common_args = { + '--objectstore': { + 'dest': 'objectstore', + 'help': 'The OSD objectstore.', + 'default': 'bluestore', + 'choices': ['bluestore', 'seastore'], + 'type': str, + }, '--data': { 'help': 'OSD data path. A physical device or logical volume', 'required': True, @@ -73,9 +80,14 @@ def rollback_osd(args, osd_id=None): 'default': "", }, '--dmcrypt': { - 'action': 'store_true', + 'action': arg_validators.DmcryptAction, 'help': 'Enable device encryption via dm-crypt', }, + '--with-tpm': { + 'dest': 'with_tpm', + 'help': 'Whether encrypted OSDs should be enrolled with TPM.', + 'action': 'store_true' + }, '--no-systemd': { 'dest': 'no_systemd', 'action': 'store_true', @@ -86,7 +98,7 @@ def rollback_osd(args, osd_id=None): bluestore_args = { '--bluestore': { 'action': 'store_true', - 'help': 'Use the bluestore objectstore', + 'help': 'Use the bluestore objectstore. (DEPRECATED: use --objectstore instead)', }, '--block.db': { 'dest': 'block_db', diff --git a/src/ceph-volume/ceph_volume/devices/lvm/create.py b/src/ceph-volume/ceph_volume/devices/lvm/create.py index 631a21b239d2..6a4d11b99bf5 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/create.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/create.py @@ -3,10 +3,8 @@ import logging from ceph_volume.util import system from ceph_volume.util.arg_validators import exclude_group_options -from ceph_volume import decorators, terminal +from ceph_volume import decorators, terminal, objectstore from .common import create_parser, rollback_osd -from .prepare import Prepare -from .activate import Activate logger = logging.getLogger(__name__) @@ -15,27 +13,29 @@ class Create(object): help = 'Create a new OSD from an LVM device' - def __init__(self, argv): + def __init__(self, argv, args=None): + self.objectstore = None self.argv = argv + self.args = args @decorators.needs_root - def create(self, args): - if not args.osd_fsid: - args.osd_fsid = system.generate_uuid() - prepare_step = Prepare([]) - prepare_step.safe_prepare(args) - osd_id = prepare_step.osd_id + def create(self): + if not self.args.osd_fsid: + self.args.osd_fsid = system.generate_uuid() + self.objectstore = objectstore.mapping['LVM'][self.args.objectstore](args=self.args) + self.objectstore.safe_prepare() + osd_id = self.objectstore.osd_id try: # we try this for activate only when 'creating' an OSD, because a rollback should not # happen when doing normal activation. For example when starting an OSD, systemd will call # activate, which would never need to be rolled back. - Activate([]).activate(args) + self.objectstore.activate() except Exception: logger.exception('lvm activate was unable to complete, while creating the OSD') logger.info('will rollback OSD ID creation') - rollback_osd(args, osd_id) + rollback_osd(self.args, osd_id) raise - terminal.success("ceph-volume lvm create successful for: %s" % args.data) + terminal.success("ceph-volume lvm create successful for: %s" % self.args.data) def main(self): sub_command_help = dedent(""" @@ -69,9 +69,9 @@ def main(self): print(sub_command_help) return exclude_group_options(parser, groups=['bluestore'], argv=self.argv) - args = parser.parse_args(self.argv) - # Default to bluestore here since defaulting it in add_argument may - # cause both to be True - if not args.bluestore: - args.bluestore = True - self.create(args) + if self.args is None: + self.args = parser.parse_args(self.argv) + if self.args.bluestore: + self.args.objectstore = 'bluestore' + self.objectstore = objectstore.mapping['LVM'][self.args.objectstore] + self.create() diff --git a/src/ceph-volume/ceph_volume/devices/lvm/listing.py b/src/ceph-volume/ceph_volume/devices/lvm/listing.py index c16afdaa7672..8fb9d8ddcf87 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/listing.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/listing.py @@ -153,7 +153,9 @@ def single_report(self, arg): elif arg[0] == '/': lv = api.get_lvs_from_path(arg) else: - lv = [api.get_single_lv(filters={'lv_name': arg.split('/')[1]})] + vg_name, lv_name = arg.split('/') + lv = [api.get_single_lv(filters={'lv_name': lv_name, + 'vg_name': vg_name})] report = self.create_report(lv) diff --git a/src/ceph-volume/ceph_volume/devices/lvm/migrate.py b/src/ceph-volume/ceph_volume/devices/lvm/migrate.py index 64589a2d6284..83ed16845e77 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/migrate.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/migrate.py @@ -10,7 +10,7 @@ from ceph_volume import decorators, terminal, process from ceph_volume.api import lvm as api from ceph_volume.systemd import systemctl - +from ceph_volume.devices.lvm import zap logger = logging.getLogger(__name__) mlogger = terminal.MultiLogger(__name__) @@ -167,9 +167,14 @@ def update_tags_when_lv_create(self, create_type): aux_dev.lv_api.set_tags(tags) def remove_lvs(self, source_devices, target_type): - remaining_devices = [self.data_device, self.db_device, self.wal_device] + remaining_devices = [self.data_device] + if self.db_device: + remaining_devices.append(self.db_device) + if self.wal_device: + remaining_devices.append(self.wal_device) outdated_tags = [] + removed_devices = [] for device, type in source_devices: if type == "block" or type == target_type: continue @@ -178,10 +183,13 @@ def remove_lvs(self, source_devices, target_type): outdated_tags.append("ceph.{}_uuid".format(type)) outdated_tags.append("ceph.{}_device".format(type)) device.lv_api.clear_tags() + removed_devices.append(device) + if len(outdated_tags) > 0: for d in remaining_devices: if d and d.is_lv: d.lv_api.clear_tags(outdated_tags) + return removed_devices def replace_lvs(self, source_devices, target_type): remaining_devices = [self.data_device] @@ -191,6 +199,7 @@ def replace_lvs(self, source_devices, target_type): remaining_devices.append(self.wal_device) outdated_tags = [] + removed_devices = [] for device, type in source_devices: if type == "block": continue @@ -199,6 +208,7 @@ def replace_lvs(self, source_devices, target_type): outdated_tags.append("ceph.{}_uuid".format(type)) outdated_tags.append("ceph.{}_device".format(type)) device.lv_api.clear_tags() + removed_devices.append(device) new_tags = {} new_tags["ceph.{}_uuid".format(target_type)] = self.target_lv.lv_uuid @@ -220,6 +230,7 @@ def replace_lvs(self, source_devices, target_type): tags["ceph.{}_uuid".format(target_type)] = self.target_lv.lv_uuid tags["ceph.{}_device".format(target_type)] = self.target_lv.lv_path self.target_lv.set_tags(tags) + return removed_devices def undo(self): mlogger.info( @@ -335,7 +346,7 @@ def migrate_to_new(self, osd_id, osd_fsid, devices, target_lv): # ceph-bluestore-tool removes source volume(s) other than block one # and attaches target one after successful migration - tag_tracker.replace_lvs(source_devices, target_type) + removed_devices = tag_tracker.replace_lvs(source_devices, target_type) osd_path = get_osd_path(osd_id, osd_fsid) source_args = self.get_source_args(osd_path, source_devices) @@ -360,6 +371,9 @@ def migrate_to_new(self, osd_id, osd_fsid, devices, target_lv): target_type))) if tag_tracker.data_device.lv_api.encrypted: self.close_encrypted(source_devices) + for d in removed_devices: + if d and d.is_lv: + zap.Zap([d.lv_api.lv_path]).main() terminal.success('Migration successful.') except: @@ -391,7 +405,7 @@ def migrate_to_existing(self, osd_id, osd_fsid, devices, target_lv): try: # ceph-bluestore-tool removes source volume(s) other than # block and target ones after successful migration - tag_tracker.remove_lvs(source_devices, target_type) + removed_devices = tag_tracker.remove_lvs(source_devices, target_type) source_args = self.get_source_args(osd_path, source_devices) mlogger.info("Migrate to existing, Source: {} Target: {}".format( source_args, target_path)) @@ -411,6 +425,9 @@ def migrate_to_existing(self, osd_id, osd_fsid, devices, target_lv): 'Failed to migrate to : {}'.format(self.args.target)) if tag_tracker.data_device.lv_api.encrypted: self.close_encrypted(source_devices) + for d in removed_devices: + if d and d.is_lv: + zap.Zap([d.lv_api.lv_path]).main() terminal.success('Migration successful.') except: tag_tracker.undo() diff --git a/src/ceph-volume/ceph_volume/devices/lvm/prepare.py b/src/ceph-volume/ceph_volume/devices/lvm/prepare.py index 85c8a1467712..18fc1df03d8d 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/prepare.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/prepare.py @@ -1,290 +1,23 @@ from __future__ import print_function -import json import logging from textwrap import dedent -from ceph_volume.util import prepare as prepare_utils -from ceph_volume.util import encryption as encryption_utils -from ceph_volume.util import system, disk -from ceph_volume.util.arg_validators import exclude_group_options -from ceph_volume import conf, decorators, terminal -from ceph_volume.api import lvm as api -from .common import prepare_parser, rollback_osd +from ceph_volume import objectstore +from .common import prepare_parser logger = logging.getLogger(__name__) -def prepare_dmcrypt(key, device, device_type, tags): - """ - Helper for devices that are encrypted. The operations needed for - block, db, wal devices are all the same - """ - if not device: - return '' - tag_name = 'ceph.%s_uuid' % device_type - uuid = tags[tag_name] - return encryption_utils.prepare_dmcrypt(key, device, uuid) - -def prepare_bluestore(block, wal, db, secrets, tags, osd_id, fsid): - """ - :param block: The name of the logical volume for the bluestore data - :param wal: a regular/plain disk or logical volume, to be used for block.wal - :param db: a regular/plain disk or logical volume, to be used for block.db - :param secrets: A dict with the secrets needed to create the osd (e.g. cephx) - :param id_: The OSD id - :param fsid: The OSD fsid, also known as the OSD UUID - """ - cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key()) - # encryption-only operations - if secrets.get('dmcrypt_key'): - # If encrypted, there is no need to create the lockbox keyring file because - # bluestore re-creates the files and does not have support for other files - # like the custom lockbox one. This will need to be done on activation. - # format and open ('decrypt' devices) and re-assign the device and journal - # variables so that the rest of the process can use the mapper paths - key = secrets['dmcrypt_key'] - block = prepare_dmcrypt(key, block, 'block', tags) - wal = prepare_dmcrypt(key, wal, 'wal', tags) - db = prepare_dmcrypt(key, db, 'db', tags) - - # create the directory - prepare_utils.create_osd_path(osd_id, tmpfs=True) - # symlink the block - prepare_utils.link_block(block, osd_id) - # get the latest monmap - prepare_utils.get_monmap(osd_id) - # write the OSD keyring if it doesn't exist already - prepare_utils.write_keyring(osd_id, cephx_secret) - # prepare the osd filesystem - prepare_utils.osd_mkfs_bluestore( - osd_id, fsid, - keyring=cephx_secret, - wal=wal, - db=db - ) - - class Prepare(object): help = 'Format an LVM device and associate it with an OSD' - def __init__(self, argv): + def __init__(self, argv, args=None): + self.objectstore = None self.argv = argv + self.args = args self.osd_id = None - def get_ptuuid(self, argument): - uuid = disk.get_partuuid(argument) - if not uuid: - terminal.error('blkid could not detect a PARTUUID for device: %s' % argument) - raise RuntimeError('unable to use device') - return uuid - - def setup_device(self, device_type, device_name, tags, size, slots): - """ - Check if ``device`` is an lv, if so, set the tags, making sure to - update the tags with the lv_uuid and lv_path which the incoming tags - will not have. - - If the device is not a logical volume, then retrieve the partition UUID - by querying ``blkid`` - """ - if device_name is None: - return '', '', tags - tags['ceph.type'] = device_type - tags['ceph.vdo'] = api.is_vdo(device_name) - - try: - vg_name, lv_name = device_name.split('/') - lv = api.get_single_lv(filters={'lv_name': lv_name, - 'vg_name': vg_name}) - except ValueError: - lv = None - - if lv: - lv_uuid = lv.lv_uuid - path = lv.lv_path - tags['ceph.%s_uuid' % device_type] = lv_uuid - tags['ceph.%s_device' % device_type] = path - lv.set_tags(tags) - elif disk.is_device(device_name): - # We got a disk, create an lv - lv_type = "osd-{}".format(device_type) - name_uuid = system.generate_uuid() - kwargs = { - 'device': device_name, - 'tags': tags, - 'slots': slots - } - #TODO use get_block_db_size and co here to get configured size in - #conf file - if size != 0: - kwargs['size'] = size - lv = api.create_lv( - lv_type, - name_uuid, - **kwargs) - path = lv.lv_path - tags['ceph.{}_device'.format(device_type)] = path - tags['ceph.{}_uuid'.format(device_type)] = lv.lv_uuid - lv_uuid = lv.lv_uuid - lv.set_tags(tags) - else: - # otherwise assume this is a regular disk partition - name_uuid = self.get_ptuuid(device_name) - path = device_name - tags['ceph.%s_uuid' % device_type] = name_uuid - tags['ceph.%s_device' % device_type] = path - lv_uuid = name_uuid - return path, lv_uuid, tags - - def prepare_data_device(self, device_type, osd_uuid): - """ - Check if ``arg`` is a device or partition to create an LV out of it - with a distinct volume group name, assigning LV tags on it and - ultimately, returning the logical volume object. Failing to detect - a device or partition will result in error. - - :param arg: The value of ``--data`` when parsing args - :param device_type: Usually ``block`` - :param osd_uuid: The OSD uuid - """ - device = self.args.data - if disk.is_partition(device) or disk.is_device(device): - # we must create a vg, and then a single lv - lv_name_prefix = "osd-{}".format(device_type) - kwargs = {'device': device, - 'tags': {'ceph.type': device_type}, - 'slots': self.args.data_slots, - } - logger.debug('data device size: {}'.format(self.args.data_size)) - if self.args.data_size != 0: - kwargs['size'] = self.args.data_size - return api.create_lv( - lv_name_prefix, - osd_uuid, - **kwargs) - else: - error = [ - 'Cannot use device ({}).'.format(device), - 'A vg/lv path or an existing device is needed'] - raise RuntimeError(' '.join(error)) - - raise RuntimeError('no data logical volume found with: {}'.format(device)) - - def safe_prepare(self, args=None): - """ - An intermediate step between `main()` and `prepare()` so that we can - capture the `self.osd_id` in case we need to rollback - - :param args: Injected args, usually from `lvm create` which compounds - both `prepare` and `create` - """ - if args is not None: - self.args = args - - try: - vgname, lvname = self.args.data.split('/') - lv = api.get_single_lv(filters={'lv_name': lvname, - 'vg_name': vgname}) - except ValueError: - lv = None - - if api.is_ceph_device(lv): - logger.info("device {} is already used".format(self.args.data)) - raise RuntimeError("skipping {}, it is already prepared".format(self.args.data)) - try: - self.prepare() - except Exception: - logger.exception('lvm prepare was unable to complete') - logger.info('will rollback OSD ID creation') - rollback_osd(self.args, self.osd_id) - raise - terminal.success("ceph-volume lvm prepare successful for: %s" % self.args.data) - - def get_cluster_fsid(self): - """ - Allows using --cluster-fsid as an argument, but can fallback to reading - from ceph.conf if that is unset (the default behavior). - """ - if self.args.cluster_fsid: - return self.args.cluster_fsid - else: - return conf.ceph.get('global', 'fsid') - - @decorators.needs_root - def prepare(self): - # FIXME we don't allow re-using a keyring, we always generate one for the - # OSD, this needs to be fixed. This could either be a file (!) or a string - # (!!) or some flags that we would need to compound into a dict so that we - # can convert to JSON (!!!) - secrets = {'cephx_secret': prepare_utils.create_key()} - cephx_lockbox_secret = '' - encrypted = 1 if self.args.dmcrypt else 0 - cephx_lockbox_secret = '' if not encrypted else prepare_utils.create_key() - - if encrypted: - secrets['dmcrypt_key'] = encryption_utils.create_dmcrypt_key() - secrets['cephx_lockbox_secret'] = cephx_lockbox_secret - - cluster_fsid = self.get_cluster_fsid() - - osd_fsid = self.args.osd_fsid or system.generate_uuid() - crush_device_class = self.args.crush_device_class - if crush_device_class: - secrets['crush_device_class'] = crush_device_class - # reuse a given ID if it exists, otherwise create a new ID - self.osd_id = prepare_utils.create_id(osd_fsid, json.dumps(secrets), osd_id=self.args.osd_id) - tags = { - 'ceph.osd_fsid': osd_fsid, - 'ceph.osd_id': self.osd_id, - 'ceph.cluster_fsid': cluster_fsid, - 'ceph.cluster_name': conf.cluster, - 'ceph.crush_device_class': crush_device_class, - 'ceph.osdspec_affinity': prepare_utils.get_osdspec_affinity() - } - if self.args.bluestore: - try: - vg_name, lv_name = self.args.data.split('/') - block_lv = api.get_single_lv(filters={'lv_name': lv_name, - 'vg_name': vg_name}) - except ValueError: - block_lv = None - - if not block_lv: - block_lv = self.prepare_data_device('block', osd_fsid) - - tags['ceph.block_device'] = block_lv.lv_path - tags['ceph.block_uuid'] = block_lv.lv_uuid - tags['ceph.cephx_lockbox_secret'] = cephx_lockbox_secret - tags['ceph.encrypted'] = encrypted - tags['ceph.vdo'] = api.is_vdo(block_lv.lv_path) - - wal_device, wal_uuid, tags = self.setup_device( - 'wal', - self.args.block_wal, - tags, - self.args.block_wal_size, - self.args.block_wal_slots) - db_device, db_uuid, tags = self.setup_device( - 'db', - self.args.block_db, - tags, - self.args.block_db_size, - self.args.block_db_slots) - - tags['ceph.type'] = 'block' - block_lv.set_tags(tags) - - prepare_bluestore( - block_lv.lv_path, - wal_device, - db_device, - secrets, - tags, - self.osd_id, - osd_fsid, - ) - def main(self): sub_command_help = dedent(""" Prepare an OSD by assigning an ID and FSID, registering them with the @@ -315,13 +48,12 @@ def main(self): prog='ceph-volume lvm prepare', description=sub_command_help, ) - if len(self.argv) == 0: + if len(self.argv) == 0 and self.args is None: print(sub_command_help) return - exclude_group_options(parser, argv=self.argv, groups=['bluestore']) - self.args = parser.parse_args(self.argv) - # Default to bluestore here since defaulting it in add_argument may - # cause both to be True - if not self.args.bluestore: - self.args.bluestore = True - self.safe_prepare() + if self.args is None: + self.args = parser.parse_args(self.argv) + if self.args.bluestore: + self.args.objectstore = 'bluestore' + self.objectstore = objectstore.mapping['LVM'][self.args.objectstore](args=self.args) + self.objectstore.safe_prepare() diff --git a/src/ceph-volume/ceph_volume/devices/lvm/zap.py b/src/ceph-volume/ceph_volume/devices/lvm/zap.py index d4d78ad01810..c278de43eb0a 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/zap.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/zap.py @@ -5,16 +5,43 @@ from textwrap import dedent -from ceph_volume import decorators, terminal, process +from ceph_volume import decorators, terminal, process, BEING_REPLACED_HEADER from ceph_volume.api import lvm as api from ceph_volume.util import system, encryption, disk, arg_validators, str_to_int, merge_dict from ceph_volume.util.device import Device from ceph_volume.systemd import systemctl +from ceph_volume.devices.raw.list import direct_report +from typing import Any, Dict, List, Set logger = logging.getLogger(__name__) mlogger = terminal.MultiLogger(__name__) +def zap_device(path: str) -> None: + """Remove any existing filesystem signatures. + + Args: + path (str): The path to the device to zap. + """ + zap_bluestore(path) + wipefs(path) + zap_data(path) + +def zap_bluestore(path: str) -> None: + """Remove all BlueStore signature on a device. + + Args: + path (str): The path to the device to remove BlueStore signatures from. + """ + terminal.info(f'Removing all BlueStore signature on {path} if any...') + process.run([ + 'ceph-bluestore-tool', + 'zap-device', + '--dev', + path, + '--yes-i-really-really-mean-it' + ]) + def wipefs(path): """ Removes the filesystem from an lv or partition. @@ -70,84 +97,127 @@ def zap_data(path): ]) -def find_associated_devices(osd_id=None, osd_fsid=None): - """ - From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the - system that match those tag values, further detect if any partitions are - part of the OSD, and then return the set of LVs and partitions (if any). - """ - lv_tags = {} - if osd_id: - lv_tags['ceph.osd_id'] = osd_id - if osd_fsid: - lv_tags['ceph.osd_fsid'] = osd_fsid - - lvs = api.get_lvs(tags=lv_tags) - if not lvs: - raise RuntimeError('Unable to find any LV for zapping OSD: ' - '%s' % osd_id or osd_fsid) +class Zap: + help = 'Removes all data and filesystems from a logical volume or partition.' - devices_to_zap = ensure_associated_lvs(lvs, lv_tags) - return [Device(path) for path in set(devices_to_zap) if path] + def __init__(self, argv: List[str]) -> None: + self.argv = argv + self.osd_ids_to_zap: List[str] = [] + def ensure_associated_raw(self, raw_report: Dict[str, Any]) -> List[str]: + osd_id: str = self.args.osd_id + osd_uuid: str = self.args.osd_fsid + raw_devices: Set[str] = set() -def ensure_associated_lvs(lvs, lv_tags={}): - """ - Go through each LV and ensure if backing devices (journal, wal, block) - are LVs or partitions, so that they can be accurately reported. - """ - # look for many LVs for each backing type, because it is possible to - # receive a filtering for osd.1, and have multiple failed deployments - # leaving many journals with osd.1 - usually, only a single LV will be - # returned - - db_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'db'})) - wal_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'wal'})) - backing_devices = [(db_lvs, 'db'), - (wal_lvs, 'wal')] - - verified_devices = [] - - for lv in lvs: - # go through each lv and append it, otherwise query `blkid` to find - # a physical device. Do this for each type (journal,db,wal) regardless - # if they have been processed in the previous LV, so that bad devices - # with the same ID can be caught - for ceph_lvs, _type in backing_devices: - if ceph_lvs: - verified_devices.extend([l.lv_path for l in ceph_lvs]) - continue - - # must be a disk partition, by querying blkid by the uuid we are - # ensuring that the device path is always correct - try: - device_uuid = lv.tags['ceph.%s_uuid' % _type] - except KeyError: - # Bluestore will not have ceph.journal_uuid, and Filestore - # will not not have ceph.db_uuid - continue + if len([details.get('osd_id') for _, details in raw_report.items() if details.get('osd_id') == osd_id]) > 1: + if not osd_uuid: + raise RuntimeError(f'Multiple OSDs found with id {osd_id}, pass --osd-fsid') - osd_device = disk.get_device_from_partuuid(device_uuid) - if not osd_device: - # if the osd_device is not found by the partuuid, then it is - # not possible to ensure this device exists anymore, so skip it - continue - verified_devices.append(osd_device) + if not osd_uuid: + for _, details in raw_report.items(): + if details.get('osd_id') == int(osd_id): + osd_uuid = details.get('osd_uuid') + break - verified_devices.append(lv.lv_path) + for osd_uuid, details in raw_report.items(): + device: str = details.get('device') + if details.get('osd_uuid') == osd_uuid: + raw_devices.add(device) - # reduce the list from all the duplicates that were added - return list(set(verified_devices)) + return list(raw_devices) + + def find_associated_devices(self) -> List[api.Volume]: + """From an ``osd_id`` and/or an ``osd_fsid``, filter out all the Logical Volumes (LVs) in the + system that match those tag values, further detect if any partitions are + part of the OSD, and then return the set of LVs and partitions (if any). -class Zap(object): + The function first queries the LVM-based OSDs using the provided `osd_id` or `osd_fsid`. + If no matches are found, it then searches the system for RAW-based OSDs. - help = 'Removes all data and filesystems from a logical volume or partition.' + Raises: + SystemExit: If no OSDs are found, the function raises a `SystemExit` with an appropriate message. - def __init__(self, argv): - self.argv = argv + Returns: + List[api.Volume]: A list of `api.Volume` objects corresponding to the OSD's Logical Volumes (LVs) + or partitions that are associated with the given `osd_id` or `osd_fsid`. - def unmount_lv(self, lv): + Notes: + - If neither `osd_id` nor `osd_fsid` are provided, the function will not be able to find OSDs. + - The search proceeds from LVM-based OSDs to RAW-based OSDs if no Logical Volumes are found. + """ + lv_tags = {} + lv_tags = {key: value for key, value in { + 'ceph.osd_id': self.args.osd_id, + 'ceph.osd_fsid': self.args.osd_fsid + }.items() if value} + devices_to_zap: List[str] = [] + lvs = api.get_lvs(tags=lv_tags) + + if lvs: + devices_to_zap = self.ensure_associated_lvs(lvs, lv_tags) + else: + mlogger.debug(f'No OSD identified by "{self.args.osd_id or self.args.osd_fsid}" was found among LVM-based OSDs.') + mlogger.debug('Proceeding to check RAW-based OSDs.') + raw_osds: Dict[str, Any] = direct_report() + if raw_osds: + devices_to_zap = self.ensure_associated_raw(raw_osds) + if not devices_to_zap: + raise SystemExit('No OSD were found.') + + return [Device(path) for path in set(devices_to_zap) if path] + + def ensure_associated_lvs(self, + lvs: List[api.Volume], + lv_tags: Dict[str, Any] = {}) -> List[str]: + """ + Go through each LV and ensure if backing devices (journal, wal, block) + are LVs or partitions, so that they can be accurately reported. + """ + # look for many LVs for each backing type, because it is possible to + # receive a filtering for osd.1, and have multiple failed deployments + # leaving many journals with osd.1 - usually, only a single LV will be + # returned + + db_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'db'})) + wal_lvs = api.get_lvs(tags=merge_dict(lv_tags, {'ceph.type': 'wal'})) + backing_devices = [(db_lvs, 'db'), + (wal_lvs, 'wal')] + + verified_devices = [] + + for lv in lvs: + # go through each lv and append it, otherwise query `blkid` to find + # a physical device. Do this for each type (journal,db,wal) regardless + # if they have been processed in the previous LV, so that bad devices + # with the same ID can be caught + for ceph_lvs, _type in backing_devices: + if ceph_lvs: + verified_devices.extend([l.lv_path for l in ceph_lvs]) + continue + + # must be a disk partition, by querying blkid by the uuid we are + # ensuring that the device path is always correct + try: + device_uuid = lv.tags['ceph.%s_uuid' % _type] + except KeyError: + # Bluestore will not have ceph.journal_uuid, and Filestore + # will not not have ceph.db_uuid + continue + + osd_device = disk.get_device_from_partuuid(device_uuid) + if not osd_device: + # if the osd_device is not found by the partuuid, then it is + # not possible to ensure this device exists anymore, so skip it + continue + verified_devices.append(osd_device) + + verified_devices.append(lv.lv_path) + + # reduce the list from all the duplicates that were added + return list(set(verified_devices)) + + def unmount_lv(self, lv: api.Volume) -> None: if lv.tags.get('ceph.cluster_name') and lv.tags.get('ceph.osd_id'): lv_path = "/var/lib/ceph/osd/{}-{}".format(lv.tags['ceph.cluster_name'], lv.tags['ceph.osd_id']) else: @@ -160,49 +230,106 @@ def unmount_lv(self, lv): if dmcrypt and dmcrypt_uuid: self.dmcrypt_close(dmcrypt_uuid) - def zap_lv(self, device): + def _write_replacement_header(self, device: str) -> None: + """Write a replacement header to a device. + + This method writes the string defined in `BEING_REPLACED_HEADER` + to the specified device. This header indicates that the device + is in the process of being replaced. + + Args: + device (str): The path to the device on which the replacement + header will be written. + """ + disk._dd_write(device, + BEING_REPLACED_HEADER) + + def clear_replace_header(self) -> bool: + """Safely erase the replacement header on a device if it is marked as being replaced. + + This method checks whether the given device is marked as being replaced + (`device.is_being_replaced`). If true, it proceeds to erase the replacement header + from the device using the `_erase_replacement_header` method. The method returns + a boolean indicating whether any action was taken. + + Args: + device (Device): The device object, which includes information about the device's + path and status (such as whether it is currently being replaced). + + Returns: + bool: True if the replacement header was successfully erased, False if the + device was not marked as being replaced or no action was necessary. + """ + result: bool = False + device: Device = self.args.clear_replace_header + if device.is_being_replaced: + self._erase_replacement_header(device.path) + result = True + return result + + def _erase_replacement_header(self, device: str) -> None: + """Erase the replacement header on a device. + + This method writes a sequence of null bytes (`0x00`) over the area of the device + where the replacement header is stored, effectively erasing it. + + Args: + device (str): The path to the device from which the replacement header will be erased. + """ + disk._dd_write(device, + b'\x00' * len(BEING_REPLACED_HEADER)) + + def zap_lv(self, device: Device) -> None: """ Device examples: vg-name/lv-name, /dev/vg-name/lv-name Requirements: Must be a logical volume (LV) """ - lv = api.get_single_lv(filters={'lv_name': device.lv_name, 'vg_name': - device.vg_name}) + lv: api.Volume = device.lv_api self.unmount_lv(lv) - - wipefs(device.path) - zap_data(device.path) + self.parent_device: str = disk.get_parent_device_from_mapper(lv.lv_path) + zap_device(device.path) if self.args.destroy: lvs = api.get_lvs(filters={'vg_name': device.vg_name}) - if lvs == []: - mlogger.info('No LVs left, exiting', device.vg_name) - return - elif len(lvs) <= 1: + if len(lvs) <= 1: mlogger.info('Only 1 LV left in VG, will proceed to destroy ' 'volume group %s', device.vg_name) pvs = api.get_pvs(filters={'lv_uuid': lv.lv_uuid}) api.remove_vg(device.vg_name) for pv in pvs: api.remove_pv(pv.pv_name) + replacement_args: Dict[str, bool] = { + 'block': self.args.replace_block, + 'db': self.args.replace_db, + 'wal': self.args.replace_wal + } + if replacement_args.get(lv.tags.get('ceph.type'), False): + mlogger.info(f'Marking {self.parent_device} as being replaced') + self._write_replacement_header(self.parent_device) else: mlogger.info('More than 1 LV left in VG, will proceed to ' 'destroy LV only') mlogger.info('Removing LV because --destroy was given: %s', device.path) + if self.args.replace_block: + mlogger.info(f'--replace-block passed but the device still has {str(len(lvs))} LV(s)') api.remove_lv(device.path) elif lv: # just remove all lvm metadata, leaving the LV around lv.clear_tags() - def zap_partition(self, device): + def zap_partition(self, device: Device) -> None: """ Device example: /dev/sda1 Requirements: Must be a partition """ if device.is_encrypted: # find the holder - holders = [ - '/dev/%s' % holder for holder in device.sys_api.get('holders', []) + pname = device.sys_api.get('parent') + devname = device.sys_api.get('devname') + parent_device = Device(f'/dev/{pname}') + holders: List[str] = [ + f'/dev/{holder}' for holder in parent_device.sys_api['partitions'][devname]['holders'] ] for mapper_uuid in os.listdir('/dev/mapper'): mapper_path = os.path.join('/dev/mapper', mapper_uuid) @@ -213,14 +340,13 @@ def zap_partition(self, device): mlogger.info("Unmounting %s", device.path) system.unmount(device.path) - wipefs(device.path) - zap_data(device.path) + zap_device(device.path) if self.args.destroy: mlogger.info("Destroying partition since --destroy was used: %s" % device.path) disk.remove_partition(device) - def zap_lvm_member(self, device): + def zap_lvm_member(self, device: Device) -> None: """ An LVM member may have more than one LV and or VG, for example if it is a raw device with multiple partitions each belonging to a different LV @@ -240,7 +366,7 @@ def zap_lvm_member(self, device): - def zap_raw_device(self, device): + def zap_raw_device(self, device: Device) -> None: """ Any whole (raw) device passed in as input will be processed here, checking for LVM membership and partitions (if any). @@ -259,13 +385,20 @@ def zap_raw_device(self, device): for part_name in device.sys_api.get('partitions', {}).keys(): self.zap_partition(Device('/dev/%s' % part_name)) - wipefs(device.path) - zap_data(device.path) + zap_device(device.path) + # TODO(guits): I leave this commented out, this should be part of a separate patch in order to + # support device replacement with raw-based OSDs + # if self.args.replace_block: + # disk._dd_write(device.path, 'CEPH_DEVICE_BEING_REPLACED') @decorators.needs_root - def zap(self, devices=None): - devices = devices or self.args.devices + def zap(self) -> None: + """Zap a device. + Raises: + SystemExit: When the device is a mapper and not a mpath device. + """ + devices = self.args.devices for device in devices: mlogger.info("Zapping: %s", device.path) if device.is_mapper and not device.is_mpath: @@ -291,21 +424,21 @@ def zap(self, devices=None): ) @decorators.needs_root - def zap_osd(self): + def zap_osd(self) -> None: if self.args.osd_id and not self.args.no_systemd: osd_is_running = systemctl.osd_is_active(self.args.osd_id) if osd_is_running: mlogger.error("OSD ID %s is running, stop it with:" % self.args.osd_id) mlogger.error("systemctl stop ceph-osd@%s" % self.args.osd_id) raise SystemExit("Unable to zap devices associated with OSD ID: %s" % self.args.osd_id) - devices = find_associated_devices(self.args.osd_id, self.args.osd_fsid) - self.zap(devices) + self.args.devices = self.find_associated_devices() + self.zap() - def dmcrypt_close(self, dmcrypt_uuid): + def dmcrypt_close(self, dmcrypt_uuid: str) -> None: mlogger.info("Closing encrypted volume %s", dmcrypt_uuid) encryption.dmcrypt_close(mapping=dmcrypt_uuid, skip_path_check=True) - def main(self): + def main(self) -> None: sub_command_help = dedent(""" Zaps the given logical volume(s), raw device(s) or partition(s) for reuse by ceph-volume. If given a path to a logical volume it must be in the format of vg/lv. Any @@ -393,12 +526,56 @@ def main(self): help='Skip systemd unit checks', ) + parser.add_argument( + '--replace-block', + dest='replace_block', + action='store_true', + help='Mark the block device as unavailable.' + ) + + parser.add_argument( + '--replace-db', + dest='replace_db', + action='store_true', + help='Mark the db device as unavailable.' + ) + + parser.add_argument( + '--replace-wal', + dest='replace_wal', + action='store_true', + help='Mark the wal device as unavailable.' + ) + + parser.add_argument( + '--clear-replace-header', + dest='clear_replace_header', + type=arg_validators.ValidClearReplaceHeaderDevice(), + help='clear the replace header on devices.' + ) + if len(self.argv) == 0: print(sub_command_help) return self.args = parser.parse_args(self.argv) + if self.args.clear_replace_header: + rc: bool = False + try: + rc = self.clear_replace_header() + except Exception as e: + raise SystemExit(e) + if rc: + mlogger.info(f'Replacement header cleared on {self.args.clear_replace_header}') + else: + mlogger.info(f'No replacement header detected on {self.args.clear_replace_header}, nothing to do.') + raise SystemExit(not rc) + + if self.args.replace_block or self.args.replace_db or self.args.replace_wal: + self.args.destroy = True + mlogger.info('--replace-block|db|wal passed, enforcing --destroy.') + if self.args.osd_id or self.args.osd_fsid: self.zap_osd() else: diff --git a/src/ceph-volume/ceph_volume/devices/raw/activate.py b/src/ceph-volume/ceph_volume/devices/raw/activate.py index 17be57dfeaa8..0cec810728ba 100644 --- a/src/ceph-volume/ceph_volume/devices/raw/activate.py +++ b/src/ceph-volume/ceph_volume/devices/raw/activate.py @@ -1,95 +1,20 @@ from __future__ import print_function import argparse import logging -import os from textwrap import dedent -from ceph_volume import process, conf, decorators, terminal -from ceph_volume.util import system -from ceph_volume.util import prepare as prepare_utils -from .list import direct_report +from ceph_volume import objectstore logger = logging.getLogger(__name__) -def activate_bluestore(meta, tmpfs, systemd): - # find the osd - osd_id = meta['osd_id'] - osd_uuid = meta['osd_uuid'] - - # mount on tmpfs the osd directory - osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) - if not system.path_is_mounted(osd_path): - # mkdir -p and mount as tmpfs - prepare_utils.create_osd_path(osd_id, tmpfs=tmpfs) - - # XXX This needs to be removed once ceph-bluestore-tool can deal with - # symlinks that exist in the osd dir - for link_name in ['block', 'block.db', 'block.wal']: - link_path = os.path.join(osd_path, link_name) - if os.path.exists(link_path): - os.unlink(os.path.join(osd_path, link_name)) - - # Once symlinks are removed, the osd dir can be 'primed again. chown first, - # regardless of what currently exists so that ``prime-osd-dir`` can succeed - # even if permissions are somehow messed up - system.chown(osd_path) - prime_command = [ - 'ceph-bluestore-tool', - 'prime-osd-dir', - '--path', osd_path, - '--no-mon-config', - '--dev', meta['device'], - ] - process.run(prime_command) - - # always re-do the symlink regardless if it exists, so that the block, - # block.wal, and block.db devices that may have changed can be mapped - # correctly every time - prepare_utils.link_block(meta['device'], osd_id) - - if 'device_db' in meta: - prepare_utils.link_db(meta['device_db'], osd_id, osd_uuid) - - if 'device_wal' in meta: - prepare_utils.link_wal(meta['device_wal'], osd_id, osd_uuid) - - system.chown(osd_path) - terminal.success("ceph-volume raw activate successful for osd ID: %s" % osd_id) - - class Activate(object): help = 'Discover and prepare a data directory for a (BlueStore) OSD on a raw device' - def __init__(self, argv): + def __init__(self, argv, args=None): + self.objectstore = None self.argv = argv - self.args = None - - @decorators.needs_root - def activate(self, devs, start_osd_id, start_osd_uuid, - tmpfs, systemd): - """ - :param args: The parsed arguments coming from the CLI - """ - assert devs or start_osd_id or start_osd_uuid - found = direct_report(devs) - - activated_any = False - for osd_uuid, meta in found.items(): - osd_id = meta['osd_id'] - if start_osd_id is not None and str(osd_id) != str(start_osd_id): - continue - if start_osd_uuid is not None and osd_uuid != start_osd_uuid: - continue - logger.info('Activating osd.%s uuid %s cluster %s' % ( - osd_id, osd_uuid, meta['ceph_fsid'])) - activate_bluestore(meta, - tmpfs=tmpfs, - systemd=systemd) - activated_any = True - - if not activated_any: - raise RuntimeError('did not find any matching OSD to activate') + self.args = args def main(self): sub_command_help = dedent(""" @@ -114,19 +39,34 @@ def main(self): '--device', help='The device for the OSD to start' ) + parser.add_argument( + '--devices', + help='The device for the OSD to start', + nargs='*', + default=[] + ) parser.add_argument( '--osd-id', help='OSD ID to activate' ) parser.add_argument( '--osd-uuid', + dest='osd_fsid', help='OSD UUID to active' ) parser.add_argument( '--no-systemd', dest='no_systemd', action='store_true', - help='Skip creating and enabling systemd units and starting OSD services' + help='This argument has no effect, this is here for backward compatibility.' + ) + parser.add_argument( + '--objectstore', + dest='objectstore', + help='The OSD objectstore.', + default='bluestore', + choices=['bluestore', 'seastore'], + type=str, ) parser.add_argument( '--block.db', @@ -147,20 +87,13 @@ def main(self): if not self.argv: print(sub_command_help) return - args = parser.parse_args(self.argv) - self.args = args - if not args.no_systemd: - terminal.error('systemd support not yet implemented') - raise SystemExit(1) + self.args = parser.parse_args(self.argv) - devs = [args.device] - if args.block_wal: - devs.append(args.block_wal) - if args.block_db: - devs.append(args.block_db) + if self.args.device: + if self.args.devices is None: + self.args.devices = [self.args.device] + else: + self.args.devices.append(self.args.device) - self.activate(devs=devs, - start_osd_id=args.osd_id, - start_osd_uuid=args.osd_uuid, - tmpfs=not args.no_tmpfs, - systemd=not self.args.no_systemd) + self.objectstore = objectstore.mapping['RAW'][self.args.objectstore](args=self.args) + self.objectstore.activate() diff --git a/src/ceph-volume/ceph_volume/devices/raw/common.py b/src/ceph-volume/ceph_volume/devices/raw/common.py index 89ee285be5b4..77db2f7daf98 100644 --- a/src/ceph-volume/ceph_volume/devices/raw/common.py +++ b/src/ceph-volume/ceph_volume/devices/raw/common.py @@ -1,7 +1,7 @@ import argparse from ceph_volume.util import arg_validators -def create_parser(prog, description): +def create_parser(prog: str, description: str) -> argparse.ArgumentParser: """ Both prepare and create share the same parser, those are defined here to avoid duplication @@ -11,6 +11,14 @@ def create_parser(prog, description): formatter_class=argparse.RawDescriptionHelpFormatter, description=description, ) + parser.add_argument( + '--objectstore', + dest='objectstore', + help='The OSD objectstore.', + default='bluestore', + choices=['bluestore', 'seastore'], + type=str, + ), parser.add_argument( '--data', required=True, @@ -20,7 +28,8 @@ def create_parser(prog, description): parser.add_argument( '--bluestore', action='store_true', - help='Use BlueStore backend') + help='Use BlueStore backend. (DEPRECATED: use --objectstore instead)' + ) parser.add_argument( '--crush-device-class', dest='crush_device_class', @@ -46,9 +55,15 @@ def create_parser(prog, description): ) parser.add_argument( '--dmcrypt', - action='store_true', + action=arg_validators.DmcryptAction, help='Enable device encryption via dm-crypt', ) + parser.add_argument( + '--with-tpm', + dest='with_tpm', + help='Whether encrypted OSDs should be enrolled with TPM.', + action='store_true' + ), parser.add_argument( '--osd-id', help='Reuse an existing OSD id', diff --git a/src/ceph-volume/ceph_volume/devices/raw/list.py b/src/ceph-volume/ceph_volume/devices/raw/list.py index 0f801701b806..68923216a411 100644 --- a/src/ceph-volume/ceph_volume/devices/raw/list.py +++ b/src/ceph-volume/ceph_volume/devices/raw/list.py @@ -5,12 +5,14 @@ from textwrap import dedent from ceph_volume import decorators, process from ceph_volume.util import disk - +from ceph_volume.util.device import Device +from typing import Any, Dict, Optional, List as _List +from concurrent.futures import ThreadPoolExecutor logger = logging.getLogger(__name__) -def direct_report(devices): +def direct_report(devices: Optional[_List[str]] = None) -> Dict[str, Any]: """ Other non-cli consumers of listing information will want to consume the report without the need to parse arguments or other flags. This helper @@ -20,65 +22,76 @@ def direct_report(devices): _list = List([]) return _list.generate(devices) -def _get_bluestore_info(dev): - out, err, rc = process.call([ - 'ceph-bluestore-tool', 'show-label', - '--dev', dev], verbose_on_failure=False) +def _get_bluestore_info(devices: _List[str]) -> Dict[str, Any]: + result: Dict[str, Any] = {} + command: _List[str] = ['ceph-bluestore-tool', + 'show-label', '--bdev_aio_poll_ms=1'] + for device in devices: + command.extend(['--dev', device]) + out, err, rc = process.call(command, verbose_on_failure=False) if rc: - # ceph-bluestore-tool returns an error (below) if device is not bluestore OSD - # > unable to read label for : (2) No such file or directory - # but it's possible the error could be for a different reason (like if the disk fails) - logger.debug('assuming device {} is not BlueStore; ceph-bluestore-tool failed to get info from device: {}\n{}'.format(dev, out, err)) - return None - oj = json.loads(''.join(out)) - if dev not in oj: - # should be impossible, so warn - logger.warning('skipping device {} because it is not reported in ceph-bluestore-tool output: {}'.format(dev, out)) - return None - try: - r = { - 'osd_uuid': oj[dev]['osd_uuid'], - } - if oj[dev]['description'] == 'main': - whoami = oj[dev]['whoami'] - r.update({ - 'type': 'bluestore', - 'osd_id': int(whoami), - 'ceph_fsid': oj[dev]['ceph_fsid'], - 'device': dev, - }) - elif oj[dev]['description'] == 'bluefs db': - r['device_db'] = dev - elif oj[dev]['description'] == 'bluefs wal': - r['device_wal'] = dev - return r - except KeyError as e: - # this will appear for devices that have a bluestore header but aren't valid OSDs - # for example, due to incomplete rollback of OSDs: https://tracker.ceph.com/issues/51869 - logger.error('device {} does not have all BlueStore data needed to be a valid OSD: {}\n{}'.format(dev, out, e)) - return None + logger.debug(f"ceph-bluestore-tool couldn't detect any BlueStore device.\n{out}\n{err}") + else: + oj = json.loads(''.join(out)) + for device in devices: + if device not in oj: + # should be impossible, so warn + logger.warning(f'skipping device {device} because it is not reported in ceph-bluestore-tool output: {out}') + if oj.get(device): + try: + osd_uuid = oj[device]['osd_uuid'] + result[osd_uuid] = disk.bluestore_info(device, oj) + except KeyError as e: + # this will appear for devices that have a bluestore header but aren't valid OSDs + # for example, due to incomplete rollback of OSDs: https://tracker.ceph.com/issues/51869 + logger.error(f'device {device} does not have all BlueStore data needed to be a valid OSD: {out}\n{e}') + return result class List(object): help = 'list BlueStore OSDs on raw devices' - def __init__(self, argv): + def __init__(self, argv: _List[str]) -> None: self.argv = argv + self.info_devices: _List[Dict[str, str]] = [] + self.devices_to_scan: _List[str] = [] + + def exclude_atari_partitions(self) -> None: + result: _List[str] = [] + for info_device in self.info_devices: + path = info_device['NAME'] + parent_device = info_device.get('PKNAME') + if parent_device: + try: + if disk.has_bluestore_label(parent_device): + logger.warning(('ignoring child device {} whose parent {} is a BlueStore OSD.'.format(path, parent_device), + 'device is likely a phantom Atari partition. device info: {}'.format(info_device))) + continue + except OSError as e: + logger.error(('ignoring child device {} to avoid reporting invalid BlueStore data from phantom Atari partitions.'.format(path), + 'failed to determine if parent device {} is BlueStore. err: {}'.format(parent_device, e))) + continue + result.append(path) + self.devices_to_scan = result + + def exclude_lvm_osd_devices(self) -> None: + with ThreadPoolExecutor() as pool: + filtered_devices_to_scan = pool.map(self.filter_lvm_osd_devices, self.devices_to_scan) + self.devices_to_scan = [device for device in filtered_devices_to_scan if device is not None] + + def filter_lvm_osd_devices(self, device: str) -> Optional[str]: + d = Device(device) + return d.path if not d.ceph_device_lvm else None - def generate(self, devs=None): + def generate(self, devices: Optional[_List[str]] = None) -> Dict[str, Any]: logger.debug('Listing block devices via lsblk...') - info_devices = disk.lsblk_all(abspath=True) - if devs is None or devs == []: + if not devices or not any(devices): # If no devs are given initially, we want to list ALL devices including children and # parents. Parent disks with child partitions may be the appropriate device to return if # the parent disk has a bluestore header, but children may be the most appropriate # devices to return if the parent disk does not have a bluestore header. - devs = [device['NAME'] for device in info_devices if device.get('NAME',)] - - result = {} - logger.debug('inspecting devices: {}'.format(devs)) - for dev in devs: + self.info_devices = disk.lsblk_all(abspath=True) # Linux kernels built with CONFIG_ATARI_PARTITION enabled can falsely interpret # bluestore's on-disk format as an Atari partition table. These false Atari partitions # can be interpreted as real OSDs if a bluestore OSD was previously created on the false @@ -88,37 +101,20 @@ def generate(self, devs=None): # parent isn't bluestore, then the child could be a valid bluestore OSD. If we fail to # determine whether a parent is bluestore, we should err on the side of not reporting # the child so as not to give a false negative. - info_device = [info for info in info_devices if info['NAME'] == dev][0] - if info_device['TYPE'] == 'lvm': - # lvm devices are not raw devices - continue - if 'PKNAME' in info_device and info_device['PKNAME'] != "": - parent = info_device['PKNAME'] - try: - if disk.has_bluestore_label(parent): - logger.warning(('ignoring child device {} whose parent {} is a BlueStore OSD.'.format(dev, parent), - 'device is likely a phantom Atari partition. device info: {}'.format(info_device))) - continue - except OSError as e: - logger.error(('ignoring child device {} to avoid reporting invalid BlueStore data from phantom Atari partitions.'.format(dev), - 'failed to determine if parent device {} is BlueStore. err: {}'.format(parent, e))) - continue + self.exclude_atari_partitions() + self.exclude_lvm_osd_devices() + + else: + self.devices_to_scan = devices - bs_info = _get_bluestore_info(dev) - if bs_info is None: - # None is also returned in the rare event that there is an issue reading info from - # a BlueStore disk, so be sure to log our assumption that it isn't bluestore - logger.info('device {} does not have BlueStore information'.format(dev)) - continue - uuid = bs_info['osd_uuid'] - if uuid not in result: - result[uuid] = {} - result[uuid].update(bs_info) + result: Dict[str, Any] = {} + logger.debug('inspecting devices: {}'.format(self.devices_to_scan)) + result = _get_bluestore_info(self.devices_to_scan) return result @decorators.needs_root - def list(self, args): + def list(self, args: argparse.Namespace) -> None: report = self.generate(args.device) if args.format == 'json': print(json.dumps(report, indent=4, sort_keys=True)) @@ -127,7 +123,7 @@ def list(self, args): raise SystemExit('No valid Ceph devices found') raise RuntimeError('not implemented yet') - def main(self): + def main(self) -> None: sub_command_help = dedent(""" List OSDs on raw devices with raw device labels (usually the first block of the device). diff --git a/src/ceph-volume/ceph_volume/devices/raw/prepare.py b/src/ceph-volume/ceph_volume/devices/raw/prepare.py index b3201a89dafc..99dd6a69e981 100644 --- a/src/ceph-volume/ceph_volume/devices/raw/prepare.py +++ b/src/ceph-volume/ceph_volume/devices/raw/prepare.py @@ -1,62 +1,12 @@ from __future__ import print_function -import json import logging import os from textwrap import dedent -from ceph_volume.util import prepare as prepare_utils -from ceph_volume.util import encryption as encryption_utils -from ceph_volume.util import disk -from ceph_volume.util import system -from ceph_volume import decorators, terminal -from ceph_volume.devices.lvm.common import rollback_osd +from ceph_volume import terminal, objectstore from .common import create_parser logger = logging.getLogger(__name__) -def prepare_dmcrypt(key, device, device_type, fsid): - """ - Helper for devices that are encrypted. The operations needed for - block, db, wal, devices are all the same - """ - if not device: - return '' - kname = disk.lsblk(device)['KNAME'] - mapping = 'ceph-{}-{}-{}-dmcrypt'.format(fsid, kname, device_type) - return encryption_utils.prepare_dmcrypt(key, device, mapping) - -def prepare_bluestore(block, wal, db, secrets, osd_id, fsid, tmpfs): - """ - :param block: The name of the logical volume for the bluestore data - :param wal: a regular/plain disk or logical volume, to be used for block.wal - :param db: a regular/plain disk or logical volume, to be used for block.db - :param secrets: A dict with the secrets needed to create the osd (e.g. cephx) - :param id_: The OSD id - :param fsid: The OSD fsid, also known as the OSD UUID - """ - cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key()) - - if secrets.get('dmcrypt_key'): - key = secrets['dmcrypt_key'] - block = prepare_dmcrypt(key, block, 'block', fsid) - wal = prepare_dmcrypt(key, wal, 'wal', fsid) - db = prepare_dmcrypt(key, db, 'db', fsid) - - # create the directory - prepare_utils.create_osd_path(osd_id, tmpfs=tmpfs) - # symlink the block - prepare_utils.link_block(block, osd_id) - # get the latest monmap - prepare_utils.get_monmap(osd_id) - # write the OSD keyring if it doesn't exist already - prepare_utils.write_keyring(osd_id, cephx_secret) - # prepare the osd filesystem - prepare_utils.osd_mkfs_bluestore( - osd_id, fsid, - keyring=cephx_secret, - wal=wal, - db=db - ) - class Prepare(object): @@ -65,65 +15,7 @@ class Prepare(object): def __init__(self, argv): self.argv = argv self.osd_id = None - - def safe_prepare(self, args=None): - """ - An intermediate step between `main()` and `prepare()` so that we can - capture the `self.osd_id` in case we need to rollback - - :param args: Injected args, usually from `raw create` which compounds - both `prepare` and `create` - """ - if args is not None: - self.args = args - try: - self.prepare() - except Exception: - logger.exception('raw prepare was unable to complete') - logger.info('will rollback OSD ID creation') - rollback_osd(self.args, self.osd_id) - raise - dmcrypt_log = 'dmcrypt' if args.dmcrypt else 'clear' - terminal.success("ceph-volume raw {} prepare successful for: {}".format(dmcrypt_log, self.args.data)) - - - @decorators.needs_root - def prepare(self): - secrets = {'cephx_secret': prepare_utils.create_key()} - encrypted = 1 if self.args.dmcrypt else 0 - cephx_lockbox_secret = '' if not encrypted else prepare_utils.create_key() - - if encrypted: - secrets['dmcrypt_key'] = os.getenv('CEPH_VOLUME_DMCRYPT_SECRET') - secrets['cephx_lockbox_secret'] = cephx_lockbox_secret # dummy value to make `ceph osd new` not complaining - - osd_fsid = system.generate_uuid() - crush_device_class = self.args.crush_device_class - if crush_device_class: - secrets['crush_device_class'] = crush_device_class - tmpfs = not self.args.no_tmpfs - wal = "" - db = "" - if self.args.block_wal: - wal = self.args.block_wal - if self.args.block_db: - db = self.args.block_db - - # reuse a given ID if it exists, otherwise create a new ID - self.osd_id = prepare_utils.create_id( - osd_fsid, - json.dumps(secrets), - osd_id=self.args.osd_id) - - prepare_bluestore( - self.args.data, - wal, - db, - secrets, - self.osd_id, - osd_fsid, - tmpfs, - ) + self.objectstore = None def main(self): sub_command_help = dedent(""" @@ -148,13 +40,15 @@ def main(self): print(sub_command_help) return self.args = parser.parse_args(self.argv) - if not self.args.bluestore: - terminal.error('must specify --bluestore (currently the only supported backend)') - raise SystemExit(1) - if self.args.dmcrypt and not os.getenv('CEPH_VOLUME_DMCRYPT_SECRET'): - terminal.error('encryption was requested (--dmcrypt) but environment variable ' \ - 'CEPH_VOLUME_DMCRYPT_SECRET is not set, you must set ' \ - 'this variable to provide a dmcrypt secret.') - raise SystemExit(1) - - self.safe_prepare(self.args) + if self.args.bluestore: + self.args.objectstore = 'bluestore' + if self.args.dmcrypt: + if not self.args.with_tpm and not os.getenv('CEPH_VOLUME_DMCRYPT_SECRET'): + terminal.error('encryption was requested (--dmcrypt) but environment variable ' \ + 'CEPH_VOLUME_DMCRYPT_SECRET is not set, you must set ' \ + 'this variable to provide a dmcrypt secret or use --with-tpm ' \ + 'in order to enroll a tpm2 token.') + raise SystemExit(1) + + self.objectstore = objectstore.mapping['RAW'][self.args.objectstore](args=self.args) + self.objectstore.safe_prepare(self.args) diff --git a/src/ceph-volume/ceph_volume/main.py b/src/ceph-volume/ceph_volume/main.py index 7868665cecbf..f8eca65ec497 100644 --- a/src/ceph-volume/ceph_volume/main.py +++ b/src/ceph-volume/ceph_volume/main.py @@ -1,10 +1,23 @@ from __future__ import print_function import argparse import os -import pkg_resources import sys import logging + +# `iter_entry_points` from `pkg_resources` takes one argument whereas +# `entry_points` from `importlib.metadata` does not. +try: + from importlib.metadata import entry_points + + def get_entry_points(group: str): # type: ignore + return entry_points().get(group, []) # type: ignore +except ImportError: + from pkg_resources import iter_entry_points as entry_points # type: ignore + + def get_entry_points(group: str): # type: ignore + return entry_points(group=group) # type: ignore + from ceph_volume.decorators import catches from ceph_volume import log, devices, configuration, conf, exceptions, terminal, inventory, drive_group, activate @@ -170,9 +183,9 @@ def _load_library_extensions(): """ logger = logging.getLogger('ceph_volume.plugins') group = 'ceph_volume_handlers' - entry_points = pkg_resources.iter_entry_points(group=group) + plugins = [] - for ep in entry_points: + for ep in get_entry_points(group=group): try: logger.debug('loading %s' % ep.name) plugin = ep.load() diff --git a/src/ceph-volume/ceph_volume/objectstore/__init__.py b/src/ceph-volume/ceph_volume/objectstore/__init__.py new file mode 100644 index 000000000000..faef2ae6231b --- /dev/null +++ b/src/ceph-volume/ceph_volume/objectstore/__init__.py @@ -0,0 +1,13 @@ +from . import lvmbluestore +from . import rawbluestore +from typing import Any, Dict + + +mapping: Dict[str, Any] = { + 'LVM': { + 'bluestore': lvmbluestore.LvmBlueStore + }, + 'RAW': { + 'bluestore': rawbluestore.RawBlueStore + } +} diff --git a/src/ceph-volume/ceph_volume/objectstore/baseobjectstore.py b/src/ceph-volume/ceph_volume/objectstore/baseobjectstore.py new file mode 100644 index 000000000000..6ac4cbd9f2b7 --- /dev/null +++ b/src/ceph-volume/ceph_volume/objectstore/baseobjectstore.py @@ -0,0 +1,178 @@ +import logging +import os +import errno +import time +import tempfile +from ceph_volume import conf, terminal, process +from ceph_volume.util import prepare as prepare_utils +from ceph_volume.util import system, disk +from ceph_volume.util import encryption as encryption_utils +from typing import Dict, Any, List, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + import argparse + from ceph_volume.api.lvm import Volume + + +logger = logging.getLogger(__name__) + + +class BaseObjectStore: + def __init__(self, args: "argparse.Namespace") -> None: + self.args: "argparse.Namespace" = args + # FIXME we don't allow re-using a keyring, we always generate one + # for the OSD, this needs to be fixed. This could either be a file (!) + # or a string (!!) or some flags that we would need to compound + # into a dict so that we can convert to JSON (!!!) + self.secrets: Dict[str, str] = {'cephx_secret': prepare_utils.create_key()} + self.cephx_secret: str = self.secrets.get('cephx_secret', + prepare_utils.create_key()) + self.encrypted: int = 0 + self.tags: Dict[str, Any] = {} + self.osd_id: str = '' + self.osd_fsid: str = '' + self.cephx_lockbox_secret: str = '' + self.objectstore: str = '' + self.osd_mkfs_cmd: List[str] = [] + self.block_device_path: str = '' + self.dmcrypt_key: str = encryption_utils.create_dmcrypt_key() + self.with_tpm: int = int(getattr(self.args, 'with_tpm', False)) + self.method: str = '' + if getattr(self.args, 'dmcrypt', False): + self.encrypted = 1 + if not self.with_tpm: + self.cephx_lockbox_secret = prepare_utils.create_key() + self.secrets['cephx_lockbox_secret'] = \ + self.cephx_lockbox_secret + + def get_ptuuid(self, argument: str) -> str: + uuid = disk.get_partuuid(argument) + if not uuid: + terminal.error('blkid could not detect a PARTUUID for device: %s' % + argument) + raise RuntimeError('unable to use device') + return uuid + + def get_osdspec_affinity(self) -> str: + return os.environ.get('CEPH_VOLUME_OSDSPEC_AFFINITY', '') + + def pre_prepare(self) -> None: + raise NotImplementedError() + + def prepare_data_device(self, + device_type: str, + osd_uuid: str) -> Optional["Volume"]: + raise NotImplementedError() + + def safe_prepare(self, args: "argparse.Namespace") -> None: + raise NotImplementedError() + + def add_objectstore_opts(self) -> None: + raise NotImplementedError() + + def prepare_osd_req(self, tmpfs: bool = True) -> None: + # create the directory + prepare_utils.create_osd_path(self.osd_id, tmpfs=tmpfs) + # symlink the block + prepare_utils.link_block(self.block_device_path, self.osd_id) + # get the latest monmap + prepare_utils.get_monmap(self.osd_id) + # write the OSD keyring if it doesn't exist already + prepare_utils.write_keyring(self.osd_id, self.cephx_secret) + + def prepare(self) -> None: + raise NotImplementedError() + + def prepare_dmcrypt(self) -> None: + raise NotImplementedError() + + def get_cluster_fsid(self) -> str: + """ + Allows using --cluster-fsid as an argument, but can fallback to reading + from ceph.conf if that is unset (the default behavior). + """ + if self.args.cluster_fsid: + return self.args.cluster_fsid + else: + return conf.ceph.get('global', 'fsid') + + def get_osd_path(self) -> str: + return '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, self.osd_id) + + def build_osd_mkfs_cmd(self) -> List[str]: + self.supplementary_command = [ + '--osd-data', self.osd_path, + '--osd-uuid', self.osd_fsid, + '--setuser', 'ceph', + '--setgroup', 'ceph' + ] + self.osd_mkfs_cmd = [ + 'ceph-osd', + '--cluster', conf.cluster, + '--osd-objectstore', self.objectstore, + '--mkfs', + '-i', self.osd_id, + '--monmap', self.monmap, + ] + if self.cephx_secret is not None: + self.osd_mkfs_cmd.extend(['--keyfile', '-']) + try: + self.add_objectstore_opts() + except NotImplementedError: + logger.info("No specific objectstore options to add.") + + self.osd_mkfs_cmd.extend(self.supplementary_command) + return self.osd_mkfs_cmd + + def osd_mkfs(self) -> None: + self.osd_path = self.get_osd_path() + self.monmap = os.path.join(self.osd_path, 'activate.monmap') + cmd = self.build_osd_mkfs_cmd() + + system.chown(self.osd_path) + """ + When running in containers the --mkfs on raw device sometimes fails + to acquire a lock through flock() on the device because systemd-udevd holds one temporarily. + See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock. + Because this is really transient, we retry up to 5 times and wait for 1 sec in-between + """ + for retry in range(5): + _, _, returncode = process.call(cmd, + stdin=self.cephx_secret, + terminal_verbose=True, + show_command=True) + if returncode == 0: + break + else: + if returncode == errno.EWOULDBLOCK: + time.sleep(1) + logger.info('disk is held by another process, ' + 'trying to mkfs again... (%s/5 attempt)' % + retry) + continue + else: + raise RuntimeError('Command failed with exit code %s: %s' % + (returncode, ' '.join(cmd))) + + def activate(self) -> None: + raise NotImplementedError() + + def enroll_tpm2(self, device: str) -> None: + """ + Enrolls a device with TPM2 (Trusted Platform Module 2.0) using systemd-cryptenroll. + This method creates a temporary file to store the dmcrypt key and uses it to enroll the device. + + Args: + device (str): The device path to be enrolled with TPM2. + """ + + if self.with_tpm: + tmp_dir: str = '/rootfs/tmp' if os.environ.get('I_AM_IN_A_CONTAINER', False) else '/tmp' + with tempfile.NamedTemporaryFile(mode='w', delete=True, dir=tmp_dir) as temp_file: + temp_file.write(self.dmcrypt_key) + temp_file.flush() + temp_file_name: str = temp_file.name.replace('/rootfs', '', 1) + cmd: List[str] = ['systemd-cryptenroll', '--tpm2-device=auto', + device, '--unlock-key-file', temp_file_name, + '--tpm2-pcrs', '9+12', '--wipe-slot', 'tpm2'] + process.call(cmd, run_on_host=True, show_command=True) diff --git a/src/ceph-volume/ceph_volume/objectstore/bluestore.py b/src/ceph-volume/ceph_volume/objectstore/bluestore.py new file mode 100644 index 000000000000..ceaa199a18b7 --- /dev/null +++ b/src/ceph-volume/ceph_volume/objectstore/bluestore.py @@ -0,0 +1,106 @@ +import logging +import os +from .baseobjectstore import BaseObjectStore +from ceph_volume.util import system +from ceph_volume.util.encryption import CephLuks2 +from ceph_volume import process +from typing import Any, Dict, List, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + import argparse + from ceph_volume.api.lvm import Volume + +logger = logging.getLogger(__name__) + + +class BlueStore(BaseObjectStore): + def __init__(self, args: "argparse.Namespace") -> None: + super().__init__(args) + self.args: "argparse.Namespace" = args + self.objectstore = 'bluestore' + self.osd_id: str = '' + self.osd_fsid: str = '' + self.osd_path: str = '' + self.key: Optional[str] = None + self.block_device_path: str = '' + self.wal_device_path: str = '' + self.db_device_path: str = '' + self.block_lv: Volume + + def add_objectstore_opts(self) -> None: + """ + Create the files for the OSD to function. A normal call will look like: + + ceph-osd --cluster ceph --mkfs --mkkey -i 0 \ + --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \ + --osd-data /var/lib/ceph/osd/ceph-0 \ + --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \ + --keyring /var/lib/ceph/osd/ceph-0/keyring \ + --setuser ceph --setgroup ceph + + In some cases it is required to use the keyring, when it is passed + in as a keyword argument it is used as part of the ceph-osd command + """ + + if self.wal_device_path: + self.osd_mkfs_cmd.extend( + ['--bluestore-block-wal-path', self.wal_device_path] + ) + system.chown(self.wal_device_path) + + if self.db_device_path: + self.osd_mkfs_cmd.extend( + ['--bluestore-block-db-path', self.db_device_path] + ) + system.chown(self.db_device_path) + + if self.get_osdspec_affinity(): + self.osd_mkfs_cmd.extend(['--osdspec-affinity', + self.get_osdspec_affinity()]) + + def unlink_bs_symlinks(self) -> None: + for link_name in ['block', 'block.db', 'block.wal']: + link_path = os.path.join(self.osd_path, link_name) + if os.path.exists(link_path): + os.unlink(os.path.join(self.osd_path, link_name)) + + + def add_label(self, key: str, + value: str, + device: str) -> None: + """Add a label to a BlueStore device. + Args: + key (str): The name of the label being added. + value (str): Value of the label being added. + device (str): The path of the BlueStore device. + Raises: + RuntimeError: If `ceph-bluestore-tool` command doesn't success. + """ + + command: List[str] = ['ceph-bluestore-tool', + 'set-label-key', + '-k', + key, + '-v', + value, + '--dev', + device] + + _, err, rc = process.call(command, + terminal_verbose=True, + show_command=True) + if rc: + raise RuntimeError(f"Can't add BlueStore label '{key}' to device {device}: {err}") + + def osd_mkfs(self) -> None: + super().osd_mkfs() + mapping: Dict[str, Any] = {'raw': ['data', 'block_db', 'block_wal'], + 'lvm': ['ceph.block_device', 'ceph.db_device', 'ceph.wal_device']} + if self.args.dmcrypt: + for dev_type in mapping[self.method]: + if self.method == 'raw': + path = self.args.__dict__.get(dev_type, None) + else: + path = self.block_lv.tags.get(dev_type, None) + if path is not None: + CephLuks2(path).config_luks2({'subsystem': f'ceph_fsid={self.osd_fsid}'}) diff --git a/src/ceph-volume/ceph_volume/objectstore/lvmbluestore.py b/src/ceph-volume/ceph_volume/objectstore/lvmbluestore.py new file mode 100644 index 000000000000..aa11d5537230 --- /dev/null +++ b/src/ceph-volume/ceph_volume/objectstore/lvmbluestore.py @@ -0,0 +1,501 @@ +import json +import logging +import os +from ceph_volume import conf, terminal, decorators, configuration, process +from ceph_volume.api import lvm as api +from ceph_volume.util import prepare as prepare_utils +from ceph_volume.util import encryption as encryption_utils +from ceph_volume.util import system, disk +from ceph_volume.systemd import systemctl +from ceph_volume.devices.lvm.common import rollback_osd +from ceph_volume.devices.lvm.listing import direct_report +from .bluestore import BlueStore +from typing import Dict, Any, Optional, List, Tuple, TYPE_CHECKING + +if TYPE_CHECKING: + import argparse + from ceph_volume.api.lvm import Volume + +logger = logging.getLogger(__name__) + + +class LvmBlueStore(BlueStore): + def __init__(self, args: "argparse.Namespace") -> None: + super().__init__(args) + self.method = 'lvm' + self.tags: Dict[str, Any] = {} + + def pre_prepare(self) -> None: + if self.encrypted and not self.with_tpm: + self.secrets['dmcrypt_key'] = self.dmcrypt_key + + cluster_fsid = self.get_cluster_fsid() + + self.osd_fsid = self.args.osd_fsid or system.generate_uuid() + crush_device_class = self.args.crush_device_class + if crush_device_class: + self.secrets['crush_device_class'] = crush_device_class + # reuse a given ID if it exists, otherwise create a new ID + self.osd_id = prepare_utils.create_id(self.osd_fsid, + json.dumps(self.secrets), + osd_id=self.args.osd_id) + self.tags = { + 'ceph.osd_fsid': self.osd_fsid, + 'ceph.osd_id': self.osd_id, + 'ceph.cluster_fsid': cluster_fsid, + 'ceph.cluster_name': conf.cluster, + 'ceph.crush_device_class': crush_device_class, + 'ceph.osdspec_affinity': self.get_osdspec_affinity() + } + + try: + vg_name, lv_name = self.args.data.split('/') + self.block_lv = api.get_single_lv(filters={'lv_name': lv_name, + 'vg_name': vg_name}) + except ValueError: + self.block_lv = None + + if not self.block_lv: + self.block_lv = self.prepare_data_device('block', self.osd_fsid) + self.block_device_path = self.block_lv.__dict__['lv_path'] + + self.tags['ceph.block_device'] = self.block_lv.__dict__['lv_path'] + self.tags['ceph.block_uuid'] = self.block_lv.__dict__['lv_uuid'] + self.tags['ceph.cephx_lockbox_secret'] = self.cephx_lockbox_secret + self.tags['ceph.encrypted'] = self.encrypted + self.tags['ceph.with_tpm'] = 1 if self.with_tpm else 0 + self.tags['ceph.vdo'] = api.is_vdo(self.block_lv.__dict__['lv_path']) + + def prepare_data_device(self, + device_type: str, + osd_uuid: str) -> Optional["Volume"]: + """ + Check if ``arg`` is a device or partition to create an LV out of it + with a distinct volume group name, assigning LV tags on it and + ultimately, returning the logical volume object. Failing to detect + a device or partition will result in error. + + :param arg: The value of ``--data`` when parsing args + :param device_type: Usually ``block`` + :param osd_uuid: The OSD uuid + """ + + device = self.args.data + if disk.is_partition(device) or disk.is_device(device): + # we must create a vg, and then a single lv + lv_name_prefix = "osd-{}".format(device_type) + kwargs = { + 'device': device, + 'tags': {'ceph.type': device_type}, + 'slots': self.args.data_slots, + } + logger.debug('data device size: {}'.format(self.args.data_size)) + if self.args.data_size != 0: + kwargs['size'] = self.args.data_size + return api.create_lv( + lv_name_prefix, + osd_uuid, + **kwargs) + else: + error = [ + 'Cannot use device ({}).'.format(device), + 'A vg/lv path or an existing device is needed'] + raise RuntimeError(' '.join(error)) + + def safe_prepare(self, + args: Optional["argparse.Namespace"] = None) -> None: + """ + An intermediate step between `main()` and `prepare()` so that we can + capture the `self.osd_id` in case we need to rollback + + :param args: Injected args, usually from `lvm create` which compounds + both `prepare` and `create` + """ + if args is not None: + self.args = args + + try: + vgname, lvname = self.args.data.split('/') + lv = api.get_single_lv(filters={'lv_name': lvname, + 'vg_name': vgname}) + except ValueError: + lv = None + + if api.is_ceph_device(lv): + logger.info("device {} is already used".format(self.args.data)) + raise RuntimeError("skipping {}, it is already prepared".format( + self.args.data)) + try: + self.prepare() + except Exception: + logger.exception('lvm prepare was unable to complete') + logger.info('will rollback OSD ID creation') + rollback_osd(self.args, self.osd_id) + raise + terminal.success("ceph-volume lvm prepare successful for: %s" % + self.args.data) + + @decorators.needs_root + def prepare(self) -> None: + # 1/ + # Need to be reworked (move it to the parent class + call super()? ) + self.pre_prepare() + + # 2/ + self.wal_device_path, wal_uuid, tags = self.setup_device( + 'wal', + self.args.block_wal, + self.tags, + self.args.block_wal_size, + self.args.block_wal_slots) + self.db_device_path, db_uuid, tags = self.setup_device( + 'db', + self.args.block_db, + self.tags, + self.args.block_db_size, + self.args.block_db_slots) + + self.tags['ceph.type'] = 'block' + self.block_lv.set_tags(self.tags) # type: ignore + + # 3/ encryption-only operations + if self.encrypted: + self.prepare_dmcrypt() + + # 4/ osd_prepare req + self.prepare_osd_req() + + # 5/ bluestore mkfs + # prepare the osd filesystem + self.osd_mkfs() + + def prepare_dmcrypt(self) -> None: + # If encrypted, there is no need to create the lockbox keyring file + # because bluestore re-creates the files and does not have support + # for other files like the custom lockbox one. This will need to be + # done on activation. Format and open ('decrypt' devices) and + # re-assign the device and journal variables so that the rest of the + # process can use the mapper paths + + device_types = ('block', 'db', 'wal') + + for device_type in device_types: + attr_name: str = f'{device_type}_device_path' + path: str = self.__dict__[attr_name] + if path: + self.__dict__[attr_name] = self.luks_format_and_open(path, + device_type, + self.tags) + + def luks_format_and_open(self, + device: str, + device_type: str, + tags: Dict[str, Any]) -> str: + """ + Helper for devices that are encrypted. The operations needed for + block, db, wal devices are all the same + """ + if not device: + return '' + tag_name = 'ceph.%s_uuid' % device_type + uuid = tags[tag_name] + # format data device + encryption_utils.luks_format( + self.dmcrypt_key, + device + ) + + if self.with_tpm: + self.enroll_tpm2(device) + + encryption_utils.luks_open( + self.dmcrypt_key, + device, + uuid, + self.with_tpm) + + return '/dev/mapper/%s' % uuid + + def setup_device(self, + device_type: str, + device_name: str, + tags: Dict[str, Any], + size: int, + slots: int) -> Tuple[str, str, Dict[str, Any]]: + """ + Check if ``device`` is an lv, if so, set the tags, making sure to + update the tags with the lv_uuid and lv_path which the incoming tags + will not have. + + If the device is not a logical volume, then retrieve the partition UUID + by querying ``blkid`` + """ + if device_name is None: + return '', '', tags + tags['ceph.type'] = device_type + tags['ceph.vdo'] = api.is_vdo(device_name) + + try: + vg_name, lv_name = device_name.split('/') + lv = api.get_single_lv(filters={'lv_name': lv_name, + 'vg_name': vg_name}) + except ValueError: + lv = None + + if lv: + lv_uuid = lv.lv_uuid + path = lv.lv_path + tags['ceph.%s_uuid' % device_type] = lv_uuid + tags['ceph.%s_device' % device_type] = path + lv.set_tags(tags) + elif disk.is_partition(device_name) or disk.is_device(device_name): + # We got a disk or a partition, create an lv + lv_type = "osd-{}".format(device_type) + name_uuid = system.generate_uuid() + kwargs = { + 'device': device_name, + 'tags': tags, + 'slots': slots + } + # TODO use get_block_db_size and co here to get configured size in + # conf file + if size != 0: + kwargs['size'] = size + lv = api.create_lv( + lv_type, + name_uuid, + **kwargs) + path = lv.lv_path + tags['ceph.{}_device'.format(device_type)] = path + tags['ceph.{}_uuid'.format(device_type)] = lv.lv_uuid + lv_uuid = lv.lv_uuid + lv.set_tags(tags) + else: + # otherwise assume this is a regular disk partition + name_uuid = self.get_ptuuid(device_name) + path = device_name + tags['ceph.%s_uuid' % device_type] = name_uuid + tags['ceph.%s_device' % device_type] = path + lv_uuid = name_uuid + return path, lv_uuid, tags + + def get_osd_device_path(self, + osd_lvs: List["Volume"], + device_type: str, + dmcrypt_secret: Optional[str] = + None) -> Optional[str]: + """ + ``device_type`` can be one of ``db``, ``wal`` or ``block`` so that we + can query LVs on system and fallback to querying the uuid if that is + not present. + + Return a path if possible, failing to do that a ``None``, since some of + these devices are optional. + """ + # TODO(guits): this should be moved in a new function get_device_uuid_from_lv() + osd_block_lv = None + for lv in osd_lvs: + if lv.tags.get('ceph.type') == 'block': + osd_block_lv = lv + break + if osd_block_lv: + is_encrypted = osd_block_lv.tags.get('ceph.encrypted', '0') == '1' + logger.debug('Found block device (%s) with encryption: %s', + osd_block_lv.name, is_encrypted) + uuid_tag = 'ceph.%s_uuid' % device_type + device_uuid = osd_block_lv.tags.get(uuid_tag) + if not device_uuid: + return None + + device_lv: Optional["Volume"] = None + for lv in osd_lvs: + if lv.tags.get('ceph.type') == device_type: + device_lv = lv + break + if device_lv: + if is_encrypted: + encryption_utils.luks_open(dmcrypt_secret, + device_lv.__dict__['lv_path'], + device_uuid) + return '/dev/mapper/%s' % device_uuid + return device_lv.__dict__['lv_path'] + + # this could be a regular device, so query it with blkid + physical_device = disk.get_device_from_partuuid(device_uuid) + if physical_device: + if is_encrypted: + encryption_utils.luks_open(dmcrypt_secret, + physical_device, + device_uuid) + return '/dev/mapper/%s' % device_uuid + return physical_device + + raise RuntimeError('could not find %s with uuid %s' % (device_type, + device_uuid)) + + def _activate(self, + osd_lvs: List["Volume"], + no_systemd: bool = False, + no_tmpfs: bool = False) -> None: + for lv in osd_lvs: + if lv.tags.get('ceph.type') == 'block': + osd_block_lv = lv + break + else: + raise RuntimeError('could not find a bluestore OSD to activate') + + is_encrypted = osd_block_lv.tags.get('ceph.encrypted', '0') == '1' + dmcrypt_secret = '' + osd_id = osd_block_lv.tags['ceph.osd_id'] + conf.cluster = osd_block_lv.tags['ceph.cluster_name'] + osd_fsid = osd_block_lv.tags['ceph.osd_fsid'] + configuration.load_ceph_conf_path( + osd_block_lv.tags['ceph.cluster_name']) + configuration.load() + + # mount on tmpfs the osd directory + self.osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) + if not system.path_is_mounted(self.osd_path): + # mkdir -p and mount as tmpfs + prepare_utils.create_osd_path(osd_id, tmpfs=not no_tmpfs) + + # XXX This needs to be removed once ceph-bluestore-tool can deal with + # symlinks that exist in the osd dir + self.unlink_bs_symlinks() + + # encryption is handled here, before priming the OSD dir + if is_encrypted: + osd_lv_path = '/dev/mapper/%s' % osd_block_lv.__dict__['lv_uuid'] + lockbox_secret = osd_block_lv.tags['ceph.cephx_lockbox_secret'] + self.with_tpm = osd_block_lv.tags.get('ceph.with_tpm') == '1' + if not self.with_tpm: + encryption_utils.write_lockbox_keyring(osd_id, + osd_fsid, + lockbox_secret) + dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid) + lv_path: str = osd_block_lv.__dict__['lv_path'] + if disk.has_holders(lv_path): + real_path_device = os.path.realpath(lv_path) + holders = disk.get_block_device_holders() + + if real_path_device in holders.keys() and real_path_device in holders.values(): + osd_lv_path = disk.get_lvm_mapper_path_from_dm(next(k for k, v in holders.items() if v == real_path_device)) + else: + encryption_utils.luks_open(dmcrypt_secret, + osd_block_lv.__dict__['lv_path'], + osd_block_lv.__dict__['lv_uuid'], + with_tpm=self.with_tpm) + else: + osd_lv_path = osd_block_lv.__dict__['lv_path'] + + db_device_path = \ + self.get_osd_device_path(osd_lvs, 'db', + dmcrypt_secret=dmcrypt_secret) + wal_device_path = \ + self.get_osd_device_path(osd_lvs, + 'wal', + dmcrypt_secret=dmcrypt_secret) + + # Once symlinks are removed, the osd dir can be 'primed again. + # chown first, regardless of what currently exists so that + # ``prime-osd-dir`` can succeed even if permissions are + # somehow messed up. + system.chown(self.osd_path) + prime_command = [ + 'ceph-bluestore-tool', '--cluster=%s' % conf.cluster, + 'prime-osd-dir', '--dev', osd_lv_path, + '--path', self.osd_path, '--no-mon-config'] + + process.run(prime_command) + # always re-do the symlink regardless if it exists, so that the block, + # block.wal, and block.db devices that may have changed can be mapped + # correctly every time + process.run(['ln', + '-snf', + osd_lv_path, + os.path.join(self.osd_path, 'block')]) + system.chown(os.path.join(self.osd_path, 'block')) + system.chown(self.osd_path) + if db_device_path: + destination = os.path.join(self.osd_path, 'block.db') + process.run(['ln', '-snf', db_device_path, destination]) + system.chown(db_device_path) + system.chown(destination) + if wal_device_path: + destination = os.path.join(self.osd_path, 'block.wal') + process.run(['ln', '-snf', wal_device_path, destination]) + system.chown(wal_device_path) + system.chown(destination) + + if no_systemd is False: + # enable the ceph-volume unit for this OSD + systemctl.enable_volume(osd_id, osd_fsid, 'lvm') + + # enable the OSD + systemctl.enable_osd(osd_id) + + # start the OSD + systemctl.start_osd(osd_id) + terminal.success("ceph-volume lvm activate successful for osd ID: %s" % + osd_id) + + @decorators.needs_root + def activate_all(self) -> None: + listed_osds = direct_report() + osds = {} + for osd_id, devices in listed_osds.items(): + # the metadata for all devices in each OSD will contain + # the FSID which is required for activation + for device in devices: + fsid = device.get('tags', {}).get('ceph.osd_fsid') + if fsid: + osds[fsid] = osd_id + break + if not osds: + terminal.warning('Was unable to find any OSDs to activate') + terminal.warning('Verify OSDs are present with ' + '"ceph-volume lvm list"') + return + for osd_fsid, osd_id in osds.items(): + if not self.args.no_systemd and systemctl.osd_is_active(osd_id): + terminal.warning( + 'OSD ID %s FSID %s process is active. ' + 'Skipping activation' % (osd_id, osd_fsid) + ) + else: + terminal.info('Activating OSD ID %s FSID %s' % (osd_id, + osd_fsid)) + self.activate(self.args, osd_id=osd_id, osd_fsid=osd_fsid) + + @decorators.needs_root + def activate(self, + args: Optional["argparse.Namespace"] = None, + osd_id: Optional[str] = None, + osd_fsid: Optional[str] = None) -> None: + """ + :param args: The parsed arguments coming from the CLI + :param osd_id: When activating all, this gets populated with an + existing OSD ID + :param osd_fsid: When activating all, this gets populated with an + existing OSD FSID + """ + osd_id = osd_id if osd_id else self.args.osd_id + osd_fsid = osd_fsid if osd_fsid else self.args.osd_fsid + + if osd_id and osd_fsid: + tags = {'ceph.osd_id': osd_id, 'ceph.osd_fsid': osd_fsid} + elif not osd_id and osd_fsid: + tags = {'ceph.osd_fsid': osd_fsid} + elif osd_id and not osd_fsid: + raise RuntimeError('could not activate osd.{}, please provide the ' + 'osd_fsid too'.format(osd_id)) + else: + raise RuntimeError('Please provide both osd_id and osd_fsid') + lvs = api.get_lvs(tags=tags) + if not lvs: + raise RuntimeError('could not find osd.%s with osd_fsid %s' % + (osd_id, osd_fsid)) + + self._activate(lvs, self.args.no_systemd, getattr(self.args, + 'no_tmpfs', + False)) diff --git a/src/ceph-volume/ceph_volume/objectstore/rawbluestore.py b/src/ceph-volume/ceph_volume/objectstore/rawbluestore.py new file mode 100644 index 000000000000..2a4b8261ece1 --- /dev/null +++ b/src/ceph-volume/ceph_volume/objectstore/rawbluestore.py @@ -0,0 +1,232 @@ +import logging +import json +import os +from .bluestore import BlueStore +from ceph_volume import terminal, decorators, conf, process +from ceph_volume.util import system, disk +from ceph_volume.util import prepare as prepare_utils +from ceph_volume.util import encryption as encryption_utils +from ceph_volume.util.device import Device +from ceph_volume.devices.lvm.common import rollback_osd +from ceph_volume.devices.raw.list import direct_report +from typing import Any, Dict, List, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + import argparse + +logger = logging.getLogger(__name__) + + +class RawBlueStore(BlueStore): + def __init__(self, args: "argparse.Namespace") -> None: + super().__init__(args) + self.method = 'raw' + self.devices: List[str] = getattr(args, 'devices', []) + self.osd_id = getattr(self.args, 'osd_id', None) + self.osd_fsid = getattr(self.args, 'osd_fsid', '') + self.block_device_path = getattr(self.args, 'data', '') + self.db_device_path = getattr(self.args, 'block_db', '') + self.wal_device_path = getattr(self.args, 'block_wal', '') + + def prepare_dmcrypt(self) -> None: + """ + Helper for devices that are encrypted. The operations needed for + block, db, wal, devices are all the same + """ + + for device, device_type in [(self.block_device_path, 'block'), + (self.db_device_path, 'db'), + (self.wal_device_path, 'wal')]: + + if device: + kname = disk.lsblk(device)['KNAME'] + mapping = 'ceph-{}-{}-{}-dmcrypt'.format(self.osd_fsid, + kname, + device_type) + # format data device + encryption_utils.luks_format( + self.dmcrypt_key, + device + ) + if self.with_tpm: + self.enroll_tpm2(device) + encryption_utils.luks_open( + self.dmcrypt_key, + device, + mapping, + self.with_tpm + ) + self.__dict__[f'{device_type}_device_path'] = \ + '/dev/mapper/{}'.format(mapping) # TODO(guits): need to preserve path or find a way to get the parent device from the mapper ? + + def safe_prepare(self, + args: Optional["argparse.Namespace"] = None) -> None: + """ + An intermediate step between `main()` and `prepare()` so that we can + capture the `self.osd_id` in case we need to rollback + + :param args: Injected args, usually from `raw create` which compounds + both `prepare` and `create` + """ + if args is not None: + self.args = args # This should be moved (to __init__ ?) + try: + self.prepare() + except Exception: + logger.exception('raw prepare was unable to complete') + logger.info('will rollback OSD ID creation') + rollback_osd(self.args, self.osd_id) + raise + dmcrypt_log = 'dmcrypt' if hasattr(args, 'dmcrypt') else 'clear' + terminal.success("ceph-volume raw {} prepare " + "successful for: {}".format(dmcrypt_log, + self.args.data)) + + @decorators.needs_root + def prepare(self) -> None: + self.osd_fsid = system.generate_uuid() + crush_device_class = self.args.crush_device_class + if self.encrypted and not self.with_tpm: + self.dmcrypt_key = os.getenv('CEPH_VOLUME_DMCRYPT_SECRET', '') + self.secrets['dmcrypt_key'] = self.dmcrypt_key + if crush_device_class: + self.secrets['crush_device_class'] = crush_device_class + + tmpfs = not self.args.no_tmpfs + + # reuse a given ID if it exists, otherwise create a new ID + self.osd_id = prepare_utils.create_id( + self.osd_fsid, json.dumps(self.secrets), self.osd_id) + + if self.encrypted: + self.prepare_dmcrypt() + + self.prepare_osd_req(tmpfs=tmpfs) + + # prepare the osd filesystem + self.osd_mkfs() + + def _activate(self, osd_id: str, osd_fsid: str) -> None: + # mount on tmpfs the osd directory + self.osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) + if not system.path_is_mounted(self.osd_path): + # mkdir -p and mount as tmpfs + prepare_utils.create_osd_path(osd_id, tmpfs=not self.args.no_tmpfs) + + # XXX This needs to be removed once ceph-bluestore-tool can deal with + # symlinks that exist in the osd dir + + self.unlink_bs_symlinks() + + # Once symlinks are removed, the osd dir can be 'primed again. chown + # first, regardless of what currently exists so that ``prime-osd-dir`` + # can succeed even if permissions are somehow messed up + system.chown(self.osd_path) + prime_command = [ + 'ceph-bluestore-tool', + 'prime-osd-dir', + '--path', self.osd_path, + '--no-mon-config', + '--dev', self.block_device_path, + ] + process.run(prime_command) + + # always re-do the symlink regardless if it exists, so that the block, + # block.wal, and block.db devices that may have changed can be mapped + # correctly every time + prepare_utils.link_block(self.block_device_path, osd_id) + + if self.db_device_path: + prepare_utils.link_db(self.db_device_path, osd_id, osd_fsid) + + if self.wal_device_path: + prepare_utils.link_wal(self.wal_device_path, osd_id, osd_fsid) + + system.chown(self.osd_path) + terminal.success("ceph-volume raw activate " + "successful for osd ID: %s" % osd_id) + + @decorators.needs_root + def activate(self) -> None: + """Activate Ceph OSDs on the system. + + This function activates Ceph Object Storage Daemons (OSDs) on the system. + It iterates over all block devices, checking if they have a LUKS2 signature and + are encrypted for Ceph. If a device's OSD fsid matches and it is enrolled with TPM2, + the function pre-activates it. After collecting the relevant devices, it attempts to + activate any OSDs found. + + Raises: + RuntimeError: If no matching OSDs are found to activate. + """ + assert self.devices or self.osd_id or self.osd_fsid + + activated_any: bool = False + + for d in disk.lsblk_all(abspath=True): + device: str = d.get('NAME') + luks2 = encryption_utils.CephLuks2(device) + if luks2.is_ceph_encrypted: + if luks2.is_tpm2_enrolled and self.osd_fsid == luks2.osd_fsid: + self.pre_activate_tpm2(device) + found = direct_report(self.devices) + + holders = disk.get_block_device_holders() + for osd_uuid, meta in found.items(): + realpath_device = os.path.realpath(meta['device']) + parent_device = holders.get(realpath_device) + if parent_device and any('ceph.cluster_fsid' in lv.lv_tags for lv in Device(parent_device).lvs): + continue + osd_id = meta['osd_id'] + if self.osd_id is not None and str(osd_id) != str(self.osd_id): + continue + if self.osd_fsid is not None and osd_uuid != self.osd_fsid: + continue + self.block_device_path = meta.get('device') + self.db_device_path = meta.get('device_db', '') + self.wal_device_path = meta.get('device_wal', '') + logger.info(f'Activating osd.{osd_id} uuid {osd_uuid} cluster {meta["ceph_fsid"]}') + self._activate(osd_id, osd_uuid) + activated_any = True + + if not activated_any: + raise RuntimeError('did not find any matching OSD to activate') + + def pre_activate_tpm2(self, device: str) -> None: + """Pre-activate a TPM2-encrypted device for Ceph. + + This function pre-activates a TPM2-encrypted device for Ceph by opening the + LUKS encryption, checking the BlueStore header, and renaming the device + mapper according to the BlueStore mapping type. + + Args: + device (str): The path to the device to be pre-activated. + + Raises: + RuntimeError: If the device does not have a BlueStore signature. + """ + bs_mapping_type: Dict[str, str] = {'bluefs db': 'db', + 'bluefs wal': 'wal', + 'main': 'block'} + self.with_tpm = 1 + self.temp_mapper: str = f'activating-{os.path.basename(device)}' + self.temp_mapper_path: str = f'/dev/mapper/{self.temp_mapper}' + if not disk.BlockSysFs(device).has_active_dmcrypt_mapper: + encryption_utils.luks_open( + '', + device, + self.temp_mapper, + self.with_tpm + ) + bluestore_header: Dict[str, Any] = disk.get_bluestore_header(self.temp_mapper_path) + if not bluestore_header: + raise RuntimeError(f"{device} doesn't have BlueStore signature.") + + kname: str = disk.get_parent_device_from_mapper(self.temp_mapper_path, abspath=False) + device_type = bs_mapping_type[bluestore_header[self.temp_mapper_path]['description']] + new_mapper: str = f'ceph-{self.osd_fsid}-{kname}-{device_type}-dmcrypt' + self.block_device_path = f'/dev/mapper/{new_mapper}' + self.devices.append(self.block_device_path) + # An option could be to simply rename the mapper but the uuid remains unchanged in sysfs + encryption_utils.luks_close(self.temp_mapper) + encryption_utils.luks_open('', device, new_mapper, self.with_tpm) diff --git a/src/ceph-volume/ceph_volume/tests/api/test_lvm.py b/src/ceph-volume/ceph_volume/tests/api/test_lvm.py index 139328b4a0d5..6a5eee0e1b8d 100644 --- a/src/ceph-volume/ceph_volume/tests/api/test_lvm.py +++ b/src/ceph-volume/ceph_volume/tests/api/test_lvm.py @@ -782,7 +782,7 @@ def test_get_lvs_empty(self, monkeypatch): class TestGetSinglePV(object): - @patch('ceph_volume.devices.lvm.prepare.api.get_pvs') + @patch('ceph_volume.api.lvm.get_pvs') def test_get_single_pv_multiple_matches_raises_runtimeerror(self, m_get_pvs): fake_pvs = [] fake_pvs.append(api.PVolume(pv_name='/dev/sda', pv_tags={})) @@ -794,14 +794,14 @@ def test_get_single_pv_multiple_matches_raises_runtimeerror(self, m_get_pvs): api.get_single_pv() assert "matched more than 1 PV present on this host." in str(e.value) - @patch('ceph_volume.devices.lvm.prepare.api.get_pvs') + @patch('ceph_volume.api.lvm.get_pvs') def test_get_single_pv_no_match_returns_none(self, m_get_pvs): m_get_pvs.return_value = [] pv = api.get_single_pv() assert pv == None - @patch('ceph_volume.devices.lvm.prepare.api.get_pvs') + @patch('ceph_volume.api.lvm.get_pvs') def test_get_single_pv_one_match(self, m_get_pvs): fake_pvs = [] fake_pvs.append(api.PVolume(pv_name='/dev/sda', pv_tags={})) @@ -815,7 +815,7 @@ def test_get_single_pv_one_match(self, m_get_pvs): class TestGetSingleVG(object): - @patch('ceph_volume.devices.lvm.prepare.api.get_vgs') + @patch('ceph_volume.api.lvm.get_vgs') def test_get_single_vg_multiple_matches_raises_runtimeerror(self, m_get_vgs): fake_vgs = [] fake_vgs.append(api.VolumeGroup(vg_name='vg1')) @@ -827,14 +827,14 @@ def test_get_single_vg_multiple_matches_raises_runtimeerror(self, m_get_vgs): api.get_single_vg() assert "matched more than 1 VG present on this host." in str(e.value) - @patch('ceph_volume.devices.lvm.prepare.api.get_vgs') + @patch('ceph_volume.api.lvm.get_vgs') def test_get_single_vg_no_match_returns_none(self, m_get_vgs): m_get_vgs.return_value = [] vg = api.get_single_vg() assert vg == None - @patch('ceph_volume.devices.lvm.prepare.api.get_vgs') + @patch('ceph_volume.api.lvm.get_vgs') def test_get_single_vg_one_match(self, m_get_vgs): fake_vgs = [] fake_vgs.append(api.VolumeGroup(vg_name='vg1')) @@ -847,7 +847,7 @@ def test_get_single_vg_one_match(self, m_get_vgs): class TestGetSingleLV(object): - @patch('ceph_volume.devices.lvm.prepare.api.get_lvs') + @patch('ceph_volume.api.lvm.get_lvs') def test_get_single_lv_multiple_matches_raises_runtimeerror(self, m_get_lvs): fake_lvs = [] fake_lvs.append(api.Volume(lv_name='lv1', @@ -866,14 +866,14 @@ def test_get_single_lv_multiple_matches_raises_runtimeerror(self, m_get_lvs): api.get_single_lv() assert "matched more than 1 LV present on this host" in str(e.value) - @patch('ceph_volume.devices.lvm.prepare.api.get_lvs') + @patch('ceph_volume.api.lvm.get_lvs') def test_get_single_lv_no_match_returns_none(self, m_get_lvs): m_get_lvs.return_value = [] lv = api.get_single_lv() assert lv == None - @patch('ceph_volume.devices.lvm.prepare.api.get_lvs') + @patch('ceph_volume.api.lvm.get_lvs') def test_get_single_lv_one_match(self, m_get_lvs): fake_lvs = [] fake_lvs.append(api.Volume(lv_name='lv1', lv_path='/dev/vg1/lv1', vg_name='vg1', lv_tags='', lv_uuid='fake-uuid')) @@ -883,15 +883,3 @@ def test_get_single_lv_one_match(self, m_get_lvs): assert isinstance(lv_, api.Volume) assert lv_.name == 'lv1' - - -class TestHelpers: - def test_get_lv_path_from_mapper(self): - mapper = '/dev/mapper/ceph--c1a97e46--234c--46aa--a549--3ca1d1f356a9-osd--block--32e8e896--172e--4a38--a06a--3702598510ec' - lv_path = api.get_lv_path_from_mapper(mapper) - assert lv_path == '/dev/ceph-c1a97e46-234c-46aa-a549-3ca1d1f356a9/osd-block-32e8e896-172e-4a38-a06a-3702598510ec' - - def test_get_mapper_from_lv_path(self): - lv_path = '/dev/ceph-c1a97e46-234c-46aa-a549-3ca1d1f356a9/osd-block-32e8e896-172e-4a38-a06a-3702598510ec' - mapper = api.get_mapper_from_lv_path(lv_path) - assert mapper == '/dev/mapper/ceph--c1a97e46--234c--46aa--a549--3ca1d1f356a9/osd--block--32e8e896--172e--4a38--a06a/3702598510ec' diff --git a/src/ceph-volume/ceph_volume/tests/conftest.py b/src/ceph-volume/ceph_volume/tests/conftest.py index 7a7c57d9721d..e6bf31737b69 100644 --- a/src/ceph-volume/ceph_volume/tests/conftest.py +++ b/src/ceph-volume/ceph_volume/tests/conftest.py @@ -1,11 +1,13 @@ import os import pytest -from mock.mock import patch, PropertyMock, create_autospec +from mock.mock import patch, PropertyMock, create_autospec, Mock from ceph_volume.api import lvm from ceph_volume.util import disk from ceph_volume.util import device from ceph_volume.util.constants import ceph_disk_guids -from ceph_volume import conf, configuration +from ceph_volume import conf, configuration, objectstore +from ceph_volume.objectstore.rawbluestore import RawBlueStore +from typing import Any, Dict, List, Optional, Callable class Capture(object): @@ -36,6 +38,16 @@ def __init__(self, **kw): def factory(): return Factory +def objectstore_bluestore_factory(**kw): + o = objectstore.bluestore.BlueStore([]) + for k, v in kw.items(): + setattr(o, k, v) + return o + +@pytest.fixture +def objectstore_bluestore(): + return objectstore_bluestore_factory + @pytest.fixture def capture(): @@ -58,30 +70,78 @@ def mock_lv(): return dev return mock_lv -def mock_device(): +def mock_device(name='foo', + vg_name='vg_foo', + vg_size=None, + lv_name='lv_foo', + lv_size=None, + path='foo', + lv_path='', + number_lvs=0): dev = create_autospec(device.Device) - dev.path = '/dev/foo' - dev.vg_name = 'vg_foo' - dev.lv_name = 'lv_foo' + if vg_size is None: + dev.vg_size = [21474836480] + if lv_size is None: + lv_size = dev.vg_size + dev.lv_size = lv_size + dev.path = f'/dev/{path}' + dev.vg_name = f'{vg_name}' + dev.lv_name = f'{lv_name}' + dev.lv_path = lv_path if lv_path else f'/dev/{dev.vg_name}/{dev.lv_name}' dev.symlink = None dev.vgs = [lvm.VolumeGroup(vg_name=dev.vg_name, lv_name=dev.lv_name)] dev.available_lvm = True - dev.vg_size = [21474836480] dev.vg_free = dev.vg_size dev.lvs = [] + for n in range(0, number_lvs): + dev.lvs.append(lvm.Volume(vg_name=f'{dev.vg_name}{n}', + lv_name=f'{dev.lv_name}-{n}', + lv_path=f'{dev.lv_path}-{n}', + lv_size=dev.lv_size, + lv_tags='')) + dev.is_device = True return dev @pytest.fixture(params=range(1,4)) def mock_devices_available(request): ret = [] - for n in range(request.param): - dev = mock_device() - # after v15.2.8, a single VG is created for each PV - dev.vg_name = f'vg_foo_{n}' + for n in range(1, request.param+1): + # dev = mock_device(suffix=str(n), vg_name=f'vg_foo_{n}', lv_name='') + dev = mock_device(vg_name=f'vg_foo_{n}', lv_name='') dev.vgs = [lvm.VolumeGroup(vg_name=dev.vg_name, lv_name=dev.lv_name)] ret.append(dev) return ret +@pytest.fixture(params=range(2,5)) +def mock_devices_available_multi_pvs_per_vg(request): + ret = [] + number_lvs = 1 + # for n in range(0, 2): + for n in range(0, request.param): + if n == request.param - 1: + number_lvs = 2 + dev = mock_device(path=f'foo{str(n)}', + vg_name='vg_foo', + lv_name=f'lv_foo{str(n)}', + lv_size=[21474836480], + number_lvs=number_lvs) + # after v15.2.8, a single VG is created for each PV + dev.vgs = [lvm.VolumeGroup(vg_name=dev.vg_name, + pv_name=dev.path, + pv_count=request.param)] + ret.append(dev) + return ret + +# @pytest.fixture(params=range(1,4)) +# def mock_devices_available_multi_pvs_per_vg(request): +# ret = [] +# for n in range(1, request.param+1): +# dev = mock_device(suffix=str(n), vg_name=f'vg_foo', lv_name='') +# # after v15.2.8, a single VG is created for each PV +# dev.vgs = [lvm.VolumeGroup(vg_name=dev.vg_name, lv_name=dev.lv_name)] +# ret.append(dev) +# return ret + @pytest.fixture def mock_device_generator(): return mock_device @@ -198,6 +258,13 @@ def is_root(monkeypatch): """ monkeypatch.setattr('os.getuid', lambda: 0) +@pytest.fixture +def is_non_root(monkeypatch): + """ + Patch ``os.getuid()`` so that ceph-volume's decorators that ensure a user + is not root. + """ + monkeypatch.setattr('os.getuid', lambda: 100) @pytest.fixture def tmpfile(tmpdir): @@ -293,7 +360,7 @@ def apply(devices=None, lsblk=None, lv=None, blkid=None, udevadm=None, has_bluestore_label=False): if devices: for dev in devices.keys(): - devices[dev]['device_nodes'] = os.path.basename(dev) + devices[dev]['device_nodes'] = [os.path.basename(dev)] else: devices = {} lsblk = lsblk if lsblk else {} @@ -322,4 +389,145 @@ def fake_filesystem(fs): fs.create_dir('/sys/block/sda/slaves') fs.create_dir('/sys/block/sda/queue') fs.create_dir('/sys/block/rbd0') + fs.create_dir('/var/log/ceph') + fs.create_dir('/tmp/osdpath') yield fs + +@pytest.fixture +def key_size(monkeypatch): + monkeypatch.setattr("ceph_volume.util.encryption.get_key_size_from_conf", lambda: 512) + +lvm_direct_report_data = { + '1': [{ + 'lv_tags': 'ceph.block_device=/dev/ceph-40bc7bd7-4aee-483e-ba95-89a64bc8a4fd/osd-block-824f7edf-371f-4b75-9231-4ab62a32d5c0,ceph.block_uuid=kS7zXI-bpmu-3ciB-0rVY-d08b-gWDf-Y9oums,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=7dccab18-14cf-11ee-837b-5254008f8ca5,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-73d6d4db-6528-48f2-a4e2-1c82bc87a9ac/osd-db-b82d920d-be3c-4e4d-ba64-18f7e8445892,ceph.db_uuid=Kuvi0U-05vW-sETB-QiNW-lpaK-XBfD-82eQWw,ceph.encrypted=0,ceph.osd_fsid=824f7edf-371f-4b75-9231-4ab62a32d5c0,ceph.osd_id=1,ceph.osdspec_affinity=,ceph.type=block,ceph.vdo=0', + 'lv_path': '/dev/ceph-40bc7bd7-4aee-483e-ba95-89a64bc8a4fd/osd-block-824f7edf-371f-4b75-9231-4ab62a32d5c0', + 'lv_name': 'osd-block-824f7edf-371f-4b75-9231-4ab62a32d5c0', + 'vg_name': 'ceph-40bc7bd7-4aee-483e-ba95-89a64bc8a4fd', + 'lv_uuid': 'kS7zXI-bpmu-3ciB-0rVY-d08b-gWDf-Y9oums', + 'lv_size': '214744170496', + 'tags': { + 'ceph.block_device': '/dev/ceph-40bc7bd7-4aee-483e-ba95-89a64bc8a4fd/osd-block-824f7edf-371f-4b75-9231-4ab62a32d5c0', + 'ceph.block_uuid': 'kS7zXI-bpmu-3ciB-0rVY-d08b-gWDf-Y9oums', + 'ceph.cephx_lockbox_secret': '', + 'ceph.cluster_fsid': '7dccab18-14cf-11ee-837b-5254008f8ca5', + 'ceph.cluster_name': 'ceph', + 'ceph.crush_device_class': '', + 'ceph.db_device': '/dev/ceph-73d6d4db-6528-48f2-a4e2-1c82bc87a9ac/osd-db-b82d920d-be3c-4e4d-ba64-18f7e8445892', + 'ceph.db_uuid': 'Kuvi0U-05vW-sETB-QiNW-lpaK-XBfD-82eQWw', + 'ceph.encrypted': '0', + 'ceph.osd_fsid': '824f7edf-371f-4b75-9231-4ab62a32d5c0', + 'ceph.osd_id': '1', + 'ceph.osdspec_affinity': '', + 'ceph.type': 'block', + 'ceph.vdo': '0' + }, + 'name': 'osd-block-824f7edf-371f-4b75-9231-4ab62a32d5c0', + 'type': 'block', + 'path': '/dev/ceph-40bc7bd7-4aee-483e-ba95-89a64bc8a4fd/osd-block-824f7edf-371f-4b75-9231-4ab62a32d5c0', + 'devices': ['/dev/vdc'] + }, { + 'lv_tags': 'ceph.block_device=/dev/ceph-40bc7bd7-4aee-483e-ba95-89a64bc8a4fd/osd-block-824f7edf-371f-4b75-9231-4ab62a32d5c0,ceph.block_uuid=kS7zXI-bpmu-3ciB-0rVY-d08b-gWDf-Y9oums,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=7dccab18-14cf-11ee-837b-5254008f8ca5,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-73d6d4db-6528-48f2-a4e2-1c82bc87a9ac/osd-db-b82d920d-be3c-4e4d-ba64-18f7e8445892,ceph.db_uuid=Kuvi0U-05vW-sETB-QiNW-lpaK-XBfD-82eQWw,ceph.encrypted=0,ceph.osd_fsid=824f7edf-371f-4b75-9231-4ab62a32d5c0,ceph.osd_id=1,ceph.osdspec_affinity=,ceph.type=db,ceph.vdo=0', + 'lv_path': '/dev/ceph-73d6d4db-6528-48f2-a4e2-1c82bc87a9ac/osd-db-b82d920d-be3c-4e4d-ba64-18f7e8445892', + 'lv_name': 'osd-db-b82d920d-be3c-4e4d-ba64-18f7e8445892', + 'vg_name': 'ceph-73d6d4db-6528-48f2-a4e2-1c82bc87a9ac', + 'lv_uuid': 'Kuvi0U-05vW-sETB-QiNW-lpaK-XBfD-82eQWw', + 'lv_size': '214744170496', + 'tags': { + 'ceph.block_device': '/dev/ceph-40bc7bd7-4aee-483e-ba95-89a64bc8a4fd/osd-block-824f7edf-371f-4b75-9231-4ab62a32d5c0', + 'ceph.block_uuid': 'kS7zXI-bpmu-3ciB-0rVY-d08b-gWDf-Y9oums', + 'ceph.cephx_lockbox_secret': '', + 'ceph.cluster_fsid': '7dccab18-14cf-11ee-837b-5254008f8ca5', + 'ceph.cluster_name': 'ceph', + 'ceph.crush_device_class': '', + 'ceph.db_device': '/dev/ceph-73d6d4db-6528-48f2-a4e2-1c82bc87a9ac/osd-db-b82d920d-be3c-4e4d-ba64-18f7e8445892', + 'ceph.db_uuid': 'Kuvi0U-05vW-sETB-QiNW-lpaK-XBfD-82eQWw', + 'ceph.encrypted': '0', + 'ceph.osd_fsid': '824f7edf-371f-4b75-9231-4ab62a32d5c0', + 'ceph.osd_id': '1', + 'ceph.osdspec_affinity': '', + 'ceph.type': 'db', + 'ceph.vdo': '0' + }, + 'name': 'osd-db-b82d920d-be3c-4e4d-ba64-18f7e8445892', + 'type': 'db', + 'path': '/dev/ceph-73d6d4db-6528-48f2-a4e2-1c82bc87a9ac/osd-db-b82d920d-be3c-4e4d-ba64-18f7e8445892', + 'devices': ['/dev/vdd'] + }], + '0': [{ + 'lv_tags': 'ceph.block_device=/dev/ceph-e34cc3f5-a70d-49df-82b3-46bcbd63d4b0/osd-block-a0e07c5b-bee1-4ea2-ae07-cb89deda9b27,ceph.block_uuid=cYBGv9-s2cn-FfEy-dGQh-VHci-5jj9-9l5kvH,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=7dccab18-14cf-11ee-837b-5254008f8ca5,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=a0e07c5b-bee1-4ea2-ae07-cb89deda9b27,ceph.osd_id=0,ceph.osdspec_affinity=,ceph.type=block,ceph.vdo=0', + 'lv_path': '/dev/ceph-e34cc3f5-a70d-49df-82b3-46bcbd63d4b0/osd-block-a0e07c5b-bee1-4ea2-ae07-cb89deda9b27', + 'lv_name': 'osd-block-a0e07c5b-bee1-4ea2-ae07-cb89deda9b27', + 'vg_name': 'ceph-e34cc3f5-a70d-49df-82b3-46bcbd63d4b0', + 'lv_uuid': 'cYBGv9-s2cn-FfEy-dGQh-VHci-5jj9-9l5kvH', + 'lv_size': '214744170496', + 'tags': { + 'ceph.block_device': '/dev/ceph-e34cc3f5-a70d-49df-82b3-46bcbd63d4b0/osd-block-a0e07c5b-bee1-4ea2-ae07-cb89deda9b27', + 'ceph.block_uuid': 'cYBGv9-s2cn-FfEy-dGQh-VHci-5jj9-9l5kvH', + 'ceph.cephx_lockbox_secret': '', + 'ceph.cluster_fsid': '7dccab18-14cf-11ee-837b-5254008f8ca5', + 'ceph.cluster_name': 'ceph', + 'ceph.crush_device_class': '', + 'ceph.encrypted': '0', + 'ceph.osd_fsid': 'a0e07c5b-bee1-4ea2-ae07-cb89deda9b27', + 'ceph.osd_id': '0', + 'ceph.osdspec_affinity': '', + 'ceph.type': 'block', + 'ceph.vdo': '0' + }, + 'name': 'osd-block-a0e07c5b-bee1-4ea2-ae07-cb89deda9b27', + 'type': 'block', + 'path': '/dev/ceph-e34cc3f5-a70d-49df-82b3-46bcbd63d4b0/osd-block-a0e07c5b-bee1-4ea2-ae07-cb89deda9b27', + 'devices': ['/dev/vdb1'] + }] + } + +raw_direct_report_data = { + "824f7edf-371f-4b75-9231-4ab62a32d5c0": { + "ceph_fsid": "7dccab18-14cf-11ee-837b-5254008f8ca5", + "device": "/dev/mapper/ceph--40bc7bd7--4aee--483e--ba95--89a64bc8a4fd-osd--block--824f7edf--371f--4b75--9231--4ab62a32d5c0", + "device_db": "/dev/mapper/ceph--73d6d4db--6528--48f2--a4e2--1c82bc87a9ac-osd--db--b82d920d--be3c--4e4d--ba64--18f7e8445892", + "osd_id": 8, + "osd_uuid": "824f7edf-371f-4b75-9231-4ab62a32d5c0", + "type": "bluestore" + }, + "a0e07c5b-bee1-4ea2-ae07-cb89deda9b27": { + "ceph_fsid": "7dccab18-14cf-11ee-837b-5254008f8ca5", + "device": "/dev/mapper/ceph--e34cc3f5--a70d--49df--82b3--46bcbd63d4b0-osd--block--a0e07c5b--bee1--4ea2--ae07--cb89deda9b27", + "osd_id": 9, + "osd_uuid": "a0e07c5b-bee1-4ea2-ae07-cb89deda9b27", + "type": "bluestore" + }, + "db32a338-b640-4cbc-af17-f63808b1c36e": { + "ceph_fsid": "c301d0aa-288d-11ef-b535-c84bd6975560", + "device": "/dev/mapper/ceph-db32a338-b640-4cbc-af17-f63808b1c36e-sdb-block-dmcrypt", + "device_db": "/dev/mapper/ceph-db32a338-b640-4cbc-af17-f63808b1c36e-sdc-db-dmcrypt", + "osd_id": 0, + "osd_uuid": "db32a338-b640-4cbc-af17-f63808b1c36e", + "type": "bluestore" + } +} + +@pytest.fixture +def mock_lvm_direct_report(monkeypatch): + monkeypatch.setattr('ceph_volume.objectstore.lvmbluestore.direct_report', lambda: lvm_direct_report_data) + +@pytest.fixture +def mock_raw_direct_report(monkeypatch): + monkeypatch.setattr('ceph_volume.objectstore.rawbluestore.direct_report', lambda x: raw_direct_report_data) + +@pytest.fixture +def fake_lsblk_all(monkeypatch: Any) -> Callable: + def apply(data: Optional[List[Dict[str, Any]]] = None) -> None: + if data is None: + devices = [] + else: + devices = data + monkeypatch.setattr("ceph_volume.util.device.disk.lsblk_all", lambda *a, **kw: devices) + return apply + +@pytest.fixture +def rawbluestore(factory: type[Factory]) -> RawBlueStore: + args = factory(devices=['/dev/foo']) + with patch('ceph_volume.objectstore.rawbluestore.prepare_utils.create_key', Mock(return_value=['AQCee6ZkzhOrJRAAZWSvNC3KdXOpC2w8ly4AZQ=='])): + r = RawBlueStore(args) # type: ignore + return r diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/data_zap.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/data_zap.py new file mode 100644 index 000000000000..cca64e83ab0f --- /dev/null +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/data_zap.py @@ -0,0 +1,81 @@ +ceph_bluestore_tool_output = ''' +{ + "/dev/sdb": { + "osd_uuid": "d5a496bc-dcb9-4ad0-a12c-393d3200d2b6", + "size": 1099511627776, + "btime": "2021-07-23T16:02:22.809186+0000", + "description": "main", + "bfm_blocks": "268435456", + "bfm_blocks_per_key": "128", + "bfm_bytes_per_block": "4096", + "bfm_size": "1099511627776", + "bluefs": "1", + "ceph_fsid": "sdb-fsid", + "ceph_version_when_created": "ceph version 19.3.0-5537-gb9ba4e48 (b9ba4e48633d6d90d5927a4e66b9ecbb4d7e6e73) squid (dev)", + "kv_backend": "rocksdb", + "magic": "ceph osd volume v026", + "mkfs_done": "yes", + "osd_key": "AQAO6PpgK+y4CBAAixq/X7OVimbaezvwD/cDmg==", + "ready": "ready", + "require_osd_release": "16", + "type": "bluestore", + "whoami": "0" + }, + "/dev/vdx": { + "osd_uuid": "d5a496bc-dcb9-4ad0-a12c-393d3200d2b6", + "size": 214748364800, + "btime": "2024-10-16T10:51:05.955279+0000", + "description": "main", + "bfm_blocks": "52428800", + "bfm_blocks_per_key": "128", + "bfm_bytes_per_block": "4096", + "bfm_size": "214748364800", + "bluefs": "1", + "ceph_fsid": "2d20bc8c-8a0c-11ef-aaba-525400e54507", + "ceph_version_when_created": "ceph version 19.3.0-5537-gb9ba4e48 (b9ba4e48633d6d90d5927a4e66b9ecbb4d7e6e73) squid (dev)", + "created_at": "2024-10-16T10:51:09.121455Z", + "elastic_shared_blobs": "1", + "epoch": "16", + "kv_backend": "rocksdb", + "magic": "ceph osd volume v026", + "multi": "yes", + "osd_key": "AQCZmg9nxOKTCBAA6EQftuqMuKMHqypSAfqBsQ==", + "ready": "ready", + "type": "bluestore", + "whoami": "5" + }, + "/dev/vdy": { + "osd_uuid": "d5a496bc-dcb9-4ad0-a12c-393d3200d2b6", + "size": 214748364800, + "btime": "2024-10-16T10:51:05.961279+0000", + "description": "bluefs db" + }, + "/dev/vdz": { + "osd_uuid": "d5a496bc-dcb9-4ad0-a12c-393d3200d2b6", + "size": 214748364800, + "btime": "2024-10-16T10:51:05.961279+0000", + "description": "bluefs wal" + } +} +'''.split('\n') + +lsblk_all = ['NAME="/dev/sdb" KNAME="/dev/sdb" PKNAME="" PARTLABEL=""', + 'NAME="/dev/sdx" KNAME="/dev/sdx" PKNAME="" PARTLABEL=""', + 'NAME="/dev/sdy" KNAME="/dev/sdy" PKNAME="" PARTLABEL=""', + 'NAME="/dev/sdz" KNAME="/dev/sdz" PKNAME="" PARTLABEL=""'] + +blkid_output = ['/dev/ceph-1172bba3-3e0e-45e5-ace6-31ae8401221f/osd-block-5050a85c-d1a7-4d66-b4ba-2e9b1a2970ae: TYPE="ceph_bluestore" USAGE="other"'] + +udevadm_property = '''DEVNAME=/dev/sdb +DEVTYPE=disk +ID_ATA=1 +ID_BUS=ata +ID_MODEL=SK_hynix_SC311_SATA_512GB +ID_PART_TABLE_TYPE=gpt +ID_PART_TABLE_UUID=c8f91d57-b26c-4de1-8884-0c9541da288c +ID_PATH=pci-0000:00:17.0-ata-3 +ID_PATH_TAG=pci-0000_00_17_0-ata-3 +ID_REVISION=70000P10 +ID_SERIAL=SK_hynix_SC311_SATA_512GB_MS83N71801150416A +TAGS=:systemd: +USEC_INITIALIZED=16117769'''.split('\n') \ No newline at end of file diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_activate.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_activate.py index 5d48a0ef4044..b44071026ad3 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_activate.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_activate.py @@ -3,7 +3,10 @@ from ceph_volume.devices.lvm import activate from ceph_volume.api import lvm as api from ceph_volume.tests.conftest import Capture - +from ceph_volume import objectstore +#from ceph_volume.util.prepare import create_key +from mock import patch, call +from argparse import Namespace class Args(object): @@ -16,44 +19,59 @@ def __init__(self, **kw): setattr(self, k, v) +@patch('ceph_volume.util.prepare.create_key', return_value='fake-secret') class TestActivate(object): # these tests are very functional, hence the heavy patching, it is hard to # test the negative side effect with an actual functional run, so we must # setup a perfect scenario for this test to check it can really work # with/without osd_id - def test_no_osd_id_matches_fsid_bluestore(self, is_root, monkeypatch, capture): - FooVolume = api.Volume(lv_name='foo', lv_path='/dev/vg/foo', - lv_tags="ceph.osd_fsid=1234") + def test_no_osd_id_matches_fsid_bluestore(self, + m_create_key, + is_root, + monkeypatch, + capture): + FooVolume = api.Volume(lv_name='foo', + lv_path='/dev/vg/foo', + lv_tags="ceph.osd_fsid=1234") volumes = [] volumes.append(FooVolume) monkeypatch.setattr(api, 'get_lvs', lambda **kwargs: volumes) - monkeypatch.setattr(activate, 'activate_bluestore', capture) + monkeypatch.setattr(objectstore.lvmbluestore.LvmBlueStore, + '_activate', + capture) + args = Args(osd_id=None, osd_fsid='1234', bluestore=True) - activate.Activate([]).activate(args) + a = activate.Activate([]) + a.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=args) + a.objectstore.activate() assert capture.calls[0]['args'][0] == [FooVolume] - def test_osd_id_no_osd_fsid(self, is_root): + def test_osd_id_no_osd_fsid(self, m_create_key, is_root): args = Args(osd_id=42, osd_fsid=None) + a = activate.Activate([]) + a.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=args) with pytest.raises(RuntimeError) as result: - activate.Activate([]).activate(args) + a.objectstore.activate() assert result.value.args[0] == 'could not activate osd.42, please provide the osd_fsid too' - def test_no_osd_id_no_osd_fsid(self, is_root): + def test_no_osd_id_no_osd_fsid(self, m_create_key, is_root): args = Args(osd_id=None, osd_fsid=None) + a = activate.Activate([]) + a.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=args) with pytest.raises(RuntimeError) as result: - activate.Activate([]).activate(args) + a.objectstore.activate() assert result.value.args[0] == 'Please provide both osd_id and osd_fsid' - def test_bluestore_no_systemd(self, is_root, monkeypatch, capture): + def test_bluestore_no_systemd(self, m_create_key, is_root, monkeypatch, capture): monkeypatch.setattr('ceph_volume.configuration.load', lambda: None) fake_enable = Capture() fake_start_osd = Capture() monkeypatch.setattr('ceph_volume.util.system.path_is_mounted', lambda *a, **kw: True) monkeypatch.setattr('ceph_volume.util.system.chown', lambda *a, **kw: True) monkeypatch.setattr('ceph_volume.process.run', lambda *a, **kw: True) - monkeypatch.setattr(activate.systemctl, 'enable_volume', fake_enable) - monkeypatch.setattr(activate.systemctl, 'start_osd', fake_start_osd) + monkeypatch.setattr(objectstore.lvmbluestore.systemctl, 'enable_volume', fake_enable) + monkeypatch.setattr(objectstore.lvmbluestore.systemctl, 'start_osd', fake_start_osd) DataVolume = api.Volume( lv_name='data', lv_path='/dev/vg/data', @@ -64,19 +82,21 @@ def test_bluestore_no_systemd(self, is_root, monkeypatch, capture): monkeypatch.setattr(api, 'get_lvs', lambda **kwargs: deepcopy(volumes)) args = Args(osd_id=None, osd_fsid='1234', no_systemd=True, bluestore=True) - activate.Activate([]).activate(args) + a = activate.Activate([]) + a.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=args) + a.objectstore.activate() assert fake_enable.calls == [] assert fake_start_osd.calls == [] - def test_bluestore_systemd(self, is_root, monkeypatch, capture): + def test_bluestore_systemd(self, m_create_key, is_root, monkeypatch, capture): monkeypatch.setattr('ceph_volume.configuration.load', lambda: None) fake_enable = Capture() fake_start_osd = Capture() monkeypatch.setattr('ceph_volume.util.system.path_is_mounted', lambda *a, **kw: True) monkeypatch.setattr('ceph_volume.util.system.chown', lambda *a, **kw: True) monkeypatch.setattr('ceph_volume.process.run', lambda *a, **kw: True) - monkeypatch.setattr(activate.systemctl, 'enable_volume', fake_enable) - monkeypatch.setattr(activate.systemctl, 'start_osd', fake_start_osd) + monkeypatch.setattr(objectstore.lvmbluestore.systemctl, 'enable_volume', fake_enable) + monkeypatch.setattr(objectstore.lvmbluestore.systemctl, 'start_osd', fake_start_osd) DataVolume = api.Volume( lv_name='data', lv_path='/dev/vg/data', @@ -88,19 +108,21 @@ def test_bluestore_systemd(self, is_root, monkeypatch, capture): args = Args(osd_id=None, osd_fsid='1234', no_systemd=False, bluestore=True) - activate.Activate([]).activate(args) + a = activate.Activate([]) + a.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=args) + a.objectstore.activate() assert fake_enable.calls != [] assert fake_start_osd.calls != [] - def test_bluestore_no_systemd_autodetect(self, is_root, monkeypatch, capture): + def test_bluestore_no_systemd_autodetect(self, m_create_key, is_root, monkeypatch, capture): monkeypatch.setattr('ceph_volume.configuration.load', lambda: None) fake_enable = Capture() fake_start_osd = Capture() monkeypatch.setattr('ceph_volume.util.system.path_is_mounted', lambda *a, **kw: True) monkeypatch.setattr('ceph_volume.util.system.chown', lambda *a, **kw: True) monkeypatch.setattr('ceph_volume.process.run', lambda *a, **kw: True) - monkeypatch.setattr(activate.systemctl, 'enable_volume', fake_enable) - monkeypatch.setattr(activate.systemctl, 'start_osd', fake_start_osd) + monkeypatch.setattr(objectstore.lvmbluestore.systemctl, 'enable_volume', fake_enable) + monkeypatch.setattr(objectstore.lvmbluestore.systemctl, 'start_osd', fake_start_osd) DataVolume = api.Volume( lv_name='data', lv_path='/dev/vg/data', @@ -112,11 +134,13 @@ def test_bluestore_no_systemd_autodetect(self, is_root, monkeypatch, capture): args = Args(osd_id=None, osd_fsid='1234', no_systemd=True, bluestore=True, auto_detect_objectstore=True) - activate.Activate([]).activate(args) + a = activate.Activate([]) + a.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=args) + a.objectstore.activate() assert fake_enable.calls == [] assert fake_start_osd.calls == [] - def test_bluestore_systemd_autodetect(self, is_root, monkeypatch, capture): + def test_bluestore_systemd_autodetect(self, m_create_key, is_root, monkeypatch, capture): monkeypatch.setattr('ceph_volume.configuration.load', lambda: None) fake_enable = Capture() fake_start_osd = Capture() @@ -125,8 +149,8 @@ def test_bluestore_systemd_autodetect(self, is_root, monkeypatch, capture): monkeypatch.setattr('ceph_volume.util.system.chown', lambda *a, **kw: True) monkeypatch.setattr('ceph_volume.process.run', lambda *a, **kw: True) - monkeypatch.setattr(activate.systemctl, 'enable_volume', fake_enable) - monkeypatch.setattr(activate.systemctl, 'start_osd', fake_start_osd) + monkeypatch.setattr(objectstore.lvmbluestore.systemctl, 'enable_volume', fake_enable) + monkeypatch.setattr(objectstore.lvmbluestore.systemctl, 'start_osd', fake_start_osd) DataVolume = api.Volume( lv_name='data', lv_path='/dev/vg/data', @@ -138,33 +162,37 @@ def test_bluestore_systemd_autodetect(self, is_root, monkeypatch, capture): args = Args(osd_id=None, osd_fsid='1234', no_systemd=False, bluestore=True, auto_detect_objectstore=False) - activate.Activate([]).activate(args) + a = activate.Activate([]) + a.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=args) + a.objectstore.activate() assert fake_enable.calls != [] assert fake_start_osd.calls != [] + +@patch('ceph_volume.util.prepare.create_key', return_value='fake-secret') +@patch('ceph_volume.objectstore.lvmbluestore.LvmBlueStore.activate_all') +@patch('ceph_volume.objectstore.lvmbluestore.LvmBlueStore.activate') class TestActivateFlags(object): - def test_default_objectstore(self, capture): + def test_default_objectstore(self, m_activate, m_activate_all, m_create_key, capture): args = ['0', 'asdf-ljh-asdf'] - activation = activate.Activate(args) - activation.activate = capture - activation.main() - parsed_args = capture.calls[0]['args'][0] - assert parsed_args.bluestore is False - def test_uses_bluestore(self, capture): + a = activate.Activate(args) + a.main() + assert a.args.objectstore == 'bluestore' + + def test_bluestore_backward_compatibility(self, m_activate, m_activate_all, m_create_key, capture): args = ['--bluestore', '0', 'asdf-ljh-asdf'] - activation = activate.Activate(args) - activation.activate = capture - activation.main() - parsed_args = capture.calls[0]['args'][0] - assert parsed_args.bluestore is True + a = activate.Activate(args) + a.main() + assert a.args.objectstore == 'bluestore' +@patch('ceph_volume.util.prepare.create_key', return_value='fake-secret') class TestActivateAll(object): - def test_does_not_detect_osds(self, capsys, is_root, capture, monkeypatch): - monkeypatch.setattr('ceph_volume.devices.lvm.activate.direct_report', lambda: {}) + def test_does_not_detect_osds(self, m_create_key, capsys, is_root, monkeypatch): + monkeypatch.setattr('ceph_volume.objectstore.lvmbluestore.direct_report', lambda: {}) args = ['--all'] activation = activate.Activate(args) activation.main() @@ -172,9 +200,9 @@ def test_does_not_detect_osds(self, capsys, is_root, capture, monkeypatch): assert 'Was unable to find any OSDs to activate' in err assert 'Verify OSDs are present with ' in err - def test_detects_running_osds(self, capsys, is_root, capture, monkeypatch): - monkeypatch.setattr('ceph_volume.devices.lvm.activate.direct_report', lambda: direct_report) - monkeypatch.setattr('ceph_volume.devices.lvm.activate.systemctl.osd_is_active', lambda x: True) + def test_detects_running_osds(self, m_create_key, capsys, is_root, capture, monkeypatch): + monkeypatch.setattr('ceph_volume.objectstore.lvmbluestore.direct_report', lambda: direct_report) + monkeypatch.setattr('ceph_volume.objectstore.lvmbluestore.systemctl.osd_is_active', lambda x: True) args = ['--all'] activation = activate.Activate(args) activation.main() @@ -182,30 +210,66 @@ def test_detects_running_osds(self, capsys, is_root, capture, monkeypatch): assert 'a8789a96ce8b process is active. Skipping activation' in err assert 'b8218eaa1634 process is active. Skipping activation' in err - def test_detects_osds_to_activate_systemd(self, is_root, capture, monkeypatch): - monkeypatch.setattr('ceph_volume.devices.lvm.activate.direct_report', lambda: direct_report) - monkeypatch.setattr('ceph_volume.devices.lvm.activate.systemctl.osd_is_active', lambda x: False) - args = ['--all'] - activation = activate.Activate(args) - activation.activate = capture - activation.main() - calls = sorted(capture.calls, key=lambda x: x['kwargs']['osd_id']) - assert calls[0]['kwargs']['osd_id'] == '0' - assert calls[0]['kwargs']['osd_fsid'] == '957d22b7-24ce-466a-9883-b8218eaa1634' - assert calls[1]['kwargs']['osd_id'] == '1' - assert calls[1]['kwargs']['osd_fsid'] == 'd0f3e4ad-e52a-4520-afc0-a8789a96ce8b' + @patch('ceph_volume.objectstore.lvmbluestore.LvmBlueStore.activate') + def test_detects_osds_to_activate_systemd(self, m_activate, m_create_key, is_root, monkeypatch): + monkeypatch.setattr('ceph_volume.objectstore.lvmbluestore.direct_report', lambda: direct_report) + monkeypatch.setattr('ceph_volume.objectstore.lvmbluestore.systemctl.osd_is_active', lambda x: False) + args = ['--all', '--bluestore'] + a = activate.Activate(args) + a.main() + calls = [ + call(Namespace(activate_all=True, + auto_detect_objectstore=False, + bluestore=True, + no_systemd=False, + no_tmpfs=False, + objectstore='bluestore', + osd_fsid=None, + osd_id=None), + osd_id='0', + osd_fsid='957d22b7-24ce-466a-9883-b8218eaa1634'), + call(Namespace(activate_all=True, + auto_detect_objectstore=False, + bluestore=True, + no_systemd=False, + no_tmpfs=False, + objectstore='bluestore', + osd_fsid=None, + osd_id=None), + osd_id='1', + osd_fsid='d0f3e4ad-e52a-4520-afc0-a8789a96ce8b') + ] + m_activate.assert_has_calls(calls) - def test_detects_osds_to_activate_no_systemd(self, is_root, capture, monkeypatch): - monkeypatch.setattr('ceph_volume.devices.lvm.activate.direct_report', lambda: direct_report) - args = ['--all', '--no-systemd'] - activation = activate.Activate(args) - activation.activate = capture - activation.main() - calls = sorted(capture.calls, key=lambda x: x['kwargs']['osd_id']) - assert calls[0]['kwargs']['osd_id'] == '0' - assert calls[0]['kwargs']['osd_fsid'] == '957d22b7-24ce-466a-9883-b8218eaa1634' - assert calls[1]['kwargs']['osd_id'] == '1' - assert calls[1]['kwargs']['osd_fsid'] == 'd0f3e4ad-e52a-4520-afc0-a8789a96ce8b' + @patch('ceph_volume.objectstore.lvmbluestore.LvmBlueStore.activate') + def test_detects_osds_to_activate_no_systemd(self, m_activate, m_create_key, is_root, monkeypatch): + monkeypatch.setattr('ceph_volume.objectstore.lvmbluestore.direct_report', lambda: direct_report) + args = ['--all', '--no-systemd', '--bluestore'] + a = activate.Activate(args) + a.main() + calls = [ + call(Namespace(activate_all=True, + auto_detect_objectstore=False, + bluestore=True, + no_systemd=True, + no_tmpfs=False, + objectstore='bluestore', + osd_fsid=None, + osd_id=None), + osd_id='0', + osd_fsid='957d22b7-24ce-466a-9883-b8218eaa1634'), + call(Namespace(activate_all=True, + auto_detect_objectstore=False, + bluestore=True, + no_systemd=True, + no_tmpfs=False, + objectstore='bluestore', + osd_fsid=None, + osd_id=None), + osd_id='1', + osd_fsid='d0f3e4ad-e52a-4520-afc0-a8789a96ce8b') + ] + m_activate.assert_has_calls(calls) # # Activate All fixture diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py index 75073c51aca2..e26a733b09cd 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py @@ -5,7 +5,6 @@ from argparse import ArgumentError from mock import MagicMock, patch -from ceph_volume.api import lvm from ceph_volume.devices.lvm import batch from ceph_volume.util import arg_validators @@ -54,14 +53,14 @@ def test_report(self, format_, factory, conf_ceph_stub, mock_device_generator): devices=devs, db_devices=[], wal_devices=[], - bluestore=True, + objectstore='bluestore', block_db_size="1G", dmcrypt=True, data_allocate_fraction=1.0, ) b = batch.Batch([]) - plan = b.get_plan(args) b.args = args + plan = b.get_deployment_layout() b.report(plan) @pytest.mark.parametrize('format_', ['json', 'json-pretty']) @@ -77,14 +76,14 @@ def test_json_report_valid_empty(self, format_, factory, conf_ceph_stub, mock_de devices=devs, db_devices=[], wal_devices=[], - bluestore=True, + objectstore='bluestore', block_db_size="1G", dmcrypt=True, data_allocate_fraction=1.0, ) b = batch.Batch([]) - plan = b.get_plan(args) b.args = args + plan = b.get_deployment_layout() report = b._create_report(plan) json.loads(report) @@ -103,14 +102,15 @@ def test_json_report_valid_empty_unavailable_fast(self, format_, factory, conf_c devices=devs, db_devices=fast_devs, wal_devices=[], - bluestore=True, + objectstore='bluestore', block_db_size="1G", + block_db_slots=1.0, dmcrypt=True, data_allocate_fraction=1.0, ) b = batch.Batch([]) - plan = b.get_plan(args) b.args = args + plan = b.get_deployment_layout() report = b._create_report(plan) json.loads(report) @@ -121,6 +121,7 @@ def test_json_report_valid_empty_unavailable_very_fast(self, format_, factory, c conf_ceph_stub('[global]\nfsid=asdf-lkjh') devs = [mock_device_generator() for _ in range(5)] fast_devs = [mock_device_generator()] + fast_devs[0].available_lvm = False very_fast_devs = [mock_device_generator()] very_fast_devs[0].available_lvm = False args = factory(data_slots=1, @@ -131,14 +132,15 @@ def test_json_report_valid_empty_unavailable_very_fast(self, format_, factory, c devices=devs, db_devices=fast_devs, wal_devices=very_fast_devs, - bluestore=True, + objectstore='bluestore', block_db_size="1G", + block_db_slots=5, dmcrypt=True, data_allocate_fraction=1.0, ) b = batch.Batch([]) - plan = b.get_plan(args) b.args = args + plan = b.get_deployment_layout() report = b._create_report(plan) json.loads(report) @@ -250,35 +252,50 @@ def test_get_physical_fast_allocs_abs_size_unused_devs(self, factory, for (_, _, slot_size, _) in fasts: assert slot_size == expected_slot_size - def test_get_physical_fast_allocs_abs_size_multi_pvs_per_vg(self, factory, - conf_ceph_stub, - mock_devices_available): + def test_get_physical_fast_allocs_abs_size_multi_pvs_per_vg(self, + factory, + conf_ceph_stub, + mock_device_generator, + mock_devices_available_multi_pvs_per_vg): conf_ceph_stub('[global]\nfsid=asdf-lkjh') - args = factory(block_db_slots=None, get_block_db_size=None) - dev_size = 21474836480 - num_devices = len(mock_devices_available) + data_devices = [] + # existing_osds = sum([len(dev.lvs) for dev in mock_devices_available_multi_pvs_per_vg]) + for i in range(len(mock_devices_available_multi_pvs_per_vg)+2): + data_devices.append(mock_device_generator(name='data', + vg_name=f'vg_foo_data{str(i)}', + lv_name=f'lv_foo_data{str(i)}')) + args = factory(block_db_slots=None, + block_db_size=None, + devices=[dev.lv_path for dev in data_devices]) + dev_size = 53687091200 + num_devices = len(mock_devices_available_multi_pvs_per_vg) vg_size = dev_size * num_devices - vg_name = 'vg_foo' - for dev in mock_devices_available: - dev.vg_name = vg_name - dev.vg_size = [vg_size] - dev.vg_free = dev.vg_size - dev.vgs = [lvm.VolumeGroup(vg_name=dev.vg_name, lv_name=dev.lv_name)] - slots_per_device = 2 - slots_per_vg = slots_per_device * num_devices - fasts = batch.get_physical_fast_allocs(mock_devices_available, - 'block_db', slots_per_device, 2, args) - expected_slot_size = int(vg_size / slots_per_vg) + vg_free = vg_size + for dev in mock_devices_available_multi_pvs_per_vg: + for lv in dev.lvs: + vg_free -= lv.lv_size[0] + dev.vg_size = [vg_size] # override the `vg_size` set in mock_device() since it's 1VG that has multiple PVs + for dev in mock_devices_available_multi_pvs_per_vg: + dev.vg_free = [vg_free] # override the `vg_free` set in mock_device() since it's 1VG that has multiple PVs + b = batch.Batch([]) + b.args = args + new_osds = len(data_devices) - len(mock_devices_available_multi_pvs_per_vg) + fasts = b.fast_allocations(mock_devices_available_multi_pvs_per_vg, + len(data_devices), + new_osds, + 'block_db') + expected_slot_size = int(vg_size / len(data_devices)) for (_, _, slot_size, _) in fasts: assert slot_size == expected_slot_size - def test_batch_fast_allocations_one_block_db_length(self, factory, conf_ceph_stub, - mock_lv_device_generator): + def test_batch_fast_allocations_one_block_db_length(self, + factory, conf_ceph_stub, + mock_device_generator): conf_ceph_stub('[global]\nfsid=asdf-lkjh') b = batch.Batch([]) - db_lv_devices = [mock_lv_device_generator()] - fast = b.fast_allocations(db_lv_devices, 1, 0, 'block_db') + db_device = [mock_device_generator()] + fast = b.fast_allocations(db_device, 1, 1, 'block_db') assert len(fast) == 1 @pytest.mark.parametrize('occupied_prior', range(7)) @@ -293,22 +310,24 @@ def test_get_physical_fast_allocs_length_existing(self, mock_device_generator): conf_ceph_stub('[global]\nfsid=asdf-lkjh') occupied_prior = min(occupied_prior, slots) - devs = [mock_device_generator() for _ in range(num_devs)] + devs = [mock_device_generator(lv_name=f'foo{n}') for n in range(slots)] + dev_paths = [dev.path for dev in devs] + fast_devs = [mock_device_generator(lv_name=f'ssd{n}') for n in range(num_devs)] already_assigned = 0 while already_assigned < occupied_prior: dev_i = random.randint(0, num_devs - 1) - dev = devs[dev_i] + dev = fast_devs[dev_i] if len(dev.lvs) < occupied_prior: dev.lvs.append('foo') dev.path = '/dev/bar' - already_assigned = sum([len(d.lvs) for d in devs]) - args = factory(block_db_slots=None, get_block_db_size=None) - expected_num_osds = max(len(devs) * slots - occupied_prior, 0) - fast = batch.get_physical_fast_allocs(devs, + already_assigned = sum([len(dev.lvs) for dev in fast_devs]) + args = factory(block_db_slots=None, get_block_db_size=None, devices=dev_paths) + expected_num_osds = max(len(fast_devs) * slots - occupied_prior, 0) + fast = batch.get_physical_fast_allocs(fast_devs, 'block_db', slots, expected_num_osds, args) assert len(fast) == expected_num_osds - expected_assignment_on_used_devices = sum([slots - len(d.lvs) for d in devs if len(d.lvs) > 0]) + expected_assignment_on_used_devices = sum([slots - len(d.lvs) for d in fast_devs if len(d.lvs) > 0]) assert len([f for f in fast if f[0] == '/dev/bar']) == expected_assignment_on_used_devices assert len([f for f in fast if f[0] != '/dev/bar']) == expected_num_osds - expected_assignment_on_used_devices diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py index 7e4d963c8b45..062ea511a8ec 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py @@ -1,6 +1,7 @@ import pytest from ceph_volume.devices import lvm from ceph_volume.api import lvm as api +from mock import patch, Mock # TODO: add tests for following commands - # ceph-volume list @@ -68,6 +69,7 @@ def test_empty_full_json_zero_exit_status(self, fake_call, is_root, factory, cap stdout, stderr = capsys.readouterr() assert stdout == '{}\n' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_empty_device_json_zero_exit_status(self, is_root,factory,capsys): args = factory(format='json', device='/dev/sda1') lvm.listing.List([]).list(args) @@ -79,6 +81,7 @@ def test_empty_full_zero_exit_status(self, fake_call, is_root, factory): with pytest.raises(SystemExit): lvm.listing.List([]).list(args) + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_empty_device_zero_exit_status(self, is_root, factory): args = factory(format='pretty', device='/dev/sda1') with pytest.raises(SystemExit): @@ -86,6 +89,7 @@ def test_empty_device_zero_exit_status(self, is_root, factory): class TestFullReport(object): + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_no_ceph_lvs(self, monkeypatch): # ceph lvs are detected by looking into its tags osd = api.Volume(lv_name='volume1', lv_path='/dev/VolGroup/lv', @@ -98,6 +102,7 @@ def test_no_ceph_lvs(self, monkeypatch): result = lvm.listing.List([]).full_report() assert result == {} + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_ceph_data_lv_reported(self, monkeypatch): tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data' pv = api.PVolume(pv_name='/dev/sda1', pv_tags={}, pv_uuid="0000", @@ -113,6 +118,7 @@ def test_ceph_data_lv_reported(self, monkeypatch): result = lvm.listing.List([]).full_report() assert result['0'][0]['name'] == 'volume1' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_ceph_journal_lv_reported(self, monkeypatch): tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data' journal_tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=journal' @@ -134,6 +140,7 @@ def test_ceph_journal_lv_reported(self, monkeypatch): assert result['0'][0]['name'] == 'volume1' assert result['0'][1]['name'] == 'journal' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_ceph_wal_lv_reported(self, monkeypatch): tags = 'ceph.osd_id=0,ceph.wal_uuid=x,ceph.type=data' wal_tags = 'ceph.osd_id=0,ceph.wal_uuid=x,ceph.type=wal' @@ -151,6 +158,7 @@ def test_ceph_wal_lv_reported(self, monkeypatch): assert result['0'][0]['name'] == 'volume1' assert result['0'][1]['name'] == 'wal' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) @pytest.mark.parametrize('type_', ['journal', 'db', 'wal']) def test_physical_2nd_device_gets_reported(self, type_, monkeypatch): tags = ('ceph.osd_id=0,ceph.{t}_uuid=x,ceph.type=data,' @@ -168,6 +176,7 @@ def test_physical_2nd_device_gets_reported(self, type_, monkeypatch): class TestSingleReport(object): + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_not_a_ceph_lv(self, monkeypatch): # ceph lvs are detected by looking into its tags lv = api.Volume(lv_name='lv', lv_tags={}, lv_path='/dev/VolGroup/lv', @@ -178,6 +187,7 @@ def test_not_a_ceph_lv(self, monkeypatch): result = lvm.listing.List([]).single_report('VolGroup/lv') assert result == {} + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_a_ceph_lv(self, monkeypatch): # ceph lvs are detected by looking into its tags tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data' @@ -194,6 +204,7 @@ def test_report_a_ceph_lv(self, monkeypatch): assert result['0'][0]['path'] == '/dev/VolGroup/lv' assert result['0'][0]['devices'] == [] + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_a_ceph_journal_device(self, monkeypatch): # ceph lvs are detected by looking into its tags tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data,' + \ @@ -242,6 +253,7 @@ def test_report_a_ceph_lv_with_devices(self, monkeypatch): assert result['0'][0]['path'] == '/dev/VolGroup/lv' assert result['0'][0]['devices'] == ['/dev/sda1', '/dev/sdb1'] + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_by_osd_id_for_just_block_dev(self, monkeypatch): tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=block' lvs = [ api.Volume(lv_name='lv1', lv_tags=tags, lv_path='/dev/vg/lv1', @@ -256,6 +268,7 @@ def test_report_by_osd_id_for_just_block_dev(self, monkeypatch): assert result['0'][0]['lv_path'] == '/dev/vg/lv1' assert result['0'][0]['vg_name'] == 'vg' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_by_osd_id_for_just_data_dev(self, monkeypatch): tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data' lvs = [ api.Volume(lv_name='lv1', lv_tags=tags, lv_path='/dev/vg/lv1', @@ -270,6 +283,7 @@ def test_report_by_osd_id_for_just_data_dev(self, monkeypatch): assert result['0'][0]['lv_path'] == '/dev/vg/lv1' assert result['0'][0]['vg_name'] == 'vg' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_by_osd_id_for_just_block_wal_and_db_dev(self, monkeypatch): tags1 = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=block' tags2 = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=wal' @@ -298,7 +312,7 @@ def test_report_by_osd_id_for_just_block_wal_and_db_dev(self, monkeypatch): assert result['0'][2]['lv_path'] == '/dev/vg/lv3' assert result['0'][2]['vg_name'] == 'vg' - + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_by_osd_id_for_data_and_journal_dev(self, monkeypatch): tags1 = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data' tags2 = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=journal' @@ -320,6 +334,7 @@ def test_report_by_osd_id_for_data_and_journal_dev(self, monkeypatch): assert result['0'][1]['lv_path'] == '/dev/vg/lv2' assert result['0'][1]['vg_name'] == 'vg' + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_by_nonexistent_osd_id(self, monkeypatch): lv = api.Volume(lv_name='lv', lv_tags={}, lv_path='/dev/VolGroup/lv', vg_name='VolGroup') @@ -329,6 +344,7 @@ def test_report_by_nonexistent_osd_id(self, monkeypatch): result = lvm.listing.List([]).single_report('1') assert result == {} + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_report_a_ceph_lv_with_no_matching_devices(self, monkeypatch): tags = 'ceph.osd_id=0,ceph.type=data' lv = api.Volume(lv_name='lv', vg_name='VolGroup', lv_uuid='aaaa', diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_migrate.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_migrate.py index 7e516f3d23bd..b032dab4eea4 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_migrate.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_migrate.py @@ -1,11 +1,13 @@ import pytest -from mock.mock import patch +from mock.mock import patch, Mock from ceph_volume import process from ceph_volume.api import lvm as api from ceph_volume.devices.lvm import migrate from ceph_volume.util.device import Device from ceph_volume.util import system from ceph_volume.util import encryption as encryption_utils +from ceph_volume.devices.lvm.zap import Zap + class TestGetClusterName(object): @@ -170,6 +172,7 @@ def mock_process(self, *args, **kwargs): return ('', '', 0) def test_init(self, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data,ceph.osd_fsid=1234' source_db_tags = 'ceph.osd_id=0,journal_uuid=x,ceph.type=db, osd_fsid=1234' source_wal_tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=wal' @@ -217,6 +220,7 @@ def test_init(self, monkeypatch): assert 'wal' == t.old_wal_tags['ceph.type'] def test_update_tags_when_lv_create(self, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = \ 'ceph.osd_id=0,ceph.journal_uuid=x,' \ 'ceph.type=data,ceph.osd_fsid=1234' @@ -275,6 +279,7 @@ def test_update_tags_when_lv_create(self, monkeypatch): '/dev/VolGroup/lv2'] == self.mock_process_input[2] def test_remove_lvs(self, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = \ 'ceph.osd_id=0,ceph.journal_uuid=x,' \ 'ceph.type=data,ceph.osd_fsid=1234,ceph.wal_uuid=aaaaa' @@ -334,6 +339,7 @@ def test_remove_lvs(self, monkeypatch): '/dev/VolGroup/lv2'] == self.mock_process_input[2] def test_replace_lvs(self, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = \ 'ceph.osd_id=0,ceph.type=data,ceph.osd_fsid=1234,'\ 'ceph.wal_uuid=wal_uuid,ceph.db_device=/dbdevice' @@ -410,6 +416,7 @@ def test_replace_lvs(self, monkeypatch): '/dev/VolGroup/lv_target'].sort() def test_undo(self, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data,ceph.osd_fsid=1234' source_db_tags = 'ceph.osd_id=0,journal_uuid=x,ceph.type=db, osd_fsid=1234' source_wal_tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=wal' @@ -524,7 +531,7 @@ def mock_get_lvs(self, *args, **kwargs): def mock_prepare_dmcrypt(self, *args, **kwargs): return '/dev/mapper/' + kwargs['mapping'] - def test_newdb_non_root(self): + def test_newdb_non_root(self, is_non_root): with pytest.raises(Exception) as error: migrate.NewDB(argv=[ '--osd-id', '1', @@ -533,9 +540,8 @@ def test_newdb_non_root(self): expected = 'This command needs to be executed with sudo or as root' assert expected in str(error.value) - @patch('os.getuid') - def test_newdb_not_target_lvm(self, m_getuid, capsys): - m_getuid.return_value = 0 + @patch('ceph_volume.api.lvm.get_lv_by_fullname', Mock(return_value=None)) + def test_newdb_not_target_lvm(self, is_root, capsys): with pytest.raises(SystemExit) as error: migrate.NewDB(argv=[ '--osd-id', '1', @@ -548,10 +554,7 @@ def test_newdb_not_target_lvm(self, m_getuid, capsys): assert expected in stderr - @patch('os.getuid') - def test_newdb_already_in_use(self, m_getuid, monkeypatch, capsys): - m_getuid.return_value = 0 - + def test_newdb_already_in_use(self, is_root, monkeypatch, capsys): self.mock_volume = api.Volume(lv_name='volume1', lv_uuid='y', vg_name='vg', @@ -570,10 +573,8 @@ def test_newdb_already_in_use(self, m_getuid, monkeypatch, capsys): expected = 'Target Logical Volume is already used by ceph: vgname/new_db' assert expected in stderr - @patch('os.getuid') - def test_newdb(self, m_getuid, monkeypatch, capsys): - m_getuid.return_value = 0 - + def test_newdb(self, is_root, monkeypatch, capsys): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = \ 'ceph.osd_id=0,ceph.type=data,ceph.osd_fsid=1234,'\ 'ceph.wal_uuid=wal_uuid,ceph.db_device=/dbdevice' @@ -730,6 +731,7 @@ def test_newdb_active_systemd(self, is_root, monkeypatch, capsys): assert not stdout def test_newdb_no_systemd(self, is_root, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = \ 'ceph.osd_id=0,ceph.type=data,ceph.osd_fsid=1234,'\ 'ceph.wal_uuid=wal_uuid,ceph.db_device=/dbdevice' @@ -818,10 +820,8 @@ def test_newdb_no_systemd(self, is_root, monkeypatch): '--dev-target', '/dev/VolGroup/target_volume', '--command', 'bluefs-bdev-new-db'] - @patch('os.getuid') - def test_newwal(self, m_getuid, monkeypatch, capsys): - m_getuid.return_value = 0 - + def test_newwal(self, is_root, monkeypatch, capsys): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = \ 'ceph.osd_id=0,ceph.type=data,ceph.osd_fsid=1234' @@ -933,6 +933,7 @@ def test_newwal_active_systemd(self, is_root, monkeypatch, capsys): assert not stdout def test_newwal_no_systemd(self, is_root, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = \ 'ceph.osd_id=0,ceph.type=data,ceph.osd_fsid=1234' @@ -996,6 +997,7 @@ def test_newwal_no_systemd(self, is_root, monkeypatch): @patch('os.getuid') def test_newwal_encrypted(self, m_getuid, monkeypatch, capsys): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) m_getuid.return_value = 0 source_tags = \ @@ -1225,9 +1227,9 @@ def test_migrate_without_args(self, capsys): assert not stderr - @patch('os.getuid') - def test_migrate_data_db_to_new_db(self, m_getuid, monkeypatch): - m_getuid.return_value = 0 + @patch.object(Zap, 'main') + def test_migrate_data_db_to_new_db(self, m_zap, is_root, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev' @@ -1325,9 +1327,12 @@ def test_migrate_data_db_to_new_db(self, m_getuid, monkeypatch): '--command', 'bluefs-bdev-migrate', '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.db'] + m_zap.assert_called_once() + @patch.object(Zap, 'main') @patch('os.getuid') - def test_migrate_data_db_to_new_db_encrypted(self, m_getuid, monkeypatch): + def test_migrate_data_db_to_new_db_encrypted(self, m_getuid, m_zap, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) m_getuid.return_value = 0 source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ @@ -1440,6 +1445,8 @@ def test_migrate_data_db_to_new_db_encrypted(self, m_getuid, monkeypatch): '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.db'] + m_zap.assert_called_once() + def test_migrate_data_db_to_new_db_active_systemd(self, is_root, monkeypatch, capsys): source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev' @@ -1503,7 +1510,9 @@ def test_migrate_data_db_to_new_db_active_systemd(self, is_root, monkeypatch, ca assert '--> OSD is running, stop it with: systemctl stop ceph-osd@2' == stderr.rstrip() assert not stdout - def test_migrate_data_db_to_new_db_no_systemd(self, is_root, monkeypatch): + @patch.object(Zap, 'main') + def test_migrate_data_db_to_new_db_no_systemd(self, m_zap, is_root, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev' source_db_tags = 'ceph.osd_id=2,ceph.type=db,ceph.osd_fsid=1234,' \ @@ -1599,10 +1608,11 @@ def test_migrate_data_db_to_new_db_no_systemd(self, is_root, monkeypatch): '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.db'] - @patch('os.getuid') - def test_migrate_data_db_to_new_db_skip_wal(self, m_getuid, monkeypatch): - m_getuid.return_value = 0 + m_zap.assert_called_once() + @patch.object(Zap, 'main') + def test_migrate_data_db_to_new_db_skip_wal(self, m_zap, is_root, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev' source_db_tags = 'ceph.osd_id=2,ceph.type=db,ceph.osd_fsid=1234,' \ @@ -1721,10 +1731,11 @@ def test_migrate_data_db_to_new_db_skip_wal(self, m_getuid, monkeypatch): '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.db'] - @patch('os.getuid') - def test_migrate_data_db_wal_to_new_db(self, m_getuid, monkeypatch): - m_getuid.return_value = 0 + m_zap.assert_called_once() + @patch.object(Zap, 'main') + def test_migrate_data_db_wal_to_new_db(self, m_zap, is_root, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev,' \ 'ceph.wal_uuid=waluuid,ceph.wal_device=wal_dev' @@ -1848,8 +1859,12 @@ def test_migrate_data_db_wal_to_new_db(self, m_getuid, monkeypatch): '--devs-source', '/var/lib/ceph/osd/ceph-2/block.db', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.wal'] + assert len(m_zap.mock_calls) == 2 + + @patch.object(Zap, 'main') @patch('os.getuid') - def test_migrate_data_db_wal_to_new_db_encrypted(self, m_getuid, monkeypatch): + def test_migrate_data_db_wal_to_new_db_encrypted(self, m_getuid, m_zap, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) m_getuid.return_value = 0 source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ @@ -1989,13 +2004,14 @@ def test_migrate_data_db_wal_to_new_db_encrypted(self, m_getuid, monkeypatch): '--devs-source', '/var/lib/ceph/osd/ceph-2/block.db', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.wal'] + assert len(m_zap.mock_calls) == 2 + @patch('os.getuid') def test_dont_migrate_data_db_wal_to_new_data(self, m_getuid, monkeypatch, capsys): m_getuid.return_value = 0 - source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev' source_db_tags = 'ceph.osd_id=2,ceph.type=db,ceph.osd_fsid=1234,' \ @@ -2057,13 +2073,10 @@ def test_dont_migrate_data_db_wal_to_new_data(self, ' please use new-db or new-wal command before.' assert expected in stderr - @patch('os.getuid') def test_dont_migrate_db_to_wal(self, - m_getuid, + is_root, monkeypatch, capsys): - m_getuid.return_value = 0 - source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev,' \ 'ceph.wal_uuid=waluuid,ceph.wal_device=wal_dev' @@ -2133,13 +2146,11 @@ def test_dont_migrate_db_to_wal(self, expected = 'Migrate to WAL is not supported' assert expected in stderr - @patch('os.getuid') def test_migrate_data_db_to_db(self, - m_getuid, + is_root, monkeypatch, capsys): - m_getuid.return_value = 0 - + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev,' \ 'ceph.wal_uuid=waluuid,ceph.wal_device=wal_dev' @@ -2287,6 +2298,7 @@ def test_migrate_data_db_to_db_active_systemd(self, is_root, monkeypatch, capsys assert not stdout def test_migrate_data_db_to_db_no_systemd(self, is_root, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev,' \ 'ceph.wal_uuid=waluuid,ceph.wal_device=wal_dev' @@ -2360,13 +2372,13 @@ def test_migrate_data_db_to_db_no_systemd(self, is_root, monkeypatch): '--command', 'bluefs-bdev-migrate', '--devs-source', '/var/lib/ceph/osd/ceph-2/block'] - @patch('os.getuid') + @patch.object(Zap, 'main') def test_migrate_data_wal_to_db(self, - m_getuid, + m_zap, + is_root, monkeypatch, capsys): - m_getuid.return_value = 0 - + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev,' \ 'ceph.wal_uuid=waluuid,ceph.wal_device=wal_dev' @@ -2465,11 +2477,108 @@ def test_migrate_data_wal_to_db(self, '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.wal'] + m_zap.assert_called_once() + + @patch.object(Zap, 'main') + @patch('os.getuid') + def test_migrate_wal_to_db(self, + m_getuid, + m_zap, + monkeypatch, + capsys): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) + m_getuid.return_value = 0 + + source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ + 'ceph.cluster_name=ceph,' \ + 'ceph.wal_uuid=waluuid,ceph.wal_device=wal_dev' + source_wal_tags = 'ceph.osd_id=2,ceph.type=wal,ceph.osd_fsid=1234,' \ + 'ceph.cluster_name=ceph,' \ + 'ceph.wal_uuid=waluuid,ceph.wal_device=wal_dev' + + data_vol = api.Volume(lv_name='volume1', + lv_uuid='datauuid', + vg_name='vg', + lv_path='/dev/VolGroup/lv1', + lv_tags=source_tags) + + wal_vol = api.Volume(lv_name='volume3', + lv_uuid='waluuid', + vg_name='vg', + lv_path='/dev/VolGroup/lv3', + lv_tags=source_wal_tags) + + self.mock_single_volumes = { + '/dev/VolGroup/lv1': data_vol, + '/dev/VolGroup/lv3': wal_vol, + } + monkeypatch.setattr(migrate.api, 'get_single_lv', + self.mock_get_single_lv) + + self.mock_volume = data_vol + monkeypatch.setattr(api, 'get_lv_by_fullname', + self.mock_get_lv_by_fullname) + + self.mock_process_input = [] + monkeypatch.setattr(process, 'call', self.mock_process) + + devices = [] + devices.append([Device('/dev/VolGroup/lv1'), 'block']) + devices.append([Device('/dev/VolGroup/lv3'), 'wal']) + + monkeypatch.setattr(migrate, 'find_associated_devices', + lambda osd_id, osd_fsid: devices) + + monkeypatch.setattr("ceph_volume.systemd.systemctl.osd_is_active", + lambda id: False) + + monkeypatch.setattr(migrate, 'get_cluster_name', + lambda osd_id, osd_fsid: 'ceph') + monkeypatch.setattr(system, 'chown', lambda path: 0) + m = migrate.Migrate(argv=[ + '--osd-id', '2', + '--osd-fsid', '1234', + '--from', 'wal', + '--target', 'vgname/data']) + + m.main() + + n = len(self.mock_process_input) + assert n >= 1 + for s in self.mock_process_input: + print(s) + + assert self. mock_process_input[n-3] == [ + 'lvchange', + '--deltag', 'ceph.osd_id=2', + '--deltag', 'ceph.type=wal', + '--deltag', 'ceph.osd_fsid=1234', + '--deltag', 'ceph.cluster_name=ceph', + '--deltag', 'ceph.wal_uuid=waluuid', + '--deltag', 'ceph.wal_device=wal_dev', + '/dev/VolGroup/lv3'] + assert self. mock_process_input[n-2] == [ + 'lvchange', + '--deltag', 'ceph.wal_uuid=waluuid', + '--deltag', 'ceph.wal_device=wal_dev', + '/dev/VolGroup/lv1'] + assert self. mock_process_input[n-1] == [ + 'ceph-bluestore-tool', + '--path', '/var/lib/ceph/osd/ceph-2', + '--dev-target', '/var/lib/ceph/osd/ceph-2/block', + '--command', 'bluefs-bdev-migrate', + '--devs-source', '/var/lib/ceph/osd/ceph-2/block.wal'] + + m_zap.assert_called_once() + + @patch.object(Zap, 'main') @patch('os.getuid') def test_migrate_data_wal_to_db_encrypted(self, m_getuid, + m_zap, monkeypatch, capsys): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) m_getuid.return_value = 0 source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ @@ -2579,6 +2688,8 @@ def test_migrate_data_wal_to_db_encrypted(self, '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.wal'] + m_zap.assert_called_once() + def test_migrate_data_wal_to_db_active_systemd(self, is_root, monkeypatch, capsys): source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev,' \ @@ -2651,7 +2762,9 @@ def test_migrate_data_wal_to_db_active_systemd(self, is_root, monkeypatch, capsy assert '--> OSD is running, stop it with: systemctl stop ceph-osd@2' == stderr.rstrip() assert not stdout - def test_migrate_data_wal_to_db_no_systemd(self, is_root, monkeypatch): + @patch.object(Zap, 'main') + def test_migrate_data_wal_to_db_no_systemd(self, m_zap, is_root, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) source_tags = 'ceph.osd_id=2,ceph.type=data,ceph.osd_fsid=1234,' \ 'ceph.cluster_name=ceph,ceph.db_uuid=dbuuid,ceph.db_device=db_dev,' \ 'ceph.wal_uuid=waluuid,ceph.wal_device=wal_dev' @@ -2747,3 +2860,5 @@ def test_migrate_data_wal_to_db_no_systemd(self, is_root, monkeypatch): '--command', 'bluefs-bdev-migrate', '--devs-source', '/var/lib/ceph/osd/ceph-2/block', '--devs-source', '/var/lib/ceph/osd/ceph-2/block.wal'] + + m_zap.assert_called_once() \ No newline at end of file diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py index 0a356988eebc..c2e909d0146a 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_prepare.py @@ -2,6 +2,7 @@ from ceph_volume.devices import lvm from ceph_volume.api import lvm as api from mock.mock import patch, Mock +from ceph_volume import objectstore class TestLVM(object): @@ -24,102 +25,117 @@ def test_main_shows_prepare_subcommands(self, capsys): assert 'Format an LVM device' in stdout +@patch('ceph_volume.util.prepare.create_key', return_value='fake-secret') class TestPrepareDevice(object): - def test_cannot_use_device(self, factory): + def test_cannot_use_device(self, m_create_key, factory): args = factory(data='/dev/var/foo') with pytest.raises(RuntimeError) as error: p = lvm.prepare.Prepare([]) - p.args = args - p.prepare_data_device( 'data', '0') + p.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=args) + p.objectstore.prepare_data_device( 'data', '0') assert 'Cannot use device (/dev/var/foo)' in str(error.value) assert 'A vg/lv path or an existing device is needed' in str(error.value) - +@patch('ceph_volume.util.prepare.create_key', return_value='fake-secret') class TestGetClusterFsid(object): + def setup_method(self): + self.p = lvm.prepare.Prepare([]) - def test_fsid_is_passed_in(self, factory): + def test_fsid_is_passed_in(self, m_create_key, factory): args = factory(cluster_fsid='aaaa-1111') - prepare_obj = lvm.prepare.Prepare([]) - prepare_obj.args = args - assert prepare_obj.get_cluster_fsid() == 'aaaa-1111' + self.p.objectstore = objectstore.lvmbluestore.LvmBlueStore(args) + assert self.p.objectstore.get_cluster_fsid() == 'aaaa-1111' - def test_fsid_is_read_from_ceph_conf(self, factory, conf_ceph_stub): + def test_fsid_is_read_from_ceph_conf(self, m_create_key, factory, conf_ceph_stub): conf_ceph_stub('[global]\nfsid = bbbb-2222') - prepare_obj = lvm.prepare.Prepare([]) - prepare_obj.args = factory(cluster_fsid=None) - assert prepare_obj.get_cluster_fsid() == 'bbbb-2222' + args = factory(cluster_fsid='') + self.p.objectstore = objectstore.lvmbluestore.LvmBlueStore(args) + assert self.p.objectstore.get_cluster_fsid() == 'bbbb-2222' +@patch('ceph_volume.util.prepare.create_key', return_value='fake-secret') class TestPrepare(object): - def test_main_spits_help_with_no_arguments(self, capsys): + def setup_method(self): + self.p = lvm.prepare.Prepare([]) + + def test_main_spits_help_with_no_arguments(self, m_create_key, capsys): lvm.prepare.Prepare([]).main() stdout, stderr = capsys.readouterr() assert 'Prepare an OSD by assigning an ID and FSID' in stdout - def test_main_shows_full_help(self, capsys): + def test_main_shows_full_help(self, m_create_key, capsys): with pytest.raises(SystemExit): lvm.prepare.Prepare(argv=['--help']).main() stdout, stderr = capsys.readouterr() assert 'Use the bluestore objectstore' in stdout assert 'A physical device or logical' in stdout - @patch('ceph_volume.devices.lvm.prepare.api.is_ceph_device') - def test_safe_prepare_osd_already_created(self, m_is_ceph_device): + @patch('ceph_volume.api.lvm.is_ceph_device') + def test_safe_prepare_osd_already_created(self, m_create_key, m_is_ceph_device): m_is_ceph_device.return_value = True with pytest.raises(RuntimeError) as error: - prepare = lvm.prepare.Prepare(argv=[]) - prepare.args = Mock() - prepare.args.data = '/dev/sdfoo' - prepare.get_lv = Mock() - prepare.safe_prepare() + self.p.args = Mock() + self.p.args.data = '/dev/sdfoo' + self.p.args.with_tpm = '0' + self.p.get_lv = Mock() + self.p.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=self.p.args) + self.p.objectstore.safe_prepare() expected = 'skipping {}, it is already prepared'.format('/dev/sdfoo') assert expected in str(error.value) - def test_setup_device_device_name_is_none(self): - result = lvm.prepare.Prepare([]).setup_device(device_type='data', device_name=None, tags={'ceph.type': 'data'}, size=0, slots=None) + def test_setup_device_device_name_is_none(self, m_create_key): + self.p.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=[]) + result = self.p.objectstore.setup_device(device_type='data', + device_name=None, + tags={'ceph.type': 'data'}, + size=0, + slots=None) assert result == ('', '', {'ceph.type': 'data'}) @patch('ceph_volume.api.lvm.Volume.set_tags') - @patch('ceph_volume.devices.lvm.prepare.api.get_single_lv') - def test_setup_device_lv_passed(self, m_get_single_lv, m_set_tags): + @patch('ceph_volume.api.lvm.get_single_lv') + def test_setup_device_lv_passed(self, m_get_single_lv, m_set_tags, m_create_key): fake_volume = api.Volume(lv_name='lv_foo', lv_path='/fake-path', vg_name='vg_foo', lv_tags='', lv_uuid='fake-uuid') m_get_single_lv.return_value = fake_volume - result = lvm.prepare.Prepare([]).setup_device(device_type='data', device_name='vg_foo/lv_foo', tags={'ceph.type': 'data'}, size=0, slots=None) + self.p.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=[]) + result = self.p.objectstore.setup_device(device_type='data', device_name='vg_foo/lv_foo', tags={'ceph.type': 'data'}, size=0, slots=None) assert result == ('/fake-path', 'fake-uuid', {'ceph.type': 'data', 'ceph.vdo': '0', 'ceph.data_uuid': 'fake-uuid', 'ceph.data_device': '/fake-path'}) - @patch('ceph_volume.devices.lvm.prepare.api.create_lv') + @patch('ceph_volume.api.lvm.create_lv') @patch('ceph_volume.api.lvm.Volume.set_tags') @patch('ceph_volume.util.disk.is_device') - def test_setup_device_device_passed(self, m_is_device, m_set_tags, m_create_lv): + def test_setup_device_device_passed(self, m_is_device, m_set_tags, m_create_lv, m_create_key): fake_volume = api.Volume(lv_name='lv_foo', lv_path='/fake-path', vg_name='vg_foo', lv_tags='', lv_uuid='fake-uuid') m_is_device.return_value = True m_create_lv.return_value = fake_volume - result = lvm.prepare.Prepare([]).setup_device(device_type='data', device_name='/dev/sdx', tags={'ceph.type': 'data'}, size=0, slots=None) + self.p.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=[]) + result = self.p.objectstore.setup_device(device_type='data', device_name='/dev/sdx', tags={'ceph.type': 'data'}, size=0, slots=None) assert result == ('/fake-path', 'fake-uuid', {'ceph.type': 'data', 'ceph.vdo': '0', 'ceph.data_uuid': 'fake-uuid', 'ceph.data_device': '/fake-path'}) - @patch('ceph_volume.devices.lvm.prepare.Prepare.get_ptuuid') - @patch('ceph_volume.devices.lvm.prepare.api.get_single_lv') - def test_setup_device_partition_passed(self, m_get_single_lv, m_get_ptuuid): + @patch('ceph_volume.objectstore.baseobjectstore.BaseObjectStore.get_ptuuid') + @patch('ceph_volume.api.lvm.get_single_lv') + def test_setup_device_partition_passed(self, m_get_single_lv, m_get_ptuuid, m_create_key): m_get_single_lv.side_effect = ValueError() m_get_ptuuid.return_value = 'fake-uuid' - result = lvm.prepare.Prepare([]).setup_device(device_type='data', device_name='/dev/sdx', tags={'ceph.type': 'data'}, size=0, slots=None) + self.p.objectstore = objectstore.lvmbluestore.LvmBlueStore(args=[]) + result = self.p.objectstore.setup_device(device_type='data', device_name='/dev/sdx', tags={'ceph.type': 'data'}, size=0, slots=None) assert result == ('/dev/sdx', 'fake-uuid', {'ceph.type': 'data', 'ceph.vdo': '0', 'ceph.data_uuid': 'fake-uuid', 'ceph.data_device': '/dev/sdx'}) - def test_invalid_osd_id_passed(self): + def test_invalid_osd_id_passed(self, m_create_key): with pytest.raises(SystemExit): lvm.prepare.Prepare(argv=['--osd-id', 'foo']).main() diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py index 2446c5ed6651..d9b3bdfd2391 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py @@ -1,20 +1,59 @@ +# type: ignore import os import pytest from copy import deepcopy -from mock.mock import patch, call +from mock.mock import patch, call, Mock from ceph_volume import process from ceph_volume.api import lvm as api from ceph_volume.devices.lvm import zap - - -class TestZap(object): - def test_invalid_osd_id_passed(self): +from . import data_zap +from typing import Tuple, List + + +def process_call(command, **kw): + result: Tuple[List[str], List[str], int] = '' + if 'udevadm' in command: + result = data_zap.udevadm_property, [], 0 + if 'ceph-bluestore-tool' in command: + result = data_zap.ceph_bluestore_tool_output, [], 0 + if 'is-active' in command: + result = [], [], 1 + if 'lsblk' in command: + result = data_zap.lsblk_all, [], 0 + if 'blkid' in command: + result = data_zap.blkid_output, [], 0 + if 'pvs' in command: + result = [], [], 0 + return result + + +class TestZap: + def test_invalid_osd_id_passed(self) -> None: with pytest.raises(SystemExit): zap.Zap(argv=['--osd-id', 'foo']).main() -class TestFindAssociatedDevices(object): - - def test_no_lvs_found_that_match_id(self, monkeypatch, device_info): + @patch('ceph_volume.util.disk._dd_write', Mock()) + @patch('ceph_volume.util.arg_validators.Device') + def test_clear_replace_header_is_being_replaced(self, m_device: Mock) -> None: + m_dev = m_device.return_value + m_dev.is_being_replaced = True + with pytest.raises(SystemExit) as e: + zap.Zap(argv=['--clear', '/dev/foo']).main() + assert e.value.code == 0 + + @patch('ceph_volume.util.disk._dd_write', Mock()) + @patch('ceph_volume.util.arg_validators.Device') + def test_clear_replace_header_is_not_being_replaced(self, m_device: Mock) -> None: + m_dev = m_device.return_value + m_dev.is_being_replaced = False + with pytest.raises(SystemExit) as e: + zap.Zap(argv=['--clear', '/dev/foo']).main() + assert e.value.code == 1 + + @patch('ceph_volume.devices.lvm.zap.direct_report', Mock(return_value={})) + @patch('ceph_volume.devices.raw.list.List.filter_lvm_osd_devices', Mock(return_value='/dev/sdb')) + @patch('ceph_volume.process.call', Mock(side_effect=process_call)) + def test_no_lvs_and_raw_found_that_match_id(self, is_root, monkeypatch, device_info): tags = 'ceph.osd_id=9,ceph.journal_uuid=x,ceph.type=data' osd = api.Volume(lv_name='volume1', lv_uuid='y', vg_name='vg', lv_tags=tags, lv_path='/dev/VolGroup/lv') @@ -22,10 +61,15 @@ def test_no_lvs_found_that_match_id(self, monkeypatch, device_info): volumes.append(osd) monkeypatch.setattr(zap.api, 'get_lvs', lambda **kwargs: {}) - with pytest.raises(RuntimeError): - zap.find_associated_devices(osd_id=10) + z = zap.Zap(['--osd-id', '10']) - def test_no_lvs_found_that_match_fsid(self, monkeypatch, device_info): + with pytest.raises(SystemExit): + z.main() + + @patch('ceph_volume.devices.lvm.zap.direct_report', Mock(return_value={})) + @patch('ceph_volume.devices.raw.list.List.filter_lvm_osd_devices', Mock(return_value='/dev/sdb')) + @patch('ceph_volume.process.call', Mock(side_effect=process_call)) + def test_no_lvs_and_raw_found_that_match_fsid(self, is_root, monkeypatch): tags = 'ceph.osd_id=9,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,'+\ 'ceph.type=data' osd = api.Volume(lv_name='volume1', lv_uuid='y', lv_tags=tags, @@ -34,10 +78,15 @@ def test_no_lvs_found_that_match_fsid(self, monkeypatch, device_info): volumes.append(osd) monkeypatch.setattr(zap.api, 'get_lvs', lambda **kwargs: {}) - with pytest.raises(RuntimeError): - zap.find_associated_devices(osd_fsid='aaaa-lkjh') + z = zap.Zap(['--osd-fsid', 'aaaa-lkjh']) - def test_no_lvs_found_that_match_id_fsid(self, monkeypatch, device_info): + with pytest.raises(SystemExit): + z.main() + + @patch('ceph_volume.devices.lvm.zap.direct_report', Mock(return_value={})) + @patch('ceph_volume.devices.raw.list.List.filter_lvm_osd_devices', Mock(return_value='/dev/sdb')) + @patch('ceph_volume.process.call', Mock(side_effect=process_call)) + def test_no_lvs_and_raw_found_that_match_id_fsid(self, is_root, monkeypatch): tags = 'ceph.osd_id=9,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,'+\ 'ceph.type=data' osd = api.Volume(lv_name='volume1', lv_uuid='y', vg_name='vg', @@ -46,45 +95,82 @@ def test_no_lvs_found_that_match_id_fsid(self, monkeypatch, device_info): volumes.append(osd) monkeypatch.setattr(zap.api, 'get_lvs', lambda **kwargs: {}) - with pytest.raises(RuntimeError): - zap.find_associated_devices(osd_id='9', osd_fsid='aaaa-lkjh') + z = zap.Zap(['--osd-id', '9', '--osd-fsid', 'aaaa-lkjh']) + + with pytest.raises(SystemExit): + z.main() - def test_no_ceph_lvs_found(self, monkeypatch): + @patch('ceph_volume.devices.lvm.zap.direct_report', Mock(return_value={})) + def test_no_ceph_lvs_and_no_ceph_raw_found(self, is_root, monkeypatch): osd = api.Volume(lv_name='volume1', lv_uuid='y', lv_tags='', lv_path='/dev/VolGroup/lv') volumes = [] volumes.append(osd) monkeypatch.setattr(zap.api, 'get_lvs', lambda **kwargs: {}) - with pytest.raises(RuntimeError): - zap.find_associated_devices(osd_id=100) + z = zap.Zap(['--osd-id', '100']) + + with pytest.raises(SystemExit): + z.main() - def test_lv_is_matched_id(self, monkeypatch): + @patch('ceph_volume.devices.lvm.zap.Zap.zap') + @patch('ceph_volume.process.call', Mock(side_effect=process_call)) + def test_lv_is_matched_id(self, mock_zap, monkeypatch, is_root): tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data' osd = api.Volume(lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv', lv_tags=tags) + volumes = [osd] + monkeypatch.setattr(zap.api, 'get_lvs', lambda **kw: volumes) + + z = zap.Zap(['--osd-id', '0']) + z.main() + assert z.args.devices[0].path == '/dev/VolGroup/lv' + mock_zap.assert_called_once() + + # @patch('ceph_volume.devices.lvm.zap.disk.has_bluestore_label', Mock(return_value=True)) + @patch('ceph_volume.devices.lvm.zap.Zap.zap') + @patch('ceph_volume.devices.raw.list.List.filter_lvm_osd_devices', Mock(return_value='/dev/sdb')) + @patch('ceph_volume.process.call', Mock(side_effect=process_call)) + def test_raw_is_matched_id(self, mock_zap, monkeypatch, is_root): volumes = [] - volumes.append(osd) monkeypatch.setattr(zap.api, 'get_lvs', lambda **kw: volumes) - monkeypatch.setattr(process, 'call', lambda x, **kw: ('', '', 0)) - result = zap.find_associated_devices(osd_id='0') - assert result[0].path == '/dev/VolGroup/lv' + z = zap.Zap(['--osd-id', '0']) + z.main() + assert z.args.devices[0].path == '/dev/sdb' + mock_zap.assert_called_once() - def test_lv_is_matched_fsid(self, monkeypatch): + @patch('ceph_volume.devices.lvm.zap.Zap.zap') + def test_lv_is_matched_fsid(self, mock_zap, monkeypatch, is_root): tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,' +\ 'ceph.type=data' osd = api.Volume(lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv', lv_tags=tags) - volumes = [] - volumes.append(osd) + volumes = [osd] monkeypatch.setattr(zap.api, 'get_lvs', lambda **kw: deepcopy(volumes)) monkeypatch.setattr(process, 'call', lambda x, **kw: ('', '', 0)) - result = zap.find_associated_devices(osd_fsid='asdf-lkjh') - assert result[0].path == '/dev/VolGroup/lv' + z = zap.Zap(['--osd-fsid', 'asdf-lkjh']) + z.main() - def test_lv_is_matched_id_fsid(self, monkeypatch): + assert z.args.devices[0].path == '/dev/VolGroup/lv' + mock_zap.assert_called_once + + @patch('ceph_volume.devices.lvm.zap.Zap.zap') + @patch('ceph_volume.devices.raw.list.List.filter_lvm_osd_devices', Mock(return_value='/dev/sdb')) + @patch('ceph_volume.process.call', Mock(side_effect=process_call)) + def test_raw_is_matched_fsid(self, mock_zap, monkeypatch, is_root): + volumes = [] + monkeypatch.setattr(zap.api, 'get_lvs', lambda **kw: volumes) + + z = zap.Zap(['--osd-fsid', 'd5a496bc-dcb9-4ad0-a12c-393d3200d2b6']) + z.main() + + assert z.args.devices[0].path == '/dev/sdb' + mock_zap.assert_called_once + + @patch('ceph_volume.devices.lvm.zap.Zap.zap') + def test_lv_is_matched_id_fsid(self, mock_zap, monkeypatch, is_root): tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,' +\ 'ceph.type=data' osd = api.Volume(lv_name='volume1', lv_uuid='y', vg_name='', @@ -94,25 +180,43 @@ def test_lv_is_matched_id_fsid(self, monkeypatch): monkeypatch.setattr(zap.api, 'get_lvs', lambda **kw: volumes) monkeypatch.setattr(process, 'call', lambda x, **kw: ('', '', 0)) - result = zap.find_associated_devices(osd_id='0', osd_fsid='asdf-lkjh') - assert result[0].path == '/dev/VolGroup/lv' - + z = zap.Zap(['--osd-id', '0', '--osd-fsid', 'asdf-lkjh', '--no-systemd']) + z.main() -class TestEnsureAssociatedLVs(object): + assert z.args.devices[0].path == '/dev/VolGroup/lv' + mock_zap.assert_called_once - def test_nothing_is_found(self): + @patch('ceph_volume.devices.lvm.zap.Zap.zap') + @patch('ceph_volume.devices.raw.list.List.filter_lvm_osd_devices', Mock(return_value='/dev/sdb')) + @patch('ceph_volume.process.call', Mock(side_effect=process_call)) + def test_raw_is_matched_id_fsid(self, mock_zap, monkeypatch, is_root): volumes = [] - result = zap.ensure_associated_lvs(volumes) - assert result == [] + monkeypatch.setattr(zap.api, 'get_lvs', lambda **kw: volumes) - def test_data_is_found(self, fake_call): - tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=data' - osd = api.Volume( - lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/data', lv_tags=tags) + z = zap.Zap(['--osd-id', '0', '--osd-fsid', 'd5a496bc-dcb9-4ad0-a12c-393d3200d2b6']) + z.main() + + assert z.args.devices[0].path == '/dev/sdb' + mock_zap.assert_called_once + + @patch('ceph_volume.devices.lvm.zap.Zap.zap') + @patch('ceph_volume.devices.raw.list.List.filter_lvm_osd_devices', Mock(side_effect=['/dev/vdx', '/dev/vdy', '/dev/vdz', None])) + @patch('ceph_volume.process.call', Mock(side_effect=process_call)) + def test_raw_multiple_devices(self, mock_zap, monkeypatch, is_root): volumes = [] - volumes.append(osd) - result = zap.ensure_associated_lvs(volumes) - assert result == ['/dev/VolGroup/data'] + monkeypatch.setattr(zap.api, 'get_lvs', lambda **kw: volumes) + z = zap.Zap(['--osd-id', '5']) + z.main() + + set([device.path for device in z.args.devices]) == {'/dev/vdx', '/dev/vdy', '/dev/vdz'} + mock_zap.assert_called_once + + @patch('ceph_volume.devices.lvm.zap.direct_report', Mock(return_value={})) + @patch('ceph_volume.devices.lvm.zap.api.get_lvs', Mock(return_value=[])) + def test_nothing_is_found(self, is_root): + z = zap.Zap(['--osd-id', '0']) + with pytest.raises(SystemExit): + z.main() def test_block_is_found(self, fake_call): tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=block' @@ -120,7 +224,7 @@ def test_block_is_found(self, fake_call): lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/block', lv_tags=tags) volumes = [] volumes.append(osd) - result = zap.ensure_associated_lvs(volumes) + result = zap.Zap([]).ensure_associated_lvs(volumes) assert result == ['/dev/VolGroup/block'] def test_success_message_for_fsid(self, factory, is_root, capsys): @@ -139,38 +243,7 @@ def test_success_message_for_id(self, factory, is_root, capsys): out, err = capsys.readouterr() assert "Zapping successful for OSD: 1" in err - def test_block_and_partition_are_found(self, monkeypatch): - monkeypatch.setattr(zap.disk, 'get_device_from_partuuid', lambda x: '/dev/sdb1') - tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=block' - osd = api.Volume( - lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/block', lv_tags=tags) - volumes = [] - volumes.append(osd) - result = zap.ensure_associated_lvs(volumes) - assert '/dev/sdb1' in result - assert '/dev/VolGroup/block' in result - - def test_journal_is_found(self, fake_call): - tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=journal' - osd = api.Volume( - lv_name='volume1', lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv', lv_tags=tags) - volumes = [] - volumes.append(osd) - result = zap.ensure_associated_lvs(volumes) - assert result == ['/dev/VolGroup/lv'] - - def test_multiple_journals_are_found(self): - tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=journal' - volumes = [] - for i in range(3): - osd = api.Volume( - lv_name='volume%s' % i, lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv%s' % i, lv_tags=tags) - volumes.append(osd) - result = zap.ensure_associated_lvs(volumes) - assert '/dev/VolGroup/lv0' in result - assert '/dev/VolGroup/lv1' in result - assert '/dev/VolGroup/lv2' in result - + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_multiple_dbs_are_found(self): tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.journal_uuid=x,ceph.type=db' volumes = [] @@ -178,11 +251,12 @@ def test_multiple_dbs_are_found(self): osd = api.Volume( lv_name='volume%s' % i, lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv%s' % i, lv_tags=tags) volumes.append(osd) - result = zap.ensure_associated_lvs(volumes) + result = zap.Zap([]).ensure_associated_lvs(volumes) assert '/dev/VolGroup/lv0' in result assert '/dev/VolGroup/lv1' in result assert '/dev/VolGroup/lv2' in result + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_multiple_wals_are_found(self): tags = 'ceph.osd_id=0,ceph.osd_fsid=asdf-lkjh,ceph.wal_uuid=x,ceph.type=wal' volumes = [] @@ -190,11 +264,12 @@ def test_multiple_wals_are_found(self): osd = api.Volume( lv_name='volume%s' % i, lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv%s' % i, lv_tags=tags) volumes.append(osd) - result = zap.ensure_associated_lvs(volumes) + result = zap.Zap([]).ensure_associated_lvs(volumes) assert '/dev/VolGroup/lv0' in result assert '/dev/VolGroup/lv1' in result assert '/dev/VolGroup/lv2' in result + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) def test_multiple_backing_devs_are_found(self): volumes = [] for _type in ['journal', 'db', 'wal']: @@ -202,16 +277,15 @@ def test_multiple_backing_devs_are_found(self): osd = api.Volume( lv_name='volume%s' % _type, lv_uuid='y', vg_name='', lv_path='/dev/VolGroup/lv%s' % _type, lv_tags=tags) volumes.append(osd) - result = zap.ensure_associated_lvs(volumes) + result = zap.Zap([]).ensure_associated_lvs(volumes) assert '/dev/VolGroup/lvjournal' in result assert '/dev/VolGroup/lvwal' in result assert '/dev/VolGroup/lvdb' in result @patch('ceph_volume.devices.lvm.zap.api.get_lvs') def test_ensure_associated_lvs(self, m_get_lvs): - zap.ensure_associated_lvs([], lv_tags={'ceph.osd_id': '1'}) + zap.Zap([]).ensure_associated_lvs([], lv_tags={'ceph.osd_id': '1'}) calls = [ - call(tags={'ceph.type': 'journal', 'ceph.osd_id': '1'}), call(tags={'ceph.type': 'db', 'ceph.osd_id': '1'}), call(tags={'ceph.type': 'wal', 'ceph.osd_id': '1'}) ] diff --git a/src/ceph-volume/ceph_volume/tests/devices/raw/data_list.py b/src/ceph-volume/ceph_volume/tests/devices/raw/data_list.py new file mode 100644 index 000000000000..e1d1a48967a0 --- /dev/null +++ b/src/ceph-volume/ceph_volume/tests/devices/raw/data_list.py @@ -0,0 +1,102 @@ +ceph_bluestore_tool_show_label_output: str = '''{ + "/dev/sdb": { + "osd_uuid": "sdb-uuid", + "size": 1099511627776, + "btime": "2021-07-23T16:02:22.809186+0000", + "description": "main", + "bfm_blocks": "268435456", + "bfm_blocks_per_key": "128", + "bfm_bytes_per_block": "4096", + "bfm_size": "1099511627776", + "bluefs": "1", + "ceph_fsid": "sdb-fsid", + "kv_backend": "rocksdb", + "magic": "ceph osd volume v026", + "mkfs_done": "yes", + "osd_key": "AQAO6PpgK+y4CBAAixq/X7OVimbaezvwD/cDmg==", + "ready": "ready", + "require_osd_release": "16", + "type": "bluestore", + "whoami": "0" + }, + "/dev/sdb2": { + "osd_uuid": "sdb2-uuid", + "size": 1099511627776, + "btime": "2021-07-23T16:02:22.809186+0000", + "description": "main", + "bfm_blocks": "268435456", + "bfm_blocks_per_key": "128", + "bfm_bytes_per_block": "4096", + "bfm_size": "1099511627776", + "bluefs": "1", + "ceph_fsid": "sdb2-fsid", + "kv_backend": "rocksdb", + "magic": "ceph osd volume v026", + "mkfs_done": "yes", + "osd_key": "AQAO6PpgK+y4CBAAixq/X7OVimbaezvwD/cDmg==", + "ready": "ready", + "require_osd_release": "16", + "type": "bluestore", + "whoami": "2" + }, + "/dev/sde1": { + "osd_uuid": "sde1-uuid", + "size": 214747316224, + "btime": "2023-07-26T13:20:19.509457+0000", + "description": "main", + "bfm_blocks": "268435456", + "bfm_blocks_per_key": "128", + "bfm_bytes_per_block": "4096", + "bfm_size": "214747316224", + "bluefs": "1", + "ceph_fsid": "sde1-fsid", + "kv_backend": "rocksdb", + "magic": "ceph osd volume v026", + "mkfs_done": "yes", + "osd_key": "AQCSHcFkUeLIMBAAjKqANkXafjvVISkXt6FGCA==", + "ready": "ready", + "require_osd_release": "16", + "type": "bluestore", + "whoami": "1" + }, + "/dev/mapper/ceph--osd--block--1": { + "osd_uuid": "lvm-1-uuid", + "size": 549751619584, + "btime": "2021-07-23T16:04:37.881060+0000", + "description": "main", + "bfm_blocks": "134216704", + "bfm_blocks_per_key": "128", + "bfm_bytes_per_block": "4096", + "bfm_size": "549751619584", + "bluefs": "1", + "ceph_fsid": "lvm-1-fsid", + "kv_backend": "rocksdb", + "magic": "ceph osd volume v026", + "mkfs_done": "yes", + "osd_key": "AQCU6Ppgz+UcIRAAh6IUjtPjiXBlEXfwO8ixzw==", + "ready": "ready", + "require_osd_release": "16", + "type": "bluestore", + "whoami": "2" + }, + "/dev/mapper/ceph--osd--block--1": { + "osd_uuid": "lvm-1-uuid", + "size": 549751619584, + "btime": "2021-07-23T16:04:37.881060+0000", + "description": "main", + "bfm_blocks": "134216704", + "bfm_blocks_per_key": "128", + "bfm_bytes_per_block": "4096", + "bfm_size": "549751619584", + "bluefs": "1", + "ceph_fsid": "lvm-1-fsid", + "kv_backend": "rocksdb", + "magic": "ceph osd volume v026", + "mkfs_done": "yes", + "osd_key": "AQCU6Ppgz+UcIRAAh6IUjtPjiXBlEXfwO8ixzw==", + "ready": "ready", + "require_osd_release": "16", + "type": "bluestore", + "whoami": "2" + } +}''' \ No newline at end of file diff --git a/src/ceph-volume/ceph_volume/tests/devices/raw/test_list.py b/src/ceph-volume/ceph_volume/tests/devices/raw/test_list.py index 5ad501bab94a..23d2bfdaa2c7 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/raw/test_list.py +++ b/src/ceph-volume/ceph_volume/tests/devices/raw/test_list.py @@ -1,5 +1,7 @@ +# type: ignore import pytest -from mock.mock import patch +from .data_list import ceph_bluestore_tool_show_label_output +from mock.mock import patch, Mock from ceph_volume.devices import raw # Sample lsblk output is below that overviews the test scenario. (--json output for reader clarity) @@ -44,23 +46,27 @@ def _devices_side_effect(): "/dev/sdb3": {}, "/dev/sdc": {}, "/dev/sdd": {}, + "/dev/sde": {}, + "/dev/sde1": {}, "/dev/mapper/ceph--osd--block--1": {}, "/dev/mapper/ceph--osd--block--2": {}, } def _lsblk_all_devices(abspath=True): return [ - {"NAME": "/dev/sda", "KNAME": "/dev/sda", "PKNAME": ""}, - {"NAME": "/dev/sda1", "KNAME": "/dev/sda1", "PKNAME": "/dev/sda"}, - {"NAME": "/dev/sda2", "KNAME": "/dev/sda2", "PKNAME": "/dev/sda"}, - {"NAME": "/dev/sda3", "KNAME": "/dev/sda3", "PKNAME": "/dev/sda"}, - {"NAME": "/dev/sdb", "KNAME": "/dev/sdb", "PKNAME": ""}, - {"NAME": "/dev/sdb2", "KNAME": "/dev/sdb2", "PKNAME": "/dev/sdb"}, - {"NAME": "/dev/sdb3", "KNAME": "/dev/sdb3", "PKNAME": "/dev/sdb"}, - {"NAME": "/dev/sdc", "KNAME": "/dev/sdc", "PKNAME": ""}, - {"NAME": "/dev/sdd", "KNAME": "/dev/sdd", "PKNAME": ""}, - {"NAME": "/dev/mapper/ceph--osd--block--1", "KNAME": "/dev/mapper/ceph--osd--block--1", "PKNAME": "/dev/sdd"}, - {"NAME": "/dev/mapper/ceph--osd--block--2", "KNAME": "/dev/mapper/ceph--osd--block--2", "PKNAME": "/dev/sdd"}, + {"NAME": "/dev/sda", "KNAME": "/dev/sda", "PKNAME": "", "TYPE": "disk"}, + {"NAME": "/dev/sda1", "KNAME": "/dev/sda1", "PKNAME": "/dev/sda", "TYPE": "part"}, + {"NAME": "/dev/sda2", "KNAME": "/dev/sda2", "PKNAME": "/dev/sda", "TYPE": "part"}, + {"NAME": "/dev/sda3", "KNAME": "/dev/sda3", "PKNAME": "/dev/sda", "TYPE": "part"}, + {"NAME": "/dev/sdb", "KNAME": "/dev/sdb", "PKNAME": "", "TYPE": "disk"}, + {"NAME": "/dev/sdb2", "KNAME": "/dev/sdb2", "PKNAME": "/dev/sdb", "TYPE": "part"}, + {"NAME": "/dev/sdb3", "KNAME": "/dev/sdb3", "PKNAME": "/dev/sdb", "TYPE": "part"}, + {"NAME": "/dev/sdc", "KNAME": "/dev/sdc", "PKNAME": "", "TYPE": "disk"}, + {"NAME": "/dev/sdd", "KNAME": "/dev/sdd", "PKNAME": "", "TYPE": "disk"}, + {"NAME": "/dev/sde", "KNAME": "/dev/sde", "PKNAME": "", "TYPE": "disk"}, + {"NAME": "/dev/sde1", "KNAME": "/dev/sde1", "PKNAME": "/dev/sde", "TYPE": "part"}, + {"NAME": "/dev/mapper/ceph--osd--block--1", "KNAME": "/dev/mapper/ceph--osd--block--1", "PKNAME": "/dev/sdd", "TYPE": "lvm"}, + {"NAME": "/dev/mapper/ceph--osd--block--2", "KNAME": "/dev/mapper/ceph--osd--block--2", "PKNAME": "/dev/sdd", "TYPE": "lvm"}, ] # dummy lsblk output for device with optional parent output @@ -70,75 +76,6 @@ def _lsblk_output(dev, parent=None): ret = 'NAME="{}" KNAME="{}" PKNAME="{}"'.format(dev, dev, parent) return [ret] # needs to be in a list form -def _bluestore_tool_label_output_sdb(): - return '''{ - "/dev/sdb": { - "osd_uuid": "sdb-uuid", - "size": 1099511627776, - "btime": "2021-07-23T16:02:22.809186+0000", - "description": "main", - "bfm_blocks": "268435456", - "bfm_blocks_per_key": "128", - "bfm_bytes_per_block": "4096", - "bfm_size": "1099511627776", - "bluefs": "1", - "ceph_fsid": "sdb-fsid", - "kv_backend": "rocksdb", - "magic": "ceph osd volume v026", - "mkfs_done": "yes", - "osd_key": "AQAO6PpgK+y4CBAAixq/X7OVimbaezvwD/cDmg==", - "ready": "ready", - "require_osd_release": "16", - "whoami": "0" - } -}''' - -def _bluestore_tool_label_output_sdb2(): - return '''{ - "/dev/sdb2": { - "osd_uuid": "sdb2-uuid", - "size": 1099511627776, - "btime": "2021-07-23T16:02:22.809186+0000", - "description": "main", - "bfm_blocks": "268435456", - "bfm_blocks_per_key": "128", - "bfm_bytes_per_block": "4096", - "bfm_size": "1099511627776", - "bluefs": "1", - "ceph_fsid": "sdb2-fsid", - "kv_backend": "rocksdb", - "magic": "ceph osd volume v026", - "mkfs_done": "yes", - "osd_key": "AQAO6PpgK+y4CBAAixq/X7OVimbaezvwD/cDmg==", - "ready": "ready", - "require_osd_release": "16", - "whoami": "2" - } -}''' - -def _bluestore_tool_label_output_dm_okay(): - return '''{ - "/dev/mapper/ceph--osd--block--1": { - "osd_uuid": "lvm-1-uuid", - "size": 549751619584, - "btime": "2021-07-23T16:04:37.881060+0000", - "description": "main", - "bfm_blocks": "134216704", - "bfm_blocks_per_key": "128", - "bfm_bytes_per_block": "4096", - "bfm_size": "549751619584", - "bluefs": "1", - "ceph_fsid": "lvm-1-fsid", - "kv_backend": "rocksdb", - "magic": "ceph osd volume v026", - "mkfs_done": "yes", - "osd_key": "AQCU6Ppgz+UcIRAAh6IUjtPjiXBlEXfwO8ixzw==", - "ready": "ready", - "require_osd_release": "16", - "whoami": "2" - } -}''' - def _process_call_side_effect(command, **kw): if "lsblk" in command: if "/dev/" in command[-1]: @@ -149,6 +86,8 @@ def _process_call_side_effect(command, **kw): return _lsblk_output(dev, parent="/dev/sdb"), '', 0 if dev == "/dev/sda" or dev == "/dev/sdb" or dev == "/dev/sdc" or dev == "/dev/sdd": return _lsblk_output(dev), '', 0 + if dev == "/dev/sde1": + return _lsblk_output(dev, parent="/dev/sde"), '', 0 if "mapper" in dev: return _lsblk_output(dev, parent="/dev/sdd"), '', 0 pytest.fail('dev {} needs behavior specified for it'.format(dev)) @@ -157,17 +96,7 @@ def _process_call_side_effect(command, **kw): pytest.fail('command {} needs behavior specified for it'.format(command)) if "ceph-bluestore-tool" in command: - if "/dev/sdb" in command: - # sdb is a bluestore OSD - return _bluestore_tool_label_output_sdb(), '', 0 - if "/dev/sdb2" in command: - # sdb2 is a phantom atari partition that appears to have some valid bluestore info - return _bluestore_tool_label_output_sdb2(), '', 0 - if "/dev/mapper/ceph--osd--block--1" in command: - # dm device 1 is a valid bluestore OSD (the other is corrupted/invalid) - return _bluestore_tool_label_output_dm_okay(), '', 0 - # sda and children, sdb's children, sdc, sdd, dm device 2 all do NOT have bluestore OSD data - return [], 'fake No such file or directory error', 1 + return ceph_bluestore_tool_show_label_output, '', 0 pytest.fail('command {} needs behavior specified for it'.format(command)) def _has_bluestore_label_side_effect(disk_path): @@ -181,6 +110,10 @@ def _has_bluestore_label_side_effect(disk_path): return False # empty disk if disk_path == "/dev/sdd": return False # has LVM subdevices + if disk_path == "/dev/sde": + return False # has partitions, it means it shouldn't be an OSD + if disk_path == "/dev/sde1": + return True # is a valid OSD if disk_path == "/dev/mapper/ceph--osd--block--1": return True # good OSD if disk_path == "/dev/mapper/ceph--osd--block--2": @@ -189,6 +122,7 @@ def _has_bluestore_label_side_effect(disk_path): class TestList(object): + @patch('ceph_volume.devices.raw.list.List.exclude_lvm_osd_devices', Mock()) @patch('ceph_volume.util.device.disk.get_devices') @patch('ceph_volume.util.disk.has_bluestore_label') @patch('ceph_volume.process.call') @@ -209,14 +143,20 @@ def test_raw_list(self, patched_disk_lsblk, patched_call, patched_bluestore_labe assert sdb['device'] == '/dev/sdb' assert sdb['ceph_fsid'] == 'sdb-fsid' assert sdb['type'] == 'bluestore' - lvm1 = result['lvm-1-uuid'] assert lvm1['osd_uuid'] == 'lvm-1-uuid' assert lvm1['osd_id'] == 2 assert lvm1['device'] == '/dev/mapper/ceph--osd--block--1' assert lvm1['ceph_fsid'] == 'lvm-1-fsid' assert lvm1['type'] == 'bluestore' - + sde1 = result['sde1-uuid'] + assert sde1['osd_uuid'] == 'sde1-uuid' + assert sde1['osd_id'] == 1 + assert sde1['device'] == '/dev/sde1' + assert sde1['ceph_fsid'] == 'sde1-fsid' + assert sde1['type'] == 'bluestore' + + @patch('ceph_volume.devices.raw.list.List.exclude_lvm_osd_devices', Mock()) @patch('ceph_volume.util.device.disk.get_devices') @patch('ceph_volume.util.disk.has_bluestore_label') @patch('ceph_volume.process.call') @@ -234,5 +174,5 @@ def _has_bluestore_label_side_effect_with_OSError(device_path): patched_get_devices.side_effect = _devices_side_effect result = raw.list.List([]).generate() - assert len(result) == 3 - assert 'sdb-uuid' in result + assert len(result) == 2 + assert {'sdb-uuid', 'sde1-uuid'} == set(result.keys()) diff --git a/src/ceph-volume/ceph_volume/tests/devices/raw/test_prepare.py b/src/ceph-volume/ceph_volume/tests/devices/raw/test_prepare.py index f814bbf136b7..ac0b1c4fb161 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/raw/test_prepare.py +++ b/src/ceph-volume/ceph_volume/tests/devices/raw/test_prepare.py @@ -1,7 +1,7 @@ import pytest from ceph_volume.devices import raw -from mock.mock import patch - +from mock.mock import patch, MagicMock +from ceph_volume import objectstore class TestRaw(object): @@ -22,15 +22,21 @@ def test_main_shows_prepare_subcommands(self, capsys): assert 'prepare ' in stdout assert 'Format a raw device' in stdout - +@patch('ceph_volume.util.prepare.create_key', return_value='fake-secret') class TestPrepare(object): + def _setup(self, **kw): + args = kw.get('args', []) + self.p = raw.prepare.Prepare([]) + self.p.objectstore = objectstore.rawbluestore.RawBlueStore(args=args) + for k, v in kw.items(): + setattr(self.p.objectstore, k, v) - def test_main_spits_help_with_no_arguments(self, capsys): + def test_main_spits_help_with_no_arguments(self, m_create_key, capsys): raw.prepare.Prepare([]).main() stdout, stderr = capsys.readouterr() assert 'Prepare an OSD by assigning an ID and FSID' in stdout - def test_main_shows_full_help(self, capsys): + def test_main_shows_full_help(self, m_create_key, capsys): with pytest.raises(SystemExit): raw.prepare.Prepare(argv=['--help']).main() stdout, stderr = capsys.readouterr() @@ -41,8 +47,13 @@ def test_main_shows_full_help(self, capsys): assert 'Path to bluestore block.wal block device' in stdout assert 'Enable device encryption via dm-crypt' in stdout + @patch('ceph_volume.util.arg_validators.set_dmcrypt_no_workqueue', return_value=MagicMock()) @patch('ceph_volume.util.arg_validators.ValidRawDevice.__call__') - def test_prepare_dmcrypt_no_secret_passed(self, m_valid_device, capsys): + def test_prepare_dmcrypt_no_secret_passed(self, + m_valid_device, + m_set_dmcrypt_no_workqueue, + m_create_key, + capsys): m_valid_device.return_value = '/dev/foo' with pytest.raises(SystemExit): raw.prepare.Prepare(argv=['--bluestore', '--data', '/dev/foo', '--dmcrypt']).main() @@ -52,43 +63,52 @@ def test_prepare_dmcrypt_no_secret_passed(self, m_valid_device, capsys): @patch('ceph_volume.util.encryption.luks_open') @patch('ceph_volume.util.encryption.luks_format') @patch('ceph_volume.util.disk.lsblk') - def test_prepare_dmcrypt_block(self, m_lsblk, m_luks_format, m_luks_open): + def test_prepare_dmcrypt_block(self, m_lsblk, m_luks_format, m_luks_open, m_create_key, factory): m_lsblk.return_value = {'KNAME': 'foo'} m_luks_format.return_value = True m_luks_open.return_value = True - result = raw.prepare.prepare_dmcrypt('foo', '/dev/foo', 'block', '123') - m_luks_open.assert_called_with('foo', '/dev/foo', 'ceph-123-foo-block-dmcrypt') - m_luks_format.assert_called_with('foo', '/dev/foo') - assert result == '/dev/mapper/ceph-123-foo-block-dmcrypt' + self._setup(block_device_path='/dev/foo', + osd_fsid='123', + secrets=dict(dmcrypt_key='foo')) + self.p.objectstore.prepare_dmcrypt() + m_luks_open.assert_called_with(self.p.objectstore.dmcrypt_key, '/dev/foo', 'ceph-123-foo-block-dmcrypt', 0) + m_luks_format.assert_called_with(self.p.objectstore.dmcrypt_key, '/dev/foo') + assert self.p.objectstore.__dict__['block_device_path'] == '/dev/mapper/ceph-123-foo-block-dmcrypt' @patch('ceph_volume.util.encryption.luks_open') @patch('ceph_volume.util.encryption.luks_format') @patch('ceph_volume.util.disk.lsblk') - def test_prepare_dmcrypt_db(self, m_lsblk, m_luks_format, m_luks_open): + def test_prepare_dmcrypt_db(self, m_lsblk, m_luks_format, m_luks_open, m_create_key): m_lsblk.return_value = {'KNAME': 'foo'} m_luks_format.return_value = True m_luks_open.return_value = True - result = raw.prepare.prepare_dmcrypt('foo', '/dev/foo', 'db', '123') - m_luks_open.assert_called_with('foo', '/dev/foo', 'ceph-123-foo-db-dmcrypt') - m_luks_format.assert_called_with('foo', '/dev/foo') - assert result == '/dev/mapper/ceph-123-foo-db-dmcrypt' + self._setup(db_device_path='/dev/db-foo', + osd_fsid='456', + secrets=dict(dmcrypt_key='foo')) + self.p.objectstore.prepare_dmcrypt() + m_luks_open.assert_called_with(self.p.objectstore.dmcrypt_key, '/dev/db-foo', 'ceph-456-foo-db-dmcrypt', 0) + m_luks_format.assert_called_with(self.p.objectstore.dmcrypt_key, '/dev/db-foo') + assert self.p.objectstore.__dict__['db_device_path'] == '/dev/mapper/ceph-456-foo-db-dmcrypt' @patch('ceph_volume.util.encryption.luks_open') @patch('ceph_volume.util.encryption.luks_format') @patch('ceph_volume.util.disk.lsblk') - def test_prepare_dmcrypt_wal(self, m_lsblk, m_luks_format, m_luks_open): + def test_prepare_dmcrypt_wal(self, m_lsblk, m_luks_format, m_luks_open, m_create_key): m_lsblk.return_value = {'KNAME': 'foo'} m_luks_format.return_value = True m_luks_open.return_value = True - result = raw.prepare.prepare_dmcrypt('foo', '/dev/foo', 'wal', '123') - m_luks_open.assert_called_with('foo', '/dev/foo', 'ceph-123-foo-wal-dmcrypt') - m_luks_format.assert_called_with('foo', '/dev/foo') - assert result == '/dev/mapper/ceph-123-foo-wal-dmcrypt' + self._setup(wal_device_path='/dev/wal-foo', + osd_fsid='789', + secrets=dict(dmcrypt_key='foo')) + self.p.objectstore.prepare_dmcrypt() + m_luks_open.assert_called_with(self.p.objectstore.dmcrypt_key, '/dev/wal-foo', 'ceph-789-foo-wal-dmcrypt', 0) + m_luks_format.assert_called_with(self.p.objectstore.dmcrypt_key, '/dev/wal-foo') + assert self.p.objectstore.__dict__['wal_device_path'] == '/dev/mapper/ceph-789-foo-wal-dmcrypt' - @patch('ceph_volume.devices.raw.prepare.rollback_osd') - @patch('ceph_volume.devices.raw.prepare.Prepare.prepare') + @patch('ceph_volume.objectstore.rawbluestore.rollback_osd') + @patch('ceph_volume.objectstore.rawbluestore.RawBlueStore.prepare') @patch('ceph_volume.util.arg_validators.ValidRawDevice.__call__') - def test_safe_prepare_exception_raised(self, m_valid_device, m_prepare, m_rollback_osd): + def test_safe_prepare_exception_raised(self, m_valid_device, m_prepare, m_rollback_osd, m_create_key): m_valid_device.return_value = '/dev/foo' m_prepare.side_effect=Exception('foo') m_rollback_osd.return_value = 'foobar' diff --git a/src/ceph-volume/ceph_volume/tests/devices/simple/test_activate.py b/src/ceph-volume/ceph_volume/tests/devices/simple/test_activate.py index 152ac9b09e23..ae7e52e518b8 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/simple/test_activate.py +++ b/src/ceph-volume/ceph_volume/tests/devices/simple/test_activate.py @@ -1,11 +1,13 @@ import os import pytest from ceph_volume.devices.simple import activate +from mock.mock import patch class TestActivate(object): - def test_no_data_uuid(self, factory, is_root, monkeypatch, capture, fake_filesystem): + @patch('ceph_volume.decorators.os.getuid', return_value=0) + def test_no_data_uuid(self, m_getuid, factory, capture, fake_filesystem): fake_filesystem.create_file('/tmp/json-config', contents='{}') args = factory(osd_id='0', osd_fsid='1234', json_config='/tmp/json-config') with pytest.raises(RuntimeError): @@ -22,7 +24,7 @@ def test_main_spits_help_with_no_arguments(self, capsys): stdout, stderr = capsys.readouterr() assert 'Activate OSDs by mounting devices previously configured' in stdout - def test_activate_all(self, is_root, monkeypatch): + def test_activate_all(self, monkeypatch): ''' make sure Activate calls activate for each file returned by glob ''' diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/Vagrantfile b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/Vagrantfile similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/Vagrantfile rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/Vagrantfile diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/group_vars/all b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/group_vars/all similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/group_vars/all rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/group_vars/all diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/hosts b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/hosts similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/hosts rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/hosts diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/setup.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/setup.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/setup.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/setup.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/test.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/test.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/test.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/test.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/test_zap.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/test_zap.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/test_zap.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/test_zap.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/vagrant_variables.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/vagrant_variables.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt-explicit/vagrant_variables.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt-explicit/vagrant_variables.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/Vagrantfile b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/Vagrantfile similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/Vagrantfile rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/Vagrantfile diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/group_vars/all b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/group_vars/all similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/group_vars/all rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/group_vars/all diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/hosts b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/hosts similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/hosts rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/hosts diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/setup.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/setup.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/setup.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/setup.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/test.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/test.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/test.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/test.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/test_zap.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/test_zap.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/test_zap.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/test_zap.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/vagrant_variables.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/vagrant_variables.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-dmcrypt/vagrant_variables.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-dmcrypt/vagrant_variables.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/Vagrantfile b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/Vagrantfile similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/Vagrantfile rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/Vagrantfile diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/group_vars/all b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/group_vars/all similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/group_vars/all rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/group_vars/all diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/hosts b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/hosts similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/hosts rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/hosts diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/setup.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/setup.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/setup.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/setup.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/test.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/test.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/test.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/test.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/test_zap.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/test_zap.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/test_zap.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/test_zap.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/vagrant_variables.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/vagrant_variables.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type-explicit/vagrant_variables.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type-explicit/vagrant_variables.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/Vagrantfile b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/Vagrantfile similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/Vagrantfile rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/Vagrantfile diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/group_vars/all b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/group_vars/all similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/group_vars/all rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/group_vars/all diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/hosts b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/hosts similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/hosts rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/hosts diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/setup.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/setup.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/setup.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/setup.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/test.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/test.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/test.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/test.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/test_zap.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/test_zap.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/test_zap.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/test_zap.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/vagrant_variables.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/vagrant_variables.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/mixed-type/vagrant_variables.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/mixed-type/vagrant_variables.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/Vagrantfile b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/Vagrantfile similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/Vagrantfile rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/Vagrantfile diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/group_vars/all b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/group_vars/all similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/group_vars/all rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/group_vars/all diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/hosts b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/hosts similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/hosts rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/hosts diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/setup.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/setup.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/setup.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/setup.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/test.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/test.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/test.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/test.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/test_zap.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/test_zap.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/test_zap.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/test_zap.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/vagrant_variables.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/vagrant_variables.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type-dmcrypt/vagrant_variables.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type-dmcrypt/vagrant_variables.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/Vagrantfile b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/Vagrantfile similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/Vagrantfile rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/Vagrantfile diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/group_vars/all b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/group_vars/all similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/group_vars/all rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/group_vars/all diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/hosts b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/hosts similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/hosts rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/hosts diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/setup.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/setup.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/setup.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/setup.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/test.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/test.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/test.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/test.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/test_zap.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/test_zap.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/test_zap.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/test_zap.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/vagrant_variables.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/vagrant_variables.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/batch/centos8/bluestore/single-type/vagrant_variables.yml rename to src/ceph-volume/ceph_volume/tests/functional/batch/centos/bluestore/single-type/vagrant_variables.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test.yml index 5d5bc59f2918..17f200c9dd31 100644 --- a/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test.yml +++ b/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test.yml @@ -14,10 +14,10 @@ tasks: - name: mark osds down - command: "ceph --cluster {{ cluster }} osd down osd.{{ item }}" + command: "ceph osd down osd.{{ item }}" with_items: "{{ osd_ids }}" - name: purge osds - command: "ceph --cluster {{ cluster }} osd purge osd.{{ item }} --yes-i-really-mean-it" + command: "ceph osd purge osd.{{ item }} --yes-i-really-mean-it" with_items: "{{ osd_ids }}" - hosts: osds @@ -25,18 +25,18 @@ tasks: - name: zap devices used for OSDs - command: "ceph-volume --cluster {{ cluster }} lvm zap {{ item }} --destroy" + command: "ceph-volume lvm zap {{ item }} --destroy" with_items: "{{ devices }}" environment: CEPH_VOLUME_DEBUG: 1 - name: batch create devices again - command: "ceph-volume --cluster {{ cluster }} lvm batch --yes --{{ osd_objectstore|default('bluestore') }} {{ '--dmcrypt' if dmcrypt|default(false) else '' }} {{ devices | join(' ') }}" + command: "ceph-volume lvm batch --yes --{{ osd_objectstore|default('bluestore') }} {{ '--dmcrypt' if dmcrypt|default(false) else '' }} {{ devices | join(' ') }}" environment: CEPH_VOLUME_DEBUG: 1 - name: ensure batch create is idempotent - command: "ceph-volume --cluster {{ cluster }} lvm batch --yes --{{ osd_objectstore|default('bluestore') }} {{ '--dmcrypt' if dmcrypt|default(false) else '' }} {{ devices | join(' ') }}" + command: "ceph-volume lvm batch --yes --{{ osd_objectstore|default('bluestore') }} {{ '--dmcrypt' if dmcrypt|default(false) else '' }} {{ devices | join(' ') }}" register: batch_cmd failed_when: false environment: @@ -50,7 +50,7 @@ - "'strategy changed' not in batch_cmd.stderr" - name: run batch --report to see if devices get filtered - command: "ceph-volume --cluster {{ cluster }} lvm batch --report --format=json --{{ osd_objectstore|default('bluestore') }} {{ '--dmcrypt' if dmcrypt|default(false) else '' }} {{ devices | join(' ') }}" + command: "ceph-volume lvm batch --report --format=json --{{ osd_objectstore|default('bluestore') }} {{ '--dmcrypt' if dmcrypt|default(false) else '' }} {{ devices | join(' ') }}" register: report_cmd failed_when: false environment: diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_explicit.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_explicit.yml index 1ff0acc9decf..2581f5c46156 100644 --- a/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_explicit.yml +++ b/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_explicit.yml @@ -14,10 +14,10 @@ tasks: - name: mark osds down - command: "ceph --cluster {{ cluster }} osd down osd.{{ item }}" + command: "ceph osd down osd.{{ item }}" with_items: "{{ osd_ids }}" - name: purge osds - command: "ceph --cluster {{ cluster }} osd purge osd.{{ item }} --yes-i-really-mean-it" + command: "ceph osd purge osd.{{ item }} --yes-i-really-mean-it" with_items: "{{ osd_ids }}" - hosts: osds @@ -27,18 +27,18 @@ tasks: - name: zap devices used for OSDs - command: "ceph-volume --cluster {{ cluster }} lvm zap {{ item }} --destroy" + command: "ceph-volume lvm zap {{ item }} --destroy" with_items: "{{ devices }}" environment: CEPH_VOLUME_DEBUG: 1 - name: batch create devices again - command: "ceph-volume --cluster {{ cluster }} lvm batch --yes --{{ osd_objectstore|default('bluestore') }} {{ '--dmcrypt' if dmcrypt|default(false) else '' }} {{ devices[:2] | join(' ') }} {{ external_devices }} {{ devices[2:] | join(' ') }}" + command: "ceph-volume lvm batch --yes --{{ osd_objectstore|default('bluestore') }} {{ '--dmcrypt' if dmcrypt|default(false) else '' }} {{ devices[:2] | join(' ') }} {{ external_devices }} {{ devices[2:] | join(' ') }}" environment: CEPH_VOLUME_DEBUG: 1 - name: ensure batch create is idempotent when all data devices are filtered - command: "ceph-volume --cluster {{ cluster }} lvm batch --yes --{{ osd_objectstore|default('bluestore') }} {{ '--dmcrypt' if dmcrypt|default(false) else '' }} {{ devices[:2] | join(' ') }} {{ external_devices }} {{ devices[2:] | join(' ') }}" + command: "ceph-volume lvm batch --yes --{{ osd_objectstore|default('bluestore') }} {{ '--dmcrypt' if dmcrypt|default(false) else '' }} {{ devices[:2] | join(' ') }} {{ external_devices }} {{ devices[2:] | join(' ') }}" register: batch_cmd failed_when: false environment: @@ -51,7 +51,7 @@ - batch_cmd.rc != 0 - name: run batch --report to see if devices get filtered - command: "ceph-volume --cluster {{ cluster }} lvm batch --report --format=json --{{ osd_objectstore|default('bluestore') }} {{ '--dmcrypt' if dmcrypt|default(false) else '' }} {{ devices[:2] | join(' ') }} {{ external_devices }} {{ devices[2:] | join(' ') }}" + command: "ceph-volume lvm batch --report --format=json --{{ osd_objectstore|default('bluestore') }} {{ '--dmcrypt' if dmcrypt|default(false) else '' }} {{ devices[:2] | join(' ') }} {{ external_devices }} {{ devices[2:] | join(' ') }}" register: report_cmd failed_when: false environment: diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_zap.yml b/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_zap.yml index 9d63df9e0fc9..4408288c8d1d 100644 --- a/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_zap.yml +++ b/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_zap.yml @@ -15,10 +15,10 @@ tasks: - name: mark osds down - command: "ceph --cluster {{ cluster }} osd down osd.{{ item }}" + command: "ceph osd down osd.{{ item }}" with_items: "{{ osd_ids }}" - name: purge osds - command: "ceph --cluster {{ cluster }} osd purge osd.{{ item }} --yes-i-really-mean-it" + command: "ceph osd purge osd.{{ item }} --yes-i-really-mean-it" with_items: "{{ osd_ids }}" @@ -27,7 +27,7 @@ tasks: - name: zap devices used for OSDs - command: "ceph-volume --cluster {{ cluster }} lvm zap --osd-id {{ item }} --destroy" + command: "ceph-volume lvm zap --osd-id {{ item }} --destroy" with_items: "{{ osd_ids }}" environment: CEPH_VOLUME_DEBUG: 1 diff --git a/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini b/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini index bc50be8101b4..ede3868b9fe2 100644 --- a/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini +++ b/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = centos8-bluestore-{single_type,single_type_dmcrypt,mixed_type,mixed_type_dmcrypt,mixed_type_explicit,mixed_type_dmcrypt_explicit} +envlist = centos-bluestore-{single_type,single_type_dmcrypt,mixed_type,mixed_type_dmcrypt,mixed_type_explicit,mixed_type_dmcrypt_explicit} skipsdist = True [testenv] @@ -18,17 +18,18 @@ setenv= VAGRANT_CWD = {changedir} CEPH_VOLUME_DEBUG = 1 DEBIAN_FRONTEND=noninteractive + ANSIBLE_COLLECTIONS_PATH = {envdir}/ansible_collections changedir= - centos8-bluestore-single_type: {toxinidir}/centos8/bluestore/single-type - centos8-bluestore-single_type_dmcrypt: {toxinidir}/centos8/bluestore/single-type-dmcrypt - centos8-bluestore-mixed_type: {toxinidir}/centos8/bluestore/mixed-type - centos8-bluestore-mixed_type_dmcrypt: {toxinidir}/centos8/bluestore/mixed-type-dmcrypt - centos8-bluestore-mixed_type_explicit: {toxinidir}/centos8/bluestore/mixed-type-explicit - centos8-bluestore-mixed_type_dmcrypt_explicit: {toxinidir}/centos8/bluestore/mixed-type-dmcrypt-explicit + centos-bluestore-single_type: {toxinidir}/centos/bluestore/single-type + centos-bluestore-single_type_dmcrypt: {toxinidir}/centos/bluestore/single-type-dmcrypt + centos-bluestore-mixed_type: {toxinidir}/centos/bluestore/mixed-type + centos-bluestore-mixed_type_dmcrypt: {toxinidir}/centos/bluestore/mixed-type-dmcrypt + centos-bluestore-mixed_type_explicit: {toxinidir}/centos/bluestore/mixed-type-explicit + centos-bluestore-mixed_type_dmcrypt_explicit: {toxinidir}/centos/bluestore/mixed-type-dmcrypt-explicit commands= git clone -b {env:CEPH_ANSIBLE_BRANCH:master} --single-branch {env:CEPH_ANSIBLE_CLONE:"https://github.com/ceph/ceph-ansible.git"} {envdir}/tmp/ceph-ansible python -m pip install -r {envdir}/tmp/ceph-ansible/tests/requirements.txt - ansible-galaxy install -r {envdir}/tmp/ceph-ansible/requirements.yml -v + ansible-galaxy collection install -r {envdir}/tmp/ceph-ansible/requirements.yml -v -p {envdir}/ansible_collections # bash {toxinidir}/../scripts/vagrant_up.sh {env:VAGRANT_UP_FLAGS:""} {posargs:--provider=virtualbox} bash {toxinidir}/../scripts/vagrant_up.sh {posargs:--provider=virtualbox} @@ -42,9 +43,6 @@ commands= # use ceph-ansible to deploy a ceph cluster on the vms ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/deploy.yml --extra-vars "fetch_directory={changedir}/fetch ceph_dev_branch={env:CEPH_DEV_BRANCH:master} ceph_dev_sha1={env:CEPH_DEV_SHA1:latest} toxinidir={toxinidir}" - # prepare nodes for testing with testinfra - ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/setup.yml - # test cluster state using testinfra py.test --reruns 5 --reruns-delay 10 -n 4 --sudo -v --connection=ansible --ssh-config={changedir}/vagrant_ssh_config --ansible-inventory={changedir}/hosts {toxinidir}/../tests diff --git a/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore b/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore index ca0146b19fee..1a4fadc10673 100644 --- a/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore +++ b/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore @@ -1,10 +1,8 @@ --- ceph_dev: True -cluster: test public_network: "192.168.3.0/24" cluster_network: "192.168.4.0/24" -monitor_interface: eth1 osd_objectstore: "bluestore" osd_scenario: lvm num_osds: 2 diff --git a/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore_lvm b/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore_lvm index c333af3e522c..40abe4c8c6a1 100644 --- a/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore_lvm +++ b/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore_lvm @@ -1,10 +1,8 @@ --- ceph_dev: True -cluster: test public_network: "192.168.3.0/24" cluster_network: "192.168.4.0/24" -monitor_interface: eth1 journal_size: 100 osd_objectstore: "bluestore" osd_scenario: lvm diff --git a/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore_lvm_dmcrypt b/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore_lvm_dmcrypt index 3cd68aaf1db3..5f8eb38274a0 100644 --- a/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore_lvm_dmcrypt +++ b/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore_lvm_dmcrypt @@ -2,10 +2,8 @@ dmcrypt: True ceph_dev: True -cluster: test public_network: "192.168.3.0/24" cluster_network: "192.168.4.0/24" -monitor_interface: eth1 journal_size: 100 osd_objectstore: "bluestore" osd_scenario: lvm diff --git a/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore_single b/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore_single index e43b14a75a49..688d65352d85 100644 --- a/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore_single +++ b/src/ceph-volume/ceph_volume/tests/functional/group_vars/bluestore_single @@ -1,10 +1,8 @@ --- ceph_dev: True -cluster: test public_network: "192.168.3.0/24" cluster_network: "192.168.4.0/24" -monitor_interface: eth1 osd_objectstore: "bluestore" osd_scenario: lvm ceph_origin: 'repository' diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/create/Vagrantfile b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/create/Vagrantfile similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/create/Vagrantfile rename to src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/create/Vagrantfile diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/create/group_vars/all b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/create/group_vars/all similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/create/group_vars/all rename to src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/create/group_vars/all diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/create/hosts b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/create/hosts similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/create/hosts rename to src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/create/hosts diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/create/setup.yml b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/create/setup.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/create/setup.yml rename to src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/create/setup.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/create/test.yml b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/create/test.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/create/test.yml rename to src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/create/test.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/create/vagrant_variables.yml b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/create/vagrant_variables.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/create/vagrant_variables.yml rename to src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/create/vagrant_variables.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/Vagrantfile b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/Vagrantfile similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/Vagrantfile rename to src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/Vagrantfile diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/group_vars/all b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/group_vars/all similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/group_vars/all rename to src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/group_vars/all diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/hosts b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/hosts similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/hosts rename to src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/hosts diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/setup.yml b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/setup.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/setup.yml rename to src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/setup.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/test.yml b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/test.yml new file mode 100644 index 000000000000..c35591ca0333 --- /dev/null +++ b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/test.yml @@ -0,0 +1,123 @@ +- hosts: osds + become: yes + tasks: + + - name: stop ceph-osd@2 daemon + service: + name: ceph-osd@2 + state: stopped + + - name: stop ceph-osd@0 daemon + service: + name: ceph-osd@0 + state: stopped + +- hosts: mons + become: yes + tasks: + - name: mark osds down + command: "ceph osd down osd.{{ item }}" + with_items: + - 0 + - 2 + + - name: destroy osd.2 + command: "ceph osd destroy osd.2 --yes-i-really-mean-it" + register: result + retries: 30 + delay: 1 + until: result is succeeded + + - name: destroy osd.0 + command: "ceph osd destroy osd.0 --yes-i-really-mean-it" + register: result + retries: 30 + delay: 1 + until: result is succeeded + +- hosts: osds + become: yes + tasks: + + # osd.2 device + - name: zap /dev/vdd1 + command: "ceph-volume lvm zap /dev/vdd1 --destroy" + environment: + CEPH_VOLUME_DEBUG: 1 + + # partitions have been completely removed, so re-create them again + - name: re-create partition /dev/vdd for lvm data usage + parted: + device: /dev/vdd + number: 1 + part_start: 0% + part_end: 50% + unit: '%' + label: gpt + state: present + + - name: redeploy osd.2 using /dev/vdd1 + command: "ceph-volume lvm create --bluestore --data /dev/vdd1 --osd-id 2" + environment: + CEPH_VOLUME_DEBUG: 1 + + # osd.0 lv + - name: zap test_group/data-lv1 + command: "ceph-volume lvm zap test_group/data-lv1" + environment: + CEPH_VOLUME_DEBUG: 1 + + - name: redeploy osd.0 using test_group/data-lv1 + command: "ceph-volume lvm create --bluestore --data test_group/data-lv1 --osd-id 0" + environment: + CEPH_VOLUME_DEBUG: 1 + + - name: stop ceph-osd@0 daemon + service: + name: ceph-osd@0 + state: stopped + + +- hosts: mons + become: yes + tasks: + - name: mark osds down + command: "ceph osd down osd.0" + + - name: destroy osd.0 + command: "ceph osd destroy osd.0 --yes-i-really-mean-it" + register: result + retries: 30 + delay: 1 + until: result is succeeded + + +- hosts: osds + become: yes + tasks: + + + - name: zap test_group/data-lv1 + command: "ceph-volume lvm zap test_group/data-lv1" + environment: + CEPH_VOLUME_DEBUG: 1 + + - name: prepare osd.0 using test_group/data-lv1 + command: "ceph-volume lvm prepare --bluestore --data test_group/data-lv1 --osd-id 0" + environment: + CEPH_VOLUME_DEBUG: 1 + + - name: activate all to start the previously prepared osd.0 + command: "ceph-volume lvm activate --all" + environment: + CEPH_VOLUME_DEBUG: 1 + + - name: node inventory + command: "ceph-volume inventory" + environment: + CEPH_VOLUME_DEBUG: 1 + + - name: list all OSDs + command: "ceph-volume lvm list" + environment: + CEPH_VOLUME_DEBUG: 1 diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/vagrant_variables.yml b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/vagrant_variables.yml similarity index 100% rename from src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/vagrant_variables.yml rename to src/ceph-volume/ceph_volume/tests/functional/lvm/centos/bluestore/dmcrypt/vagrant_variables.yml diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/test.yml b/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/test.yml deleted file mode 100644 index 0a47b5eb851e..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/lvm/centos8/bluestore/dmcrypt/test.yml +++ /dev/null @@ -1,123 +0,0 @@ -- hosts: osds - become: yes - tasks: - - - name: stop ceph-osd@2 daemon - service: - name: ceph-osd@2 - state: stopped - - - name: stop ceph-osd@0 daemon - service: - name: ceph-osd@0 - state: stopped - -- hosts: mons - become: yes - tasks: - - name: mark osds down - command: "ceph --cluster {{ cluster }} osd down osd.{{ item }}" - with_items: - - 0 - - 2 - - - name: destroy osd.2 - command: "ceph --cluster {{ cluster }} osd destroy osd.2 --yes-i-really-mean-it" - register: result - retries: 30 - delay: 1 - until: result is succeeded - - - name: destroy osd.0 - command: "ceph --cluster {{ cluster }} osd destroy osd.0 --yes-i-really-mean-it" - register: result - retries: 30 - delay: 1 - until: result is succeeded - -- hosts: osds - become: yes - tasks: - - # osd.2 device - - name: zap /dev/vdd1 - command: "ceph-volume --cluster {{ cluster }} lvm zap /dev/vdd1 --destroy" - environment: - CEPH_VOLUME_DEBUG: 1 - - # partitions have been completely removed, so re-create them again - - name: re-create partition /dev/vdd for lvm data usage - parted: - device: /dev/vdd - number: 1 - part_start: 0% - part_end: 50% - unit: '%' - label: gpt - state: present - - - name: redeploy osd.2 using /dev/vdd1 - command: "ceph-volume --cluster {{ cluster }} lvm create --bluestore --data /dev/vdd1 --osd-id 2" - environment: - CEPH_VOLUME_DEBUG: 1 - - # osd.0 lv - - name: zap test_group/data-lv1 - command: "ceph-volume --cluster {{ cluster }} lvm zap test_group/data-lv1" - environment: - CEPH_VOLUME_DEBUG: 1 - - - name: redeploy osd.0 using test_group/data-lv1 - command: "ceph-volume --cluster {{ cluster }} lvm create --bluestore --data test_group/data-lv1 --osd-id 0" - environment: - CEPH_VOLUME_DEBUG: 1 - - - name: stop ceph-osd@0 daemon - service: - name: ceph-osd@0 - state: stopped - - -- hosts: mons - become: yes - tasks: - - name: mark osds down - command: "ceph --cluster {{ cluster }} osd down osd.0" - - - name: destroy osd.0 - command: "ceph --cluster {{ cluster }} osd destroy osd.0 --yes-i-really-mean-it" - register: result - retries: 30 - delay: 1 - until: result is succeeded - - -- hosts: osds - become: yes - tasks: - - - - name: zap test_group/data-lv1 - command: "ceph-volume --cluster {{ cluster }} lvm zap test_group/data-lv1" - environment: - CEPH_VOLUME_DEBUG: 1 - - - name: prepare osd.0 using test_group/data-lv1 - command: "ceph-volume --cluster {{ cluster }} lvm prepare --bluestore --data test_group/data-lv1 --osd-id 0" - environment: - CEPH_VOLUME_DEBUG: 1 - - - name: activate all to start the previously prepared osd.0 - command: "ceph-volume lvm activate --all" - environment: - CEPH_VOLUME_DEBUG: 1 - - - name: node inventory - command: "ceph-volume inventory" - environment: - CEPH_VOLUME_DEBUG: 1 - - - name: list all OSDs - command: "ceph-volume lvm list" - environment: - CEPH_VOLUME_DEBUG: 1 diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml b/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml index 97d77a7f4601..b6b038c90be0 100644 --- a/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml +++ b/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml @@ -18,20 +18,20 @@ become: yes tasks: - name: mark osds down - command: "ceph --cluster {{ cluster }} osd down osd.{{ item }}" + command: "ceph osd down osd.{{ item }}" with_items: - 0 - 2 - name: destroy osd.2 - command: "ceph --cluster {{ cluster }} osd destroy osd.2 --yes-i-really-mean-it" + command: "ceph osd destroy osd.2 --yes-i-really-mean-it" register: result retries: 30 delay: 1 until: result is succeeded - name: destroy osd.0 - command: "ceph --cluster {{ cluster }} osd destroy osd.0 --yes-i-really-mean-it" + command: "ceph osd destroy osd.0 --yes-i-really-mean-it" register: result retries: 30 delay: 1 @@ -44,7 +44,7 @@ # osd.2 device - name: zap /dev/vdd1 - command: "ceph-volume --cluster {{ cluster }} lvm zap /dev/vdd1 --destroy" + command: "ceph-volume lvm zap /dev/vdd1 --destroy" environment: CEPH_VOLUME_DEBUG: 1 @@ -60,18 +60,18 @@ state: present - name: redeploy osd.2 using /dev/vdd1 - command: "ceph-volume --cluster {{ cluster }} lvm create --bluestore --data /dev/vdd1 --osd-id 2" + command: "ceph-volume lvm create --bluestore --data /dev/vdd1 --osd-id 2" environment: CEPH_VOLUME_DEBUG: 1 # osd.0 device (zap without --destroy that removes the LV) - name: zap test_group/data-lv1 - command: "ceph-volume --cluster {{ cluster }} lvm zap test_group/data-lv1" + command: "ceph-volume lvm zap test_group/data-lv1" environment: CEPH_VOLUME_DEBUG: 1 - name: prepare osd.0 again using test_group/data-lv1 - command: "ceph-volume --cluster {{ cluster }} lvm prepare --bluestore --data test_group/data-lv1 --osd-id 0" + command: "ceph-volume lvm prepare --bluestore --data test_group/data-lv1 --osd-id 0" environment: CEPH_VOLUME_DEBUG: 1 @@ -151,11 +151,11 @@ # zapping the first lv shouldn't remove the vg, allowing the second zap to succeed - name: zap test_zap/data-lv1 - command: "ceph-volume --cluster {{ cluster }} lvm zap --destroy test_zap/data-lv1" + command: "ceph-volume lvm zap --destroy test_zap/data-lv1" environment: CEPH_VOLUME_DEBUG: 1 - name: zap test_zap/data-lv2 - command: "ceph-volume --cluster {{ cluster }} lvm zap --destroy test_zap/data-lv2" + command: "ceph-volume lvm zap --destroy test_zap/data-lv2" environment: CEPH_VOLUME_DEBUG: 1 diff --git a/src/ceph-volume/ceph_volume/tests/functional/lvm/tox.ini b/src/ceph-volume/ceph_volume/tests/functional/lvm/tox.ini index fe60c7db2289..4c76c3ef9147 100644 --- a/src/ceph-volume/ceph_volume/tests/functional/lvm/tox.ini +++ b/src/ceph-volume/ceph_volume/tests/functional/lvm/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = centos8-bluestore-{create,prepare_activate,dmcrypt} +envlist = centos-bluestore-{create,prepare_activate,dmcrypt} skipsdist = True [testenv] @@ -18,18 +18,20 @@ setenv= VAGRANT_CWD = {changedir} CEPH_VOLUME_DEBUG = 1 DEBIAN_FRONTEND=noninteractive + ANSIBLE_COLLECTIONS_PATH = {envdir}/ansible_collections + CEPH_ANSIBLE_VAGRANT_BOX = centos/stream9 changedir= # plain/unencrypted - centos8-bluestore-create: {toxinidir}/centos8/bluestore/create + centos-bluestore-create: {toxinidir}/centos/bluestore/create # dmcrypt - centos8-bluestore-dmcrypt: {toxinidir}/centos8/bluestore/dmcrypt + centos-bluestore-dmcrypt: {toxinidir}/centos/bluestore/dmcrypt # TODO: these are placeholders for now, eventually we want to # test the prepare/activate workflow of ceph-volume as well - centos8-bluestore-prepare_activate: {toxinidir}/xenial/bluestore/prepare_activate + centos-bluestore-prepare_activate: {toxinidir}/xenial/bluestore/prepare_activate commands= - git clone -b {env:CEPH_ANSIBLE_BRANCH:master} --single-branch {env:CEPH_ANSIBLE_CLONE:"https://github.com/ceph/ceph-ansible.git"} {envdir}/tmp/ceph-ansible + git clone -b {env:CEPH_ANSIBLE_BRANCH:main} --single-branch {env:CEPH_ANSIBLE_CLONE:"https://github.com/ceph/ceph-ansible.git"} {envdir}/tmp/ceph-ansible pip install -r {envdir}/tmp/ceph-ansible/tests/requirements.txt - ansible-galaxy install -r {envdir}/tmp/ceph-ansible/requirements.yml -v + ansible-galaxy collection install -r {envdir}/tmp/ceph-ansible/requirements.yml -v -p {envdir}/ansible_collections bash {toxinidir}/../scripts/vagrant_up.sh {env:VAGRANT_UP_FLAGS:"--no-provision"} {posargs:--provider=virtualbox} bash {toxinidir}/../scripts/generate_ssh_config.sh {changedir} @@ -43,10 +45,7 @@ commands= cp {toxinidir}/../playbooks/deploy.yml {envdir}/tmp/ceph-ansible # use ceph-ansible to deploy a ceph cluster on the vms - ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/deploy.yml --extra-vars "fetch_directory={changedir}/fetch ceph_dev_branch={env:CEPH_DEV_BRANCH:master} ceph_dev_sha1={env:CEPH_DEV_SHA1:latest} toxinidir={toxinidir}" - - # prepare nodes for testing with testinfra - ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/setup.yml + ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/deploy.yml --extra-vars "fetch_directory={changedir}/fetch ceph_dev_branch={env:CEPH_DEV_BRANCH:main} ceph_dev_sha1={env:CEPH_DEV_SHA1:latest} toxinidir={toxinidir}" # test cluster state using testinfra py.test --reruns 5 --reruns-delay 10 -n 4 --sudo -v --connection=ansible --ssh-config={changedir}/vagrant_ssh_config --ansible-inventory={changedir}/hosts {toxinidir}/../tests diff --git a/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml b/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml index 0ac200c6bc0d..036c4daf5046 100644 --- a/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml +++ b/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml @@ -21,20 +21,6 @@ DEBIAN_FRONTEND: noninteractive pre_tasks: - # If we can't get python2 installed before any module is used we will fail - # so just try what we can to get it installed - - name: check for python2 - stat: - path: /usr/bin/python - ignore_errors: yes - register: systempython2 - - - name: install python2 for debian based systems - raw: sudo apt-get -y install python-simplejson - ignore_errors: yes - when: - - systempython2.stat is undefined or systempython2.stat.exists == false - # Ansible will try to auto-install python-apt, in some systems this might be # python3-apt, or python-apt, and it has caused whole runs to fail because # it is trying to do an interactive prompt @@ -46,18 +32,6 @@ - python-apt - aptitude - - name: install python2 for fedora - raw: sudo dnf -y install python creates=/usr/bin/python - ignore_errors: yes - when: - - systempython2.stat is undefined or systempython2.stat.exists == false - - - name: install python2 for opensuse - raw: sudo zypper -n install python-base creates=/usr/bin/python2.7 - ignore_errors: yes - when: - - systempython2.stat is undefined or systempython2.stat.exists == false - - name: gather facts setup: when: @@ -93,6 +67,12 @@ state: latest when: not is_atomic | bool + - name: install net-tools + package: + name: net-tools + state: present + when: not is_atomic | bool + - name: update the system command: dnf update -y changed_when: false diff --git a/src/ceph-volume/ceph_volume/tests/functional/scripts/vagrant_up.sh b/src/ceph-volume/ceph_volume/tests/functional/scripts/vagrant_up.sh index 8f4cd3bca9ba..104ab118c98c 100644 --- a/src/ceph-volume/ceph_volume/tests/functional/scripts/vagrant_up.sh +++ b/src/ceph-volume/ceph_volume/tests/functional/scripts/vagrant_up.sh @@ -2,6 +2,15 @@ set -e +CEPH_ANSIBLE_VAGRANT_BOX="${CEPH_ANSIBLE_VAGRANT_BOX:-centos/stream9}" + +if [[ "${CEPH_ANSIBLE_VAGRANT_BOX}" =~ "centos/stream" ]]; then + EL_VERSION="${CEPH_ANSIBLE_VAGRANT_BOX: -1}" + LATEST_IMAGE="$(curl -s https://cloud.centos.org/centos/${EL_VERSION}-stream/x86_64/images/CHECKSUM | sed -nE 's/^SHA256.*\((.*-([0-9]+).*vagrant-libvirt.box)\).*$/\1/p' | sort -u | tail -n1)" + vagrant box remove "${CEPH_ANSIBLE_VAGRANT_BOX}" --all --force || true + vagrant box add --force --provider libvirt --name "${CEPH_ANSIBLE_VAGRANT_BOX}" "https://cloud.centos.org/centos/${EL_VERSION}-stream/x86_64/images/${LATEST_IMAGE}" --force +fi + retries=0 until [ $retries -ge 5 ] do diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/Vagrantfile b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/Vagrantfile deleted file mode 120000 index 16076e424520..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/Vagrantfile +++ /dev/null @@ -1 +0,0 @@ -../../../../Vagrantfile \ No newline at end of file diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/group_vars/all b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/group_vars/all deleted file mode 100644 index c265e783b07d..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/group_vars/all +++ /dev/null @@ -1,19 +0,0 @@ ---- - -ceph_dev: True -cluster: test -public_network: "192.168.1.0/24" -cluster_network: "192.168.2.0/24" -monitor_interface: eth1 -journal_size: 100 -osd_objectstore: "bluestore" -ceph_origin: 'repository' -ceph_repository: 'dev' -copy_admin_key: false -os_tuning_params: - - { name: kernel.pid_max, value: 4194303 } - - { name: fs.file-max, value: 26234859 } -ceph_conf_overrides: - global: - osd_pool_default_pg_num: 8 - osd_pool_default_size: 1 diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/host_vars/osd0.yml b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/host_vars/osd0.yml deleted file mode 100644 index 2e1c7ee9e895..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/host_vars/osd0.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- - -devices: - - '/dev/sdb' -dedicated_devices: - - '/dev/sdc' -osd_scenario: "non-collocated" diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/host_vars/osd1.yml b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/host_vars/osd1.yml deleted file mode 100644 index 7e90071c9b16..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/host_vars/osd1.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- - -devices: - - '/dev/sdb' - - '/dev/sdc' -osd_scenario: "collocated" diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/hosts b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/hosts deleted file mode 100644 index e0c08b94659a..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/hosts +++ /dev/null @@ -1,9 +0,0 @@ -[mons] -mon0 monitor_interface=eth1 - -[osds] -osd0 -osd1 - -[mgrs] -mon0 diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/test.yml b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/test.yml deleted file mode 100644 index 24e2c0353c94..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/test.yml +++ /dev/null @@ -1,31 +0,0 @@ ---- - -- hosts: osds - become: yes - tasks: - - - name: list all OSD directories - find: - paths: /var/lib/ceph/osd - file_type: directory - register: osd_paths - - - name: scan all OSD directories - command: "ceph-volume --cluster={{ cluster }} simple scan {{ item.path }}" - environment: - CEPH_VOLUME_DEBUG: 1 - with_items: - - "{{ osd_paths.files }}" - - - name: list all OSD JSON files - find: - paths: /etc/ceph/osd - file_type: file - register: osd_configs - - - name: activate all scanned OSDs - command: "ceph-volume --cluster={{ cluster }} simple activate --file {{ item.path }}" - environment: - CEPH_VOLUME_DEBUG: 1 - with_items: - - "{{ osd_configs.files }}" diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/vagrant_variables.yml b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/vagrant_variables.yml deleted file mode 100644 index 63700c3c902d..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/activate/vagrant_variables.yml +++ /dev/null @@ -1,73 +0,0 @@ ---- - -# DEPLOY CONTAINERIZED DAEMONS -docker: false - -# DEFINE THE NUMBER OF VMS TO RUN -mon_vms: 1 -osd_vms: 2 -mds_vms: 0 -rgw_vms: 0 -nfs_vms: 0 -rbd_mirror_vms: 0 -client_vms: 0 -iscsi_gw_vms: 0 -mgr_vms: 0 - - -# INSTALL SOURCE OF CEPH -# valid values are 'stable' and 'dev' -ceph_install_source: stable - -# SUBNETS TO USE FOR THE VMS -public_subnet: 192.168.1 -cluster_subnet: 192.168.2 - -# MEMORY -# set 1024 for CentOS -memory: 512 - -# Ethernet interface name -# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial -eth: 'eth1' - -# Disks -# For libvirt use disks: "[ '/dev/vdb', '/dev/vdc' ]" -# For CentOS7 use disks: "[ '/dev/sda', '/dev/sdb' ]" -disks: "[ '/dev/sdb', '/dev/sdc' ]" - -# VAGRANT BOX -# Ceph boxes are *strongly* suggested. They are under better control and will -# not get updated frequently unless required for build systems. These are (for -# now): -# -# * ceph/ubuntu-xenial -# -# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64 -# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet -# libvirt CentOS: centos/7 -# parallels Ubuntu: parallels/ubuntu-14.04 -# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller' -# For more boxes have a look at: -# - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q= -# - https://download.gluster.org/pub/gluster/purpleidea/vagrant/ -vagrant_box: centos/7 -#ssh_private_key_path: "~/.ssh/id_rsa" -# The sync directory changes based on vagrant box -# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant -#vagrant_sync_dir: /home/vagrant/sync -#vagrant_sync_dir: / -# Disables synced folder creation. Not needed for testing, will skip mounting -# the vagrant directory on the remote box regardless of the provider. -vagrant_disable_synced_folder: true -# VAGRANT URL -# This is a URL to download an image from an alternate location. vagrant_box -# above should be set to the filename of the image. -# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box -# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box -# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box - -os_tuning_params: - - { name: kernel.pid_max, value: 4194303 } - - { name: fs.file-max, value: 26234859 } - diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/Vagrantfile b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/Vagrantfile deleted file mode 120000 index 16076e424520..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/Vagrantfile +++ /dev/null @@ -1 +0,0 @@ -../../../../Vagrantfile \ No newline at end of file diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/group_vars/all b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/group_vars/all deleted file mode 100644 index 885c2c82f4e5..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/group_vars/all +++ /dev/null @@ -1,22 +0,0 @@ ---- - -dmcrypt: True -ceph_dev: True -cluster: test -public_network: "192.168.1.0/24" -cluster_network: "192.168.2.0/24" -monitor_interface: eth1 -journal_size: 100 -osd_objectstore: "bluestore" -ceph_origin: 'repository' -ceph_repository: 'dev' -copy_admin_key: false -os_tuning_params: - - { name: kernel.pid_max, value: 4194303 } - - { name: fs.file-max, value: 26234859 } -ceph_conf_overrides: - global: - osd_pool_default_pg_num: 8 - osd_pool_default_size: 1 - osd: - osd_dmcrypt_type: luks diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/host_vars/osd0.yml b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/host_vars/osd0.yml deleted file mode 100644 index 2e1c7ee9e895..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/host_vars/osd0.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- - -devices: - - '/dev/sdb' -dedicated_devices: - - '/dev/sdc' -osd_scenario: "non-collocated" diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/host_vars/osd1.yml b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/host_vars/osd1.yml deleted file mode 100644 index 7e90071c9b16..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/host_vars/osd1.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- - -devices: - - '/dev/sdb' - - '/dev/sdc' -osd_scenario: "collocated" diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/hosts b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/hosts deleted file mode 100644 index e0c08b94659a..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/hosts +++ /dev/null @@ -1,9 +0,0 @@ -[mons] -mon0 monitor_interface=eth1 - -[osds] -osd0 -osd1 - -[mgrs] -mon0 diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/test.yml b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/test.yml deleted file mode 100644 index 55ae7cc8eb94..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/test.yml +++ /dev/null @@ -1,15 +0,0 @@ ---- - -- hosts: osds - become: yes - tasks: - - - name: scan all running OSDs - command: "ceph-volume --cluster={{ cluster }} simple scan" - environment: - CEPH_VOLUME_DEBUG: 1 - - - name: activate all scanned OSDs - command: "ceph-volume --cluster={{ cluster }} simple activate --all" - environment: - CEPH_VOLUME_DEBUG: 1 diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/vagrant_variables.yml b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/vagrant_variables.yml deleted file mode 100644 index 63700c3c902d..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-luks/vagrant_variables.yml +++ /dev/null @@ -1,73 +0,0 @@ ---- - -# DEPLOY CONTAINERIZED DAEMONS -docker: false - -# DEFINE THE NUMBER OF VMS TO RUN -mon_vms: 1 -osd_vms: 2 -mds_vms: 0 -rgw_vms: 0 -nfs_vms: 0 -rbd_mirror_vms: 0 -client_vms: 0 -iscsi_gw_vms: 0 -mgr_vms: 0 - - -# INSTALL SOURCE OF CEPH -# valid values are 'stable' and 'dev' -ceph_install_source: stable - -# SUBNETS TO USE FOR THE VMS -public_subnet: 192.168.1 -cluster_subnet: 192.168.2 - -# MEMORY -# set 1024 for CentOS -memory: 512 - -# Ethernet interface name -# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial -eth: 'eth1' - -# Disks -# For libvirt use disks: "[ '/dev/vdb', '/dev/vdc' ]" -# For CentOS7 use disks: "[ '/dev/sda', '/dev/sdb' ]" -disks: "[ '/dev/sdb', '/dev/sdc' ]" - -# VAGRANT BOX -# Ceph boxes are *strongly* suggested. They are under better control and will -# not get updated frequently unless required for build systems. These are (for -# now): -# -# * ceph/ubuntu-xenial -# -# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64 -# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet -# libvirt CentOS: centos/7 -# parallels Ubuntu: parallels/ubuntu-14.04 -# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller' -# For more boxes have a look at: -# - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q= -# - https://download.gluster.org/pub/gluster/purpleidea/vagrant/ -vagrant_box: centos/7 -#ssh_private_key_path: "~/.ssh/id_rsa" -# The sync directory changes based on vagrant box -# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant -#vagrant_sync_dir: /home/vagrant/sync -#vagrant_sync_dir: / -# Disables synced folder creation. Not needed for testing, will skip mounting -# the vagrant directory on the remote box regardless of the provider. -vagrant_disable_synced_folder: true -# VAGRANT URL -# This is a URL to download an image from an alternate location. vagrant_box -# above should be set to the filename of the image. -# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box -# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box -# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box - -os_tuning_params: - - { name: kernel.pid_max, value: 4194303 } - - { name: fs.file-max, value: 26234859 } - diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/Vagrantfile b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/Vagrantfile deleted file mode 120000 index 16076e424520..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/Vagrantfile +++ /dev/null @@ -1 +0,0 @@ -../../../../Vagrantfile \ No newline at end of file diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/group_vars/all b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/group_vars/all deleted file mode 100644 index 30bcf5be7c6f..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/group_vars/all +++ /dev/null @@ -1,22 +0,0 @@ ---- - -dmcrypt: True -ceph_dev: True -cluster: test -public_network: "192.168.1.0/24" -cluster_network: "192.168.2.0/24" -monitor_interface: eth1 -journal_size: 100 -osd_objectstore: "bluestore" -ceph_origin: 'repository' -ceph_repository: 'dev' -copy_admin_key: false -os_tuning_params: - - { name: kernel.pid_max, value: 4194303 } - - { name: fs.file-max, value: 26234859 } -ceph_conf_overrides: - global: - osd_pool_default_pg_num: 8 - osd_pool_default_size: 1 - osd: - osd_dmcrypt_type: plain diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/host_vars/osd0.yml b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/host_vars/osd0.yml deleted file mode 100644 index 2e1c7ee9e895..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/host_vars/osd0.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- - -devices: - - '/dev/sdb' -dedicated_devices: - - '/dev/sdc' -osd_scenario: "non-collocated" diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/host_vars/osd1.yml b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/host_vars/osd1.yml deleted file mode 100644 index 7e90071c9b16..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/host_vars/osd1.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- - -devices: - - '/dev/sdb' - - '/dev/sdc' -osd_scenario: "collocated" diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/hosts b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/hosts deleted file mode 100644 index e0c08b94659a..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/hosts +++ /dev/null @@ -1,9 +0,0 @@ -[mons] -mon0 monitor_interface=eth1 - -[osds] -osd0 -osd1 - -[mgrs] -mon0 diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/test.yml b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/test.yml deleted file mode 100644 index 24e2c0353c94..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/test.yml +++ /dev/null @@ -1,31 +0,0 @@ ---- - -- hosts: osds - become: yes - tasks: - - - name: list all OSD directories - find: - paths: /var/lib/ceph/osd - file_type: directory - register: osd_paths - - - name: scan all OSD directories - command: "ceph-volume --cluster={{ cluster }} simple scan {{ item.path }}" - environment: - CEPH_VOLUME_DEBUG: 1 - with_items: - - "{{ osd_paths.files }}" - - - name: list all OSD JSON files - find: - paths: /etc/ceph/osd - file_type: file - register: osd_configs - - - name: activate all scanned OSDs - command: "ceph-volume --cluster={{ cluster }} simple activate --file {{ item.path }}" - environment: - CEPH_VOLUME_DEBUG: 1 - with_items: - - "{{ osd_configs.files }}" diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/vagrant_variables.yml b/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/vagrant_variables.yml deleted file mode 100644 index 63700c3c902d..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/centos7/bluestore/dmcrypt-plain/vagrant_variables.yml +++ /dev/null @@ -1,73 +0,0 @@ ---- - -# DEPLOY CONTAINERIZED DAEMONS -docker: false - -# DEFINE THE NUMBER OF VMS TO RUN -mon_vms: 1 -osd_vms: 2 -mds_vms: 0 -rgw_vms: 0 -nfs_vms: 0 -rbd_mirror_vms: 0 -client_vms: 0 -iscsi_gw_vms: 0 -mgr_vms: 0 - - -# INSTALL SOURCE OF CEPH -# valid values are 'stable' and 'dev' -ceph_install_source: stable - -# SUBNETS TO USE FOR THE VMS -public_subnet: 192.168.1 -cluster_subnet: 192.168.2 - -# MEMORY -# set 1024 for CentOS -memory: 512 - -# Ethernet interface name -# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial -eth: 'eth1' - -# Disks -# For libvirt use disks: "[ '/dev/vdb', '/dev/vdc' ]" -# For CentOS7 use disks: "[ '/dev/sda', '/dev/sdb' ]" -disks: "[ '/dev/sdb', '/dev/sdc' ]" - -# VAGRANT BOX -# Ceph boxes are *strongly* suggested. They are under better control and will -# not get updated frequently unless required for build systems. These are (for -# now): -# -# * ceph/ubuntu-xenial -# -# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64 -# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet -# libvirt CentOS: centos/7 -# parallels Ubuntu: parallels/ubuntu-14.04 -# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller' -# For more boxes have a look at: -# - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q= -# - https://download.gluster.org/pub/gluster/purpleidea/vagrant/ -vagrant_box: centos/7 -#ssh_private_key_path: "~/.ssh/id_rsa" -# The sync directory changes based on vagrant box -# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant -#vagrant_sync_dir: /home/vagrant/sync -#vagrant_sync_dir: / -# Disables synced folder creation. Not needed for testing, will skip mounting -# the vagrant directory on the remote box regardless of the provider. -vagrant_disable_synced_folder: true -# VAGRANT URL -# This is a URL to download an image from an alternate location. vagrant_box -# above should be set to the filename of the image. -# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box -# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box -# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box - -os_tuning_params: - - { name: kernel.pid_max, value: 4194303 } - - { name: fs.file-max, value: 26234859 } - diff --git a/src/ceph-volume/ceph_volume/tests/functional/simple/tox.ini b/src/ceph-volume/ceph_volume/tests/functional/simple/tox.ini deleted file mode 100644 index c910754c337d..000000000000 --- a/src/ceph-volume/ceph_volume/tests/functional/simple/tox.ini +++ /dev/null @@ -1,56 +0,0 @@ -[tox] -envlist = centos7-bluestore-{activate,dmcrypt_plain,dmcrypt_luks} -skipsdist = True - -[testenv] -deps = mock -allowlist_externals = - vagrant - bash - git - sleep - cp -passenv=* -setenv= - ANSIBLE_CONFIG = {envdir}/tmp/ceph-ansible/ansible.cfg - ANSIBLE_SSH_ARGS = -F {changedir}/vagrant_ssh_config -o ControlMaster=auto -o ControlPersist=600s -o PreferredAuthentications=publickey - ANSIBLE_STDOUT_CALLBACK = debug - VAGRANT_CWD = {changedir} - CEPH_VOLUME_DEBUG = 1 - DEBIAN_FRONTEND=noninteractive -changedir= - centos7-bluestore-activate: {toxinidir}/centos7/bluestore/activate - centos7-bluestore-dmcrypt_plain: {toxinidir}/centos7/bluestore/dmcrypt-plain - centos7-bluestore-dmcrypt_luks: {toxinidir}/centos7/bluestore/dmcrypt-luks -commands= - git clone -b {env:CEPH_ANSIBLE_BRANCH:master} --single-branch https://github.com/ceph/ceph-ansible.git {envdir}/tmp/ceph-ansible - pip install -r {envdir}/tmp/ceph-ansible/tests/requirements.txt - ansible-galaxy install -r {envdir}/tmp/ceph-ansible/requirements.yml -v - - bash {toxinidir}/../scripts/vagrant_up.sh {env:VAGRANT_UP_FLAGS:"--no-provision"} {posargs:--provider=virtualbox} - bash {toxinidir}/../scripts/generate_ssh_config.sh {changedir} - - cp {toxinidir}/../playbooks/deploy.yml {envdir}/tmp/ceph-ansible - - # use ceph-ansible to deploy a ceph cluster on the vms - ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/deploy.yml --extra-vars "fetch_directory={changedir}/fetch ceph_dev_branch={env:CEPH_DEV_BRANCH:master} ceph_dev_sha1={env:CEPH_DEV_SHA1:latest} toxinidir={toxinidir}" - - # prepare nodes for testing with testinfra - ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/setup.yml - - # test cluster state testinfra - py.test --reruns 5 --reruns-delay 10 -n 4 --sudo -v --connection=ansible --ssh-config={changedir}/vagrant_ssh_config --ansible-inventory={changedir}/hosts {toxinidir}/../tests - - # make ceph-volume simple take over all the OSDs that got deployed, disabling ceph-disk - ansible-playbook -vv -i {changedir}/hosts {changedir}/test.yml - - # reboot all vms - bash {toxinidir}/../scripts/vagrant_reload.sh {env:VAGRANT_UP_FLAGS:"--no-provision"} {posargs:--provider=virtualbox} - - # wait 2 minutes for services to be ready - sleep 120 - - # retest to ensure cluster came back up correctly after rebooting - py.test --reruns 5 --reruns-delay 10 -n 4 --sudo -v --connection=ansible --ssh-config={changedir}/vagrant_ssh_config --ansible-inventory={changedir}/hosts {toxinidir}/../tests - - vagrant destroy {env:VAGRANT_DESTROY_FLAGS:"--force"} diff --git a/src/ceph-volume/ceph_volume/tests/objectstore/test_baseobjectstore.py b/src/ceph-volume/ceph_volume/tests/objectstore/test_baseobjectstore.py new file mode 100644 index 000000000000..248adf66e9e4 --- /dev/null +++ b/src/ceph-volume/ceph_volume/tests/objectstore/test_baseobjectstore.py @@ -0,0 +1,162 @@ +import pytest +from mock.mock import patch, Mock, call +from ceph_volume.objectstore.baseobjectstore import BaseObjectStore +from ceph_volume.util import system + + +@patch('ceph_volume.objectstore.baseobjectstore.prepare_utils.create_key', Mock(return_value=['AQCee6ZkzhOrJRAAZWSvNC3KdXOpC2w8ly4AZQ=='])) +class TestBaseObjectStore: + def test_init_dmcrypt(self, factory): + args = factory(dmcrypt=True) + bo = BaseObjectStore(args) + assert bo.encrypted == 1 + assert bo.cephx_lockbox_secret == ['AQCee6ZkzhOrJRAAZWSvNC3KdXOpC2w8ly4AZQ=='] + assert bo.secrets['cephx_lockbox_secret'] == ['AQCee6ZkzhOrJRAAZWSvNC3KdXOpC2w8ly4AZQ=='] + + @patch('ceph_volume.process.call', Mock(return_value=(['c6798f59-01'], '', 0))) + def test_get_ptuuid_ok(self): + """ + Test that the ptuuid is returned + """ + assert BaseObjectStore([]).get_ptuuid('/dev/sda') == 'c6798f59-01' + + @patch('ceph_volume.process.call', Mock(return_value=('', '', 0))) + def test_get_ptuuid_raises_runtime_error(self, capsys): + """ + Test that the ptuuid is returned + """ + with pytest.raises(RuntimeError) as error: + bo = BaseObjectStore([]) + bo.get_ptuuid('/dev/sda') + stdout, stderr = capsys.readouterr() + assert 'blkid could not detect a PARTUUID for device: /dev/sda' in stderr + assert str(error.value) == 'unable to use device' + + @patch.dict('os.environ', {'CEPH_VOLUME_OSDSPEC_AFFINITY': 'foo'}) + def test_get_osdspec_affinity(self): + assert BaseObjectStore([]).get_osdspec_affinity() == 'foo' + + def test_pre_prepare(self): + with pytest.raises(NotImplementedError): + BaseObjectStore([]).pre_prepare() + + def test_prepare_data_device(self): + with pytest.raises(NotImplementedError): + BaseObjectStore([]).prepare_data_device('foo', 'bar') + + def test_safe_prepare(self): + with pytest.raises(NotImplementedError): + BaseObjectStore([]).safe_prepare(args=None) + + def test_add_objectstore_opts(self): + with pytest.raises(NotImplementedError): + BaseObjectStore([]).add_objectstore_opts() + + @patch('ceph_volume.util.prepare.create_osd_path') + @patch('ceph_volume.util.prepare.link_block') + @patch('ceph_volume.util.prepare.get_monmap') + @patch('ceph_volume.util.prepare.write_keyring') + def test_prepare_osd_req(self, m_write_keyring, m_get_monmap, m_link_block, m_create_osd_path): + bo = BaseObjectStore([]) + bo.osd_id = '123' + bo.block_device_path = '/dev/foo' + bo.prepare_osd_req() + assert m_create_osd_path.mock_calls == [call('123', tmpfs=True)] + assert m_link_block.mock_calls == [call('/dev/foo', '123')] + assert m_get_monmap.mock_calls == [call('123')] + assert m_write_keyring.mock_calls == [call('123', ['AQCee6ZkzhOrJRAAZWSvNC3KdXOpC2w8ly4AZQ=='])] + + def test_prepare(self): + with pytest.raises(NotImplementedError): + BaseObjectStore([]).prepare() + + def test_prepare_dmcrypt(self): + with pytest.raises(NotImplementedError): + BaseObjectStore([]).prepare_dmcrypt() + + def test_cluster_fsid_from_args(self, factory): + args = factory(cluster_fsid='abcd') + bo = BaseObjectStore(args) + assert bo.get_cluster_fsid() == 'abcd' + + def test_cluster_fsid_from_conf(self, conf_ceph_stub, factory): + args = factory(cluster_fsid=None) + conf_ceph_stub('[global]\nfsid = abcd-123') + bo = BaseObjectStore([]) + bo.args = args + assert bo.get_cluster_fsid() == 'abcd-123' + + @patch('ceph_volume.conf.cluster', 'ceph') + def test_get_osd_path(self): + bo = BaseObjectStore([]) + bo.osd_id = '123' + assert bo.get_osd_path() == '/var/lib/ceph/osd/ceph-123/' + + @patch('ceph_volume.conf.cluster', 'ceph') + def test_build_osd_mkfs_cmd_base(self): + bo = BaseObjectStore([]) + bo.osd_path = '/var/lib/ceph/osd/ceph-123/' + bo.osd_fsid = 'abcd-1234' + bo.objectstore = 'my-fake-objectstore' + bo.osd_id = '123' + bo.monmap = '/etc/ceph/ceph.monmap' + result = bo.build_osd_mkfs_cmd() + + assert result == ['ceph-osd', + '--cluster', + 'ceph', + '--osd-objectstore', + 'my-fake-objectstore', + '--mkfs', '-i', '123', + '--monmap', + '/etc/ceph/ceph.monmap', + '--keyfile', '-', + '--osd-data', + '/var/lib/ceph/osd/ceph-123/', + '--osd-uuid', 'abcd-1234', + '--setuser', 'ceph', + '--setgroup', 'ceph'] + + def test_osd_mkfs_ok(self, monkeypatch, fake_call): + bo = BaseObjectStore([]) + bo.get_osd_path = lambda: '/var/lib/ceph/osd/ceph-123/' + bo.build_osd_mkfs_cmd = lambda: ['ceph-osd', '--mkfs', 'some', 'fake', 'args'] + monkeypatch.setattr(system, 'chown', lambda path: 0) + bo.osd_mkfs() + assert fake_call.calls == [ + { + 'args': (['ceph-osd', + '--mkfs', + 'some', + 'fake', + 'args'],), + 'kwargs': { + 'stdin': ['AQCee6ZkzhOrJRAAZWSvNC3KdXOpC2w8ly4AZQ=='], + 'terminal_verbose': True, + 'show_command': True} + } + ] + + @patch('ceph_volume.process.call', Mock(return_value=([], [], 999))) + def test_osd_mkfs_fails(self, monkeypatch): + bo = BaseObjectStore([]) + bo.get_osd_path = lambda: '/var/lib/ceph/osd/ceph-123/' + bo.build_osd_mkfs_cmd = lambda: ['ceph-osd', '--mkfs', 'some', 'fake', 'args'] + monkeypatch.setattr(system, 'chown', lambda path: 0) + with pytest.raises(RuntimeError) as error: + bo.osd_mkfs() + assert str(error.value) == 'Command failed with exit code 999: ceph-osd --mkfs some fake args' + + @patch('time.sleep', Mock()) + @patch('ceph_volume.process.call', return_value=([], [], 11)) + def test_osd_mkfs_fails_EWOULDBLOCK(self, m_call, monkeypatch): + bo = BaseObjectStore([]) + bo.get_osd_path = lambda: '/var/lib/ceph/osd/ceph-123/' + bo.build_osd_mkfs_cmd = lambda: ['ceph-osd', '--mkfs', 'some', 'fake', 'args'] + monkeypatch.setattr(system, 'chown', lambda path: 0) + bo.osd_mkfs() + assert m_call.call_count == 5 + + def test_activate(self): + with pytest.raises(NotImplementedError): + BaseObjectStore([]).activate() diff --git a/src/ceph-volume/ceph_volume/tests/objectstore/test_bluestore.py b/src/ceph-volume/ceph_volume/tests/objectstore/test_bluestore.py new file mode 100644 index 000000000000..77bb383284ee --- /dev/null +++ b/src/ceph-volume/ceph_volume/tests/objectstore/test_bluestore.py @@ -0,0 +1,27 @@ +from mock import patch, Mock +from ceph_volume.objectstore.bluestore import BlueStore + + +class TestBlueStore: + @patch('ceph_volume.objectstore.baseobjectstore.prepare_utils.create_key', Mock(return_value=['AQCee6ZkzhOrJRAAZWSvNC3KdXOpC2w8ly4AZQ=='])) + def setup_method(self, m_create_key): + self.b = BlueStore([]) + self.b.osd_mkfs_cmd = ['binary', 'arg1'] + + def test_add_objectstore_opts_wal_device_path(self, monkeypatch): + monkeypatch.setattr('ceph_volume.util.system.chown', lambda path: 0) + self.b.wal_device_path = '/dev/nvme0n1' + self.b.add_objectstore_opts() + assert self.b.osd_mkfs_cmd == ['binary', 'arg1', '--bluestore-block-wal-path', '/dev/nvme0n1'] + + def test_add_objectstore_opts_db_device_path(self, monkeypatch): + monkeypatch.setattr('ceph_volume.util.system.chown', lambda path: 0) + self.b.db_device_path = '/dev/ssd1' + self.b.add_objectstore_opts() + assert self.b.osd_mkfs_cmd == ['binary', 'arg1', '--bluestore-block-db-path', '/dev/ssd1'] + + def test_add_objectstore_opts_osdspec_affinity(self, monkeypatch): + monkeypatch.setattr('ceph_volume.util.system.chown', lambda path: 0) + self.b.get_osdspec_affinity = lambda: 'foo' + self.b.add_objectstore_opts() + assert self.b.osd_mkfs_cmd == ['binary', 'arg1', '--osdspec-affinity', 'foo'] \ No newline at end of file diff --git a/src/ceph-volume/ceph_volume/tests/objectstore/test_lvmbluestore.py b/src/ceph-volume/ceph_volume/tests/objectstore/test_lvmbluestore.py new file mode 100644 index 000000000000..2dc089267a4b --- /dev/null +++ b/src/ceph-volume/ceph_volume/tests/objectstore/test_lvmbluestore.py @@ -0,0 +1,627 @@ +import pytest +from mock import patch, Mock, MagicMock, call +from ceph_volume.objectstore.lvmbluestore import LvmBlueStore +from ceph_volume.api.lvm import Volume +from ceph_volume.util import system + + +class TestLvmBlueStore: + @patch('ceph_volume.objectstore.lvmbluestore.prepare_utils.create_key', Mock(return_value=['AQCee6ZkzhOrJRAAZWSvNC3KdXOpC2w8ly4AZQ=='])) + def setup_method(self, m_create_key): + self.lvm_bs = LvmBlueStore([]) + + @patch('ceph_volume.conf.cluster', 'ceph') + @patch('ceph_volume.api.lvm.get_single_lv') + @patch('ceph_volume.objectstore.lvmbluestore.prepare_utils.create_id', Mock(return_value='111')) + def test_pre_prepare_lv(self, m_get_single_lv, factory): + args = factory(cluster_fsid='abcd', + osd_fsid='abc123', + crush_device_class='ssd', + osd_id='111', + data='vg_foo/lv_foo') + m_get_single_lv.return_value = Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='', + lv_uuid='fake-uuid') + self.lvm_bs.encrypted = True + self.lvm_bs.dmcrypt_key = 'fake-dmcrypt-key' + self.lvm_bs.args = args + self.lvm_bs.pre_prepare() + assert self.lvm_bs.secrets['dmcrypt_key'] == 'fake-dmcrypt-key' + assert self.lvm_bs.secrets['crush_device_class'] == 'ssd' + assert self.lvm_bs.osd_id == '111' + assert self.lvm_bs.block_device_path == '/fake-path' + assert self.lvm_bs.tags == {'ceph.osd_fsid': 'abc123', + 'ceph.osd_id': '111', + 'ceph.cluster_fsid': 'abcd', + 'ceph.cluster_name': 'ceph', + 'ceph.crush_device_class': 'ssd', + 'ceph.osdspec_affinity': '', + 'ceph.block_device': '/fake-path', + 'ceph.block_uuid': 'fake-uuid', + 'ceph.cephx_lockbox_secret': '', + 'ceph.encrypted': True, + 'ceph.vdo': '0', + 'ceph.with_tpm': 0} + + @patch('ceph_volume.conf.cluster', 'ceph') + @patch('ceph_volume.api.lvm.get_single_lv') + @patch('ceph_volume.objectstore.lvmbluestore.prepare_utils.create_id', Mock(return_value='111')) + def test_pre_prepare_lv_with_dmcrypt_and_tpm(self, m_get_single_lv, factory): + args = factory(cluster_fsid='abcd', + osd_fsid='abc123', + crush_device_class='ssd', + osd_id='111', + data='vg_foo/lv_foo', + dmcrypt=True, + with_tpm=True) + m_get_single_lv.return_value = Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='', + lv_uuid='fake-uuid') + self.lvm_bs.encrypted = True + self.lvm_bs.with_tpm = True + self.lvm_bs.dmcrypt_key = 'fake-dmcrypt-key-tpm2' + self.lvm_bs.args = args + self.lvm_bs.pre_prepare() + assert 'dmcrypt_key' not in self.lvm_bs.secrets.keys() + assert self.lvm_bs.secrets['crush_device_class'] == 'ssd' + assert self.lvm_bs.osd_id == '111' + assert self.lvm_bs.block_device_path == '/fake-path' + assert self.lvm_bs.tags == {'ceph.osd_fsid': 'abc123', + 'ceph.osd_id': '111', + 'ceph.cluster_fsid': 'abcd', + 'ceph.cluster_name': 'ceph', + 'ceph.crush_device_class': 'ssd', + 'ceph.osdspec_affinity': '', + 'ceph.block_device': '/fake-path', + 'ceph.block_uuid': 'fake-uuid', + 'ceph.cephx_lockbox_secret': '', + 'ceph.encrypted': True, + 'ceph.vdo': '0', + 'ceph.with_tpm': 1} + + @patch('ceph_volume.objectstore.lvmbluestore.prepare_utils.create_id', Mock(return_value='111')) + def test_pre_prepare_no_lv(self, factory): + args = factory(cluster_fsid='abcd', + osd_fsid='abc123', + crush_device_class='ssd', + osd_id='111', + data='/dev/foo', + dmcrypt_key='fake-dmcrypt-key') + self.lvm_bs.prepare_data_device = lambda x, y: Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='', + lv_uuid='fake-uuid') + self.lvm_bs.encrypted = True + self.lvm_bs.dmcrypt_key = 'fake-dmcrypt-key' + self.lvm_bs.args = args + self.lvm_bs.pre_prepare() + assert self.lvm_bs.secrets['dmcrypt_key'] == 'fake-dmcrypt-key' + assert self.lvm_bs.secrets['crush_device_class'] == 'ssd' + assert self.lvm_bs.osd_id == '111' + assert self.lvm_bs.block_device_path == '/fake-path' + assert self.lvm_bs.tags == {'ceph.osd_fsid': 'abc123', + 'ceph.osd_id': '111', + 'ceph.cluster_fsid': 'abcd', + 'ceph.cluster_name': None, + 'ceph.crush_device_class': 'ssd', + 'ceph.osdspec_affinity': '', + 'ceph.block_device': '/fake-path', + 'ceph.block_uuid': 'fake-uuid', + 'ceph.cephx_lockbox_secret': '', + 'ceph.encrypted': True, + 'ceph.vdo': '0', + 'ceph.with_tpm': 0} + + @patch('ceph_volume.util.disk.is_partition', Mock(return_value=True)) + @patch('ceph_volume.api.lvm.create_lv') + def test_prepare_data_device(self, m_create_lv, factory): + args = factory(data='/dev/foo', + data_slots=1, + data_size=102400) + self.lvm_bs.args = args + m_create_lv.return_value = Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='', + lv_uuid='abcd') + assert self.lvm_bs.prepare_data_device('block', 'abcd') == m_create_lv.return_value + assert self.lvm_bs.args.data_size == 102400 + + @patch('ceph_volume.util.disk.is_device', Mock(return_value=False)) + @patch('ceph_volume.util.disk.is_partition', Mock(return_value=False)) + def test_prepare_data_device_fails(self, factory): + args = factory(data='/dev/foo') + self.lvm_bs.args = args + with pytest.raises(RuntimeError) as error: + self.lvm_bs.prepare_data_device('block', 'abcd') + assert ('Cannot use device (/dev/foo). ' + 'A vg/lv path or an existing device is needed') == str(error.value) + + @patch('ceph_volume.api.lvm.is_ceph_device', Mock(return_value=True)) + @patch('ceph_volume.api.lvm.get_single_lv') + def test_safe_prepare_is_ceph_device(self, m_get_single_lv, factory): + args = factory(data='/dev/foo') + self.lvm_bs.args = args + m_get_single_lv.return_value = Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='', + lv_uuid='fake-uuid') + self.lvm_bs.prepare = MagicMock() + with pytest.raises(RuntimeError) as error: + self.lvm_bs.safe_prepare(args) + assert str(error.value) == 'skipping /dev/foo, it is already prepared' + + @patch('ceph_volume.api.lvm.is_ceph_device', Mock(return_value=False)) + @patch('ceph_volume.api.lvm.get_single_lv') + def test_safe_prepare(self, m_get_single_lv, factory): + args = factory(data='vg_foo/lv_foo') + self.lvm_bs.args = args + m_get_single_lv.return_value = Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='', + lv_uuid='fake-uuid') + self.lvm_bs.prepare = MagicMock() + self.lvm_bs.safe_prepare() + assert self.lvm_bs.prepare.called + + @patch('ceph_volume.objectstore.lvmbluestore.LvmBlueStore.prepare', Mock(side_effect=Exception)) + @patch('ceph_volume.api.lvm.is_ceph_device', Mock(return_value=False)) + # @patch('ceph_volume.devices.lvm.common.rollback_osd') + @patch('ceph_volume.objectstore.lvmbluestore.rollback_osd') + @patch('ceph_volume.api.lvm.get_single_lv') + def test_safe_prepare_raises_exception(self, m_get_single_lv, m_rollback_osd, factory): + args = factory(data='/dev/foo') + self.lvm_bs.args = args + self.lvm_bs.osd_id = '111' + m_get_single_lv.return_value = Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='', + lv_uuid='fake-uuid') + m_rollback_osd.return_value = MagicMock() + with pytest.raises(Exception): + self.lvm_bs.safe_prepare() + assert m_rollback_osd.mock_calls == [call(self.lvm_bs.args, '111')] + + @patch('ceph_volume.objectstore.baseobjectstore.BaseObjectStore.get_ptuuid', Mock(return_value='c6798f59-01')) + @patch('ceph_volume.api.lvm.Volume.set_tags', MagicMock()) + @patch('ceph_volume.api.lvm.get_single_lv') + def test_prepare(self, m_get_single_lv, is_root, factory): + m_get_single_lv.return_value = Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='', + lv_uuid='fake-uuid') + args = factory(data='vg_foo/lv_foo', + block_wal='/dev/foo1', + block_db='/dev/foo2', + block_wal_size=123, + block_db_size=123, + block_wal_slots=1, + block_db_slots=1, + with_tpm=False + ) + self.lvm_bs.args = args + self.lvm_bs.pre_prepare = lambda: None + self.lvm_bs.block_lv = MagicMock() + self.lvm_bs.prepare_osd_req = MagicMock() + self.lvm_bs.osd_mkfs = MagicMock() + self.lvm_bs.prepare_dmcrypt = MagicMock() + self.lvm_bs.secrets['dmcrypt_key'] = 'fake-secret' + self.lvm_bs.prepare() + assert self.lvm_bs.wal_device_path == '/dev/foo1' + assert self.lvm_bs.db_device_path == '/dev/foo2' + assert self.lvm_bs.block_lv.set_tags.mock_calls == [call({'ceph.type': 'block', 'ceph.vdo': '0', 'ceph.wal_uuid': 'c6798f59-01', 'ceph.wal_device': '/dev/foo1', 'ceph.db_uuid': 'c6798f59-01', 'ceph.db_device': '/dev/foo2'})] + assert not self.lvm_bs.prepare_dmcrypt.called + assert self.lvm_bs.osd_mkfs.called + assert self.lvm_bs.prepare_osd_req.called + + def test_prepare_dmcrypt(self): + self.lvm_bs.secrets = {'dmcrypt_key': 'fake-secret'} + self.lvm_bs.tags = {'ceph.block_uuid': 'block-uuid1', + 'ceph.db_uuid': 'db-uuid2', + 'ceph.wal_uuid': 'wal-uuid3', + 'ceph.with_tpm': 0} + self.lvm_bs.block_device_path = '/dev/sdb' + self.lvm_bs.db_device_path = '/dev/sdc' + self.lvm_bs.wal_device_path = '/dev/sdb' + self.lvm_bs.luks_format_and_open = lambda *a: f'/dev/mapper/{a[2]["ceph."+a[1]+"_uuid"]}' + self.lvm_bs.prepare_dmcrypt() + assert self.lvm_bs.block_device_path == '/dev/mapper/block-uuid1' + assert self.lvm_bs.db_device_path == '/dev/mapper/db-uuid2' + assert self.lvm_bs.wal_device_path == '/dev/mapper/wal-uuid3' + + @patch('ceph_volume.objectstore.lvmbluestore.encryption_utils.luks_open') + @patch('ceph_volume.objectstore.lvmbluestore.encryption_utils.luks_format') + def test_luks_format_and_open(self, m_luks_format, m_luks_open): + result = self.lvm_bs.luks_format_and_open('/dev/foo', + 'block', + {'ceph.block_uuid': 'block-uuid1'}) + assert result == '/dev/mapper/block-uuid1' + + @patch('ceph_volume.objectstore.lvmbluestore.LvmBlueStore.enroll_tpm2', Mock(return_value=MagicMock())) + @patch('ceph_volume.objectstore.lvmbluestore.encryption_utils.luks_open') + @patch('ceph_volume.objectstore.lvmbluestore.encryption_utils.luks_format') + def test_luks_format_and_open_with_tpm(self, m_luks_format, m_luks_open): + self.lvm_bs.with_tpm = True + result = self.lvm_bs.luks_format_and_open('/dev/foo', + 'block', + {'ceph.block_uuid': 'block-uuid1'}) + assert result == '/dev/mapper/block-uuid1' + self.lvm_bs.enroll_tpm2.assert_called_once() + + def test_luks_format_and_open_not_device(self): + result = self.lvm_bs.luks_format_and_open('', + 'block', + {}) + assert result == '' + + def test_setup_device_is_none(self): + result = self.lvm_bs.setup_device('block', + None, + {}, + 1, + 1) + assert result == ('', '', {}) + + @patch('ceph_volume.api.lvm.Volume.set_tags', return_value=MagicMock()) + @patch('ceph_volume.util.system.generate_uuid', + Mock(return_value='d83fa1ca-bd68-4c75-bdc2-464da58e8abd')) + @patch('ceph_volume.api.lvm.create_lv') + @patch('ceph_volume.util.disk.is_device', Mock(return_value=True)) + def test_setup_device_is_device(self, m_create_lv, m_set_tags): + m_create_lv.return_value = Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='', + lv_uuid='fake-uuid') + result = self.lvm_bs.setup_device('block', + '/dev/foo', + {}, + 1, + 1) + assert m_create_lv.mock_calls == [call('osd-block', + 'd83fa1ca-bd68-4c75-bdc2-464da58e8abd', + device='/dev/foo', + tags={'ceph.type': 'block', + 'ceph.vdo': '0', + 'ceph.block_device': '/fake-path', + 'ceph.block_uuid': 'fake-uuid'}, + slots=1, + size=1)] + assert result == ('/fake-path', + 'fake-uuid', + {'ceph.type': 'block', + 'ceph.vdo': '0', + 'ceph.block_device': '/fake-path', + 'ceph.block_uuid': 'fake-uuid' + }) + + @patch('ceph_volume.api.lvm.get_single_lv') + @patch('ceph_volume.api.lvm.Volume.set_tags', return_value=MagicMock()) + def test_setup_device_is_lv(self, m_set_tags, m_get_single_lv): + m_get_single_lv.return_value = Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='', + lv_uuid='fake-uuid') + result = self.lvm_bs.setup_device('block', + 'vg_foo/lv_foo', + {}, + 1, + 1) + assert result == ('/fake-path', + 'fake-uuid', + {'ceph.type': 'block', + 'ceph.vdo': '0', + 'ceph.block_device': '/fake-path', + 'ceph.block_uuid': 'fake-uuid' + }) + + @patch('ceph_volume.api.lvm.Volume.set_tags', return_value=MagicMock()) + def test_setup_device_partition(self, m_set_tags): + self.lvm_bs.get_ptuuid = lambda x: 'c6798f59-01' + result = self.lvm_bs.setup_device('block', + '/dev/foo1', + {}, + 1, + 1) + assert result == ('/dev/foo1', + 'c6798f59-01', + {'ceph.type': 'block', + 'ceph.vdo': '0', + 'ceph.block_uuid': 'c6798f59-01', + 'ceph.block_device': '/dev/foo1'}) + + def test_get_osd_device_path_lv_block(self): + lvs = [Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='ceph.type=block,ceph.block_uuid=fake-block-uuid', + lv_uuid='fake-block-uuid')] + assert self.lvm_bs.get_osd_device_path(lvs, 'block') == '/fake-path' + + @patch('ceph_volume.objectstore.lvmbluestore.encryption_utils.luks_open', MagicMock()) + def test_get_osd_device_path_lv_block_encrypted(self): + lvs = [Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='ceph.type=block,ceph.block_uuid=fake-block-uuid,ceph.encrypted=1', + lv_uuid='fake-block-uuid')] + assert self.lvm_bs.get_osd_device_path(lvs, 'block') == '/dev/mapper/fake-block-uuid' + + def test_get_osd_device_path_lv_db(self): + lvs = [Volume(lv_name='lv_foo-block', + lv_path='/fake-block-path', + vg_name='vg_foo', + lv_tags='ceph.type=block,ceph.block_uuid=fake-block-uuid,ceph.db_uuid=fake-db-uuid', + lv_uuid='fake-block-uuid'), + Volume(lv_name='lv_foo-db', + lv_path='/fake-db-path', + vg_name='vg_foo_db', + lv_tags='ceph.type=db,ceph.block_uuid=fake-block-uuid,ceph.db_uuid=fake-db-uuid', + lv_uuid='fake-db-uuid')] + assert self.lvm_bs.get_osd_device_path(lvs, 'db') == '/fake-db-path' + + def test_get_osd_device_path_no_device_uuid(self): + lvs = [Volume(lv_name='lv_foo-block', + lv_path='/fake-block-path', + vg_name='vg_foo', + lv_tags='ceph.type=block,ceph.block_uuid=fake-block-uuid', + lv_uuid='fake-block-uuid'), + Volume(lv_name='lv_foo-db', + lv_path='/fake-db-path', + vg_name='vg_foo_db', + lv_tags='ceph.type=db,ceph.block_uuid=fake-block-uuid', + lv_uuid='fake-db-uuid')] + assert not self.lvm_bs.get_osd_device_path(lvs, 'db') + + @patch('ceph_volume.util.disk.get_device_from_partuuid') + @patch('ceph_volume.objectstore.lvmbluestore.encryption_utils.luks_open', MagicMock()) + def test_get_osd_device_path_phys_encrypted(self, m_get_device_from_partuuid): + m_get_device_from_partuuid.return_value = '/dev/sda1' + lvs = [Volume(lv_name='lv_foo-block', + lv_path='/fake-block-path', + vg_name='vg_foo', + lv_tags='ceph.type=block,ceph.block_uuid=fake-block-uuid,ceph.db_uuid=fake-db-uuid,ceph.osd_id=0,ceph.osd_fsid=abcd,ceph.cluster_name=ceph,ceph.encrypted=1', + lv_uuid='fake-block-uuid')] + assert self.lvm_bs.get_osd_device_path(lvs, 'db') == '/dev/mapper/fake-db-uuid' + + @patch('ceph_volume.util.disk.get_device_from_partuuid') + def test_get_osd_device_path_phys(self, m_get_device_from_partuuid): + m_get_device_from_partuuid.return_value = '/dev/sda1' + lvs = [Volume(lv_name='lv_foo-block', + lv_path='/fake-block-path', + vg_name='vg_foo', + lv_tags='ceph.type=block,ceph.block_uuid=fake-block-uuid,ceph.db_uuid=fake-db-uuid,ceph.osd_id=0,ceph.osd_fsid=abcd,ceph.cluster_name=ceph', + lv_uuid='fake-block-uuid')] + self.lvm_bs.get_osd_device_path(lvs, 'db') + + @patch('ceph_volume.util.disk.get_device_from_partuuid') + def test_get_osd_device_path_phys_raises_exception(self, m_get_device_from_partuuid): + m_get_device_from_partuuid.return_value = '' + lvs = [Volume(lv_name='lv_foo-block', + lv_path='/fake-block-path', + vg_name='vg_foo', + lv_tags='ceph.type=block,ceph.block_uuid=fake-block-uuid,ceph.db_uuid=fake-db-uuid,ceph.osd_id=0,ceph.osd_fsid=abcd,ceph.cluster_name=ceph', + lv_uuid='fake-block-uuid')] + with pytest.raises(RuntimeError): + self.lvm_bs.get_osd_device_path(lvs, 'db') + + def test__activate_raises_exception(self): + lvs = [Volume(lv_name='lv_foo-db', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='ceph.type=db,ceph.db_uuid=fake-db-uuid', + lv_uuid='fake-db-uuid')] + with pytest.raises(RuntimeError) as error: + self.lvm_bs._activate(lvs) + assert str(error.value) == 'could not find a bluestore OSD to activate' + + @patch('ceph_volume.objectstore.lvmbluestore.encryption_utils.write_lockbox_keyring', MagicMock()) + @patch('ceph_volume.objectstore.lvmbluestore.encryption_utils.get_dmcrypt_key', MagicMock()) + @patch('ceph_volume.objectstore.lvmbluestore.prepare_utils.create_osd_path') + @patch('ceph_volume.terminal.success') + @pytest.mark.parametrize("encrypted", ["ceph.encrypted=0", "ceph.encrypted=1"]) + def test__activate(self, + m_success, m_create_osd_path, + monkeypatch, fake_run, fake_call, encrypted, conf_ceph_stub): + conf_ceph_stub('[global]\nfsid=asdf-lkjh') + monkeypatch.setattr(system, 'chown', lambda path: 0) + monkeypatch.setattr('ceph_volume.configuration.load', lambda: None) + monkeypatch.setattr('ceph_volume.util.system.path_is_mounted', lambda path: False) + m_create_osd_path.return_value = MagicMock() + m_success.return_value = MagicMock() + lvs = [Volume(lv_name='lv_foo-block', + lv_path='/fake-block-path', + vg_name='vg_foo', + lv_tags=f'ceph.type=block,ceph.db_uuid=fake-db-uuid,ceph.block_uuid=fake-block-uuid,ceph.wal_uuid=fake-wal-uuid,ceph.osd_id=0,ceph.osd_fsid=abcd,ceph.cluster_name=ceph,{encrypted},ceph.cephx_lockbox_secret=abcd', + lv_uuid='fake-block-uuid'), + Volume(lv_name='lv_foo-db', + lv_path='/fake-db-path', + vg_name='vg_foo_db', + lv_tags=f'ceph.type=db,ceph.db_uuid=fake-db-uuid,ceph.block_uuid=fake-block-uuid,ceph.wal_uuid=fake-wal-uuid,ceph.osd_id=0,ceph.osd_fsid=abcd,ceph.cluster_name=ceph,{encrypted},ceph.cephx_lockbox_secret=abcd', + lv_uuid='fake-db-uuid'), + Volume(lv_name='lv_foo-db', + lv_path='/fake-wal-path', + vg_name='vg_foo_wal', + lv_tags=f'ceph.type=wal,ceph.block_uuid=fake-block-uuid,ceph.wal_uuid=fake-wal-uuid,ceph.db_uuid=fake-db-uuid,ceph.osd_id=0,ceph.osd_fsid=abcd,ceph.cluster_name=ceph,{encrypted},ceph.cephx_lockbox_secret=abcd', + lv_uuid='fake-wal-uuid')] + self.lvm_bs._activate(lvs) + if encrypted == "ceph.encrypted=0": + assert fake_run.calls == [{'args': (['ceph-bluestore-tool', '--cluster=ceph', + 'prime-osd-dir', '--dev', '/fake-block-path', + '--path', '/var/lib/ceph/osd/ceph-0', '--no-mon-config'],), + 'kwargs': {}}, + {'args': (['ln', '-snf', '/fake-block-path', + '/var/lib/ceph/osd/ceph-0/block'],), + 'kwargs': {}}, + {'args': (['ln', '-snf', '/fake-db-path', + '/var/lib/ceph/osd/ceph-0/block.db'],), + 'kwargs': {}}, + {'args': (['ln', '-snf', '/fake-wal-path', + '/var/lib/ceph/osd/ceph-0/block.wal'],), + 'kwargs': {}}, + {'args': (['systemctl', 'enable', + 'ceph-volume@lvm-0-abcd'],), + 'kwargs': {}}, + {'args': (['systemctl', 'enable', '--runtime', 'ceph-osd@0'],), + 'kwargs': {}}, + {'args': (['systemctl', 'start', 'ceph-osd@0'],), + 'kwargs': {}}] + else: + assert fake_run.calls == [{'args': (['ceph-bluestore-tool', '--cluster=ceph', + 'prime-osd-dir', '--dev', '/dev/mapper/fake-block-uuid', + '--path', '/var/lib/ceph/osd/ceph-0', '--no-mon-config'],), + 'kwargs': {}}, + {'args': (['ln', '-snf', '/dev/mapper/fake-block-uuid', + '/var/lib/ceph/osd/ceph-0/block'],), + 'kwargs': {}}, + {'args': (['ln', '-snf', '/dev/mapper/fake-db-uuid', + '/var/lib/ceph/osd/ceph-0/block.db'],), + 'kwargs': {}}, + {'args': (['ln', '-snf', '/dev/mapper/fake-wal-uuid', + '/var/lib/ceph/osd/ceph-0/block.wal'],), + 'kwargs': {}}, + {'args': (['systemctl', 'enable', 'ceph-volume@lvm-0-abcd'],), + 'kwargs': {}}, + {'args': (['systemctl', 'enable', '--runtime', 'ceph-osd@0'],), + 'kwargs': {}}, + {'args': (['systemctl', 'start', 'ceph-osd@0'],), + 'kwargs': {}}] + assert m_success.mock_calls == [call('ceph-volume lvm activate successful for osd ID: 0')] + + @patch('ceph_volume.systemd.systemctl.osd_is_active', return_value=False) + def test_activate_all(self, + m_create_key, + mock_lvm_direct_report, + is_root, + factory, + fake_run): + args = factory(no_systemd=True) + self.lvm_bs.args = args + self.lvm_bs.activate = MagicMock() + self.lvm_bs.activate_all() + assert self.lvm_bs.activate.mock_calls == [call(args, + osd_id='1', + osd_fsid='824f7edf-371f-4b75-9231-4ab62a32d5c0'), + call(args, + osd_id='0', + osd_fsid='a0e07c5b-bee1-4ea2-ae07-cb89deda9b27')] + + @patch('ceph_volume.systemd.systemctl.osd_is_active', return_value=False) + def test_activate_all_no_osd_found(self, + m_create_key, + is_root, + factory, + fake_run, + monkeypatch, + capsys): + monkeypatch.setattr('ceph_volume.objectstore.lvmbluestore.direct_report', lambda: {}) + args = factory(no_systemd=True) + self.lvm_bs.args = args + self.lvm_bs.activate_all() + stdout, stderr = capsys.readouterr() + assert "Was unable to find any OSDs to activate" in stderr + assert "Verify OSDs are present with" in stderr + + @patch('ceph_volume.api.lvm.process.call', Mock(return_value=('', '', 0))) + @patch('ceph_volume.systemd.systemctl.osd_is_active', return_value=True) + def test_activate_all_osd_is_active(self, + mock_lvm_direct_report, + is_root, + factory, + fake_run): + args = factory(no_systemd=False) + self.lvm_bs.args = args + self.lvm_bs.activate = MagicMock() + self.lvm_bs.activate_all() + assert self.lvm_bs.activate.mock_calls == [] + + @patch('ceph_volume.api.lvm.get_lvs') + def test_activate_osd_id_and_fsid(self, + m_get_lvs, + is_root, + factory): + args = factory(osd_id='1', + osd_fsid='824f7edf', + no_systemd=True) + lvs = [Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags=f'ceph.osd_id={args.osd_id},ceph.osd_fsid={args.osd_fsid}', + lv_uuid='fake-uuid')] + m_get_lvs.return_value = lvs + self.lvm_bs.args = args + self.lvm_bs._activate = MagicMock() + self.lvm_bs.activate() + assert self.lvm_bs._activate.mock_calls == [call(lvs, True, False)] + assert m_get_lvs.mock_calls == [call(tags={'ceph.osd_id': '1', + 'ceph.osd_fsid': '824f7edf'})] + + @patch('ceph_volume.api.lvm.get_lvs') + def test_activate_not_osd_id_and_fsid(self, + m_get_lvs, + is_root, + factory): + args = factory(no_systemd=True, + osd_id=None, + osd_fsid='824f7edf') + lvs = [Volume(lv_name='lv_foo', + lv_path='/fake-path', + vg_name='vg_foo', + lv_tags='', + lv_uuid='fake-uuid')] + m_get_lvs.return_value = lvs + self.lvm_bs.args = args + self.lvm_bs._activate = MagicMock() + self.lvm_bs.activate() + assert self.lvm_bs._activate.mock_calls == [call(lvs, True, False)] + assert m_get_lvs.mock_calls == [call(tags={'ceph.osd_fsid': '824f7edf'})] + + def test_activate_osd_id_and_not_fsid(self, + is_root, + factory): + args = factory(no_systemd=True, + osd_id='1', + osd_fsid=None) + self.lvm_bs.args = args + self.lvm_bs._activate = MagicMock() + with pytest.raises(RuntimeError) as error: + self.lvm_bs.activate() + assert str(error.value) == 'could not activate osd.1, please provide the osd_fsid too' + + def test_activate_not_osd_id_and_not_fsid(self, + is_root, + factory): + args = factory(no_systemd=True, + osd_id=None, + osd_fsid=None) + self.lvm_bs.args = args + self.lvm_bs._activate = MagicMock() + with pytest.raises(RuntimeError) as error: + self.lvm_bs.activate() + assert str(error.value) == 'Please provide both osd_id and osd_fsid' + + @patch('ceph_volume.api.lvm.get_lvs') + def test_activate_couldnt_find_osd(self, + m_get_lvs, + is_root, + factory): + args = factory(osd_id='1', + osd_fsid='824f7edf', + no_systemd=True) + lvs = [] + m_get_lvs.return_value = lvs + self.lvm_bs.args = args + self.lvm_bs._activate = MagicMock() + with pytest.raises(RuntimeError) as error: + self.lvm_bs.activate() + assert str(error.value) == 'could not find osd.1 with osd_fsid 824f7edf' \ No newline at end of file diff --git a/src/ceph-volume/ceph_volume/tests/objectstore/test_rawbluestore.py b/src/ceph-volume/ceph_volume/tests/objectstore/test_rawbluestore.py new file mode 100644 index 000000000000..fd7c468037c5 --- /dev/null +++ b/src/ceph-volume/ceph_volume/tests/objectstore/test_rawbluestore.py @@ -0,0 +1,219 @@ +import pytest +from mock import patch, Mock, MagicMock, call +from ceph_volume.objectstore.rawbluestore import RawBlueStore +from ceph_volume.util import system + + +class TestRawBlueStore: + @patch('ceph_volume.objectstore.rawbluestore.prepare_utils.create_key', Mock(return_value=['AQCee6ZkzhOrJRAAZWSvNC3KdXOpC2w8ly4AZQ=='])) + def setup_method(self, m_create_key): + self.raw_bs = RawBlueStore([]) + + def test_prepare_dmcrypt(self, + device_info, + fake_call, + key_size): + self.raw_bs.secrets = {'dmcrypt_key': 'foo'} + self.raw_bs.block_device_path = '/dev/foo0' + self.raw_bs.db_device_path = '/dev/foo1' + self.raw_bs.wal_device_path = '/dev/foo2' + lsblk = {"TYPE": "disk", + "NAME": "foo0", + 'KNAME': 'foo0'} + device_info(lsblk=lsblk) + self.raw_bs.prepare_dmcrypt() + assert self.raw_bs.block_device_path == "/dev/mapper/ceph--foo0-block-dmcrypt" + assert self.raw_bs.db_device_path == "/dev/mapper/ceph--foo0-db-dmcrypt" + assert self.raw_bs.wal_device_path == "/dev/mapper/ceph--foo0-wal-dmcrypt" + + @patch('ceph_volume.objectstore.rawbluestore.RawBlueStore.enroll_tpm2', Mock(return_value=MagicMock())) + def test_prepare_dmcrypt_with_tpm(self, + device_info, + fake_call, + key_size): + self.raw_bs.block_device_path = '/dev/foo0' + self.raw_bs.db_device_path = '/dev/foo1' + self.raw_bs.wal_device_path = '/dev/foo2' + self.raw_bs.with_tpm = 1 + lsblk = {"TYPE": "disk", + "NAME": "foo0", + 'KNAME': 'foo0'} + device_info(lsblk=lsblk) + self.raw_bs.prepare_dmcrypt() + assert 'dmcrypt_key' not in self.raw_bs.secrets.keys() + assert self.raw_bs.block_device_path == "/dev/mapper/ceph--foo0-block-dmcrypt" + assert self.raw_bs.db_device_path == "/dev/mapper/ceph--foo0-db-dmcrypt" + assert self.raw_bs.wal_device_path == "/dev/mapper/ceph--foo0-wal-dmcrypt" + assert self.raw_bs.enroll_tpm2.mock_calls == [call('/dev/foo0'), call('/dev/foo1'), call('/dev/foo2')] + + @patch('ceph_volume.objectstore.rawbluestore.rollback_osd') + @patch('ceph_volume.objectstore.rawbluestore.RawBlueStore.prepare') + def test_safe_prepare_raises_exception(self, + m_prepare, + m_rollback_osd, + factory, + capsys): + m_prepare.side_effect = Exception + m_rollback_osd.return_value = MagicMock() + args = factory(osd_id='1') + self.raw_bs.args = args + self.raw_bs.osd_id = self.raw_bs.args.osd_id + with pytest.raises(Exception): + self.raw_bs.safe_prepare() + assert m_rollback_osd.mock_calls == [call(self.raw_bs.args, '1')] + + @patch('ceph_volume.objectstore.rawbluestore.RawBlueStore.prepare', MagicMock()) + def test_safe_prepare(self, + factory, + capsys): + args = factory(dmcrypt=True, + data='/dev/foo') + self.raw_bs.safe_prepare(args) + _, stderr = capsys.readouterr() + assert "prepare successful for: /dev/foo" in stderr + + @patch.dict('os.environ', {'CEPH_VOLUME_DMCRYPT_SECRET': 'dmcrypt-key'}) + @patch('ceph_volume.objectstore.rawbluestore.prepare_utils.create_id') + @patch('ceph_volume.objectstore.rawbluestore.system.generate_uuid') + def test_prepare(self, m_generate_uuid, m_create_id, is_root, factory): + m_generate_uuid.return_value = 'fake-uuid' + m_create_id.return_value = MagicMock() + self.raw_bs.prepare_dmcrypt = MagicMock() + self.raw_bs.prepare_osd_req = MagicMock() + self.raw_bs.osd_mkfs = MagicMock() + args = factory(crush_device_class='foo', + no_tmpfs=False, + block_wal='/dev/foo1', + block_db='/dev/foo2',) + self.raw_bs.args = args + self.raw_bs.secrets = dict() + self.raw_bs.encrypted = True + self.raw_bs.prepare() + assert self.raw_bs.prepare_osd_req.mock_calls == [call(tmpfs=True)] + assert self.raw_bs.osd_mkfs.called + assert self.raw_bs.prepare_dmcrypt.called + + @patch('ceph_volume.conf.cluster', 'ceph') + @patch('ceph_volume.objectstore.rawbluestore.prepare_utils.link_wal') + @patch('ceph_volume.objectstore.rawbluestore.prepare_utils.link_db') + @patch('ceph_volume.objectstore.rawbluestore.prepare_utils.link_block') + @patch('os.path.exists') + @patch('os.unlink') + @patch('ceph_volume.objectstore.rawbluestore.prepare_utils.create_osd_path') + @patch('ceph_volume.objectstore.rawbluestore.process.run') + def test__activate(self, + m_run, + m_create_osd_path, + m_unlink, + m_exists, + m_link_block, + m_link_db, + m_link_wal, + monkeypatch, + factory): + args = factory(no_tmpfs=False) + self.raw_bs.args = args + self.raw_bs.block_device_path = '/dev/sda' + self.raw_bs.db_device_path = '/dev/sdb' + self.raw_bs.wal_device_path = '/dev/sdc' + m_run.return_value = MagicMock() + m_exists.side_effect = lambda path: True + m_create_osd_path.return_value = MagicMock() + m_unlink.return_value = MagicMock() + monkeypatch.setattr(system, 'chown', lambda path: 0) + monkeypatch.setattr(system, 'path_is_mounted', lambda path: 0) + self.raw_bs._activate('1', True) + calls = [call('/var/lib/ceph/osd/ceph-1/block'), + call('/var/lib/ceph/osd/ceph-1/block.db'), + call('/var/lib/ceph/osd/ceph-1/block.wal')] + assert m_run.mock_calls == [call(['ceph-bluestore-tool', + 'prime-osd-dir', + '--path', '/var/lib/ceph/osd/ceph-1', + '--no-mon-config', '--dev', '/dev/sda'])] + assert m_unlink.mock_calls == calls + assert m_exists.mock_calls == calls + assert m_create_osd_path.mock_calls == [call('1', tmpfs=True)] + + def test_activate_raises_exception(self, + is_root, + mock_raw_direct_report): + with pytest.raises(RuntimeError) as error: + self.raw_bs.osd_id = '1' + self.raw_bs.activate() + assert str(error.value) == 'did not find any matching OSD to activate' + + def test_activate_osd_id_and_fsid(self, + is_root, + mock_raw_direct_report): + self.raw_bs._activate = MagicMock() + self.raw_bs.osd_id = '8' + self.raw_bs.osd_fsid = '824f7edf-371f-4b75-9231-4ab62a32d5c0' + self.raw_bs.activate() + self.raw_bs._activate.mock_calls == [call({'ceph_fsid': '7dccab18-14cf-11ee-837b-5254008f8ca5', + 'device': '/dev/mapper/ceph--40bc7bd7--4aee--483e--ba95--89a64bc8a4fd-osd--block--824f7edf--371f--4b75--9231--4ab62a32d5c0', + 'device_db': '/dev/mapper/ceph--73d6d4db--6528--48f2--a4e2--1c82bc87a9ac-osd--db--b82d920d--be3c--4e4d--ba64--18f7e8445892', + 'osd_id': 8, + 'osd_uuid': '824f7edf-371f-4b75-9231-4ab62a32d5c0', + 'type': 'bluestore'}, + tmpfs=True)] + + @patch('ceph_volume.objectstore.rawbluestore.encryption_utils.rename_mapper', Mock(return_value=MagicMock())) + @patch('ceph_volume.util.disk.get_bluestore_header') + @patch('ceph_volume.objectstore.rawbluestore.encryption_utils.luks_close', Mock(return_value=MagicMock())) + @patch('ceph_volume.objectstore.rawbluestore.encryption_utils.luks_open', Mock(return_value=MagicMock())) + def test_activate_dmcrypt_tpm(self, m_bs_header, rawbluestore, fake_lsblk_all, mock_raw_direct_report, is_root) -> None: + m_bs_header.return_value = { + "/dev/mapper/activating-sdb": { + "osd_uuid": "db32a338-b640-4cbc-af17-f63808b1c36e", + "size": 20000572178432, + "btime": "2024-06-13T12:16:57.607442+0000", + "description": "main", + "bfm_blocks": "4882952192", + "bfm_blocks_per_key": "128", + "bfm_bytes_per_block": "4096", + "bfm_size": "20000572178432", + "bluefs": "1", + "ceph_fsid": "c301d0aa-288d-11ef-b535-c84bd6975560", + "ceph_version_when_created": "ceph version 19.0.0-4242-gf2f7cc60 (f2f7cc609cdbae767486cf2fe6872a4789adffb2) squid (dev)", + "created_at": "2024-06-13T12:17:20.122565Z", + "elastic_shared_blobs": "1", + "kv_backend": "rocksdb", + "magic": "ceph osd volume v026", + "mkfs_done": "yes", + "osd_key": "AQAk42pmt7tqFxAAHlaETFm33yFtEuoQAh/cpQ==", + "ready": "ready", + "whoami": "0"} + } + mock_luks2_1 = Mock() + mock_luks2_1.is_ceph_encrypted = True + mock_luks2_1.is_tpm2_enrolled = True + mock_luks2_1.osd_fsid = 'db32a338-b640-4cbc-af17-f63808b1c36e' + + mock_luks2_2 = Mock() + mock_luks2_2.is_ceph_encrypted = True + mock_luks2_2.is_tpm2_enrolled = False + mock_luks2_2.osd_fsid = 'db32a338-b640-4cbc-af17-f63808b1c36e' + + mock_luks2_3 = Mock() + mock_luks2_3.is_ceph_encrypted = False + mock_luks2_3.is_tpm2_enrolled = False + mock_luks2_3.osd_fsid = '' + + mock_luks2_4 = Mock() + mock_luks2_4.is_ceph_encrypted = True + mock_luks2_4.is_tpm2_enrolled = True + mock_luks2_4.osd_fsid = 'abcd' + with patch('ceph_volume.objectstore.rawbluestore.encryption_utils.CephLuks2', side_effect=[mock_luks2_1, + mock_luks2_2, + mock_luks2_3, + mock_luks2_4]): + fake_lsblk_all([{'NAME': '/dev/sdb', 'FSTYPE': 'crypto_LUKS'}, + {'NAME': '/dev/sdc', 'FSTYPE': 'crypto_LUKS'}, + {'NAME': '/dev/sdd', 'FSTYPE': ''}]) + rawbluestore.osd_fsid = 'db32a338-b640-4cbc-af17-f63808b1c36e' + rawbluestore.osd_id = '0' + rawbluestore._activate = MagicMock() + rawbluestore.activate() + assert rawbluestore._activate.mock_calls == [call(0, 'db32a338-b640-4cbc-af17-f63808b1c36e')] + assert rawbluestore.block_device_path == '/dev/mapper/ceph-db32a338-b640-4cbc-af17-f63808b1c36e-sdb-block-dmcrypt' + assert rawbluestore.db_device_path == '/dev/mapper/ceph-db32a338-b640-4cbc-af17-f63808b1c36e-sdc-db-dmcrypt' diff --git a/src/ceph-volume/ceph_volume/tests/systemd/test_main.py b/src/ceph-volume/ceph_volume/tests/systemd/test_main.py index be13438f6fb6..3156d50ddfa3 100644 --- a/src/ceph-volume/ceph_volume/tests/systemd/test_main.py +++ b/src/ceph-volume/ceph_volume/tests/systemd/test_main.py @@ -31,15 +31,15 @@ class TestMain(object): def setup_method(self): conf.log_path = '/tmp/' - def test_no_arguments_parsing_error(self): + def test_no_arguments_parsing_error(self, fake_filesystem): with pytest.raises(RuntimeError): main(args=[]) - def test_parsing_suffix_error(self): + def test_parsing_suffix_error(self, fake_filesystem): with pytest.raises(exceptions.SuffixParsingError): main(args=['asdf']) - def test_correct_command(self, monkeypatch): + def test_correct_command(self, monkeypatch, fake_filesystem): run = Capture() monkeypatch.setattr(process, 'run', run) main(args=['ceph-volume-systemd', 'lvm-8715BEB4-15C5-49DE-BA6F-401086EC7B41-0' ]) diff --git a/src/ceph-volume/ceph_volume/tests/test_ceph_volume.py b/src/ceph-volume/ceph_volume/tests/test_ceph_volume.py new file mode 100644 index 000000000000..0336e2cdc26d --- /dev/null +++ b/src/ceph-volume/ceph_volume/tests/test_ceph_volume.py @@ -0,0 +1,26 @@ +import os +from ceph_volume import AllowLoopDevices, allow_loop_devices +from typing import Any + + +class TestAllowLoopDevsWarning: + def setup_method(self) -> None: + AllowLoopDevices.allow = False + AllowLoopDevices.warned = False + self.teardown_method() + + def teardown_method(self) -> None: + AllowLoopDevices.allow = False + AllowLoopDevices.warned = False + if os.environ.get('CEPH_VOLUME_ALLOW_LOOP_DEVICES'): + os.environ.pop('CEPH_VOLUME_ALLOW_LOOP_DEVICES') + + def test_loop_dev_warning(self, fake_call: Any, caplog: Any) -> None: + AllowLoopDevices.warned = False + assert allow_loop_devices() is False + assert not caplog.records + os.environ['CEPH_VOLUME_ALLOW_LOOP_DEVICES'] = "y" + assert allow_loop_devices() is True + log = caplog.records[0] + assert log.levelname == "WARNING" + assert "will never be supported in production" in log.message diff --git a/src/ceph-volume/ceph_volume/tests/test_inventory.py b/src/ceph-volume/ceph_volume/tests/test_inventory.py index 785d8b56e86b..29cd1fc4e4db 100644 --- a/src/ceph-volume/ceph_volume/tests/test_inventory.py +++ b/src/ceph-volume/ceph_volume/tests/test_inventory.py @@ -118,7 +118,7 @@ def device_data(device_info): class TestInventory(object): expected_keys = [ - 'ceph_device', + 'ceph_device_lvm', 'path', 'rejected_reasons', 'sys_api', @@ -126,6 +126,7 @@ class TestInventory(object): 'lvs', 'device_id', 'lsm_data', + 'being_replaced' ] expected_sys_api_keys = [ diff --git a/src/ceph-volume/ceph_volume/tests/test_main.py b/src/ceph-volume/ceph_volume/tests/test_main.py index d03d405d5538..65689bf4f3b2 100644 --- a/src/ceph-volume/ceph_volume/tests/test_main.py +++ b/src/ceph-volume/ceph_volume/tests/test_main.py @@ -32,7 +32,7 @@ def test_flags_are_parsed_with_help(self, capsys): assert '--cluster' in stdout assert '--log-path' in stdout - def test_log_ignoring_missing_ceph_conf(self, caplog): + def test_log_ignoring_missing_ceph_conf(self, caplog, fake_filesystem): with pytest.raises(SystemExit) as error: main.Volume(argv=['ceph-volume', '--cluster', 'barnacle', 'lvm', '--help']) # make sure we aren't causing an actual error @@ -41,7 +41,7 @@ def test_log_ignoring_missing_ceph_conf(self, caplog): assert log.message == 'ignoring inability to load ceph.conf' assert log.levelname == 'WARNING' - def test_logs_current_command(self, caplog): + def test_logs_current_command(self, caplog, fake_filesystem): with pytest.raises(SystemExit) as error: main.Volume(argv=['ceph-volume', '--cluster', 'barnacle', 'lvm', '--help']) # make sure we aren't causing an actual error @@ -50,7 +50,7 @@ def test_logs_current_command(self, caplog): assert log.message == 'Running command: ceph-volume --cluster barnacle lvm --help' assert log.levelname == 'INFO' - def test_logs_set_level_warning(self, caplog): + def test_logs_set_level_warning(self, caplog, fake_filesystem): with pytest.raises(SystemExit) as error: main.Volume(argv=['ceph-volume', '--log-level', 'warning', '--cluster', 'barnacle', 'lvm', '--help']) # make sure we aren't causing an actual error diff --git a/src/ceph-volume/ceph_volume/tests/test_terminal.py b/src/ceph-volume/ceph_volume/tests/test_terminal.py index e59a036baa80..3c420f15e19c 100644 --- a/src/ceph-volume/ceph_volume/tests/test_terminal.py +++ b/src/ceph-volume/ceph_volume/tests/test_terminal.py @@ -131,13 +131,3 @@ def test_writer(self, encoding, stream, monkeypatch, capsys, caplog): writer.seek(0) val = buffer.getvalue() assert self.octpus_and_squid_en.encode(encoding) in val - - def test_writer_uses_log_on_unicodeerror(self, stream, monkeypatch, capture): - - if sys.version_info > (3,): - pytest.skip("Something breaks inside of pytest's capsys") - monkeypatch.setattr(terminal.terminal_logger, 'info', capture) - buffer = io.BytesIO() - writer = stream(buffer, 'ascii') - terminal._Write(_writer=writer).raw(self.message) - assert self.octpus_and_squid_en in capture.calls[0]['args'][0] diff --git a/src/ceph-volume/ceph_volume/tests/util/test_arg_validators.py b/src/ceph-volume/ceph_volume/tests/util/test_arg_validators.py index c6349308ee7a..abbf1d57f332 100644 --- a/src/ceph-volume/ceph_volume/tests/util/test_arg_validators.py +++ b/src/ceph-volume/ceph_volume/tests/util/test_arg_validators.py @@ -1,6 +1,5 @@ import argparse import pytest -import os from ceph_volume import exceptions, process from ceph_volume.util import arg_validators from mock.mock import patch, MagicMock @@ -12,23 +11,22 @@ def setup_method(self): self.validator = arg_validators.OSDPath() def test_is_not_root(self, monkeypatch): - monkeypatch.setattr(os, 'getuid', lambda: 100) + monkeypatch.setattr('ceph_volume.decorators.os.getuid', lambda : 100) with pytest.raises(exceptions.SuperUserError): self.validator('') - def test_path_is_not_a_directory(self, is_root, monkeypatch, fake_filesystem): + def test_path_is_not_a_directory(self, monkeypatch, fake_filesystem): fake_file = fake_filesystem.create_file('/tmp/foo') + monkeypatch.setattr('ceph_volume.decorators.os.getuid', lambda : 0) monkeypatch.setattr(arg_validators.disk, 'is_partition', lambda x: False) - validator = arg_validators.OSDPath() with pytest.raises(argparse.ArgumentError): - validator(fake_file.path) + self.validator(fake_file.path) - def test_files_are_missing(self, is_root, tmpdir, monkeypatch): - tmppath = str(tmpdir) - monkeypatch.setattr(arg_validators.disk, 'is_partition', lambda x: False) - validator = arg_validators.OSDPath() + @patch('ceph_volume.decorators.os.getuid', return_value=0) + @patch('ceph_volume.util.arg_validators.disk.is_partition', return_value=False) + def test_files_are_missing(self, m_is_partition, m_getuid, fake_filesystem): with pytest.raises(argparse.ArgumentError) as error: - validator(tmppath) + self.validator('/tmp/osdpath') assert 'Required file (ceph_fsid) was not found in OSD' in str(error.value) diff --git a/src/ceph-volume/ceph_volume/tests/util/test_device.py b/src/ceph-volume/ceph_volume/tests/util/test_device.py index e382981d9232..9a41d9683213 100644 --- a/src/ceph-volume/ceph_volume/tests/util/test_device.py +++ b/src/ceph-volume/ceph_volume/tests/util/test_device.py @@ -47,7 +47,8 @@ def test_lvm_size_rounds_down(self, fake_call, device_info): disk = device.Device("/dev/sda") assert disk.lvm_size.gb == 4 - def test_is_lv(self, fake_call, device_info): + def test_is_lv(self, fake_call, device_info, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) data = {"lv_path": "vg/lv", "vg_name": "vg", "name": "lv"} lsblk = {"TYPE": "lvm", "NAME": "vg-lv"} device_info(lv=data,lsblk=lsblk) @@ -152,14 +153,6 @@ def test_disk_is_device(self, fake_call, device_info): disk = device.Device("/dev/sda") assert disk.is_device is True - @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) - def test_is_partition(self, fake_call, device_info): - data = {"/dev/sda1": {"foo": "bar"}} - lsblk = {"TYPE": "part", "NAME": "sda1", "PKNAME": "sda"} - device_info(devices=data, lsblk=lsblk) - disk = device.Device("/dev/sda1") - assert disk.is_partition - @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) def test_mpath_device_is_device(self, fake_call, device_info): data = {"/dev/foo": {"foo": "bar"}} @@ -241,7 +234,7 @@ def test_is_ceph_disk_member_not_available_blkid(self, fake_call, monkeypatch, p @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) def test_reject_removable_device(self, fake_call, device_info): - data = {"/dev/sdb": {"removable": 1}} + data = {"/dev/sdb": {"removable": "1"}} lsblk = {"TYPE": "disk", "NAME": "sdb"} device_info(devices=data,lsblk=lsblk) disk = device.Device("/dev/sdb") @@ -249,7 +242,7 @@ def test_reject_removable_device(self, fake_call, device_info): @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) def test_reject_device_with_gpt_headers(self, fake_call, device_info): - data = {"/dev/sdb": {"removable": 0, "size": 5368709120}} + data = {"/dev/sdb": {"removable": "0", "size": 5368709120}} lsblk = {"TYPE": "disk", "NAME": "sdb"} blkid= {"PTTYPE": "gpt"} device_info( @@ -262,7 +255,7 @@ def test_reject_device_with_gpt_headers(self, fake_call, device_info): @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) def test_accept_non_removable_device(self, fake_call, device_info): - data = {"/dev/sdb": {"removable": 0, "size": 5368709120}} + data = {"/dev/sdb": {"removable": "0", "size": 5368709120}} lsblk = {"TYPE": "disk", "NAME": "sdb"} device_info(devices=data,lsblk=lsblk) disk = device.Device("/dev/sdb") @@ -286,7 +279,7 @@ def test_accept_symlink_to_device(self, fake_call): m_os_path_islink.return_value = True m_os_path_realpath.return_value = '/dev/sdb' - data = {"/dev/sdb": {"ro": 0, "size": 5368709120}} + data = {"/dev/sdb": {"ro": "0", "size": 5368709120}} lsblk = {"TYPE": "disk"} device_info(devices=data,lsblk=lsblk) disk = device.Device("/dev/test_symlink") @@ -304,7 +297,7 @@ def test_reject_symlink_to_device_mapper(self, fake_call): m_os_path_islink.return_value = True m_os_readlink.return_value = '/dev/dm-0' - data = {"/dev/mapper/mpatha": {"ro": 0, "size": 5368709120}} + data = {"/dev/mapper/mpatha": {"ro": "0", "size": 5368709120}} lsblk = {"TYPE": "disk"} device_info(devices=data,lsblk=lsblk) disk = device.Device("/dev/mapper/mpatha") @@ -312,12 +305,28 @@ def test_reject_symlink_to_device_mapper(self, @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) def test_reject_readonly_device(self, fake_call, device_info): - data = {"/dev/cdrom": {"ro": 1}} + data = {"/dev/cdrom": {"ro": "1"}} lsblk = {"TYPE": "disk", "NAME": "cdrom"} device_info(devices=data,lsblk=lsblk) disk = device.Device("/dev/cdrom") assert not disk.available + @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) + @patch('ceph_volume.util.device.os.path.realpath') + @patch('ceph_volume.util.device.os.path.islink') + def test_reject_lv_symlink_to_device(self, + m_os_path_islink, + m_os_path_realpath, + device_info, + fake_call): + m_os_path_islink.return_value = True + m_os_path_realpath.return_value = '/dev/mapper/vg-lv' + lv = {"lv_path": "/dev/vg/lv", "vg_name": "vg", "name": "lv"} + lsblk = {"TYPE": "lvm", "NAME": "vg-lv"} + device_info(lv=lv,lsblk=lsblk) + disk = device.Device("/dev/vg/lv") + assert disk.path == '/dev/vg/lv' + @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) def test_reject_smaller_than_5gb(self, fake_call, device_info): data = {"/dev/sda": {"size": 5368709119}} @@ -328,7 +337,7 @@ def test_reject_smaller_than_5gb(self, fake_call, device_info): @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) def test_accept_non_readonly_device(self, fake_call, device_info): - data = {"/dev/sda": {"ro": 0, "size": 5368709120}} + data = {"/dev/sda": {"ro": "0", "size": 5368709120}} lsblk = {"TYPE": "disk", "NAME": "sda"} device_info(devices=data,lsblk=lsblk) disk = device.Device("/dev/sda") @@ -536,7 +545,8 @@ def test_mapper_is_not_encrypted_plain(self, fake_call, device_info, monkeypatch assert disk.is_encrypted is False @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) - def test_lv_is_encrypted_blkid(self, fake_call, device_info): + def test_lv_is_encrypted_blkid(self, fake_call, device_info, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) lsblk = {'TYPE': 'lvm', 'NAME': 'sda'} blkid = {'TYPE': 'crypto_LUKS'} device_info(lsblk=lsblk, blkid=blkid) @@ -545,7 +555,8 @@ def test_lv_is_encrypted_blkid(self, fake_call, device_info): assert disk.is_encrypted is True @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) - def test_lv_is_not_encrypted_blkid(self, fake_call, factory, device_info): + def test_lv_is_not_encrypted_blkid(self, fake_call, factory, device_info, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) lsblk = {'TYPE': 'lvm', 'NAME': 'sda'} blkid = {'TYPE': 'xfs'} device_info(lsblk=lsblk, blkid=blkid) @@ -554,7 +565,8 @@ def test_lv_is_not_encrypted_blkid(self, fake_call, factory, device_info): assert disk.is_encrypted is False @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) - def test_lv_is_encrypted_lsblk(self, fake_call, device_info): + def test_lv_is_encrypted_lsblk(self, fake_call, device_info, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) lsblk = {'FSTYPE': 'crypto_LUKS', 'NAME': 'sda', 'TYPE': 'lvm'} blkid = {'TYPE': 'mapper'} device_info(lsblk=lsblk, blkid=blkid) @@ -563,7 +575,8 @@ def test_lv_is_encrypted_lsblk(self, fake_call, device_info): assert disk.is_encrypted is True @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) - def test_lv_is_not_encrypted_lsblk(self, fake_call, factory, device_info): + def test_lv_is_not_encrypted_lsblk(self, fake_call, factory, device_info, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) lsblk = {'FSTYPE': 'xfs', 'NAME': 'sda', 'TYPE': 'lvm'} blkid = {'TYPE': 'mapper'} device_info(lsblk=lsblk, blkid=blkid) @@ -572,7 +585,8 @@ def test_lv_is_not_encrypted_lsblk(self, fake_call, factory, device_info): assert disk.is_encrypted is False @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) - def test_lv_is_encrypted_lvm_api(self, fake_call, factory, device_info): + def test_lv_is_encrypted_lvm_api(self, fake_call, factory, device_info, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) lsblk = {'FSTYPE': 'xfs', 'NAME': 'sda', 'TYPE': 'lvm'} blkid = {'TYPE': 'mapper'} device_info(lsblk=lsblk, blkid=blkid) @@ -581,7 +595,8 @@ def test_lv_is_encrypted_lvm_api(self, fake_call, factory, device_info): assert disk.is_encrypted is True @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) - def test_lv_is_not_encrypted_lvm_api(self, fake_call, factory, device_info): + def test_lv_is_not_encrypted_lvm_api(self, fake_call, factory, device_info, monkeypatch): + monkeypatch.setattr('ceph_volume.util.device.Device.is_lv', lambda: True) lsblk = {'FSTYPE': 'xfs', 'NAME': 'sda', 'TYPE': 'lvm'} blkid = {'TYPE': 'mapper'} device_info(lsblk=lsblk, blkid=blkid) @@ -594,10 +609,10 @@ class TestDeviceOrdering(object): def setup_method(self): self.data = { - "/dev/sda": {"removable": 0}, - "/dev/sdb": {"removable": 1}, # invalid - "/dev/sdc": {"removable": 0}, - "/dev/sdd": {"removable": 1}, # invalid + "/dev/sda": {"removable": "0"}, + "/dev/sdb": {"removable": "1"}, # invalid + "/dev/sdc": {"removable": "0"}, + "/dev/sdd": {"removable": "1"}, # invalid } @patch("ceph_volume.util.disk.has_bluestore_label", lambda x: False) diff --git a/src/ceph-volume/ceph_volume/tests/util/test_disk.py b/src/ceph-volume/ceph_volume/tests/util/test_disk.py index ce1f9466fd56..8c27ce402fbc 100644 --- a/src/ceph-volume/ceph_volume/tests/util/test_disk.py +++ b/src/ceph-volume/ceph_volume/tests/util/test_disk.py @@ -1,7 +1,8 @@ -import os import pytest +import stat from ceph_volume.util import disk -from mock.mock import patch, MagicMock +from mock.mock import patch, Mock, MagicMock, mock_open +from pyfakefs.fake_filesystem_unittest import TestCase class TestFunctions: @@ -33,6 +34,31 @@ def test_is_device_type_mpath(self): def test_is_device_type_part(self): assert not disk.is_device('/dev/foo1') + @patch('ceph_volume.util.disk.os.path.exists', MagicMock(return_value=True)) + @patch('ceph_volume.util.disk.get_partitions', MagicMock(return_value={"sda1": "sda"})) + def test_is_partition(self): + assert disk.is_partition('sda1') + + + @patch('os.path.exists', Mock(return_value=True)) + def test_get_lvm_mapper_path_from_dm(self): + with patch('builtins.open', mock_open(read_data='test--foo--vg-test--foo--lv')): + assert disk.get_lvm_mapper_path_from_dm('/dev/dm-123') == '/dev/mapper/test--foo--vg-test--foo--lv' + + @patch('ceph_volume.util.disk.get_block_device_holders', MagicMock(return_value={'/dev/dmcrypt-mapper-123': '/dev/sda'})) + @patch('os.path.realpath', MagicMock(return_value='/dev/sda')) + def test_has_holders_true(self): + assert disk.has_holders('/dev/sda') + + @patch('ceph_volume.util.disk.get_block_device_holders', MagicMock(return_value={'/dev/dmcrypt-mapper-123': '/dev/sda'})) + @patch('os.path.realpath', MagicMock(return_value='/dev/sdb')) + def test_has_holders_false(self): + assert not disk.has_holders('/dev/sda') + + @patch('ceph_volume.util.disk.get_block_device_holders', MagicMock(return_value={'/dev/dmcrypt-mapper-123': '/dev/sda'})) + @patch('os.path.realpath', MagicMock(return_value='/dev/foobar')) + def test_has_holders_device_does_not_exist(self): + assert not disk.has_holders('/dev/foobar') class TestLsblkParser(object): @@ -255,64 +281,72 @@ def test_no_devices_are_found(self, tmpdir, patched_get_block_devs_sysfs): result = disk.get_devices(_sys_block_path=str(tmpdir)) assert result == {} - def test_sda_block_is_found(self, patched_get_block_devs_sysfs, fake_filesystem): + @patch('ceph_volume.util.disk.udevadm_property') + def test_sda_block_is_found(self, m_udev_adm_property, patched_get_block_devs_sysfs, fake_filesystem): sda_path = '/dev/sda' - patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk']] + patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk', sda_path]] result = disk.get_devices() assert len(result.keys()) == 1 assert result[sda_path]['human_readable_size'] == '0.00 B' assert result[sda_path]['model'] == '' assert result[sda_path]['partitions'] == {} - def test_sda_size(self, patched_get_block_devs_sysfs, fake_filesystem): + @patch('ceph_volume.util.disk.udevadm_property') + def test_sda_size(self, m_udev_adm_property, patched_get_block_devs_sysfs, fake_filesystem): sda_path = '/dev/sda' - patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk']] + patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk', sda_path]] fake_filesystem.create_file('/sys/block/sda/size', contents = '1024') result = disk.get_devices() assert list(result.keys()) == [sda_path] assert result[sda_path]['human_readable_size'] == '512.00 KB' - def test_sda_sectorsize_fallsback(self, patched_get_block_devs_sysfs, fake_filesystem): + @patch('ceph_volume.util.disk.udevadm_property') + def test_sda_sectorsize_fallsback(self, m_udev_adm_property, patched_get_block_devs_sysfs, fake_filesystem): # if no sectorsize, it will use queue/hw_sector_size sda_path = '/dev/sda' - patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk']] + patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk', sda_path]] fake_filesystem.create_file('/sys/block/sda/queue/hw_sector_size', contents = '1024') result = disk.get_devices() assert list(result.keys()) == [sda_path] assert result[sda_path]['sectorsize'] == '1024' - def test_sda_sectorsize_from_logical_block(self, patched_get_block_devs_sysfs, fake_filesystem): + @patch('ceph_volume.util.disk.udevadm_property') + def test_sda_sectorsize_from_logical_block(self, m_udev_adm_property, patched_get_block_devs_sysfs, fake_filesystem): sda_path = '/dev/sda' - patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk']] + patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk', sda_path]] fake_filesystem.create_file('/sys/block/sda/queue/logical_block_size', contents = '99') result = disk.get_devices() assert result[sda_path]['sectorsize'] == '99' - def test_sda_sectorsize_does_not_fallback(self, patched_get_block_devs_sysfs, fake_filesystem): + @patch('ceph_volume.util.disk.udevadm_property') + def test_sda_sectorsize_does_not_fallback(self, m_udev_adm_property, patched_get_block_devs_sysfs, fake_filesystem): sda_path = '/dev/sda' - patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk']] + patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk', sda_path]] fake_filesystem.create_file('/sys/block/sda/queue/logical_block_size', contents = '99') fake_filesystem.create_file('/sys/block/sda/queue/hw_sector_size', contents = '1024') result = disk.get_devices() assert result[sda_path]['sectorsize'] == '99' - def test_is_rotational(self, patched_get_block_devs_sysfs, fake_filesystem): + @patch('ceph_volume.util.disk.udevadm_property') + def test_is_rotational(self, m_udev_adm_property, patched_get_block_devs_sysfs, fake_filesystem): sda_path = '/dev/sda' - patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk']] + patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk', sda_path]] fake_filesystem.create_file('/sys/block/sda/queue/rotational', contents = '1') result = disk.get_devices() assert result[sda_path]['rotational'] == '1' - def test_is_ceph_rbd(self, patched_get_block_devs_sysfs, fake_filesystem): + @patch('ceph_volume.util.disk.udevadm_property') + def test_is_ceph_rbd(self, m_udev_adm_property, patched_get_block_devs_sysfs, fake_filesystem): rbd_path = '/dev/rbd0' - patched_get_block_devs_sysfs.return_value = [[rbd_path, rbd_path, 'disk']] + patched_get_block_devs_sysfs.return_value = [[rbd_path, rbd_path, 'disk', rbd_path]] result = disk.get_devices() assert rbd_path not in result - def test_actuator_device(self, patched_get_block_devs_sysfs, fake_filesystem): + @patch('ceph_volume.util.disk.udevadm_property') + def test_actuator_device(self, m_udev_adm_property, patched_get_block_devs_sysfs, fake_filesystem): sda_path = '/dev/sda' fake_actuator_nb = 2 - patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk']] + patched_get_block_devs_sysfs.return_value = [[sda_path, sda_path, 'disk', sda_path]] for actuator in range(0, fake_actuator_nb): fake_filesystem.create_dir(f'/sys/block/sda/queue/independent_access_ranges/{actuator}') result = disk.get_devices() @@ -538,19 +572,176 @@ def test_formatting_tb(self): assert result == "1027.00 TB" -class TestAllowLoopDevsWarning(object): - def test_loop_dev_warning(self, fake_call, caplog): - assert disk.allow_loop_devices() is False - assert not caplog.records - os.environ['CEPH_VOLUME_ALLOW_LOOP_DEVICES'] = "y" - assert disk.allow_loop_devices() is True - log = caplog.records[0] - assert log.levelname == "WARNING" - assert "will never be supported in production" in log.message - - class TestHasBlueStoreLabel(object): def test_device_path_is_a_path(self, fake_filesystem): device_path = '/var/lib/ceph/osd/ceph-0' fake_filesystem.create_dir(device_path) - assert not disk.has_bluestore_label(device_path) \ No newline at end of file + assert not disk.has_bluestore_label(device_path) + + +class TestBlockSysFs(TestCase): + def setUp(self) -> None: + self.setUpPyfakefs() + self.fs.create_dir('/fake-area/foo/holders') + self.fs.create_dir('/fake-area/bar2/holders') + self.fs.create_file('/fake-area/bar2/holders/dm-0') + self.fs.create_file('/fake-area/foo/holders/dm-1') + self.fs.create_file('/fake-area/bar2/partition', contents='2') + self.fs.create_dir('/sys/dev/block') + self.fs.create_dir('/sys/block/foo') + self.fs.create_symlink('/sys/dev/block/8:0', '/fake-area/foo') + self.fs.create_symlink('/sys/dev/block/252:2', '/fake-area/bar2') + self.fs.create_file('/sys/block/dm-0/dm/uuid', contents='CRYPT-LUKS2-1234-abcdef') + self.fs.create_file('/sys/block/dm-1/dm/uuid', contents='LVM-abcdef') + + def test_init(self) -> None: + b = disk.BlockSysFs('/dev/foo') + assert b.path == '/dev/foo' + assert b.sys_dev_block == '/sys/dev/block' + assert b.sys_block == '/sys/block' + + def test_get_sys_dev_block_path(self) -> None: + b = disk.BlockSysFs('/dev/foo') + assert b.get_sys_dev_block_path == '/sys/dev/block/8:0' + + def test_is_partition_true(self) -> None: + b = disk.BlockSysFs('/dev/bar2') + assert b.is_partition + + def test_is_partition_false(self) -> None: + b = disk.BlockSysFs('/dev/foo') + assert not b.is_partition + + def test_holders(self) -> None: + b1 = disk.BlockSysFs('/dev/bar2') + b2 = disk.BlockSysFs('/dev/foo') + assert b1.holders == ['dm-0'] + assert b2.holders == ['dm-1'] + + def test_has_active_dmcrypt_mapper(self) -> None: + b = disk.BlockSysFs('/dev/bar2') + assert b.has_active_dmcrypt_mapper + + def test_has_active_mappers(self) -> None: + b = disk.BlockSysFs('/dev/foo') + assert b.has_active_mappers + + def test_active_mappers_dmcrypt(self) -> None: + b = disk.BlockSysFs('/dev/bar2') + assert b.active_mappers() + assert b.active_mappers()['dm-0'] + assert b.active_mappers()['dm-0']['type'] == 'CRYPT' + assert b.active_mappers()['dm-0']['dmcrypt_mapping'] == 'abcdef' + assert b.active_mappers()['dm-0']['dmcrypt_type'] == 'LUKS2' + assert b.active_mappers()['dm-0']['dmcrypt_uuid'] == '1234' + + def test_active_mappers_lvm(self) -> None: + b = disk.BlockSysFs('/dev/foo') + assert b.active_mappers() + assert b.active_mappers()['dm-1'] + assert b.active_mappers()['dm-1']['type'] == 'LVM' + assert b.active_mappers()['dm-1']['uuid'] == 'abcdef' + + +class TestUdevData(TestCase): + def setUp(self) -> None: + udev_data_lv_device: str = """ +S:disk/by-id/dm-uuid-LVM-1f1RaxWlzQ61Sbc7oCIHRMdh0M8zRTSnU03ekuStqWuiA6eEDmwoGg3cWfFtE2li +S:mapper/vg1-lv1 +S:disk/by-id/dm-name-vg1-lv1 +S:vg1/lv1 +I:837060642207 +E:DM_UDEV_DISABLE_OTHER_RULES_FLAG= +E:DM_UDEV_DISABLE_LIBRARY_FALLBACK_FLAG=1 +E:DM_UDEV_PRIMARY_SOURCE_FLAG=1 +E:DM_UDEV_RULES_VSN=2 +E:DM_NAME=fake_vg1-fake-lv1 +E:DM_UUID=LVM-1f1RaxWlzQ61Sbc7oCIHRMdh0M8zRTSnU03ekuStqWuiA6eEDmwoGg3cWfFtE2li +E:DM_SUSPENDED=0 +E:DM_VG_NAME=fake_vg1 +E:DM_LV_NAME=fake-lv1 +E:DM_LV_LAYER= +E:NVME_HOST_IFACE=none +E:SYSTEMD_READY=1 +G:systemd +Q:systemd +V:1""" + udev_data_bare_device: str = """ +S:disk/by-path/pci-0000:00:02.0 +S:disk/by-path/virtio-pci-0000:00:02.0 +S:disk/by-diskseq/1 +I:3037919 +E:ID_PATH=pci-0000:00:02.0 +E:ID_PATH_TAG=pci-0000_00_02_0 +E:ID_PART_TABLE_UUID=baefa409 +E:ID_PART_TABLE_TYPE=dos +E:NVME_HOST_IFACE=none +G:systemd +Q:systemd +V:1""" + self.fake_device: str = '/dev/cephtest' + self.setUpPyfakefs() + self.fs.create_file(self.fake_device, st_mode=(stat.S_IFBLK | 0o600)) + self.fs.create_file('/run/udev/data/b999:0', create_missing_dirs=True, contents=udev_data_bare_device) + self.fs.create_file('/run/udev/data/b998:1', create_missing_dirs=True, contents=udev_data_lv_device) + + def test_device_not_found(self) -> None: + self.fs.remove(self.fake_device) + with pytest.raises(RuntimeError): + disk.UdevData(self.fake_device) + + @patch('ceph_volume.util.disk.os.stat', MagicMock()) + @patch('ceph_volume.util.disk.os.minor', Mock(return_value=0)) + @patch('ceph_volume.util.disk.os.major', Mock(return_value=999)) + def test_no_data(self) -> None: + self.fs.remove('/run/udev/data/b999:0') + with pytest.raises(RuntimeError): + disk.UdevData(self.fake_device) + + @patch('ceph_volume.util.disk.os.stat', MagicMock()) + @patch('ceph_volume.util.disk.os.minor', Mock(return_value=0)) + @patch('ceph_volume.util.disk.os.major', Mock(return_value=999)) + def test_is_dm_false(self) -> None: + assert not disk.UdevData(self.fake_device).is_dm + + @patch('ceph_volume.util.disk.os.stat', MagicMock()) + @patch('ceph_volume.util.disk.os.minor', Mock(return_value=1)) + @patch('ceph_volume.util.disk.os.major', Mock(return_value=998)) + def test_is_dm_true(self) -> None: + assert disk.UdevData(self.fake_device).is_dm + + @patch('ceph_volume.util.disk.os.stat', MagicMock()) + @patch('ceph_volume.util.disk.os.minor', Mock(return_value=1)) + @patch('ceph_volume.util.disk.os.major', Mock(return_value=998)) + def test_is_lvm_true(self) -> None: + assert disk.UdevData(self.fake_device).is_dm + + @patch('ceph_volume.util.disk.os.stat', MagicMock()) + @patch('ceph_volume.util.disk.os.minor', Mock(return_value=0)) + @patch('ceph_volume.util.disk.os.major', Mock(return_value=999)) + def test_is_lvm_false(self) -> None: + assert not disk.UdevData(self.fake_device).is_dm + + @patch('ceph_volume.util.disk.os.stat', MagicMock()) + @patch('ceph_volume.util.disk.os.minor', Mock(return_value=1)) + @patch('ceph_volume.util.disk.os.major', Mock(return_value=998)) + def test_slashed_path_with_lvm(self) -> None: + assert disk.UdevData(self.fake_device).slashed_path == '/dev/fake_vg1/fake-lv1' + + @patch('ceph_volume.util.disk.os.stat', MagicMock()) + @patch('ceph_volume.util.disk.os.minor', Mock(return_value=1)) + @patch('ceph_volume.util.disk.os.major', Mock(return_value=998)) + def test_dashed_path_with_lvm(self) -> None: + assert disk.UdevData(self.fake_device).dashed_path == '/dev/mapper/fake_vg1-fake-lv1' + + @patch('ceph_volume.util.disk.os.stat', MagicMock()) + @patch('ceph_volume.util.disk.os.minor', Mock(return_value=0)) + @patch('ceph_volume.util.disk.os.major', Mock(return_value=999)) + def test_slashed_path_with_bare_device(self) -> None: + assert disk.UdevData(self.fake_device).slashed_path == '/dev/cephtest' + + @patch('ceph_volume.util.disk.os.stat', MagicMock()) + @patch('ceph_volume.util.disk.os.minor', Mock(return_value=0)) + @patch('ceph_volume.util.disk.os.major', Mock(return_value=999)) + def test_dashed_path_with_bare_device(self) -> None: + assert disk.UdevData(self.fake_device).dashed_path == '/dev/cephtest' \ No newline at end of file diff --git a/src/ceph-volume/ceph_volume/tests/util/test_encryption.py b/src/ceph-volume/ceph_volume/tests/util/test_encryption.py index cd2ea8f187fc..c155df691a6a 100644 --- a/src/ceph-volume/ceph_volume/tests/util/test_encryption.py +++ b/src/ceph-volume/ceph_volume/tests/util/test_encryption.py @@ -1,6 +1,46 @@ from ceph_volume.util import encryption -from mock.mock import patch +from mock.mock import call, patch, Mock, MagicMock +from typing import Any import base64 +import pytest +import json + + +class TestNoWorkqueue: + def setup_method(self): + encryption.conf.dmcrypt_no_workqueue = None + + @patch('ceph_volume.util.encryption.process.call', + Mock(return_value=(['cryptsetup 2.7.2 flags: UDEV BLKID KEYRING' \ + 'FIPS KERNEL_CAPI PWQUALITY '], [''], 0))) + def test_set_dmcrypt_no_workqueue_true(self): + encryption.set_dmcrypt_no_workqueue() + assert encryption.conf.dmcrypt_no_workqueue + + @patch('ceph_volume.util.encryption.process.call', + Mock(return_value=(['cryptsetup 2.0.0'], [''], 0))) + def test_set_dmcrypt_no_workqueue_false(self): + encryption.set_dmcrypt_no_workqueue() + assert encryption.conf.dmcrypt_no_workqueue is None + + @patch('ceph_volume.util.encryption.process.call', + Mock(return_value=([''], ['fake error'], 1))) + def test_set_dmcrypt_no_workqueue_cryptsetup_version_fails(self): + with pytest.raises(RuntimeError): + encryption.set_dmcrypt_no_workqueue() + + @patch('ceph_volume.util.encryption.process.call', + Mock(return_value=(['unexpected output'], [''], 0))) + def test_set_dmcrypt_no_workqueue_pattern_not_found(self): + with pytest.raises(RuntimeError): + encryption.set_dmcrypt_no_workqueue() + + @patch('ceph_volume.util.encryption.process.call', + Mock(return_value=([], [''], 0))) + def test_set_dmcrypt_no_workqueue_index_error(self): + with pytest.raises(RuntimeError): + encryption.set_dmcrypt_no_workqueue() + class TestGetKeySize(object): def test_get_size_from_conf_default(self, conf_ceph_stub): @@ -103,8 +143,9 @@ def test_luks_format_command_with_custom_size(self, m_call, conf_ceph_stub): class TestLuksOpen(object): + @patch('ceph_volume.util.encryption.bypass_workqueue', return_value=False) @patch('ceph_volume.util.encryption.process.call') - def test_luks_open_command_with_default_size(self, m_call, conf_ceph_stub): + def test_luks_open_command_with_default_size(self, m_call, m_bypass_workqueue, conf_ceph_stub): conf_ceph_stub('[global]\nfsid=abcd') expected = [ 'cryptsetup', @@ -120,8 +161,9 @@ def test_luks_open_command_with_default_size(self, m_call, conf_ceph_stub): encryption.luks_open('abcd', '/dev/foo', '/dev/bar') assert m_call.call_args[0][0] == expected + @patch('ceph_volume.util.encryption.bypass_workqueue', return_value=False) @patch('ceph_volume.util.encryption.process.call') - def test_luks_open_command_with_custom_size(self, m_call, conf_ceph_stub): + def test_luks_open_command_with_custom_size(self, m_call, m_bypass_workqueue, conf_ceph_stub): conf_ceph_stub('[global]\nfsid=abcd\n[osd]\nosd_dmcrypt_key_size=256') expected = [ 'cryptsetup', @@ -136,3 +178,145 @@ def test_luks_open_command_with_custom_size(self, m_call, conf_ceph_stub): ] encryption.luks_open('abcd', '/dev/foo', '/dev/bar') assert m_call.call_args[0][0] == expected + + @patch('ceph_volume.util.encryption.bypass_workqueue', return_value=False) + @patch('ceph_volume.util.encryption.process.call') + def test_luks_open_command_with_tpm(self, m_call, m_bypass_workqueue, conf_ceph_stub): + fake_mapping: str = 'fake-mapping' + fake_device: str = 'fake-device' + expected = [ + '/usr/lib/systemd/systemd-cryptsetup', + 'attach', + fake_mapping, + fake_device, + '-', + 'tpm2-device=auto,discard,headless=true,nofail', + ] + encryption.luks_open('', fake_device, fake_mapping, 1) + assert m_call.call_args[0][0] == expected + + @patch('ceph_volume.util.encryption.bypass_workqueue', return_value=True) + @patch('ceph_volume.util.encryption.process.call') + def test_luks_open_command_with_tpm_bypass_workqueue(self, m_call, m_bypass_workqueue, conf_ceph_stub): + fake_mapping: str = 'fake-mapping' + fake_device: str = 'fake-device' + expected = [ + '/usr/lib/systemd/systemd-cryptsetup', + 'attach', + fake_mapping, + fake_device, + '-', + 'tpm2-device=auto,discard,headless=true,nofail,no-read-workqueue,no-write-workqueue', + ] + encryption.luks_open('', fake_device, fake_mapping, 1) + assert m_call.call_args[0][0] == expected + + +class TestCephLuks2: + @patch.object(encryption.CephLuks2, 'get_osd_fsid', Mock(return_value='abcd-1234')) + @patch.object(encryption.CephLuks2, 'is_ceph_encrypted', Mock(return_value=True)) + def test_init_ceph_encrypted(self) -> None: + assert encryption.CephLuks2('/dev/foo').osd_fsid == 'abcd-1234' + + @patch.object(encryption.CephLuks2, 'get_osd_fsid', Mock(return_value='')) + @patch.object(encryption.CephLuks2, 'is_ceph_encrypted', Mock(return_value=False)) + def test_init_not_ceph_encrypted(self) -> None: + assert encryption.CephLuks2('/dev/foo').osd_fsid == '' + + def test_has_luks2_signature(self) -> None: + with patch('ceph_volume.util.encryption._dd_read', return_value='LUKS'): + assert encryption.CephLuks2('/dev/foo').has_luks2_signature + + @patch('ceph_volume.util.encryption._dd_read', side_effect=Exception('foo')) + def test_has_luks2_signature_raises_exception(self, m_dd_read: Any) -> None: + with pytest.raises(RuntimeError): + encryption.CephLuks2('/dev/foo').has_luks2_signature + + @patch.object(encryption.CephLuks2, 'get_subsystem', Mock(return_value='ceph_fsid=abcd')) + @patch.object(encryption.CephLuks2, 'has_luks2_signature', Mock(return_value=True)) + def test_is_ceph_encrypted(self) -> None: + assert encryption.CephLuks2('/dev/foo').is_ceph_encrypted + + @patch.object(encryption.CephLuks2, 'get_label', Mock(return_value='')) + @patch.object(encryption.CephLuks2, 'has_luks2_signature', Mock(return_value=True)) + def test_is_not_ceph_encrypted(self) -> None: + assert not encryption.CephLuks2('/dev/foo').is_ceph_encrypted + + @patch('ceph_volume.util.encryption.process.call', Mock(return_value=MagicMock())) + def test_config_luks2_invalid_config(self) -> None: + with pytest.raises(RuntimeError): + encryption.CephLuks2('/dev/foo').config_luks2({'subsystem': 'ceph_fsid=1234-abcd', 'label': 'foo', 'foo': 'bar'}) + + @patch('ceph_volume.util.encryption.process.call', Mock(return_value=MagicMock())) + def test_config_luks2_invalid_config_keys(self) -> None: + with pytest.raises(RuntimeError): + encryption.CephLuks2('/dev/foo').config_luks2({'fake': 'fake-value', 'subsystem': 'ceph_fsid=1234-abcd'}) + + @patch('ceph_volume.util.encryption.process.call') + def test_config_luks2_ok(self, m_call: Any) -> None: + m_call.return_value = ('', '', 0) + encryption.CephLuks2('/dev/foo').config_luks2({'label': 'foo', 'subsystem': 'ceph_fsid=1234-abcd'}) + assert m_call.mock_calls == [call(['cryptsetup', 'config', '/dev/foo', '--label', 'foo', '--subsystem', 'ceph_fsid=1234-abcd'], verbose_on_failure=False)] + + @patch('ceph_volume.util.encryption.process.call') + def test_config_luks2_raises_exception(self, m_call: Any) -> None: + m_call.return_value = ('', '', 1) + with pytest.raises(RuntimeError): + encryption.CephLuks2('/dev/foo').config_luks2({'label': 'foo', 'subsystem': 'ceph_fsid=1234-abcd'}) + + def test_get_label(self) -> None: + with patch('ceph_volume.util.encryption._dd_read', return_value='fake-luks2-label'): + label: str = encryption.CephLuks2('/dev/foo').get_label() + assert label == 'fake-luks2-label' + + def test_get_label_raises_exception(self) -> None: + with patch('ceph_volume.util.encryption._dd_read', side_effect=Exception('fake-error')): + with pytest.raises(RuntimeError): + encryption.CephLuks2('/dev/foo').get_label() + + @patch.object(encryption.CephLuks2, 'get_subsystem', Mock(return_value='ceph_fsid=abcd')) + def test_get_osd_fsid(self) -> None: + assert encryption.CephLuks2('/dev/foo').get_osd_fsid() == 'abcd' + + @patch.object(encryption.CephLuks2, 'get_label', Mock(return_value='ceph')) + def test_get_osd_fsid_error(self) -> None: + result: str = encryption.CephLuks2('/dev/foo').get_osd_fsid() + assert result == '' + + def test_get_subsystem(self) -> None: + with patch('ceph_volume.util.encryption._dd_read', return_value='fake-luks2-subsystem'): + assert encryption.CephLuks2('/dev/foo').get_subsystem() == 'fake-luks2-subsystem' + + def test_get_subsystem_raises_exception(self) -> None: + with patch('ceph_volume.util.encryption._dd_read', side_effect=Exception('fake-error')): + with pytest.raises(RuntimeError): + encryption.CephLuks2('/dev/foo').get_subsystem() + + def test_get_json_area(self) -> None: + mock_json_data = '{"tokens": {"1": {"type": "systemd-tpm2"}}}' + with patch('ceph_volume.util.encryption._dd_read', return_value=mock_json_data): + assert encryption.CephLuks2('/dev/foo').get_json_area() == json.loads(mock_json_data) + + def test_get_json_area_invalid(self) -> None: + with patch('ceph_volume.util.encryption._dd_read', return_value='invalid-json-data'): + with pytest.raises(RuntimeError): + encryption.CephLuks2('/dev/foo').get_json_area() + + def test_get_json_area_exception_caught(self) -> None: + with patch('ceph_volume.util.encryption._dd_read', side_effect=OSError): + with pytest.raises(OSError): + encryption.CephLuks2('/dev/foo').get_json_area() + + @patch('ceph_volume.util.encryption.lsblk', Mock(return_value={'FSTYPE': 'crypto_LUKS'})) + @patch.object(encryption.CephLuks2, 'get_json_area', Mock(return_value={"tokens": {"1": {"type": "systemd-tpm2"}}})) + def test_is_tpm2_enrolled_true(self) -> None: + assert encryption.CephLuks2('/dev/foo').is_tpm2_enrolled + + @patch('ceph_volume.util.encryption.lsblk', Mock(return_value={'FSTYPE': 'whatever'})) + def test_is_tpm2_enrolled_false_not_a_luks_device(self) -> None: + assert not encryption.CephLuks2('/dev/foo').is_tpm2_enrolled + + @patch('ceph_volume.util.encryption.lsblk', Mock(return_value={'FSTYPE': 'crypto_LUKS'})) + @patch.object(encryption.CephLuks2, 'get_json_area', Mock(return_value={"whatever": "fake-value"})) + def test_is_tpm2_enrolled_false_not_enrolled_with_tpm2(self) -> None: + assert not encryption.CephLuks2('/dev/foo').is_tpm2_enrolled diff --git a/src/ceph-volume/ceph_volume/tests/util/test_prepare.py b/src/ceph-volume/ceph_volume/tests/util/test_prepare.py index ee9774ecc833..d1f53bdddc78 100644 --- a/src/ceph-volume/ceph_volume/tests/util/test_prepare.py +++ b/src/ceph-volume/ceph_volume/tests/util/test_prepare.py @@ -5,6 +5,8 @@ from ceph_volume.util.prepare import system from ceph_volume import conf from ceph_volume.tests.conftest import Factory +from ceph_volume import objectstore +from mock.mock import patch class TestOSDIDAvailable(object): @@ -117,28 +119,50 @@ def test_underscore_options_are_used(self, conf_ceph_stub, fake_run): class TestOsdMkfsBluestore(object): + def setup_method(self): + conf.cluster = 'ceph' def test_keyring_is_added(self, fake_call, monkeypatch): monkeypatch.setattr(system, 'chown', lambda path: True) - prepare.osd_mkfs_bluestore(1, 'asdf', keyring='secret') - assert '--keyfile' in fake_call.calls[0]['args'][0] - - def test_keyring_is_not_added(self, fake_call, monkeypatch): + o = objectstore.baseobjectstore.BaseObjectStore([]) + o.osd_id = '1' + o.osd_fsid = 'asdf' + o.osd_mkfs() + assert '--keyfile' in fake_call.calls[2]['args'][0] + + def test_keyring_is_not_added(self, fake_call, monkeypatch, factory): + args = factory(dmcrypt=False) monkeypatch.setattr(system, 'chown', lambda path: True) - prepare.osd_mkfs_bluestore(1, 'asdf') + o = objectstore.bluestore.BlueStore([]) + o.args = args + o.osd_id = '1' + o.osd_fsid = 'asdf' + o.osd_mkfs() assert '--keyfile' not in fake_call.calls[0]['args'][0] - def test_wal_is_added(self, fake_call, monkeypatch): + def test_wal_is_added(self, fake_call, monkeypatch, objectstore_bluestore, factory): + args = factory(dmcrypt=False) monkeypatch.setattr(system, 'chown', lambda path: True) - prepare.osd_mkfs_bluestore(1, 'asdf', wal='/dev/smm1') - assert '--bluestore-block-wal-path' in fake_call.calls[0]['args'][0] - assert '/dev/smm1' in fake_call.calls[0]['args'][0] - - def test_db_is_added(self, fake_call, monkeypatch): + bs = objectstore_bluestore(objecstore='bluestore', + osd_id='1', + osd_fid='asdf', + wal_device_path='/dev/smm1', + cephx_secret='foo', + dmcrypt=False) + bs.args = args + bs.osd_mkfs() + assert '--bluestore-block-wal-path' in fake_call.calls[2]['args'][0] + assert '/dev/smm1' in fake_call.calls[2]['args'][0] + + def test_db_is_added(self, fake_call, monkeypatch, factory): + args = factory(dmcrypt=False) monkeypatch.setattr(system, 'chown', lambda path: True) - prepare.osd_mkfs_bluestore(1, 'asdf', db='/dev/smm2') - assert '--bluestore-block-db-path' in fake_call.calls[0]['args'][0] - assert '/dev/smm2' in fake_call.calls[0]['args'][0] + bs = objectstore.bluestore.BlueStore([]) + bs.args = args + bs.db_device_path = '/dev/smm2' + bs.osd_mkfs() + assert '--bluestore-block-db-path' in fake_call.calls[2]['args'][0] + assert '/dev/smm2' in fake_call.calls[2]['args'][0] class TestMountOSD(object): @@ -263,23 +287,29 @@ def test_normalize_strings_duplicate_flags(self, flags): result = sorted(prepare._normalize_mount_flags(flags, extras=['discard','rw']).split(',')) assert ','.join(result) == 'auto,discard,exec,rw' - +@patch('ceph_volume.util.prepare.create_key', return_value='fake-secret') class TestMkfsBluestore(object): - def test_non_zero_exit_status(self, stub_call, monkeypatch): + def test_non_zero_exit_status(self, m_create_key, stub_call, monkeypatch, objectstore_bluestore): conf.cluster = 'ceph' monkeypatch.setattr('ceph_volume.util.prepare.system.chown', lambda x: True) stub_call(([], [], 1)) + bs = objectstore_bluestore(osd_id='1', + osd_fsid='asdf-1234', + cephx_secret='keyring') with pytest.raises(RuntimeError) as error: - prepare.osd_mkfs_bluestore('1', 'asdf-1234', keyring='keyring') + bs.osd_mkfs() assert "Command failed with exit code 1" in str(error.value) - def test_non_zero_exit_formats_command_correctly(self, stub_call, monkeypatch): + def test_non_zero_exit_formats_command_correctly(self, m_create_key, stub_call, monkeypatch, objectstore_bluestore): conf.cluster = 'ceph' monkeypatch.setattr('ceph_volume.util.prepare.system.chown', lambda x: True) stub_call(([], [], 1)) + bs = objectstore_bluestore(osd_id='1', + osd_fsid='asdf-1234', + cephx_secret='keyring') with pytest.raises(RuntimeError) as error: - prepare.osd_mkfs_bluestore('1', 'asdf-1234', keyring='keyring') + bs.osd_mkfs() expected = ' '.join([ 'ceph-osd', '--cluster', diff --git a/src/ceph-volume/ceph_volume/util/arg_validators.py b/src/ceph-volume/ceph_volume/util/arg_validators.py index 1abb5165ec00..e75b34e550e3 100644 --- a/src/ceph-volume/ceph_volume/util/arg_validators.py +++ b/src/ceph-volume/ceph_volume/util/arg_validators.py @@ -4,11 +4,23 @@ from ceph_volume import terminal, decorators, process from ceph_volume.util.device import Device from ceph_volume.util import disk +from ceph_volume.util.encryption import set_dmcrypt_no_workqueue + + +mlogger = terminal.MultiLogger(__name__) def valid_osd_id(val): return str(int(val)) +class DmcryptAction(argparse._StoreTrueAction): + def __init__(self, *args, **kwargs): + super(DmcryptAction, self).__init__(*args, **kwargs) + + def __call__(self, *args, **kwargs): + set_dmcrypt_no_workqueue() + super(DmcryptAction, self).__call__(*args, **kwargs) + class ValidDevice(object): def __init__(self, as_string=False, gpt_ok=False): @@ -61,6 +73,17 @@ def _is_valid_device(self, raise_sys_exit=True): return self._device +class ValidClearReplaceHeaderDevice(ValidDevice): + def __call__(self, dev_path: str) -> str: + super().get_device(dev_path) + return self._format_device(self._is_valid_device()) + + def _is_valid_device(self) -> Device: + if not self._device.is_being_replaced: + mlogger.info(f'{self.dev_path} has no replacement header.') + return self._device + + class ValidDataDevice(ValidDevice): def __call__(self, dev_path): super().get_device(dev_path) @@ -83,6 +106,9 @@ def __call__(self, dev_path): super().get_device(dev_path) return self._format_device(self._is_valid_device()) + def _format_device(self, device: Device) -> str: + return device.path + def _is_valid_device(self, raise_sys_exit=True): out, err, rc = process.call([ 'ceph-bluestore-tool', 'show-label', diff --git a/src/ceph-volume/ceph_volume/util/device.py b/src/ceph-volume/ceph_volume/util/device.py index d61222afe0a0..04eefeac750d 100644 --- a/src/ceph-volume/ceph_volume/util/device.py +++ b/src/ceph-volume/ceph_volume/util/device.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- - +# type: ignore import logging import os from functools import total_ordering -from ceph_volume import sys_info +from ceph_volume import sys_info, allow_loop_devices, BEING_REPLACED_HEADER from ceph_volume.api import lvm from ceph_volume.util import disk, system from ceph_volume.util.lsmdisk import LSMDisk from ceph_volume.util.constants import ceph_disk_guids -from ceph_volume.util.disk import allow_loop_devices +from typing import List, Tuple logger = logging.getLogger(__name__) @@ -86,13 +86,14 @@ class Device(object): {attr:<25} {value}""" report_fields = [ - 'ceph_device', + 'ceph_device_lvm', 'rejected_reasons', 'available', 'path', 'sys_api', 'device_id', 'lsm_data', + 'being_replaced' ] pretty_report_sys_fields = [ 'actuators', @@ -119,15 +120,10 @@ def __init__(self, path, with_lsm=False, lvs=None, lsblk_all=None, all_devices_v self.symlink = self.path real_path = os.path.realpath(self.path) # check if we are not a device mapper - if "dm-" not in real_path: + if "dm-" not in real_path and not self.is_lv: self.path = real_path - if not sys_info.devices: - if self.path: - sys_info.devices = disk.get_devices(device=self.path) - else: - sys_info.devices = disk.get_devices() - if sys_info.devices.get(self.path, {}): - self.device_nodes = sys_info.devices[self.path]['device_nodes'] + if not sys_info.devices.get(self.path): + sys_info.devices = disk.get_devices() self.sys_api = sys_info.devices.get(self.path, {}) self.partitions = self._get_partitions() self.lv_api = None @@ -141,8 +137,11 @@ def __init__(self, path, with_lsm=False, lvs=None, lsblk_all=None, all_devices_v self.blkid_api = None self._exists = None self._is_lvm_member = None - self.ceph_device = False + self.ceph_device_lvm = False + self.being_replaced: bool = self.is_being_replaced self._parse() + if self.path in sys_info.devices.keys(): + self.device_nodes = sys_info.devices[self.path]['device_nodes'] self.lsm_data = self.fetch_lsm(with_lsm) self.available_lvm, self.rejected_reasons_lvm = self._check_lvm_reject_reasons() @@ -215,12 +214,21 @@ def _parse(self): lv = _lv break else: + filters = {} if self.path[0] == '/': - lv = lvm.get_single_lv(filters={'lv_path': self.path}) + lv_mapper_path: str = self.path + field: str = 'lv_path' + + if self.path.startswith('/dev/mapper') or self.path.startswith('/dev/dm-'): + path = os.path.realpath(self.path) if self.path.startswith('/dev/mapper') else self.path + lv_mapper_path = disk.get_lvm_mapper_path_from_dm(path) + field = 'lv_dm_path' + + filters = {field: lv_mapper_path} else: vgname, lvname = self.path.split('/') - lv = lvm.get_single_lv(filters={'lv_name': lvname, - 'vg_name': vgname}) + filters = {'lv_name': lvname, 'vg_name': vgname} + lv = lvm.get_single_lv(filters=filters) if lv: self.lv_api = lv @@ -228,7 +236,7 @@ def _parse(self): self.path = lv.lv_path self.vg_name = lv.vg_name self.lv_name = lv.name - self.ceph_device = lvm.is_ceph_device(lv) + self.ceph_device_lvm = lvm.is_ceph_device(lv) else: self.lvs = [] if self.lsblk_all: @@ -293,7 +301,7 @@ def report(self): rot=self.rotational, available=self.available, model=self.model, - device_nodes=self.device_nodes + device_nodes=','.join(self.device_nodes) ) def json_report(self): @@ -358,7 +366,7 @@ def _set_lvm_membership(self): self._is_lvm_member = True self.lvs.extend(lvm.get_device_lvs(path)) if self.lvs: - self.ceph_device = any([True if lv.tags.get('ceph.osd_id') else False for lv in self.lvs]) + self.ceph_device_lvm = any([True if lv.tags.get('ceph.osd_id') else False for lv in self.lvs]) def _get_partitions(self): """ @@ -460,27 +468,28 @@ def is_mapper(self): def device_type(self): self.load_blkid_api() if 'type' in self.sys_api: - return self.sys_api['type'] + return self.sys_api.get('type') elif self.disk_api: - return self.disk_api['TYPE'] + return self.disk_api.get('TYPE') elif self.blkid_api: - return self.blkid_api['TYPE'] + return self.blkid_api.get('TYPE') @property def is_mpath(self): return self.device_type == 'mpath' @property - def is_lv(self): - return self.lv_api is not None + def is_lv(self) -> bool: + path = os.path.realpath(self.path) + return path in disk.get_lvm_mappers() @property def is_partition(self): self.load_blkid_api() if self.disk_api: - return self.disk_api['TYPE'] == 'part' + return self.disk_api.get('TYPE') == 'part' elif self.blkid_api: - return self.blkid_api['TYPE'] == 'part' + return self.blkid_api.get('TYPE') == 'part' return False @property @@ -584,7 +593,7 @@ def vg_free(self): return [vg_free] @property - def has_partitions(self): + def has_partitions(self) -> bool: ''' Boolean to determine if a given device has partitions. ''' @@ -592,10 +601,17 @@ def has_partitions(self): return True return False - def _check_generic_reject_reasons(self): + @property + def is_being_replaced(self) -> bool: + ''' + Boolean to indicate if the device is being replaced. + ''' + return disk._dd_read(self.path, 26) == BEING_REPLACED_HEADER + + def _check_generic_reject_reasons(self) -> List[str]: reasons = [ - ('removable', 1, 'removable'), - ('ro', 1, 'read-only'), + ('id_bus', 'usb', 'id_bus'), + ('ro', '1', 'read-only'), ] rejected = [reason for (k, v, reason) in reasons if self.sys_api.get(k, '') == v] @@ -633,9 +649,11 @@ def _check_generic_reject_reasons(self): rejected.append('Has partitions') if self.has_fs: rejected.append('Has a FileSystem') + if self.is_being_replaced: + rejected.append('Is being replaced') return rejected - def _check_lvm_reject_reasons(self): + def _check_lvm_reject_reasons(self) -> Tuple[bool, List[str]]: rejected = [] if self.vgs: available_vgs = [vg for vg in self.vgs if int(vg.vg_free_count) > 10] @@ -648,7 +666,7 @@ def _check_lvm_reject_reasons(self): return len(rejected) == 0, rejected - def _check_raw_reject_reasons(self): + def _check_raw_reject_reasons(self) -> Tuple[bool, List[str]]: rejected = self._check_generic_reject_reasons() if len(self.vgs) > 0: rejected.append('LVM detected') diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py index ee061b724007..77b55314f660 100644 --- a/src/ceph-volume/ceph_volume/util/disk.py +++ b/src/ceph-volume/ceph_volume/util/disk.py @@ -3,9 +3,11 @@ import re import stat import time -from ceph_volume import process +import json +from ceph_volume import process, allow_loop_devices from ceph_volume.api import lvm from ceph_volume.util.system import get_file_contents +from typing import Dict, List, Any, Union, Optional logger = logging.getLogger(__name__) @@ -249,7 +251,9 @@ def lsblk(device, columns=None, abspath=False): return result[0] -def lsblk_all(device='', columns=None, abspath=False): +def lsblk_all(device: str = '', + columns: Optional[List[str]] = None, + abspath: bool = False) -> List[Dict[str, str]]: """ Create a dictionary of identifying values for a device using ``lsblk``. Each supported column is a key, in its *raw* format (all uppercase @@ -330,7 +334,6 @@ def lsblk_all(device='', columns=None, abspath=False): if device: base_command.append('--nodeps') base_command.append(device) - out, err, rc = process.call(base_command) if rc != 0: @@ -364,30 +367,18 @@ def is_device(dev): return TYPE in ['disk', 'mpath'] # fallback to stat - return _stat_is_device(os.lstat(dev).st_mode) + return _stat_is_device(os.lstat(dev).st_mode) and not is_partition(dev) -def is_partition(dev): +def is_partition(dev: str) -> bool: """ Boolean to determine if a given device is a partition, like /dev/sda1 """ if not os.path.exists(dev): return False - # use lsblk first, fall back to using stat - TYPE = lsblk(dev).get('TYPE') - if TYPE: - return TYPE == 'part' - # fallback to stat - stat_obj = os.stat(dev) - if _stat_is_device(stat_obj.st_mode): - return False - - major = os.major(stat_obj.st_rdev) - minor = os.minor(stat_obj.st_rdev) - if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)): - return True - return False + partitions = get_partitions() + return dev.split("/")[-1] in partitions def is_ceph_rbd(dev): @@ -738,61 +729,34 @@ def is_mapper_device(device_name): return device_name.startswith(('/dev/mapper', '/dev/dm-')) -class AllowLoopDevices(object): - allow = False - warned = False - - @classmethod - def __call__(cls): - val = os.environ.get("CEPH_VOLUME_ALLOW_LOOP_DEVICES", "false").lower() - if val not in ("false", 'no', '0'): - cls.allow = True - if not cls.warned: - logger.warning( - "CEPH_VOLUME_ALLOW_LOOP_DEVICES is set in your " - "environment, so we will allow the use of unattached loop" - " devices as disks. This feature is intended for " - "development purposes only and will never be supported in" - " production. Issues filed based on this behavior will " - "likely be ignored." - ) - cls.warned = True - return cls.allow - - -allow_loop_devices = AllowLoopDevices() - - -def get_block_devs_sysfs(_sys_block_path='/sys/block', _sys_dev_block_path='/sys/dev/block', device=''): - def holder_inner_loop(): +def get_block_devs_sysfs(_sys_block_path: str = '/sys/block', _sys_dev_block_path: str = '/sys/dev/block', device: str = '') -> List[List[str]]: + def holder_inner_loop() -> bool: for holder in holders: # /sys/block/sdy/holders/dm-8/dm/uuid - holder_dm_type = get_file_contents(os.path.join(_sys_block_path, dev, f'holders/{holder}/dm/uuid')).split('-')[0].lower() + holder_dm_type: str = get_file_contents(os.path.join(_sys_block_path, dev, f'holders/{holder}/dm/uuid')).split('-')[0].lower() if holder_dm_type == 'mpath': return True # First, get devices that are _not_ partitions - result = list() + result: List[List[str]] = list() if not device: - dev_names = os.listdir(_sys_block_path) + dev_names: List[str] = os.listdir(_sys_block_path) else: dev_names = [device] for dev in dev_names: - name = kname = os.path.join("/dev", dev) + name = kname = pname = os.path.join("/dev", dev) if not os.path.exists(name): continue - type_ = 'disk' - holders = os.listdir(os.path.join(_sys_block_path, dev, 'holders')) - if get_file_contents(os.path.join(_sys_block_path, dev, 'removable')) == "1": - continue + type_: str = 'disk' + holders: List[str] = os.listdir(os.path.join(_sys_block_path, dev, 'holders')) if holder_inner_loop(): continue - dm_dir_path = os.path.join(_sys_block_path, dev, 'dm') + dm_dir_path: str = os.path.join(_sys_block_path, dev, 'dm') if os.path.isdir(dm_dir_path): - dm_type = get_file_contents(os.path.join(dm_dir_path, 'uuid')) - type_ = dm_type.split('-')[0].lower() - basename = get_file_contents(os.path.join(dm_dir_path, 'name')) - name = os.path.join("/dev/mapper", basename) + dm_type: str = get_file_contents(os.path.join(dm_dir_path, 'uuid')) + type_: List[str] = dm_type.split('-')[0].lower() + basename: str = get_file_contents(os.path.join(dm_dir_path, 'name')) + name: str = os.path.join("/dev/mapper", basename) if dev.startswith('loop'): if not allow_loop_devices(): continue @@ -800,28 +764,36 @@ def holder_inner_loop(): if not os.path.exists(os.path.join(_sys_block_path, dev, 'loop')): continue type_ = 'loop' - result.append([kname, name, type_]) + result.append([kname, name, type_, pname]) # Next, look for devices that _are_ partitions - for item in os.listdir(_sys_dev_block_path): - is_part = get_file_contents(os.path.join(_sys_dev_block_path, item, 'partition')) == "1" - dev = os.path.basename(os.readlink(os.path.join(_sys_dev_block_path, item))) - if not is_part: - continue - name = kname = os.path.join("/dev", dev) - result.append([name, kname, "part"]) + partitions: Dict[str, str] = get_partitions() + for partition in partitions.keys(): + name = kname = os.path.join("/dev", partition) + result.append([name, kname, "part", partitions[partition]]) return sorted(result, key=lambda x: x[0]) -def get_partitions(_sys_dev_block_path ='/sys/dev/block'): - devices = os.listdir(_sys_dev_block_path) - result = dict() +def get_partitions(_sys_dev_block_path: str ='/sys/dev/block') -> Dict[str, str]: + """ + Retrieves a dictionary mapping partition system names to their parent device names. + + Args: + _sys_dev_block_path (str, optional): The path to the system's block device directory. + Defaults to '/sys/dev/block'. + + Returns: + Dict[str, str]: A dictionary where the keys are partition system names, and the values are + the corresponding parent device names. + """ + devices: List[str] = os.listdir(_sys_dev_block_path) + result: Dict[str, str] = {} for device in devices: - device_path = os.path.join(_sys_dev_block_path, device) - is_partition = get_file_contents(os.path.join(device_path, 'partition')) == "1" + device_path: str = os.path.join(_sys_dev_block_path, device) + is_partition: bool = int(get_file_contents(os.path.join(device_path, 'partition'), '0')) > 0 if not is_partition: continue - partition_sys_name = os.path.basename(os.readlink(device_path)) - parent_device_sys_name = os.readlink(device_path).split('/')[-2:-1][0] + partition_sys_name: str = os.path.basename(os.path.realpath(device_path)) + parent_device_sys_name: str = os.path.realpath(device_path).split('/')[-2:-1][0] result[partition_sys_name] = parent_device_sys_name return result @@ -839,23 +811,22 @@ def get_devices(_sys_block_path='/sys/block', device=''): device_facts = {} block_devs = get_block_devs_sysfs(_sys_block_path) - partitions = get_partitions() block_types = ['disk', 'mpath', 'lvm', 'part'] if allow_loop_devices(): block_types.append('loop') for block in block_devs: + metadata: Dict[str, Any] = {} if block[2] == 'lvm': - block[1] = lvm.get_lv_path_from_mapper(block[1]) + block[1] = UdevData(block[1]).slashed_path devname = os.path.basename(block[0]) diskname = block[1] if block[2] not in block_types: continue sysdir = os.path.join(_sys_block_path, devname) if block[2] == 'part': - sysdir = os.path.join(_sys_block_path, partitions[devname], devname) - metadata = {} + sysdir = os.path.join(_sys_block_path, block[3], devname) # If the device is ceph rbd it gets excluded if is_ceph_rbd(diskname): @@ -882,17 +853,19 @@ def get_devices(_sys_block_path='/sys/block', device=''): for key, file_ in facts: metadata[key] = get_file_contents(os.path.join(sysdir, file_)) + device_slaves = [] if block[2] != 'part': device_slaves = os.listdir(os.path.join(sysdir, 'slaves')) metadata['partitions'] = get_partitions_facts(sysdir) + metadata['device_nodes'] = [] if device_slaves: - metadata['device_nodes'] = ','.join(device_slaves) + metadata['device_nodes'].extend(device_slaves) else: if block[2] == 'part': - metadata['device_nodes'] = partitions[devname] + metadata['device_nodes'].append(block[3]) else: - metadata['device_nodes'] = devname + metadata['device_nodes'].append(devname) metadata['actuators'] = None if os.path.isdir(sysdir + "/queue/independent_access_ranges/"): @@ -920,7 +893,13 @@ def get_devices(_sys_block_path='/sys/block', device=''): metadata['size'] = float(size) * 512 metadata['human_readable_size'] = human_readable_size(metadata['size']) metadata['path'] = diskname + metadata['devname'] = devname metadata['type'] = block[2] + metadata['parent'] = block[3] + + # some facts from udevadm + p = udevadm_property(sysdir) + metadata['id_bus'] = p.get('ID_BUS', '') device_facts[diskname] = metadata return device_facts @@ -941,3 +920,477 @@ def has_bluestore_label(device_path): logger.info(f'{device_path} is a directory, skipping.') return isBluestore + +def get_lvm_mappers(sys_block_path: str = '/sys/block') -> List[str]: + """ + Retrieve a list of Logical Volume Manager (LVM) device mappers. + + This function scans the given system block path for device mapper (dm) devices + and identifies those that are managed by LVM. For each LVM device found, it adds + the corresponding paths to the result list. + + Args: + sys_block_path (str, optional): The path to the system block directory. Defaults to '/sys/block'. + + Returns: + List[str]: A list of strings representing the paths of LVM device mappers. + Each LVM device will have two entries: the /dev/mapper/ path and the /dev/ path. + """ + result: List[str] = [] + for device in os.listdir(sys_block_path): + path: str = os.path.join(sys_block_path, device, 'dm') + uuid_path: str = os.path.join(path, 'uuid') + name_path: str = os.path.join(path, 'name') + + if os.path.exists(uuid_path): + with open(uuid_path, 'r') as f: + mapper_type: str = f.read().split('-')[0] + + if mapper_type == 'LVM': + with open(name_path, 'r') as f: + name: str = f.read() + result.append(f'/dev/mapper/{name.strip()}') + result.append(f'/dev/{device}') + return result + +def _dd_read(device: str, count: int, skip: int = 0) -> str: + """Read bytes from a device + + Args: + device (str): The device to read bytes from. + count (int): The number of bytes to read. + skip (int, optional): The number of bytes to skip at the beginning. Defaults to 0. + + Returns: + str: A string containing the read bytes. + """ + result: str = '' + try: + with open(device, 'rb') as b: + b.seek(skip) + data: bytes = b.read(count) + result = data.decode('utf-8').replace('\x00', '') + except OSError: + logger.warning(f"Can't read from {device}") + pass + except UnicodeDecodeError: + pass + except Exception as e: + logger.error(f"An error occurred while reading from {device}: {e}") + raise + + return result + +def _dd_write(device: str, data: Union[str, bytes], skip: int = 0) -> None: + """Write bytes to a device + + Args: + device (str): The device to write bytes to. + data (str): The data to write to the device. + skip (int, optional): The number of bytes to skip at the beginning. Defaults to 0. + + Raises: + OSError: If there is an error opening or writing to the device. + Exception: If any other error occurs during the write operation. + """ + + if isinstance(data, str): + data = data.encode('utf-8') + + try: + with open(device, 'r+b') as b: + b.seek(skip) + b.write(data) + except OSError: + logger.warning(f"Can't write to {device}") + raise + except Exception as e: + logger.error(f"An error occurred while writing to {device}: {e}") + raise + +def get_bluestore_header(device: str) -> Dict[str, Any]: + """Retrieve BlueStore header information from a given device. + + This function retrieves BlueStore header information from the specified 'device'. + It first checks if the device exists. If the device does not exist, a RuntimeError + is raised. Then, it calls the 'ceph-bluestore-tool' command to show the label + information of the device. If the command execution is successful, it parses the + JSON output containing the BlueStore header information and returns it as a dictionary. + + Args: + device (str): The path to the device. + + Returns: + Dict[str, Any]: A dictionary containing BlueStore header information. + """ + data: Dict[str, Any] = {} + + if os.path.exists(device): + out, err, rc = process.call([ + 'ceph-bluestore-tool', 'show-label', + '--dev', device], verbose_on_failure=False) + if rc: + logger.debug(f'device {device} is not BlueStore; ceph-bluestore-tool failed to get info from device: {out}\n{err}') + else: + data = json.loads(''.join(out)) + else: + logger.warning(f'device {device} not found.') + return data + +def bluestore_info(device: str, bluestore_labels: Dict[str, Any]) -> Dict[str, Any]: + """Build a dict representation of a BlueStore header + + Args: + device (str): The path of the BlueStore device. + bluestore_labels (Dict[str, Any]): Plain text output from `ceph-bluestore-tool show-label` + + Returns: + Dict[str, Any]: Generated dict representation of the BlueStore header + """ + result: Dict[str, Any] = {} + result['osd_uuid'] = bluestore_labels[device]['osd_uuid'] + if bluestore_labels[device]['description'] == 'main': + whoami = bluestore_labels[device]['whoami'] + result.update({ + 'type': bluestore_labels[device].get('type', 'bluestore'), + 'osd_id': int(whoami), + 'ceph_fsid': bluestore_labels[device]['ceph_fsid'], + 'device': device, + }) + if bluestore_labels[device].get('db_device_uuid', ''): + result['db_device_uuid'] = bluestore_labels[device].get('db_device_uuid') + if bluestore_labels[device].get('wal_device_uuid', ''): + result['wal_device_uuid'] = bluestore_labels[device].get('wal_device_uuid') + elif bluestore_labels[device]['description'] == 'bluefs db': + result['device_db'] = device + elif bluestore_labels[device]['description'] == 'bluefs wal': + result['device_wal'] = device + return result + +def get_block_device_holders(sys_block: str = '/sys/block') -> Dict[str, Any]: + """Get a dictionary of device mappers with their corresponding parent devices. + + This function retrieves information about device mappers and their parent devices + from the '/sys/block' directory. It iterates through each directory within 'sys_block', + and for each directory, it checks if a 'holders' directory exists. If so, it lists + the contents of the 'holders' directory and constructs a dictionary where the keys + are the device mappers and the values are their corresponding parent devices. + + Args: + sys_block (str, optional): The path to the '/sys/block' directory. Defaults to '/sys/block'. + + Returns: + Dict[str, Any]: A dictionary where keys are device mappers (e.g., '/dev/mapper/...') and + values are their corresponding parent devices (e.g., '/dev/sdX'). + """ + result: Dict[str, Any] = {} + for b in os.listdir(sys_block): + path: str = os.path.join(sys_block, b, 'holders') + if os.path.exists(path): + for h in os.listdir(path): + result[f'/dev/{h}'] = f'/dev/{b}' + + return result + +def has_holders(device: str) -> bool: + """Check if a given device has any associated holders. + + This function determines whether the specified device has associated holders + (e.g., other devices that depend on it) by checking if the device's real path + appears in the values of the dictionary returned by `get_block_device_holders`. + + Args: + device (str): The path to the device (e.g., '/dev/sdX') to check. + + Returns: + bool: True if the device has holders, False otherwise. + """ + return os.path.realpath(device) in get_block_device_holders().values() + +def get_parent_device_from_mapper(mapper: str, abspath: bool = True) -> str: + """Get the parent device corresponding to a given device mapper. + + This function retrieves the parent device corresponding to a given device mapper + from the dictionary returned by the 'get_block_device_holders' function. It first + checks if the specified 'mapper' exists. If it does, it resolves the real path of + the mapper using 'os.path.realpath'. Then, it attempts to retrieve the parent device + from the dictionary. If the mapper is not found in the dictionary, an empty string + is returned. + + Args: + mapper (str): The path to the device mapper. + abspath (bool, optional): If True (default), returns the absolute path of the parent device. + If False, returns only the basename of the parent device. + + Returns: + str: The parent device corresponding to the given device mapper, or an empty string + if the mapper is not found in the dictionary of device mappers. + """ + result: str = '' + if os.path.exists(mapper): + _mapper: str = os.path.realpath(mapper) + try: + result = get_block_device_holders()[_mapper] + if not abspath: + result = os.path.basename(result) + except KeyError: + pass + return result + +def get_lvm_mapper_path_from_dm(path: str, sys_block: str = '/sys/block') -> str: + """Retrieve the logical volume path for a given device. + + This function takes the path of a device and returns the corresponding + logical volume path by reading the 'dm/name' file within the sysfs + directory. + + Args: + path (str): The device path for which to retrieve the logical volume path. + sys_block (str, optional): The base sysfs block directory. Defaults to '/sys/block'. + + Returns: + str: The device mapper path in the 'dashed form' of '/dev/mapper/vg-lv'. + """ + result: str = '' + dev: str = os.path.basename(path) + sys_block_path: str = os.path.join(sys_block, dev, 'dm/name') + if os.path.exists(sys_block_path): + with open(sys_block_path, 'r') as f: + content: str = f.read() + result = f'/dev/mapper/{content}' + return result.strip() + + +class BlockSysFs: + def __init__(self, + path: str, + sys_dev_block: str = '/sys/dev/block', + sys_block: str = '/sys/block') -> None: + """ + Initializes a BlockSysFs object. + + Args: + path (str): The path to the block device. + sys_dev_block (str, optional): Path to the sysfs directory containing block devices. + Defaults to '/sys/dev/block'. + sys_block (str, optional): Path to the sysfs directory containing block information. + Defaults to '/sys/block'. + """ + self.path: str = path + self.name: str = os.path.basename(os.path.realpath(self.path)) + self.sys_dev_block: str = sys_dev_block + self.sys_block: str = sys_block + + @property + def is_partition(self) -> bool: + """ + Checks if the current block device is a partition. + + Returns: + bool: True if it is a partition, False otherwise. + """ + path: str = os.path.join(self.get_sys_dev_block_path, 'partition') + return os.path.exists(path) + + @property + def holders(self) -> List[str]: + """ + Retrieves the holders of the current block device. + + Returns: + List[str]: A list of holders (other devices) associated with this block device. + """ + result: List[str] = [] + path: str = os.path.join(self.get_sys_dev_block_path, 'holders') + if os.path.exists(path): + result = os.listdir(path) + return result + + @property + def get_sys_dev_block_path(self) -> str: + """ + Gets the sysfs path for the current block device. + + Returns: + str: The sysfs path corresponding to this block device. + """ + sys_dev_block_path: str = '' + devices: List[str] = os.listdir(self.sys_dev_block) + for device in devices: + path = os.path.join(self.sys_dev_block, device) + if os.path.realpath(path).split('/')[-1:][0] == self.name: + sys_dev_block_path = path + return sys_dev_block_path + + @property + def has_active_mappers(self) -> bool: + """ + Checks if there are any active device mappers for the current block device. + + Returns: + bool: True if active mappers exist, False otherwise. + """ + return len(self.active_mappers()) > 0 + + @property + def has_active_dmcrypt_mapper(self) -> bool: + """ + Checks if there is an active dm-crypt (disk encryption) mapper for the current block device. + + Returns: + bool: True if an active dm-crypt mapper exists, False otherwise. + """ + return any(value.get('type') == 'CRYPT' for value in self.active_mappers().values()) + + def active_mappers(self) -> Dict[str, Any]: + """ + Retrieves information about active device mappers for the current block device. + + Returns: + Dict[str, Any]: A dictionary containing details about active device mappers. + Keys are the holders, and values provide details like type, + dm-crypt metadata, and LVM UUIDs. + """ + result: Dict[str, Any] = {} + for holder in self.holders: + path: str = os.path.join(self.sys_block, holder, 'dm/uuid') + if os.path.exists(path): + result[holder] = {} + with open(path, 'r') as f: + content: str = f.read().strip() + content_split: List[str] = content.split('-', maxsplit=3) + mapper_type: str = content_split[0] + result[holder]['type'] = mapper_type + if mapper_type == 'CRYPT': + result[holder]['dmcrypt_type'] = content_split[1] + result[holder]['dmcrypt_uuid'] = content_split[2] + result[holder]['dmcrypt_mapping'] = content_split[3] + if mapper_type == 'LVM': + result[holder]['uuid'] = content_split[1] + return result + +class UdevData: + """ + Class representing udev data for a specific device. + This class extracts and stores relevant information about the device from udev files. + + Attributes: + ----------- + path : str + The initial device path (e.g., /dev/sda). + realpath : str + The resolved real path of the device. + stats : os.stat_result + The result of the os.stat() call to retrieve device metadata. + major : int + The device's major number. + minor : int + The device's minor number. + udev_data_path : str + The path to the udev metadata for the device (e.g., /run/udev/data/b:). + symlinks : List[str] + A list of symbolic links pointing to the device. + id : str + A unique identifier for the device. + environment : Dict[str, str] + A dictionary containing environment variables extracted from the udev data. + group : str + The group associated with the device. + queue : str + The queue associated with the device. + version : str + The version of the device or its metadata. + """ + def __init__(self, path: str) -> None: + """Initialize an instance of the UdevData class and load udev information. + + Args: + path (str): The path to the device to be analyzed (e.g., /dev/sda). + + Raises: + RuntimeError: Raised if no udev data file is found for the specified device. + """ + if not os.path.exists(path): + raise RuntimeError(f'{path} not found.') + self.path: str = path + self.realpath: str = os.path.realpath(self.path) + self.stats: os.stat_result = os.stat(self.realpath) + self.major: int = os.major(self.stats.st_rdev) + self.minor: int = os.minor(self.stats.st_rdev) + self.udev_data_path: str = f'/run/udev/data/b{self.major}:{self.minor}' + self.symlinks: List[str] = [] + self.id: str = '' + self.environment: Dict[str, str] = {} + self.group: str = '' + self.queue: str = '' + self.version: str = '' + + if not os.path.exists(self.udev_data_path): + raise RuntimeError(f'No udev data could be retrieved for {self.path}') + + with open(self.udev_data_path, 'r') as f: + content: str = f.read().strip() + self.raw_data: List[str] = content.split('\n') + + for line in self.raw_data: + data_type, data = line.split(':', 1) + if data_type == 'S': + self.symlinks.append(data) + if data_type == 'I': + self.id = data + if data_type == 'E': + key, value = data.split('=') + self.environment[key] = value + if data_type == 'G': + self.group = data + if data_type == 'Q': + self.queue = data + if data_type == 'V': + self.version = data + + @property + def is_dm(self) -> bool: + """Check if the device is a device mapper (DM). + + Returns: + bool: True if the device is a device mapper, otherwise False. + """ + return 'DM_UUID' in self.environment.keys() + + @property + def is_lvm(self) -> bool: + """Check if the device is a Logical Volume Manager (LVM) volume. + + Returns: + bool: True if the device is an LVM volume, otherwise False. + """ + return self.environment.get('DM_UUID', '').startswith('LVM') + + @property + def slashed_path(self) -> str: + """Get the LVM path structured with slashes. + + Returns: + str: A path using slashes if the device is an LVM volume (e.g., /dev/vgname/lvname), + otherwise the original path. + """ + result: str = self.path + if self.is_lvm: + vg: str = self.environment.get('DM_VG_NAME', '') + lv: str = self.environment.get('DM_LV_NAME', '') + result = f'/dev/{vg}/{lv}' + return result + + @property + def dashed_path(self) -> str: + """Get the LVM path structured with dashes. + + Returns: + str: A path using dashes if the device is an LVM volume (e.g., /dev/mapper/vgname-lvname), + otherwise the original path. + """ + result: str = self.path + if self.is_lvm: + name: str = self.environment.get('DM_NAME', '') + result = f'/dev/mapper/{name}' + return result diff --git a/src/ceph-volume/ceph_volume/util/encryption.py b/src/ceph-volume/ceph_volume/util/encryption.py index f8aea80b4935..5de77d21a9a1 100644 --- a/src/ceph-volume/ceph_volume/util/encryption.py +++ b/src/ceph-volume/ceph_volume/util/encryption.py @@ -1,15 +1,71 @@ import base64 import os import logging +import re +import json from ceph_volume import process, conf, terminal from ceph_volume.util import constants, system from ceph_volume.util.device import Device from .prepare import write_keyring -from .disk import lsblk, device_family, get_part_entry_type +from .disk import lsblk, device_family, get_part_entry_type, _dd_read +from packaging import version +from typing import Any, Dict, List logger = logging.getLogger(__name__) mlogger = terminal.MultiLogger(__name__) +def set_dmcrypt_no_workqueue(target_version: str = '2.3.4') -> None: + """Set `conf.dmcrypt_no_workqueue` to `True` if the installed version + of `cryptsetup` is greater than or equal to the specified `target_version`. + + Depending on the crypsetup version, `cryptsetup --version` output can be different. + Eg: + + CentOS Stream9: + $ cryptsetup --version + cryptsetup 2.6.0 flags: UDEV BLKID KEYRING FIPS KERNEL_CAPI PWQUALITY + + CentOS Stream8: + $ cryptsetup --version + cryptsetup 2.3.7 + + Args: + target_version (str, optional): The minimum version required for setting + `conf.dmcrypt_no_workqueue` to `True`. Defaults to '2.3.4'. + + Raises: + RuntimeError: If failed to retrieve the cryptsetup version. + RuntimeError: If failed to parse the cryptsetup version. + RuntimeError: If failed to compare the cryptsetup version with the target version. + """ + command = ["cryptsetup", "--version"] + out, err, rc = process.call(command) + + # This regex extracts the version number from + # the `cryptsetup --version` output + pattern: str = r'(\d+\.?)+' + + if rc: + raise RuntimeError(f"Can't retrieve cryptsetup version: {err}") + + try: + cryptsetup_version = re.search(pattern, out[0]) + + if cryptsetup_version is None: + _output: str = "\n".join(out) + raise RuntimeError('Error while checking cryptsetup version.\n', + '`cryptsetup --version` output:\n', + f'{_output}') + + if version.parse(cryptsetup_version.group(0)) >= version.parse(target_version): + conf.dmcrypt_no_workqueue = True + except IndexError: + mlogger.debug(f'cryptsetup version check: rc={rc} out={out} err={err}') + raise RuntimeError("Couldn't check the cryptsetup version.") + +def bypass_workqueue(device: str) -> bool: + return not Device(device).rotational and conf.dmcrypt_no_workqueue + def get_key_size_from_conf(): """ Return the osd dmcrypt key size from config file. @@ -28,7 +84,7 @@ def get_key_size_from_conf(): return key_size -def create_dmcrypt_key(): +def create_dmcrypt_key() -> str: """ Create the secret dm-crypt key (KEK) used to encrypt/decrypt the Volume Key. """ @@ -37,7 +93,7 @@ def create_dmcrypt_key(): return key -def luks_format(key, device): +def luks_format(key: str, device: str) -> None: """ Decrypt (open) an encrypted device, previously prepared with cryptsetup @@ -79,10 +135,54 @@ def plain_open(key, device, mapping): '--key-size', '256', ] + if bypass_workqueue(device): + command.extend(['--perf-no_read_workqueue', + '--perf-no_write_workqueue']) + process.call(command, stdin=key, terminal_verbose=True, show_command=True) -def luks_open(key, device, mapping): +def luks_close(mapping: str) -> None: + """Close a LUKS2 mapper device. + + Args: + mapping (str): the name of the mapper to be closed. + """ + command: List[str] = ['cryptsetup', + 'luksClose', + mapping] + + process.call(command, + terminal_verbose=True, + show_command=True) + + +def rename_mapper(current: str, new: str) -> None: + """Rename a mapper + + Args: + old (str): current name + new (str): new name + """ + + command: List[str] = [ + 'dmsetup', + 'rename', + current, + new + ] + + _, err, rc = process.call(command, + terminal_verbose=True, + show_command=True) + if rc: + raise RuntimeError(f"Can't rename mapper '{current}' to '{new}': {err}") + + +def luks_open(key: str, + device: str, + mapping: str, + with_tpm: int = 0) -> None: """ Decrypt (open) an encrypted device, previously prepared with cryptsetup @@ -91,19 +191,40 @@ def luks_open(key, device, mapping): :param key: dmcrypt secret key :param device: absolute path to device :param mapping: mapping name used to correlate device. Usually a UUID + :param with_tpm: whether to use tpm2 token enrollment. """ - command = [ - 'cryptsetup', - '--key-size', - get_key_size_from_conf(), - '--key-file', - '-', - '--allow-discards', # allow discards (aka TRIM) requests for device - 'luksOpen', - device, - mapping, - ] - process.call(command, stdin=key, terminal_verbose=True, show_command=True) + command: List[str] = [] + if with_tpm: + command = ['/usr/lib/systemd/systemd-cryptsetup', + 'attach', + mapping, + device, + '-', + 'tpm2-device=auto,discard,headless=true,nofail'] + if bypass_workqueue(device): + command[-1] += ',no-read-workqueue,no-write-workqueue' + else: + command = [ + 'cryptsetup', + '--key-size', + get_key_size_from_conf(), + '--key-file', + '-', + '--allow-discards', # allow discards (aka TRIM) requests for device + 'luksOpen', + device, + mapping, + ] + + if bypass_workqueue(device): + command.extend(['--perf-no_read_workqueue', + '--perf-no_write_workqueue']) + + process.call(command, + run_on_host=with_tpm, + stdin=key, + terminal_verbose=True, + show_command=True) def dmcrypt_close(mapping, skip_path_check=False): @@ -292,3 +413,160 @@ def prepare_dmcrypt(key, device, mapping): mapping ) return '/dev/mapper/%s' % mapping + + +class CephLuks2: + def __init__(self, device: str) -> None: + self.device: str = device + self.osd_fsid: str = '' + if self.is_ceph_encrypted: + self.osd_fsid = self.get_osd_fsid() + + @property + def has_luks2_signature(self) -> bool: + try: + return _dd_read(self.device, 4) == 'LUKS' + except Exception as e: + raise RuntimeError(e) + + @property + def is_ceph_encrypted(self) -> bool: + """Check whether a device is used for a Ceph encrypted OSD + + Args: + device (str): The path of the device being checked. + + Returns: + bool: `True` if the device is used by an encrypted Ceph OSD, else `False`. + """ + result: bool = False + try: + result = self.has_luks2_signature and 'ceph_fsid=' in self.get_subsystem() + except RuntimeError: + pass + return result + + def config_luks2(self, config: Dict[str, str]) -> None: + """Set the subsystem of a LUKS2 device + + Args: + config (str): The config to apply to the LUKS2 device. + + Raises: + RuntimeError: If it can't set LUKS2 configuration. + """ + if not (0 < len(config) <= 2): + raise RuntimeError(f'Invalid config for LUKS2 device {self.device}') + + valid_keys = ['label', 'subsystem'] + if not all(key in valid_keys for key in config.keys()): + raise RuntimeError(f'LUKS2 config for device {self.device} can only be "label" and/or "subsystem".') + + command: List[str] = ['cryptsetup', 'config', + self.device] + for k, v in config.items(): + command.extend([f'--{k}', v]) + _, err, rc = process.call(command, verbose_on_failure=False) + if rc: + raise RuntimeError(f"Can't set luks2 config to {self.device}:\n{err}") + + def get_label(self) -> str: + """Get the label of a LUKS2 device + + Args: + device (str): The device to get the LUKS label from. + + Returns: + str: The LUKS2 label of the device. + """ + result: str = '' + try: + result = _dd_read(self.device, 48, 24) + except Exception: + raise RuntimeError(f"Can't get luks2 label from {self.device}") + return result + + def get_osd_fsid(self) -> str: + """Get the osd fsid. + + Returns: + str: The OSD fsid + """ + + result: str = '' + try: + subsystem = self.get_subsystem() + result = subsystem.split('=')[1] + except IndexError: + logger.debug(f"LUKS2 device {self.device} doesn't have ceph osd fsid detail. Please check LUKS2 label for this device.") + return result + + def get_subsystem(self) -> str: + """Get the subsystem of a LUKS2 device + + Args: + device (str): The device to get the LUKS subsystem from. + + Returns: + str: The LUKS2 subsystem of the device. + """ + result: str = '' + try: + result = _dd_read(self.device, 48, 208) + except Exception as e: + raise RuntimeError(f"Can't get luks2 label from {self.device}:\n{e}") + return result + + def get_json_area(self) -> Dict[str, Any]: + """Retrieve the LUKS2 JSON configuration area from a given device. + + This function reads the LUKS2 JSON configuration area from the specified 'device'. + It first checks if the device contains a LUKS2 signature. If not, an empty dictionary + is returned. If a LUKS2 signature is found, it reads the JSON configuration area + starting from byte offset 4096 (4 KB) and extracts the configuration data. + + Args: + device (str): The path to the device. + + Raises: + RuntimeError: If the LUKS2 JSON area on the device is invalid or cannot be decoded. + + Returns: + Dict[str, Any]: A dictionary containing the extracted LUKS2 JSON configuration data. + """ + result: Dict[str, Any] = {} + try: + data: str = _dd_read(self.device, 12288, 4096) + result = json.loads(data) + except json.JSONDecodeError: + msg: str = f"LUKS2 json area for device {self.device} seems invalid." + raise RuntimeError(msg) + except Exception: + raise + + return result + + @property + def is_tpm2_enrolled(self) -> bool: + """Check if a given device is enrolled with TPM2. + + This function checks if the specified 'device' is enrolled with TPM2. + It first determines if the device is a LUKS encrypted volume by checking + its filesystem type using lsblk. If the filesystem type is 'crypto_LUKS', + it extracts the LUKS2 JSON configuration area from the device using the + 'get_luks2_json_area' function. If the JSON area contains a 'systemd-tpm2' + token, it indicates that the device is enrolled with TPM2. + + Args: + device (str): The path to the device. + + Returns: + bool: True if the device is enrolled with TPM2, False otherwise. + """ + if lsblk(self.device).get('FSTYPE', '') == 'crypto_LUKS': + json_area: Dict[str, Any] = self.get_json_area() + if 'tokens' in json_area.keys(): + for token in json_area['tokens'].keys(): + if json_area['tokens'][token].get('type', '') == 'systemd-tpm2': + return True + return False diff --git a/src/ceph-volume/ceph_volume/util/prepare.py b/src/ceph-volume/ceph_volume/util/prepare.py index 576c08617084..9c863b83d938 100644 --- a/src/ceph-volume/ceph_volume/util/prepare.py +++ b/src/ceph-volume/ceph_volume/util/prepare.py @@ -4,11 +4,9 @@ may want to change some part of the process, while others might want to consume the single-call helper """ -import errno import os import logging import json -import time from ceph_volume import process, conf, terminal from ceph_volume.util import system, constants, str_to_int, disk @@ -379,82 +377,3 @@ def get_monmap(osd_id): '--keyring', bootstrap_keyring, 'mon', 'getmap', '-o', monmap_destination ]) - - -def get_osdspec_affinity(): - return os.environ.get('CEPH_VOLUME_OSDSPEC_AFFINITY', '') - - -def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False): - """ - Create the files for the OSD to function. A normal call will look like: - - ceph-osd --cluster ceph --mkfs --mkkey -i 0 \ - --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \ - --osd-data /var/lib/ceph/osd/ceph-0 \ - --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \ - --keyring /var/lib/ceph/osd/ceph-0/keyring \ - --setuser ceph --setgroup ceph - - In some cases it is required to use the keyring, when it is passed in as - a keyword argument it is used as part of the ceph-osd command - """ - path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id) - monmap = os.path.join(path, 'activate.monmap') - - system.chown(path) - - base_command = [ - 'ceph-osd', - '--cluster', conf.cluster, - '--osd-objectstore', 'bluestore', - '--mkfs', - '-i', osd_id, - '--monmap', monmap, - ] - - supplementary_command = [ - '--osd-data', path, - '--osd-uuid', fsid, - '--setuser', 'ceph', - '--setgroup', 'ceph' - ] - - if keyring is not None: - base_command.extend(['--keyfile', '-']) - - if wal: - base_command.extend( - ['--bluestore-block-wal-path', wal] - ) - system.chown(wal) - - if db: - base_command.extend( - ['--bluestore-block-db-path', db] - ) - system.chown(db) - - if get_osdspec_affinity(): - base_command.extend(['--osdspec-affinity', get_osdspec_affinity()]) - - command = base_command + supplementary_command - - """ - When running in containers the --mkfs on raw device sometimes fails - to acquire a lock through flock() on the device because systemd-udevd holds one temporarily. - See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock. - Because this is really transient, we retry up to 5 times and wait for 1 sec in-between - """ - for retry in range(5): - _, _, returncode = process.call(command, stdin=keyring, terminal_verbose=True, show_command=True) - if returncode == 0: - break - else: - if returncode == errno.EWOULDBLOCK: - time.sleep(1) - logger.info('disk is held by another process, trying to mkfs again... (%s/5 attempt)' % retry) - continue - else: - raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command))) - diff --git a/src/ceph-volume/ceph_volume/util/system.py b/src/ceph-volume/ceph_volume/util/system.py index 590a0599b56b..4b44d31336cc 100644 --- a/src/ceph-volume/ceph_volume/util/system.py +++ b/src/ceph-volume/ceph_volume/util/system.py @@ -134,7 +134,7 @@ def mkdir_p(path, chown=True): A `mkdir -p` that defaults to chown the path to the ceph user """ try: - os.mkdir(path) + os.makedirs(path) except OSError as e: if e.errno == errno.EEXIST: pass diff --git a/src/ceph-volume/setup.py b/src/ceph-volume/setup.py index 44a0d0e46899..fa49a95cdd05 100644 --- a/src/ceph-volume/setup.py +++ b/src/ceph-volume/setup.py @@ -14,7 +14,10 @@ keywords='ceph volume disk devices lvm', url="https://github.com/ceph/ceph", zip_safe = False, - install_requires='ceph', + install_requires=[ + 'ceph', + 'packaging', + ], dependency_links=[''.join(['file://', os.path.join(os.getcwd(), '../', 'python-common#egg=ceph-1.0.0')])], tests_require=[ diff --git a/src/ceph-volume/tox.ini b/src/ceph-volume/tox.ini index 696d6dcc837a..f7d294a9aadc 100644 --- a/src/ceph-volume/tox.ini +++ b/src/ceph-volume/tox.ini @@ -11,7 +11,7 @@ deps= allowlist_externals= ./tox_install_command.sh install_command=./tox_install_command.sh {opts} {packages} -commands=py.test --numprocesses=auto -vv {posargs:ceph_volume/tests} --ignore=ceph_volume/tests/functional +commands=py.test -vv {posargs:ceph_volume/tests} --ignore=ceph_volume/tests/functional [testenv:py3-flake8] deps=flake8 diff --git a/src/ceph.in b/src/ceph.in index 2ba2c74768cf..51743dd9ae8c 100755 --- a/src/ceph.in +++ b/src/ceph.in @@ -336,6 +336,8 @@ def parse_cmdargs(args=None, target='') -> Tuple[argparse.ArgumentParser, parser.add_argument('--concise', dest='verbose', action="store_false", help="make less verbose") + parser.add_argument('--daemon-output-file', dest='daemon_output_file', + help="output file location local to the daemon for JSON produced by tell commands") parser.add_argument('-f', '--format', choices=['json', 'json-pretty', 'xml', 'xml-pretty', 'plain', 'yaml'], help="Note: yaml is only valid for orch commands", dest='output_format') @@ -580,6 +582,8 @@ def do_command(parsed_args, target, cmdargs, sigdict, inbuf, verbose): if valid_dict: if parsed_args.output_format: valid_dict['format'] = parsed_args.output_format + if parsed_args.daemon_output_file: + valid_dict['output-file'] = parsed_args.daemon_output_file if verbose: print("Submitting command: ", valid_dict, file=sys.stderr) else: @@ -1310,7 +1314,7 @@ def main(): if final_e: raise final_e - # Block until command completion (currently scrub and deep_scrub only) + # Block until command completion (currently scrub and deep scrub only) if block: wait(childargs, waitdata) diff --git a/src/ceph_fuse.cc b/src/ceph_fuse.cc index 3fa5346b4634..68fe30760a77 100644 --- a/src/ceph_fuse.cc +++ b/src/ceph_fuse.cc @@ -81,9 +81,10 @@ static void fuse_usage() void usage() { cout << -"usage: ceph-fuse [-n client.username] [-m mon-ip-addr:mon-port] [OPTIONS]\n" -" --client_mountpoint/-r \n" -" use sub_directory as the mounted root, rather than the full Ceph tree.\n" +"\nusage: ceph-fuse [-n client.username] [-m mon-ip-addr:mon-port] [--client_fs ] [--client_mountpoint/-r ] [OPTIONS]\n\n" + +" --client_mountpoint/-r: use sub_directory as the mounted root, rather than the full CephFS tree.\n" +" --client_fs: named file system to mount (default: usually the first file system created).\n" "\n"; fuse_usage(); generic_client_usage(); diff --git a/src/ceph_mds.cc b/src/ceph_mds.cc index 5a917fa807c4..ba8726a2be36 100644 --- a/src/ceph_mds.cc +++ b/src/ceph_mds.cc @@ -81,7 +81,7 @@ static void handle_mds_signal(int signum) int main(int argc, const char **argv) { - ceph_pthread_setname(pthread_self(), "ceph-mds"); + ceph_pthread_setname("ceph-mds"); auto args = argv_to_vec(argc, argv); if (args.empty()) { diff --git a/src/ceph_mgr.cc b/src/ceph_mgr.cc index 67bda0c51bed..bd2c643bc6bd 100644 --- a/src/ceph_mgr.cc +++ b/src/ceph_mgr.cc @@ -41,7 +41,7 @@ static void usage() */ int main(int argc, const char **argv) { - ceph_pthread_setname(pthread_self(), "ceph-mgr"); + ceph_pthread_setname("ceph-mgr"); auto args = argv_to_vec(argc, argv); if (args.empty()) { diff --git a/src/ceph_mon.cc b/src/ceph_mon.cc index 279fdb20ccbf..63eb252e38f5 100644 --- a/src/ceph_mon.cc +++ b/src/ceph_mon.cc @@ -250,7 +250,7 @@ int main(int argc, const char **argv) { // reset our process name, in case we did a respawn, so that it's not // left as "exe". - ceph_pthread_setname(pthread_self(), "ceph-mon"); + ceph_pthread_setname("ceph-mon"); int err; diff --git a/src/ceph_nvmeof_monitor_client.cc b/src/ceph_nvmeof_monitor_client.cc new file mode 100644 index 000000000000..fa41bed08ad7 --- /dev/null +++ b/src/ceph_nvmeof_monitor_client.cc @@ -0,0 +1,79 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 IBM Inc + * + * Author: Alexander Indenbaum + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include + +#include "include/types.h" +#include "include/compat.h" +#include "common/config.h" +#include "common/ceph_argparse.h" +#include "common/errno.h" +#include "common/pick_address.h" +#include "global/global_init.h" + +#include "nvmeof/NVMeofGwMonitorClient.h" + +static void usage() +{ + std::cout << "usage: ceph-nvmeof-monitor-client\n" + " --gateway-name \n" + " --gateway-address \n" + " --gateway-pool \n" + " --gateway-group \n" + " --monitor-group-address \n" + " [flags]\n" + << std::endl; + generic_server_usage(); +} + +/** + * A short main() which just instantiates a Nvme and + * hands over control to that. + */ +int main(int argc, const char **argv) +{ + ceph_pthread_setname("ceph-nvmeof-monitor-client"); + + auto args = argv_to_vec(argc, argv); + if (args.empty()) { + std::cerr << argv[0] << ": -h or --help for usage" << std::endl; + exit(1); + } + if (ceph_argparse_need_usage(args)) { + usage(); + exit(0); + } + + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, // maybe later use CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + + pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC); + + global_init_daemonize(g_ceph_context); + global_init_chdir(g_ceph_context); + common_init_finish(g_ceph_context); + + NVMeofGwMonitorClient gw_monitor_client(argc, argv); + int rc = gw_monitor_client.init(); + if (rc != 0) { + std::cerr << "Error in initialization: " << cpp_strerror(rc) << std::endl; + return rc; + } + + return gw_monitor_client.main(args); +} + diff --git a/src/ceph_osd.cc b/src/ceph_osd.cc index c0bd5b33ad4e..52988843c832 100644 --- a/src/ceph_osd.cc +++ b/src/ceph_osd.cc @@ -375,8 +375,9 @@ int main(int argc, const char **argv) << " for osd." << whoami << " fsid " << g_conf().get_val("fsid") << dendl; + forker.exit(0); } - if (mkfs || mkkey) { + if (mkkey) { forker.exit(0); } if (mkjournal) { diff --git a/src/ceph_release b/src/ceph_release index 5640b4491a0d..67f3c2f5ae34 100644 --- a/src/ceph_release +++ b/src/ceph_release @@ -1,3 +1,3 @@ -18 -reef +19 +squid dev diff --git a/src/ceph_release.h.in.cmake b/src/ceph_release.h.in.cmake new file mode 100644 index 000000000000..f622fc565f16 --- /dev/null +++ b/src/ceph_release.h.in.cmake @@ -0,0 +1,8 @@ +#ifndef CEPH_RELEASE_H +#define CEPH_RELEASE_H + +#define CEPH_RELEASE @CEPH_RELEASE@ +#define CEPH_RELEASE_NAME "@CEPH_RELEASE_NAME@" +#define CEPH_RELEASE_TYPE "@CEPH_RELEASE_TYPE@" + +#endif diff --git a/src/ceph_ver.h.in.cmake b/src/ceph_ver.h.in.cmake index d7e1c8e9bddf..028a1c527b44 100644 --- a/src/ceph_ver.h.in.cmake +++ b/src/ceph_ver.h.in.cmake @@ -3,8 +3,7 @@ #define CEPH_GIT_VER @CEPH_GIT_VER@ #define CEPH_GIT_NICE_VER "@CEPH_GIT_NICE_VER@" -#define CEPH_RELEASE @CEPH_RELEASE@ -#define CEPH_RELEASE_NAME "@CEPH_RELEASE_NAME@" -#define CEPH_RELEASE_TYPE "@CEPH_RELEASE_TYPE@" + +#include "ceph_release.h" #endif diff --git a/src/cephadm/CMakeLists.txt b/src/cephadm/CMakeLists.txt index 8b969bc33e7b..c8b7c74a985a 100644 --- a/src/cephadm/CMakeLists.txt +++ b/src/cephadm/CMakeLists.txt @@ -1,10 +1,14 @@ if(WITH_TESTS) include(AddCephTest) - add_tox_test(cephadm TOX_ENVS py3 mypy flake8) + add_tox_test(cephadm TOX_ENVS __tox_defaults__) endif() set(bin_target_file ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/cephadm) +if(NOT DEFINED CEPHADM_BUNDLED_DEPENDENCIES) + set(CEPHADM_BUNDLED_DEPENDENCIES "pip") +endif() + add_custom_command( OUTPUT "${bin_target_file}" DEPENDS @@ -17,6 +21,7 @@ add_custom_command( --set-version-var=CEPH_RELEASE=${CEPH_RELEASE} --set-version-var=CEPH_RELEASE_NAME=${CEPH_RELEASE_NAME} --set-version-var=CEPH_RELEASE_TYPE=${CEPH_RELEASE_TYPE} + --bundled-dependencies=${CEPHADM_BUNDLED_DEPENDENCIES} ${bin_target_file} ) diff --git a/src/cephadm/box/box.py b/src/cephadm/box/box.py index db2f24233512..fd9de7fe3e3e 100755 --- a/src/cephadm/box/box.py +++ b/src/cephadm/box/box.py @@ -6,9 +6,8 @@ import sys import host import osd -from multiprocessing import Process, Pool +from multiprocessing import Pool from util import ( - BoxType, Config, Target, ensure_inside_container, @@ -19,12 +18,9 @@ run_dc_shell_commands, get_container_engine, run_shell_command, - run_shell_commands, - ContainerEngine, DockerEngine, PodmanEngine, colored, - engine, engine_compose, Colors, get_seed_name diff --git a/src/cephadm/box/host.py b/src/cephadm/box/host.py index aae16d07f453..6b49def23189 100644 --- a/src/cephadm/box/host.py +++ b/src/cephadm/box/host.py @@ -12,7 +12,6 @@ run_dc_shell_command, run_shell_command, engine, - BoxType ) diff --git a/src/cephadm/box/osd.py b/src/cephadm/box/osd.py index 827a4de36c0f..3e559b2fe8c1 100644 --- a/src/cephadm/box/osd.py +++ b/src/cephadm/box/osd.py @@ -5,7 +5,6 @@ from typing import Dict from util import ( - BoxType, Config, Target, ensure_inside_container, diff --git a/src/cephadm/box/util.py b/src/cephadm/box/util.py index 7dcf883f8a37..4aa5645b26b9 100644 --- a/src/cephadm/box/util.py +++ b/src/cephadm/box/util.py @@ -417,5 +417,4 @@ def up(self, hosts: int): def get_container_engine() -> ContainerEngine: if engine() == 'docker': return DockerEngine() - else: - return PodmanEngine() + return PodmanEngine() diff --git a/src/cephadm/build.py b/src/cephadm/build.py index 0680abad21a0..43bc58a40034 100755 --- a/src/cephadm/build.py +++ b/src/cephadm/build.py @@ -7,13 +7,17 @@ import argparse import compileall +import enum +import functools +import json import logging import os import pathlib +import shlex import shutil import subprocess -import tempfile import sys +import tempfile HAS_ZIPAPP = False try: @@ -27,6 +31,66 @@ log = logging.getLogger(__name__) +# Fill in the package requirements for the zipapp build below. The PY36_REQUIREMENTS +# list applies *only* to python 3.6. The PY_REQUIREMENTS list applies to all other +# python versions. Python lower than 3.6 is not supported by this script. +# +# Each item must be a dict with the following fields: +# - package_spec (REQUIRED, str): A python package requirement in the same style as +# requirements.txt and pip. +# - from_source (bool): Try to force a clean no-binaries build using source packages. +# - unique (bool): If true, this requirement should not be combined with any other +# on the pip command line. +# - ignore_suffixes (list of str): A list of file and directory suffixes to EXCLUDE +# from the final zipapp. +# - ignore_exact (list of str): A list of exact file and directory names to EXCLUDE +# from the final zipapp. +# - custom_pip_args (list of str): A list of additional custom arguments to pass +# to pip when installing this dependency. +# +PY36_REQUIREMENTS = [ + { + 'package_spec': 'MarkupSafe >= 2.0.1, <2.2', + 'from_source': True, + 'unique': True, + }, + { + 'package_spec': 'Jinja2 >= 3.0.2, <3.2', + 'from_source': True, + 'unique': True, + }, + { + 'package_spec': 'PyYAML >= 6.0, <6.1', + # do not include the stub package for compatibility with + # old versions of the extension module. We are going out of our + # way to avoid the binary extension module for our zipapp, no + # point in pulling this unnecessary module for wrapping it. + 'ignore_exact': ['_yaml'], + }, +] +PY_REQUIREMENTS = [ + {'package_spec': 'MarkupSafe >= 2.1.3, <2.2', 'from_source': True}, + {'package_spec': 'Jinja2 >= 3.1.2, <3.2', 'from_source': True}, + # We can not install PyYAML using sources. Unlike MarkupSafe it requires + # Cython to build and Cython must be compiled and there's not clear way past + # the requirement in pyyaml's pyproject.toml. Instead, rely on fetching + # a platform specific pyyaml wheel and then stripping of the binary shared + # object. + { + 'package_spec': 'PyYAML >= 6.0, <6.1', + # do not include the stub package for compatibility with + # old versions of the extension module. We are going out of our + # way to avoid the binary extension module for our zipapp, no + # point in pulling this unnecessary module for wrapping it. + 'ignore_exact': ['_yaml'], + }, +] +# IMPORTANT to be fully compatible with all the distros ceph is built for we +# need to work around various old versions of python/pip. As such it's easier +# to repeat our requirements in this script than it is to parse zipapp-reqs.txt. +# You *must* keep the PY_REQUIREMENTS list in sync with the contents of +# zipapp-reqs.txt manually. + _VALID_VERS_VARS = [ "CEPH_GIT_VER", "CEPH_GIT_NICE_VER", @@ -36,6 +100,123 @@ ] +class InstallSpec: + def __init__( + self, + package_spec, + custom_pip_args=None, + unique=False, + from_source=False, + ignore_suffixes=None, + ignore_exact=None, + **kwargs, + ): + self.package_spec = package_spec + self.name = package_spec.split()[0] + self.custom_pip_args = custom_pip_args or [] + self.unique = unique + self.from_source = from_source + self.ignore_suffixes = ignore_suffixes or [] + self.ignore_exact = ignore_exact or [] + self.extra = kwargs + + @property + def pip_args(self): + args = [] + if self.from_source: + args.append("--no-binary") + args.append(":all:") + return args + self.custom_pip_args + + @property + def pip_args_and_package(self): + return self.pip_args + [self.package_spec] + + def compatible(self, other): + return ( + other + and not self.unique + and not other.unique + and self.pip_args == other.pip_args + ) + + +class PipEnv(enum.Enum): + never = enum.auto() + auto = enum.auto() + required = enum.auto() + + @property + def enabled(self): + return self == self.auto or self == self.required + + +class DependencyMode(enum.Enum): + pip = enum.auto() + rpm = enum.auto() + none = enum.auto() + + +class Config: + def __init__(self, cli_args): + self.cli_args = cli_args + self._maj_min = sys.version_info[0:2] + self.install_dependencies = True + self.deps_mode = DependencyMode[cli_args.bundled_dependencies] + if self.deps_mode == DependencyMode.none: + self.install_dependencies = False + if self.deps_mode == DependencyMode.pip: + self._setup_pip() + elif self.deps_mode == DependencyMode.rpm: + self._setup_rpm() + + def _setup_pip(self): + if self._maj_min == (3, 6): + self.requirements = [InstallSpec(**v) for v in PY36_REQUIREMENTS] + else: + self.requirements = [InstallSpec(**v) for v in PY_REQUIREMENTS] + self.pip_venv = PipEnv[self.cli_args.pip_use_venv] + + def _setup_rpm(self): + self.requirements = [InstallSpec(**v) for v in PY_REQUIREMENTS] + + +class DependencyInfo: + """Type for tracking bundled dependencies.""" + + def __init__(self, config): + self._config = config + self._deps = [] + self._reqs = { + s.name: s.package_spec for s in self._config.requirements + } + + @property + def requirements(self): + """Return requirements.""" + return self._config.requirements + + def add(self, name, **fields): + """Add a new bundled dependency to track.""" + vals = {'name': name} + vals.update({k: v for k, v in fields.items() if v is not None}) + if name in self._reqs: + vals['requirements_entry'] = self._reqs[name] + self._deps.append(vals) + + def save(self, path): + """Record bundled dependency meta-data to the supplied file.""" + with open(path, 'w') as fh: + json.dump(self._deps, fh) + + +def _run(command, *args, **kwargs): + log.info( + 'Running cmd: %s', ' '.join(shlex.quote(str(c)) for c in command) + ) + return subprocess.run(command, *args, **kwargs) + + def _reexec(python): """Switch to the selected version of python by exec'ing into the desired python path. @@ -54,35 +235,66 @@ def _did_rexec(): return bool(os.environ.get("_BUILD_PYTHON_SET", "")) -def _build(dest, src, versioning_vars=None): +def _build(dest, src, config): """Build the binary.""" os.chdir(src) tempdir = pathlib.Path(tempfile.mkdtemp(suffix=".cephadm.build")) log.debug("working in %s", tempdir) + dinfo = None + appdir = tempdir / "app" try: - if os.path.isfile("requirements.txt"): - _install_deps(tempdir) + if config.install_dependencies: + depsdir = tempdir / "deps" + dinfo = _install_deps(depsdir, config) + ignore_suffixes = [] + ignore_exact = [] + for ispec in config.requirements: + ignore_suffixes.extend(ispec.ignore_suffixes) + ignore_exact.extend(ispec.ignore_exact) + ignorefn = functools.partial( + _ignore_cephadmlib, + ignore_suffixes=ignore_suffixes, + ignore_exact=ignore_exact, + ) + shutil.copytree(depsdir, appdir, ignore=ignorefn) log.info("Copying contents") # cephadmlib is cephadm's private library of modules shutil.copytree( - "cephadmlib", tempdir / "cephadmlib", ignore=_ignore_cephadmlib + "cephadmlib", appdir / "cephadmlib", ignore=_ignore_cephadmlib ) # cephadm.py is cephadm's main script for the "binary" # this must be renamed to __main__.py for the zipapp - shutil.copy("cephadm.py", tempdir / "__main__.py") + shutil.copy("cephadm.py", appdir / "__main__.py") + mdir = appdir / "_cephadmmeta" + mdir.mkdir(parents=True, exist_ok=True) + (mdir / "__init__.py").touch(exist_ok=True) + versioning_vars = config.cli_args.version_vars + shutil.copytree( + "../python-common/ceph", appdir / "ceph" + ) if versioning_vars: - generate_version_file(versioning_vars, tempdir / "_version.py") - _compile(dest, tempdir) + generate_version_file(versioning_vars, mdir / "version.py") + if dinfo: + dinfo.save(mdir / "deps.json") + _compile(dest, appdir) finally: shutil.rmtree(tempdir) -def _ignore_cephadmlib(source_dir, names): +def _ignore_cephadmlib( + source_dir, names, ignore_suffixes=None, ignore_exact=None +): # shutil.copytree callback: return the list of names *to ignore* + suffixes = ["~", ".old", ".swp", ".pyc", ".pyo", ".so", "__pycache__"] + exact = [] + if ignore_suffixes: + suffixes += ignore_suffixes + if ignore_exact: + exact += ignore_exact return [ name for name in names - if name.endswith(("~", ".old", ".swp", ".pyc", ".pyo", "__pycache__")) + if name.endswith(tuple(suffixes)) or name in exact ] @@ -116,23 +328,170 @@ def _compile(dest, tempdir): log.info("Zipapp created without compression") -def _install_deps(tempdir): +def _install_deps(tempdir, config): + if config.deps_mode == DependencyMode.pip: + return _install_pip_deps(tempdir, config) + if config.deps_mode == DependencyMode.rpm: + return _install_rpm_deps(tempdir, config) + raise ValueError(f'unexpected deps mode: {deps.mode}') + + +def _install_pip_deps(tempdir, config): """Install dependencies with pip.""" - # TODO we could explicitly pass a python version here - log.info("Installing dependencies") - # apparently pip doesn't have an API, just a cli. - subprocess.check_call( - [ - sys.executable, - "-m", - "pip", - "install", - "--requirement", - "requirements.txt", - "--target", - tempdir, - ] + log.info("Installing dependencies using pip") + + executable = sys.executable + venv = config.pip_venv + has_venv = _has_python_venv(sys.executable) if venv.enabled else False + venv = None + if venv == PipEnv.required and not has_venv: + raise RuntimeError('venv (virtual environment) module not found') + if has_venv: + log.info('Attempting to create a virtualenv') + venv = tempdir / "_venv_" + _run([sys.executable, '-m', 'venv', str(venv)]) + executable = str(venv / "bin" / pathlib.Path(executable).name) + # try to upgrade pip in the virtualenv. if it fails ignore the error + _run([executable, '-m', 'pip', 'install', '-U', 'pip']) + else: + log.info('Continuing without a virtualenv...') + if not _has_python_pip(executable): + raise RuntimeError('pip module not found') + + # best effort to disable compilers, packages in the zipapp + # must be pure python. + env = os.environ.copy() + env['CC'] = '/bin/false' + env['CXX'] = '/bin/false' + env['LC_ALL'] = 'C.UTF-8' # work around some env issues with pip + if env.get('PYTHONPATH'): + env['PYTHONPATH'] = env['PYTHONPATH'] + f':{tempdir}' + else: + env['PYTHONPATH'] = f'{tempdir}' + + pip_args = [] + prev = None + for ispec in config.requirements: + if ispec.compatible(prev) and pip_args: + pip_args[0].append(ispec.package_spec) + else: + pip_args.append(ispec.pip_args_and_package) + prev = ispec + for batch in pip_args: + _run( + [ + executable, + "-m", + "pip", + "install", + "--target", + tempdir, + ] + + batch, + env=env, + check=True, + ) + + dinfo = DependencyInfo(config) + res = _run( + [executable, '-m', 'pip', 'list', '--format=json', '--path', tempdir], + check=True, + stdout=subprocess.PIPE, + ) + pkgs = json.loads(res.stdout) + for pkg in pkgs: + dinfo.add( + pkg['name'], + version=pkg['version'], + package_source='pip', + ) + + if venv: + shutil.rmtree(venv) + return dinfo + + +def _has_python_venv(executable): + res = _run( + [executable, '-m', 'venv', '--help'], stdout=subprocess.DEVNULL + ) + return res.returncode == 0 + + +def _has_python_pip(executable): + res = _run( + [executable, '-m', 'venv', '--help'], stdout=subprocess.DEVNULL + ) + return res.returncode == 0 + + +def _install_rpm_deps(tempdir, config): + log.info("Installing dependencies using RPMs") + dinfo = DependencyInfo(config) + for pkg in config.requirements: + log.info(f"Looking for rpm package for: {pkg.name!r}") + _deps_from_rpm(tempdir, config, dinfo, pkg.name) + return dinfo + + +def _deps_from_rpm(tempdir, config, dinfo, pkg): + # first, figure out what rpm provides a particular python lib + dist = f'python3.{sys.version_info.minor}dist({pkg})'.lower() + try: + res = subprocess.run( + ['rpm', '-q', '--whatprovides', dist], + check=True, + stdout=subprocess.PIPE, + ) + except subprocess.CalledProcessError as err: + log.error(f"Command failed: {err.args[1]!r}") + log.error(f"An installed RPM package for {pkg} was not found") + sys.exit(1) + rpmname = res.stdout.strip().decode('utf8') + # get version information about said rpm + res = subprocess.run( + ['rpm', '-q', '--qf', '%{version} %{release} %{epoch}\\n', rpmname], + check=True, + stdout=subprocess.PIPE, + ) + vers = res.stdout.decode('utf8').splitlines()[0].split() + log.info(f"RPM Package: {rpmname} ({vers})") + dinfo.add( + pkg, + rpm_name=rpmname, + version=vers[0], + rpm_release=vers[1], + rpm_epoch=vers[2], + package_source='rpm', ) + # get the list of files provided by the rpm + res = subprocess.run( + ['rpm', '-ql', rpmname], check=True, stdout=subprocess.PIPE + ) + paths = [l.decode('utf8') for l in res.stdout.splitlines()] + # the top_level.txt file can be used to determine where the python packages + # actually are. We need all of those and the meta-data dir (parent of + # top_level.txt) to be included in our zipapp + top_level = None + for path in paths: + if path.endswith('top_level.txt'): + top_level = pathlib.Path(path) + if not top_level: + raise ValueError('top_level not found') + meta_dir = top_level.parent + pkg_dirs = [ + top_level.parent.parent / p + for p in top_level.read_text().splitlines() + ] + meta_dest = tempdir / meta_dir.name + log.info(f"Copying {meta_dir} to {meta_dest}") + # copy the meta data directory + shutil.copytree(meta_dir, meta_dest, ignore=_ignore_cephadmlib) + # copy all the package directories + for pkg_dir in pkg_dirs: + pkg_dest = tempdir / pkg_dir.name + log.info(f"Copying {pkg_dir} to {pkg_dest}") + shutil.copytree(pkg_dir, pkg_dest, ignore=_ignore_cephadmlib) def generate_version_file(versioning_vars, dest): @@ -178,6 +537,19 @@ def main(): action="append", help="Set a key=value pair in the generated version info file", ) + parser.add_argument( + '--pip-use-venv', + choices=[e.name for e in PipEnv], + default=PipEnv.auto.name, + help='Configure pip to use a virtual environment when bundling dependencies', + ) + parser.add_argument( + "--bundled-dependencies", + "-B", + choices=[e.name for e in DependencyMode], + default=DependencyMode.pip.name, + help="Source for bundled dependencies", + ) args = parser.parse_args() if not _did_rexec() and args.python: @@ -188,7 +560,8 @@ def main(): v=sys.version_info ) ) - log.info("Args: %s", vars(args)) + for argkey, argval in vars(args).items(): + log.info("Argument: %s=%r", argkey, argval) if not HAS_ZIPAPP: # Unconditionally display an error that the version of python # lacks zipapp (probably too old). @@ -206,7 +579,7 @@ def main(): dest = pathlib.Path(args.dest).absolute() log.info("Source Dir: %s", source) log.info("Destination Path: %s", dest) - _build(dest, source, versioning_vars=args.version_vars) + _build(dest, source, Config(args)) if __name__ == "__main__": diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index 4901abf42cd3..d2ddf5641169 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -18,7 +18,7 @@ import time import errno import ssl -from typing import Dict, List, Tuple, Optional, Union, Any, Callable, IO, Sequence, TypeVar, cast, Iterable, TextIO +from typing import Dict, List, Tuple, Optional, Union, Any, Callable, Sequence, TypeVar, cast import re import uuid @@ -28,29 +28,14 @@ from glob import glob from io import StringIO from threading import Thread, Event -from urllib.error import HTTPError, URLError -from urllib.request import urlopen, Request from pathlib import Path +from configparser import ConfigParser from cephadmlib.constants import ( # default images - DEFAULT_ALERT_MANAGER_IMAGE, - DEFAULT_ELASTICSEARCH_IMAGE, - DEFAULT_GRAFANA_IMAGE, - DEFAULT_HAPROXY_IMAGE, DEFAULT_IMAGE, DEFAULT_IMAGE_IS_MAIN, DEFAULT_IMAGE_RELEASE, - DEFAULT_JAEGER_AGENT_IMAGE, - DEFAULT_JAEGER_COLLECTOR_IMAGE, - DEFAULT_JAEGER_QUERY_IMAGE, - DEFAULT_KEEPALIVED_IMAGE, - DEFAULT_LOKI_IMAGE, - DEFAULT_NODE_EXPORTER_IMAGE, - DEFAULT_NVMEOF_IMAGE, - DEFAULT_PROMETHEUS_IMAGE, - DEFAULT_PROMTAIL_IMAGE, - DEFAULT_SNMP_GATEWAY_IMAGE, # other constant values CEPH_CONF, CEPH_CONF_DIR, @@ -59,7 +44,6 @@ CEPH_DEFAULT_PUBKEY, CEPH_KEYRING, CEPH_PUBKEY, - CGROUPS_SPLIT_PODMAN_VERSION, CONTAINER_INIT, CUSTOM_PS1, DATA_DIR, @@ -71,9 +55,9 @@ LOGROTATE_DIR, LOG_DIR, LOG_DIR_MODE, - PIDS_LIMIT_UNLIMITED_PODMAN_VERSION, SYSCTL_DIR, UNIT_DIR, + DAEMON_FAILED_ERROR, ) from cephadmlib.context import CephadmContext from cephadmlib.context_getters import ( @@ -84,12 +68,12 @@ get_config_and_keyring, get_parm, read_configuration_source, - should_log_to_journald, ) from cephadmlib.exceptions import ( ClusterAlreadyExists, Error, UnauthorizedRegistryError, + DaemonStartException, ) from cephadmlib.exe_utils import find_executable, find_program from cephadmlib.call_wrappers import ( @@ -101,14 +85,13 @@ concurrent_tasks, ) from cephadmlib.container_engines import ( - Docker, Podman, check_container_engine, find_container_engine, + pull_command, registry_login, ) from cephadmlib.data_utils import ( - dict_get, dict_get_join, get_legacy_config_fsid, is_fsid, @@ -116,15 +99,16 @@ try_convert_datetime, read_config, with_units_to_int, + _extract_host_info_from_applied_spec, ) from cephadmlib.file_utils import ( get_file_timestamp, makedirs, pathify, - populate_files, read_file, recursive_chown, touch, + unlink_file, write_new, write_tmp, ) @@ -135,7 +119,6 @@ check_subnet, get_fqdn, get_hostname, - get_ip_addresses, get_short_hostname, ip_in_subnets, is_ipv6, @@ -148,12 +131,21 @@ from cephadmlib.locking import FileLock from cephadmlib.daemon_identity import DaemonIdentity, DaemonSubIdentity from cephadmlib.packagers import create_packager, Packager -from cephadmlib.logging import cephadm_init_logging, Highlight, LogDestination -from cephadmlib.systemd import check_unit, check_units +from cephadmlib.logging import ( + cephadm_init_logging, + Highlight, + LogDestination, +) +from cephadmlib.systemd import check_unit, check_units, terminate_service +from cephadmlib import systemd_unit +from cephadmlib import runscripts from cephadmlib.container_types import ( CephContainer, InitContainer, + SidecarContainer, + extract_uid_gid, is_container_running, + get_mgr_images, ) from cephadmlib.decorators import ( deprecated_command, @@ -169,9 +161,32 @@ register as register_daemon_form, ) from cephadmlib.deploy import DeploymentType -from cephadmlib.container_daemon_form import ContainerDaemonForm +from cephadmlib.container_daemon_form import ( + ContainerDaemonForm, + daemon_to_container, +) from cephadmlib.sysctl import install_sysctl, migrate_sysctl_dir from cephadmlib.firewalld import Firewalld, update_firewalld +from cephadmlib import templating +from cephadmlib.daemons.ceph import get_ceph_mounts_for_type, ceph_daemons +from cephadmlib.daemons import ( + Ceph, + CephExporter, + CephIscsi, + CephNvmeof, + CustomContainer, + HAproxy, + Keepalived, + Monitoring, + NFSGanesha, + SMB, + SNMPGateway, + MgmtGateway, + OAuth2Proxy, + Tracing, + NodeProxy, +) +from cephadmlib.agent import http_query FuncT = TypeVar('FuncT', bound=Callable) @@ -207,1364 +222,9 @@ def __eq__(self, other: Any) -> bool: ################################## -@register_daemon_form -class Ceph(DaemonForm): - daemons = ('mon', 'mgr', 'osd', 'mds', 'rgw', 'rbd-mirror', - 'crash', 'cephfs-mirror', 'ceph-exporter') - gateways = ('iscsi', 'nfs', 'nvmeof') - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - # TODO: figure out a way to un-special-case osd - return daemon_type in cls.daemons and daemon_type != 'osd' - - def __init__(self, ident: DaemonIdentity) -> None: - self._identity = ident - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Ceph': - return cls(ident) - - @property - def identity(self) -> DaemonIdentity: - return self._identity - - def firewall_service_name(self) -> str: - if self.identity.daemon_type == 'mon': - return 'ceph-mon' - elif self.identity.daemon_type in ['mgr', 'mds']: - return 'ceph' - return '' - -################################## - - -@register_daemon_form -class OSD(Ceph): - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - # TODO: figure out a way to un-special-case osd - return daemon_type == 'osd' - - @staticmethod - def get_sysctl_settings() -> List[str]: - return [ - '# allow a large number of OSDs', - 'fs.aio-max-nr = 1048576', - 'kernel.pid_max = 4194304', - ] - - def firewall_service_name(self) -> str: - return 'ceph' - - -################################## - - -@register_daemon_form -class SNMPGateway(ContainerDaemonForm): - """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks""" - daemon_type = 'snmp-gateway' - SUPPORTED_VERSIONS = ['V2c', 'V3'] - default_image = DEFAULT_SNMP_GATEWAY_IMAGE - DEFAULT_PORT = 9464 - env_filename = 'snmp-gateway.conf' - - @classmethod - def for_daemon_type(cls, daemon_type: str) -> bool: - return cls.daemon_type == daemon_type - - def __init__(self, - ctx: CephadmContext, - fsid: str, - daemon_id: Union[int, str], - config_json: Dict[str, Any], - image: Optional[str] = None) -> None: - self.ctx = ctx - self.fsid = fsid - self.daemon_id = daemon_id - self.image = image or SNMPGateway.default_image - - self.uid = config_json.get('uid', 0) - self.gid = config_json.get('gid', 0) - - self.destination = config_json.get('destination', '') - self.snmp_version = config_json.get('snmp_version', 'V2c') - self.snmp_community = config_json.get('snmp_community', 'public') - self.log_level = config_json.get('log_level', 'info') - self.snmp_v3_auth_username = config_json.get('snmp_v3_auth_username', '') - self.snmp_v3_auth_password = config_json.get('snmp_v3_auth_password', '') - self.snmp_v3_auth_protocol = config_json.get('snmp_v3_auth_protocol', '') - self.snmp_v3_priv_protocol = config_json.get('snmp_v3_priv_protocol', '') - self.snmp_v3_priv_password = config_json.get('snmp_v3_priv_password', '') - self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '') - - self.validate() - - @classmethod - def init(cls, ctx: CephadmContext, fsid: str, - daemon_id: Union[int, str]) -> 'SNMPGateway': - cfgs = fetch_configs(ctx) - assert cfgs # assert some config data was found - return cls(ctx, fsid, daemon_id, cfgs, ctx.image) - - @classmethod - def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'SNMPGateway': - return cls.init(ctx, ident.fsid, ident.daemon_id) - - @property - def identity(self) -> DaemonIdentity: - return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id) - - @staticmethod - def get_version(ctx: CephadmContext, fsid: str, daemon_id: str) -> Optional[str]: - """Return the version of the notifier from it's http endpoint""" - path = os.path.join(ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta') - try: - with open(path, 'r') as env: - metadata = json.loads(env.read()) - except (OSError, json.JSONDecodeError): - return None - - ports = metadata.get('ports', []) - if not ports: - return None - - try: - with urlopen(f'http://127.0.0.1:{ports[0]}/') as r: - html = r.read().decode('utf-8').split('\n') - except (HTTPError, URLError): - return None - - for h in html: - stripped = h.strip() - if stripped.startswith(('
    ', '
    ')) and \
    -               stripped.endswith(('
    ', '
    ')): - #
    (version=1.2.1, branch=HEAD, revision=7...
    -                return stripped.split(',')[0].split('version=')[1]
    -
    -        return None
    -
    -    @property
    -    def port(self) -> int:
    -        endpoints = fetch_endpoints(self.ctx)
    -        if not endpoints:
    -            return self.DEFAULT_PORT
    -        return endpoints[0].port
    -
    -    def get_daemon_args(self) -> List[str]:
    -        v3_args = []
    -        base_args = [
    -            f'--web.listen-address=:{self.port}',
    -            f'--snmp.destination={self.destination}',
    -            f'--snmp.version={self.snmp_version}',
    -            f'--log.level={self.log_level}',
    -            '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl'
    -        ]
    -
    -        if self.snmp_version == 'V3':
    -            # common auth settings
    -            v3_args.extend([
    -                '--snmp.authentication-enabled',
    -                f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}',
    -                f'--snmp.security-engine-id={self.snmp_v3_engine_id}'
    -            ])
    -            # authPriv setting is applied if we have a privacy protocol setting
    -            if self.snmp_v3_priv_protocol:
    -                v3_args.extend([
    -                    '--snmp.private-enabled',
    -                    f'--snmp.private-protocol={self.snmp_v3_priv_protocol}'
    -                ])
    -
    -        return base_args + v3_args
    -
    -    @property
    -    def data_dir(self) -> str:
    -        return os.path.join(self.ctx.data_dir, self.ctx.fsid, f'{self.daemon_type}.{self.daemon_id}')
    -
    -    @property
    -    def conf_file_path(self) -> str:
    -        return os.path.join(self.data_dir, self.env_filename)
    -
    -    def create_daemon_conf(self) -> None:
    -        """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon"""
    -        with write_new(self.conf_file_path) as f:
    -            if self.snmp_version == 'V2c':
    -                f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n')
    -            else:
    -                f.write(f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n')
    -                f.write(f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n')
    -                if self.snmp_v3_priv_password:
    -                    f.write(f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n')
    -
    -    def validate(self) -> None:
    -        """Validate the settings
    -
    -        Raises:
    -            Error: if the fsid doesn't look like an fsid
    -            Error: if the snmp version is not supported
    -            Error: destination IP and port address missing
    -        """
    -        if not is_fsid(self.fsid):
    -            raise Error(f'not a valid fsid: {self.fsid}')
    -
    -        if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS:
    -            raise Error(f'not a valid snmp version: {self.snmp_version}')
    -
    -        if not self.destination:
    -            raise Error('config is missing destination attribute(:) of the target SNMP listener')
    -
    -    def container(self, ctx: CephadmContext) -> CephContainer:
    -        return get_deployment_container(ctx, self.identity)
    -
    -    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    -        return self.uid, self.gid
    -
    -
    -##################################
    -@register_daemon_form
    -class Monitoring(DaemonForm):
    -    """Define the configs for the monitoring containers"""
    -
    -    port_map = {
    -        'prometheus': [9095],  # Avoid default 9090, due to conflict with cockpit UI
    -        'node-exporter': [9100],
    -        'grafana': [3000],
    -        'alertmanager': [9093, 9094],
    -        'loki': [3100],
    -        'promtail': [9080]
    -    }
    -
    -    components = {
    -        'prometheus': {
    -            'image': DEFAULT_PROMETHEUS_IMAGE,
    -            'cpus': '2',
    -            'memory': '4GB',
    -            'args': [
    -                '--config.file=/etc/prometheus/prometheus.yml',
    -                '--storage.tsdb.path=/prometheus',
    -            ],
    -            'config-json-files': [
    -                'prometheus.yml',
    -            ],
    -        },
    -        'loki': {
    -            'image': DEFAULT_LOKI_IMAGE,
    -            'cpus': '1',
    -            'memory': '1GB',
    -            'args': [
    -                '--config.file=/etc/loki/loki.yml',
    -            ],
    -            'config-json-files': [
    -                'loki.yml'
    -            ],
    -        },
    -        'promtail': {
    -            'image': DEFAULT_PROMTAIL_IMAGE,
    -            'cpus': '1',
    -            'memory': '1GB',
    -            'args': [
    -                '--config.file=/etc/promtail/promtail.yml',
    -            ],
    -            'config-json-files': [
    -                'promtail.yml',
    -            ],
    -        },
    -        'node-exporter': {
    -            'image': DEFAULT_NODE_EXPORTER_IMAGE,
    -            'cpus': '1',
    -            'memory': '1GB',
    -            'args': [
    -                '--no-collector.timex'
    -            ],
    -        },
    -        'grafana': {
    -            'image': DEFAULT_GRAFANA_IMAGE,
    -            'cpus': '2',
    -            'memory': '4GB',
    -            'args': [],
    -            'config-json-files': [
    -                'grafana.ini',
    -                'provisioning/datasources/ceph-dashboard.yml',
    -                'certs/cert_file',
    -                'certs/cert_key',
    -            ],
    -        },
    -        'alertmanager': {
    -            'image': DEFAULT_ALERT_MANAGER_IMAGE,
    -            'cpus': '2',
    -            'memory': '2GB',
    -            'args': [
    -                '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]),
    -            ],
    -            'config-json-files': [
    -                'alertmanager.yml',
    -            ],
    -            'config-json-args': [
    -                'peers',
    -            ],
    -        },
    -    }  # type: ignore
    -
    -    @classmethod
    -    def for_daemon_type(cls, daemon_type: str) -> bool:
    -        return daemon_type in cls.components
    -
    -    @staticmethod
    -    def get_version(ctx, container_id, daemon_type):
    -        # type: (CephadmContext, str, str) -> str
    -        """
    -        :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
    -        """
    -        assert daemon_type in ('prometheus', 'alertmanager', 'node-exporter', 'loki', 'promtail')
    -        cmd = daemon_type.replace('-', '_')
    -        code = -1
    -        err = ''
    -        out = ''
    -        version = ''
    -        if daemon_type == 'alertmanager':
    -            for cmd in ['alertmanager', 'prometheus-alertmanager']:
    -                out, err, code = call(ctx, [
    -                    ctx.container_engine.path, 'exec', container_id, cmd,
    -                    '--version'
    -                ], verbosity=CallVerbosity.QUIET)
    -                if code == 0:
    -                    break
    -            cmd = 'alertmanager'  # reset cmd for version extraction
    -        else:
    -            out, err, code = call(ctx, [
    -                ctx.container_engine.path, 'exec', container_id, cmd, '--version'
    -            ], verbosity=CallVerbosity.QUIET)
    -        if code == 0:
    -            if err.startswith('%s, version ' % cmd):
    -                version = err.split(' ')[2]
    -            elif out.startswith('%s, version ' % cmd):
    -                version = out.split(' ')[2]
    -        return version
    -
    -    def __init__(self, ident: DaemonIdentity) -> None:
    -        self._identity = ident
    -
    -    @classmethod
    -    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Monitoring':
    -        return cls(ident)
    -
    -    @property
    -    def identity(self) -> DaemonIdentity:
    -        return self._identity
    -
    -##################################
    -
    -
    -@register_daemon_form
    -class NFSGanesha(ContainerDaemonForm):
    -    """Defines a NFS-Ganesha container"""
    -
    -    daemon_type = 'nfs'
    -    entrypoint = '/usr/bin/ganesha.nfsd'
    -    daemon_args = ['-F', '-L', 'STDERR']
    -
    -    required_files = ['ganesha.conf']
    -
    -    port_map = {
    -        'nfs': 2049,
    -    }
    -
    -    @classmethod
    -    def for_daemon_type(cls, daemon_type: str) -> bool:
    -        return cls.daemon_type == daemon_type
    -
    -    def __init__(self,
    -                 ctx,
    -                 fsid,
    -                 daemon_id,
    -                 config_json,
    -                 image=DEFAULT_IMAGE):
    -        # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
    -        self.ctx = ctx
    -        self.fsid = fsid
    -        self.daemon_id = daemon_id
    -        self.image = image
    -
    -        # config-json options
    -        self.pool = dict_get(config_json, 'pool', require=True)
    -        self.namespace = dict_get(config_json, 'namespace')
    -        self.userid = dict_get(config_json, 'userid')
    -        self.extra_args = dict_get(config_json, 'extra_args', [])
    -        self.files = dict_get(config_json, 'files', {})
    -        self.rgw = dict_get(config_json, 'rgw', {})
    -
    -        # validate the supplied args
    -        self.validate()
    -
    -    @classmethod
    -    def init(cls, ctx, fsid, daemon_id):
    -        # type: (CephadmContext, str, Union[int, str]) -> NFSGanesha
    -        return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
    -
    -    @classmethod
    -    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'NFSGanesha':
    -        return cls.init(ctx, ident.fsid, ident.daemon_id)
    -
    -    @property
    -    def identity(self) -> DaemonIdentity:
    -        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    -
    -    def get_container_mounts(self, data_dir):
    -        # type: (str) -> Dict[str, str]
    -        mounts = dict()
    -        mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
    -        mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
    -        mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
    -        if self.rgw:
    -            cluster = self.rgw.get('cluster', 'ceph')
    -            rgw_user = self.rgw.get('user', 'admin')
    -            mounts[os.path.join(data_dir, 'keyring.rgw')] = \
    -                '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
    -        return mounts
    -
    -    @staticmethod
    -    def get_container_envs():
    -        # type: () -> List[str]
    -        envs = [
    -            'CEPH_CONF=%s' % (CEPH_DEFAULT_CONF)
    -        ]
    -        return envs
    -
    -    @staticmethod
    -    def get_version(ctx, container_id):
    -        # type: (CephadmContext, str) -> Optional[str]
    -        version = None
    -        out, err, code = call(ctx,
    -                              [ctx.container_engine.path, 'exec', container_id,
    -                               NFSGanesha.entrypoint, '-v'],
    -                              verbosity=CallVerbosity.QUIET)
    -        if code == 0:
    -            match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
    -            if match:
    -                version = match.group(1)
    -        return version
    -
    -    def validate(self):
    -        # type: () -> None
    -        if not is_fsid(self.fsid):
    -            raise Error('not an fsid: %s' % self.fsid)
    -        if not self.daemon_id:
    -            raise Error('invalid daemon_id: %s' % self.daemon_id)
    -        if not self.image:
    -            raise Error('invalid image: %s' % self.image)
    -
    -        # check for the required files
    -        if self.required_files:
    -            for fname in self.required_files:
    -                if fname not in self.files:
    -                    raise Error('required file missing from config-json: %s' % fname)
    -
    -        # check for an RGW config
    -        if self.rgw:
    -            if not self.rgw.get('keyring'):
    -                raise Error('RGW keyring is missing')
    -            if not self.rgw.get('user'):
    -                raise Error('RGW user is missing')
    -
    -    def get_daemon_name(self):
    -        # type: () -> str
    -        return '%s.%s' % (self.daemon_type, self.daemon_id)
    -
    -    def get_container_name(self, desc=None):
    -        # type: (Optional[str]) -> str
    -        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
    -        if desc:
    -            cname = '%s-%s' % (cname, desc)
    -        return cname
    -
    -    def get_daemon_args(self):
    -        # type: () -> List[str]
    -        return self.daemon_args + self.extra_args
    -
    -    def create_daemon_dirs(self, data_dir, uid, gid):
    -        # type: (str, int, int) -> None
    -        """Create files under the container data dir"""
    -        if not os.path.isdir(data_dir):
    -            raise OSError('data_dir is not a directory: %s' % (data_dir))
    -
    -        logger.info('Creating ganesha config...')
    -
    -        # create the ganesha conf dir
    -        config_dir = os.path.join(data_dir, 'etc/ganesha')
    -        makedirs(config_dir, uid, gid, 0o755)
    -
    -        # populate files from the config-json
    -        populate_files(config_dir, self.files, uid, gid)
    -
    -        # write the RGW keyring
    -        if self.rgw:
    -            keyring_path = os.path.join(data_dir, 'keyring.rgw')
    -            with write_new(keyring_path, owner=(uid, gid)) as f:
    -                f.write(self.rgw.get('keyring', ''))
    -
    -    def firewall_service_name(self) -> str:
    -        return 'nfs'
    -
    -    def container(self, ctx: CephadmContext) -> CephContainer:
    -        return get_deployment_container(ctx, self.identity)
    -
    -    def customize_container_endpoints(
    -        self, endpoints: List[EndPoint], deployment_type: DeploymentType
    -    ) -> None:
    -        if deployment_type == DeploymentType.DEFAULT and not endpoints:
    -            nfs_ports = list(NFSGanesha.port_map.values())
    -            endpoints.extend([EndPoint('0.0.0.0', p) for p in nfs_ports])
    -
    -    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    -        # TODO: extract ganesha uid/gid (997, 994) ?
    -        return extract_uid_gid(ctx)
    -
    -    def config_and_keyring(
    -        self, ctx: CephadmContext
    -    ) -> Tuple[Optional[str], Optional[str]]:
    -        return get_config_and_keyring(ctx)
    -
    -##################################
    -
    -
    -@register_daemon_form
    -class CephIscsi(DaemonForm):
    -    """Defines a Ceph-Iscsi container"""
    -
    -    daemon_type = 'iscsi'
    -    entrypoint = '/usr/bin/rbd-target-api'
    -
    -    required_files = ['iscsi-gateway.cfg']
    -
    -    @classmethod
    -    def for_daemon_type(cls, daemon_type: str) -> bool:
    -        return cls.daemon_type == daemon_type
    -
    -    def __init__(self,
    -                 ctx,
    -                 fsid,
    -                 daemon_id,
    -                 config_json,
    -                 image=DEFAULT_IMAGE):
    -        # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
    -        self.ctx = ctx
    -        self.fsid = fsid
    -        self.daemon_id = daemon_id
    -        self.image = image
    -
    -        # config-json options
    -        self.files = dict_get(config_json, 'files', {})
    -
    -        # validate the supplied args
    -        self.validate()
    -
    -    @classmethod
    -    def init(cls, ctx, fsid, daemon_id):
    -        # type: (CephadmContext, str, Union[int, str]) -> CephIscsi
    -        return cls(ctx, fsid, daemon_id,
    -                   fetch_configs(ctx), ctx.image)
    -
    -    @classmethod
    -    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'CephIscsi':
    -        return cls.init(ctx, ident.fsid, ident.daemon_id)
    -
    -    @property
    -    def identity(self) -> DaemonIdentity:
    -        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    -
    -    @staticmethod
    -    def get_container_mounts(data_dir, log_dir):
    -        # type: (str, str) -> Dict[str, str]
    -        mounts = dict()
    -        mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
    -        mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
    -        mounts[os.path.join(data_dir, 'iscsi-gateway.cfg')] = '/etc/ceph/iscsi-gateway.cfg:z'
    -        mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
    -        mounts[os.path.join(data_dir, 'tcmu-runner-entrypoint.sh')] = '/usr/local/scripts/tcmu-runner-entrypoint.sh'
    -        mounts[log_dir] = '/var/log:z'
    -        mounts['/dev'] = '/dev'
    -        return mounts
    -
    -    @staticmethod
    -    def get_container_binds():
    -        # type: () -> List[List[str]]
    -        binds = []
    -        lib_modules = ['type=bind',
    -                       'source=/lib/modules',
    -                       'destination=/lib/modules',
    -                       'ro=true']
    -        binds.append(lib_modules)
    -        return binds
    -
    -    @staticmethod
    -    def get_version(ctx, container_id):
    -        # type: (CephadmContext, str) -> Optional[str]
    -        version = None
    -        out, err, code = call(ctx,
    -                              [ctx.container_engine.path, 'exec', container_id,
    -                               '/usr/bin/python3', '-c',
    -                               "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"],
    -                              verbosity=CallVerbosity.QUIET)
    -        if code == 0:
    -            version = out.strip()
    -        return version
    -
    -    def validate(self):
    -        # type: () -> None
    -        if not is_fsid(self.fsid):
    -            raise Error('not an fsid: %s' % self.fsid)
    -        if not self.daemon_id:
    -            raise Error('invalid daemon_id: %s' % self.daemon_id)
    -        if not self.image:
    -            raise Error('invalid image: %s' % self.image)
    -
    -        # check for the required files
    -        if self.required_files:
    -            for fname in self.required_files:
    -                if fname not in self.files:
    -                    raise Error('required file missing from config-json: %s' % fname)
    -
    -    def get_daemon_name(self):
    -        # type: () -> str
    -        return '%s.%s' % (self.daemon_type, self.daemon_id)
    -
    -    def get_container_name(self, desc=None):
    -        # type: (Optional[str]) -> str
    -        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
    -        if desc:
    -            cname = '%s-%s' % (cname, desc)
    -        return cname
    -
    -    def create_daemon_dirs(self, data_dir, uid, gid):
    -        # type: (str, int, int) -> None
    -        """Create files under the container data dir"""
    -        if not os.path.isdir(data_dir):
    -            raise OSError('data_dir is not a directory: %s' % (data_dir))
    -
    -        logger.info('Creating ceph-iscsi config...')
    -        configfs_dir = os.path.join(data_dir, 'configfs')
    -        makedirs(configfs_dir, uid, gid, 0o755)
    -
    -        # set up the tcmu-runner entrypoint script
    -        # to be mounted into the container. For more info
    -        # on why we need this script, see the
    -        # tcmu_runner_entrypoint_script function
    -        self.files['tcmu-runner-entrypoint.sh'] = self.tcmu_runner_entrypoint_script()
    -
    -        # populate files from the config-json
    -        populate_files(data_dir, self.files, uid, gid)
    -
    -        # we want the tcmu runner entrypoint script to be executable
    -        # populate_files will give it 0o600 by default
    -        os.chmod(os.path.join(data_dir, 'tcmu-runner-entrypoint.sh'), 0o700)
    -
    -    @staticmethod
    -    def configfs_mount_umount(data_dir, mount=True):
    -        # type: (str, bool) -> List[str]
    -        mount_path = os.path.join(data_dir, 'configfs')
    -        if mount:
    -            cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
    -                  'mount -t configfs none {0}; fi'.format(mount_path)
    -        else:
    -            cmd = 'if grep -qs {0} /proc/mounts; then ' \
    -                  'umount {0}; fi'.format(mount_path)
    -        return cmd.split()
    -
    -    @staticmethod
    -    def tcmu_runner_entrypoint_script() -> str:
    -        # since we are having tcmu-runner be a background
    -        # process in its systemd unit (rbd-target-api being
    -        # the main process) systemd will not restart it when
    -        # it fails. in order to try and get around that for now
    -        # we can have a script mounted in the container that
    -        # that attempts to do the restarting for us. This script
    -        # can then become the entrypoint for the tcmu-runner
    -        # container
    -
    -        # This is intended to be dropped for a better solution
    -        # for at least the squid release onward
    -        return """#!/bin/bash
    -RUN_DIR=/var/run/tcmu-runner
    -
    -if [ ! -d "${RUN_DIR}" ] ; then
    -    mkdir -p "${RUN_DIR}"
    -fi
    -
    -rm -rf "${RUN_DIR}"/*
    -
    -while true
    -do
    -    touch "${RUN_DIR}"/start-up-$(date -Ins)
    -    /usr/bin/tcmu-runner
    -
    -    # If we got around 3 kills/segfaults in the last minute,
    -    # don't start anymore
    -    if [ $(find "${RUN_DIR}" -type f -cmin -1 | wc -l) -ge 3 ] ; then
    -        exit 0
    -    fi
    -
    -    sleep 1
    -done
    -"""
    -
    -    def get_tcmu_runner_container(self):
    -        # type: () -> CephContainer
    -        # daemon_id, is used to generated the cid and pid files used by podman but as both tcmu-runner
    -        # and rbd-target-api have the same daemon_id, it conflits and prevent the second container from
    -        # starting. .tcmu runner is appended to the daemon_id to fix that.
    -        subident = DaemonSubIdentity(
    -            self.fsid, self.daemon_type, self.daemon_id, 'tcmu'
    -        )
    -        tcmu_container = get_deployment_container(self.ctx, subident)
    -        # TODO: Eventually we don't want to run tcmu-runner through this script.
    -        # This is intended to be a workaround backported to older releases
    -        # and should eventually be removed in at least squid onward
    -        tcmu_container.entrypoint = '/usr/local/scripts/tcmu-runner-entrypoint.sh'
    -        tcmu_container.cname = self.get_container_name(desc='tcmu')
    -        return tcmu_container
    -
    -
    -##################################
    -
    -
    -@register_daemon_form
    -class CephNvmeof(DaemonForm):
    -    """Defines a Ceph-Nvmeof container"""
    -
    -    daemon_type = 'nvmeof'
    -    required_files = ['ceph-nvmeof.conf']
    -    default_image = DEFAULT_NVMEOF_IMAGE
    -
    -    @classmethod
    -    def for_daemon_type(cls, daemon_type: str) -> bool:
    -        return cls.daemon_type == daemon_type
    -
    -    def __init__(self,
    -                 ctx,
    -                 fsid,
    -                 daemon_id,
    -                 config_json,
    -                 image=DEFAULT_NVMEOF_IMAGE):
    -        # type: (CephadmContext, str, Union[int, str], Dict, str) -> None
    -        self.ctx = ctx
    -        self.fsid = fsid
    -        self.daemon_id = daemon_id
    -        self.image = image
    -
    -        # config-json options
    -        self.files = dict_get(config_json, 'files', {})
    -
    -        # validate the supplied args
    -        self.validate()
    -
    -    @classmethod
    -    def init(cls, ctx, fsid, daemon_id):
    -        # type: (CephadmContext, str, Union[int, str]) -> CephNvmeof
    -        return cls(ctx, fsid, daemon_id,
    -                   fetch_configs(ctx), ctx.image)
    -
    -    @classmethod
    -    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'CephNvmeof':
    -        return cls.init(ctx, ident.fsid, ident.daemon_id)
    -
    -    @property
    -    def identity(self) -> DaemonIdentity:
    -        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    -
    -    @staticmethod
    -    def get_container_mounts(data_dir: str) -> Dict[str, str]:
    -        mounts = dict()
    -        mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
    -        mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
    -        mounts[os.path.join(data_dir, 'ceph-nvmeof.conf')] = '/src/ceph-nvmeof.conf:z'
    -        mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
    -        mounts['/dev/hugepages'] = '/dev/hugepages'
    -        mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio'
    -        return mounts
    -
    -    @staticmethod
    -    def get_container_binds():
    -        # type: () -> List[List[str]]
    -        binds = []
    -        lib_modules = ['type=bind',
    -                       'source=/lib/modules',
    -                       'destination=/lib/modules',
    -                       'ro=true']
    -        binds.append(lib_modules)
    -        return binds
    -
    -    @staticmethod
    -    def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]:
    -        out, err, ret = call(ctx,
    -                             [ctx.container_engine.path, 'inspect',
    -                              '--format', '{{index .Config.Labels "io.ceph.version"}}',
    -                              ctx.image])
    -        version = None
    -        if ret == 0:
    -            version = out.strip()
    -        return version
    -
    -    def validate(self):
    -        # type: () -> None
    -        if not is_fsid(self.fsid):
    -            raise Error('not an fsid: %s' % self.fsid)
    -        if not self.daemon_id:
    -            raise Error('invalid daemon_id: %s' % self.daemon_id)
    -        if not self.image:
    -            raise Error('invalid image: %s' % self.image)
    -
    -        # check for the required files
    -        if self.required_files:
    -            for fname in self.required_files:
    -                if fname not in self.files:
    -                    raise Error('required file missing from config-json: %s' % fname)
    -
    -    def get_daemon_name(self):
    -        # type: () -> str
    -        return '%s.%s' % (self.daemon_type, self.daemon_id)
    -
    -    def get_container_name(self, desc=None):
    -        # type: (Optional[str]) -> str
    -        cname = '%s-%s' % (self.fsid, self.get_daemon_name())
    -        if desc:
    -            cname = '%s-%s' % (cname, desc)
    -        return cname
    -
    -    def create_daemon_dirs(self, data_dir, uid, gid):
    -        # type: (str, int, int) -> None
    -        """Create files under the container data dir"""
    -        if not os.path.isdir(data_dir):
    -            raise OSError('data_dir is not a directory: %s' % (data_dir))
    -
    -        logger.info('Creating ceph-nvmeof config...')
    -        configfs_dir = os.path.join(data_dir, 'configfs')
    -        makedirs(configfs_dir, uid, gid, 0o755)
    -
    -        # populate files from the config-json
    -        populate_files(data_dir, self.files, uid, gid)
    -
    -    @staticmethod
    -    def configfs_mount_umount(data_dir, mount=True):
    -        # type: (str, bool) -> List[str]
    -        mount_path = os.path.join(data_dir, 'configfs')
    -        if mount:
    -            cmd = 'if ! grep -qs {0} /proc/mounts; then ' \
    -                  'mount -t configfs none {0}; fi'.format(mount_path)
    -        else:
    -            cmd = 'if grep -qs {0} /proc/mounts; then ' \
    -                  'umount {0}; fi'.format(mount_path)
    -        return cmd.split()
    -
    -    @staticmethod
    -    def get_sysctl_settings() -> List[str]:
    -        return [
    -            'vm.nr_hugepages = 4096',
    -        ]
    -
    -
    -##################################
    -
    -
    -@register_daemon_form
    -class CephExporter(DaemonForm):
    -    """Defines a Ceph exporter container"""
    -
    -    daemon_type = 'ceph-exporter'
    -    entrypoint = '/usr/bin/ceph-exporter'
    -    DEFAULT_PORT = 9926
    -    port_map = {
    -        'ceph-exporter': DEFAULT_PORT,
    -    }
    -
    -    @classmethod
    -    def for_daemon_type(cls, daemon_type: str) -> bool:
    -        return cls.daemon_type == daemon_type
    -
    -    def __init__(self,
    -                 ctx: CephadmContext,
    -                 fsid: str, daemon_id: Union[int, str],
    -                 config_json: Dict[str, Any],
    -                 image: str = DEFAULT_IMAGE) -> None:
    -        self.ctx = ctx
    -        self.fsid = fsid
    -        self.daemon_id = daemon_id
    -        self.image = image
    -
    -        self.sock_dir = config_json.get('sock-dir', '/var/run/ceph/')
    -        ipv4_addrs, _ = get_ip_addresses(get_hostname())
    -        addrs = '0.0.0.0' if ipv4_addrs else '::'
    -        self.addrs = config_json.get('addrs', addrs)
    -        self.port = config_json.get('port', self.DEFAULT_PORT)
    -        self.prio_limit = config_json.get('prio-limit', 5)
    -        self.stats_period = config_json.get('stats-period', 5)
    -
    -        self.validate()
    -
    -    @classmethod
    -    def init(cls, ctx: CephadmContext, fsid: str,
    -             daemon_id: Union[int, str]) -> 'CephExporter':
    -        return cls(ctx, fsid, daemon_id,
    -                   fetch_configs(ctx), ctx.image)
    -
    -    @classmethod
    -    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'CephExporter':
    -        return cls.init(ctx, ident.fsid, ident.daemon_id)
    -
    -    @property
    -    def identity(self) -> DaemonIdentity:
    -        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    -
    -    @staticmethod
    -    def get_container_mounts() -> Dict[str, str]:
    -        mounts = dict()
    -        mounts['/var/run/ceph'] = '/var/run/ceph:z'
    -        return mounts
    -
    -    def get_daemon_args(self) -> List[str]:
    -        args = [
    -            f'--sock-dir={self.sock_dir}',
    -            f'--addrs={self.addrs}',
    -            f'--port={self.port}',
    -            f'--prio-limit={self.prio_limit}',
    -            f'--stats-period={self.stats_period}',
    -        ]
    -        return args
    -
    -    def validate(self) -> None:
    -        if not os.path.isdir(self.sock_dir):
    -            raise Error(f'Directory does not exist. Got: {self.sock_dir}')
    -
    -
    -##################################
    -
    -
    -@register_daemon_form
    -class HAproxy(DaemonForm):
    -    """Defines an HAproxy container"""
    -    daemon_type = 'haproxy'
    -    required_files = ['haproxy.cfg']
    -    default_image = DEFAULT_HAPROXY_IMAGE
    -
    -    @classmethod
    -    def for_daemon_type(cls, daemon_type: str) -> bool:
    -        return cls.daemon_type == daemon_type
    -
    -    def __init__(self,
    -                 ctx: CephadmContext,
    -                 fsid: str, daemon_id: Union[int, str],
    -                 config_json: Dict, image: str) -> None:
    -        self.ctx = ctx
    -        self.fsid = fsid
    -        self.daemon_id = daemon_id
    -        self.image = image
    -
    -        # config-json options
    -        self.files = dict_get(config_json, 'files', {})
    -
    -        self.validate()
    -
    -    @classmethod
    -    def init(cls, ctx: CephadmContext,
    -             fsid: str, daemon_id: Union[int, str]) -> 'HAproxy':
    -        return cls(ctx, fsid, daemon_id, fetch_configs(ctx),
    -                   ctx.image)
    -
    -    @classmethod
    -    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'HAproxy':
    -        return cls.init(ctx, ident.fsid, ident.daemon_id)
    -
    -    @property
    -    def identity(self) -> DaemonIdentity:
    -        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    -
    -    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
    -        """Create files under the container data dir"""
    -        if not os.path.isdir(data_dir):
    -            raise OSError('data_dir is not a directory: %s' % (data_dir))
    -
    -        # create additional directories in data dir for HAproxy to use
    -        if not os.path.isdir(os.path.join(data_dir, 'haproxy')):
    -            makedirs(os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE)
    -
    -        data_dir = os.path.join(data_dir, 'haproxy')
    -        populate_files(data_dir, self.files, uid, gid)
    -
    -    def get_daemon_args(self) -> List[str]:
    -        return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
    -
    -    def validate(self):
    -        # type: () -> None
    -        if not is_fsid(self.fsid):
    -            raise Error('not an fsid: %s' % self.fsid)
    -        if not self.daemon_id:
    -            raise Error('invalid daemon_id: %s' % self.daemon_id)
    -        if not self.image:
    -            raise Error('invalid image: %s' % self.image)
    -
    -        # check for the required files
    -        if self.required_files:
    -            for fname in self.required_files:
    -                if fname not in self.files:
    -                    raise Error('required file missing from config-json: %s' % fname)
    -
    -    def get_daemon_name(self):
    -        # type: () -> str
    -        return '%s.%s' % (self.daemon_type, self.daemon_id)
    -
    -    def get_container_name(self, desc=None):
    -        # type: (Optional[str]) -> str
    -        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
    -        if desc:
    -            cname = '%s-%s' % (cname, desc)
    -        return cname
    -
    -    def extract_uid_gid_haproxy(self) -> Tuple[int, int]:
    -        # better directory for this?
    -        return extract_uid_gid(self.ctx, file_path='/var/lib')
    -
    -    @staticmethod
    -    def get_container_mounts(data_dir: str) -> Dict[str, str]:
    -        mounts = dict()
    -        mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy'
    -        return mounts
    -
    -    @staticmethod
    -    def get_sysctl_settings() -> List[str]:
    -        return [
    -            '# IP forwarding and non-local bind',
    -            'net.ipv4.ip_forward = 1',
    -            'net.ipv4.ip_nonlocal_bind = 1',
    -        ]
    -
    -##################################
    -
    -
    -@register_daemon_form
    -class Keepalived(DaemonForm):
    -    """Defines an Keepalived container"""
    -    daemon_type = 'keepalived'
    -    required_files = ['keepalived.conf']
    -    default_image = DEFAULT_KEEPALIVED_IMAGE
    -
    -    @classmethod
    -    def for_daemon_type(cls, daemon_type: str) -> bool:
    -        return cls.daemon_type == daemon_type
    -
    -    def __init__(self,
    -                 ctx: CephadmContext,
    -                 fsid: str, daemon_id: Union[int, str],
    -                 config_json: Dict, image: str) -> None:
    -        self.ctx = ctx
    -        self.fsid = fsid
    -        self.daemon_id = daemon_id
    -        self.image = image
    -
    -        # config-json options
    -        self.files = dict_get(config_json, 'files', {})
    -
    -        self.validate()
    -
    -    @classmethod
    -    def init(cls, ctx: CephadmContext, fsid: str,
    -             daemon_id: Union[int, str]) -> 'Keepalived':
    -        return cls(ctx, fsid, daemon_id,
    -                   fetch_configs(ctx), ctx.image)
    -
    -    @classmethod
    -    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Keepalived':
    -        return cls.init(ctx, ident.fsid, ident.daemon_id)
    -
    -    @property
    -    def identity(self) -> DaemonIdentity:
    -        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    -
    -    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
    -        """Create files under the container data dir"""
    -        if not os.path.isdir(data_dir):
    -            raise OSError('data_dir is not a directory: %s' % (data_dir))
    -
    -        # create additional directories in data dir for keepalived to use
    -        if not os.path.isdir(os.path.join(data_dir, 'keepalived')):
    -            makedirs(os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE)
    -
    -        # populate files from the config-json
    -        populate_files(data_dir, self.files, uid, gid)
    -
    -    def validate(self):
    -        # type: () -> None
    -        if not is_fsid(self.fsid):
    -            raise Error('not an fsid: %s' % self.fsid)
    -        if not self.daemon_id:
    -            raise Error('invalid daemon_id: %s' % self.daemon_id)
    -        if not self.image:
    -            raise Error('invalid image: %s' % self.image)
    -
    -        # check for the required files
    -        if self.required_files:
    -            for fname in self.required_files:
    -                if fname not in self.files:
    -                    raise Error('required file missing from config-json: %s' % fname)
    -
    -    def get_daemon_name(self):
    -        # type: () -> str
    -        return '%s.%s' % (self.daemon_type, self.daemon_id)
    -
    -    def get_container_name(self, desc=None):
    -        # type: (Optional[str]) -> str
    -        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
    -        if desc:
    -            cname = '%s-%s' % (cname, desc)
    -        return cname
    -
    -    @staticmethod
    -    def get_container_envs():
    -        # type: () -> List[str]
    -        envs = [
    -            'KEEPALIVED_AUTOCONF=false',
    -            'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
    -            'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
    -            'KEEPALIVED_DEBUG=false'
    -        ]
    -        return envs
    -
    -    @staticmethod
    -    def get_sysctl_settings() -> List[str]:
    -        return [
    -            '# IP forwarding and non-local bind',
    -            'net.ipv4.ip_forward = 1',
    -            'net.ipv4.ip_nonlocal_bind = 1',
    -        ]
    -
    -    def extract_uid_gid_keepalived(self) -> Tuple[int, int]:
    -        # better directory for this?
    -        return extract_uid_gid(self.ctx, file_path='/var/lib')
    -
    -    @staticmethod
    -    def get_container_mounts(data_dir: str) -> Dict[str, str]:
    -        mounts = dict()
    -        mounts[os.path.join(data_dir, 'keepalived.conf')] = '/etc/keepalived/keepalived.conf'
    -        return mounts
    -
    -##################################
    -
    -
    -@register_daemon_form
    -class Tracing(DaemonForm):
    -    """Define the configs for the jaeger tracing containers"""
    -
    -    components: Dict[str, Dict[str, Any]] = {
    -        'elasticsearch': {
    -            'image': DEFAULT_ELASTICSEARCH_IMAGE,
    -            'envs': ['discovery.type=single-node']
    -        },
    -        'jaeger-agent': {
    -            'image': DEFAULT_JAEGER_AGENT_IMAGE,
    -        },
    -        'jaeger-collector': {
    -            'image': DEFAULT_JAEGER_COLLECTOR_IMAGE,
    -        },
    -        'jaeger-query': {
    -            'image': DEFAULT_JAEGER_QUERY_IMAGE,
    -        },
    -    }  # type: ignore
    -
    -    @classmethod
    -    def for_daemon_type(cls, daemon_type: str) -> bool:
    -        return daemon_type in cls.components
    -
    -    @staticmethod
    -    def set_configuration(config: Dict[str, str], daemon_type: str) -> None:
    -        if daemon_type in ['jaeger-collector', 'jaeger-query']:
    -            assert 'elasticsearch_nodes' in config
    -            Tracing.components[daemon_type]['envs'] = [
    -                'SPAN_STORAGE_TYPE=elasticsearch',
    -                f'ES_SERVER_URLS={config["elasticsearch_nodes"]}']
    -        if daemon_type == 'jaeger-agent':
    -            assert 'collector_nodes' in config
    -            Tracing.components[daemon_type]['daemon_args'] = [
    -                f'--reporter.grpc.host-port={config["collector_nodes"]}',
    -                '--processor.jaeger-compact.server-host-port=6799'
    -            ]
    -
    -    def __init__(self, ident: DaemonIdentity) -> None:
    -        self._identity = ident
    -
    -    @classmethod
    -    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Tracing':
    -        return cls(ident)
    -
    -    @property
    -    def identity(self) -> DaemonIdentity:
    -        return self._identity
    -
    -##################################
    -
    -
    -@register_daemon_form
    -class CustomContainer(ContainerDaemonForm):
    -    """Defines a custom container"""
    -    daemon_type = 'container'
    -
    -    @classmethod
    -    def for_daemon_type(cls, daemon_type: str) -> bool:
    -        return cls.daemon_type == daemon_type
    -
    -    def __init__(self,
    -                 fsid: str, daemon_id: Union[int, str],
    -                 config_json: Dict, image: str) -> None:
    -        self.fsid = fsid
    -        self.daemon_id = daemon_id
    -        self.image = image
    -
    -        # config-json options
    -        self.entrypoint = dict_get(config_json, 'entrypoint')
    -        self.uid = dict_get(config_json, 'uid', 65534)  # nobody
    -        self.gid = dict_get(config_json, 'gid', 65534)  # nobody
    -        self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
    -        self.args = dict_get(config_json, 'args', [])
    -        self.envs = dict_get(config_json, 'envs', [])
    -        self.privileged = dict_get(config_json, 'privileged', False)
    -        self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
    -        self.ports = dict_get(config_json, 'ports', [])
    -        self.dirs = dict_get(config_json, 'dirs', [])
    -        self.files = dict_get(config_json, 'files', {})
    -
    -    @classmethod
    -    def init(cls, ctx: CephadmContext,
    -             fsid: str, daemon_id: Union[int, str]) -> 'CustomContainer':
    -        return cls(fsid, daemon_id,
    -                   fetch_configs(ctx), ctx.image)
    -
    -    @classmethod
    -    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'CustomContainer':
    -        return cls.init(ctx, ident.fsid, ident.daemon_id)
    -
    -    @property
    -    def identity(self) -> DaemonIdentity:
    -        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    -
    -    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
    -        """
    -        Create dirs/files below the container data directory.
    -        """
    -        logger.info('Creating custom container configuration '
    -                    'dirs/files in {} ...'.format(data_dir))
    -
    -        if not os.path.isdir(data_dir):
    -            raise OSError('data_dir is not a directory: %s' % data_dir)
    -
    -        for dir_path in self.dirs:
    -            logger.info('Creating directory: {}'.format(dir_path))
    -            dir_path = os.path.join(data_dir, dir_path.strip('/'))
    -            makedirs(dir_path, uid, gid, 0o755)
    -
    -        for file_path in self.files:
    -            logger.info('Creating file: {}'.format(file_path))
    -            content = dict_get_join(self.files, file_path)
    -            file_path = os.path.join(data_dir, file_path.strip('/'))
    -            with write_new(file_path, owner=(uid, gid), encoding='utf-8') as f:
    -                f.write(content)
    -
    -    def get_daemon_args(self) -> List[str]:
    -        return []
    -
    -    def get_container_args(self) -> List[str]:
    -        return self.args
    -
    -    def get_container_envs(self) -> List[str]:
    -        return self.envs
    -
    -    def get_container_mounts(self, data_dir: str) -> Dict[str, str]:
    -        """
    -        Get the volume mounts. Relative source paths will be located below
    -        `/var/lib/ceph//`.
    -
    -        Example:
    -        {
    -            /foo/conf: /conf
    -            foo/conf: /conf
    -        }
    -        becomes
    -        {
    -            /foo/conf: /conf
    -            /var/lib/ceph///foo/conf: /conf
    -        }
    -        """
    -        mounts = {}
    -        for source, destination in self.volume_mounts.items():
    -            source = os.path.join(data_dir, source)
    -            mounts[source] = destination
    -        return mounts
    -
    -    def get_container_binds(self, data_dir: str) -> List[List[str]]:
    -        """
    -        Get the bind mounts. Relative `source=...` paths will be located below
    -        `/var/lib/ceph//`.
    -
    -        Example:
    -        [
    -            'type=bind',
    -            'source=lib/modules',
    -            'destination=/lib/modules',
    -            'ro=true'
    -        ]
    -        becomes
    -        [
    -            ...
    -            'source=/var/lib/ceph///lib/modules',
    -            ...
    -        ]
    -        """
    -        binds = self.bind_mounts.copy()
    -        for bind in binds:
    -            for index, value in enumerate(bind):
    -                match = re.match(r'^source=(.+)$', value)
    -                if match:
    -                    bind[index] = 'source={}'.format(os.path.join(
    -                        data_dir, match.group(1)))
    -        return binds
    -
    -    # Cache the container so we don't need to rebuild it again when calling
    -    # into init_containers
    -    _container: Optional[CephContainer] = None
    -
    -    def container(self, ctx: CephadmContext) -> CephContainer:
    -        if self._container is None:
    -            self._container = get_deployment_container(
    -                ctx,
    -                self.identity,
    -                privileged=self.privileged,
    -                ptrace=ctx.allow_ptrace,
    -            )
    -        return self._container
    -
    -    def init_containers(self, ctx: CephadmContext) -> List[InitContainer]:
    -        primary = self.container(ctx)
    -        init_containers: List[Dict[str, Any]] = getattr(
    -            ctx, 'init_containers', []
    -        )
    -        return [
    -            InitContainer.from_primary_and_opts(ctx, primary, ic_opts)
    -            for ic_opts in init_containers
    -        ]
    -
    -    def customize_container_endpoints(
    -        self, endpoints: List[EndPoint], deployment_type: DeploymentType
    -    ) -> None:
    -        if deployment_type == DeploymentType.DEFAULT:
    -            endpoints.extend([EndPoint('0.0.0.0', p) for p in self.ports])
    -
    -    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    -        return self.uid, self.gid
    -
    -
    -##################################
    -
    -
     def get_supported_daemons():
         # type: () -> List[str]
    -    supported_daemons = list(Ceph.daemons)
    +    supported_daemons = ceph_daemons()
         supported_daemons.extend(Monitoring.components)
         supported_daemons.append(NFSGanesha.daemon_type)
         supported_daemons.append(CephIscsi.daemon_type)
    @@ -1574,16 +234,17 @@ def get_supported_daemons():
         supported_daemons.append(Keepalived.daemon_type)
         supported_daemons.append(CephadmAgent.daemon_type)
         supported_daemons.append(SNMPGateway.daemon_type)
    +    supported_daemons.append(MgmtGateway.daemon_type)
    +    supported_daemons.append(OAuth2Proxy.daemon_type)
         supported_daemons.extend(Tracing.components)
    +    supported_daemons.append(NodeProxy.daemon_type)
    +    supported_daemons.append(SMB.daemon_type)
         assert len(supported_daemons) == len(set(supported_daemons))
         return supported_daemons
     
     ##################################
     
     
    -##################################
    -
    -
     def json_loads_retry(cli_func: Callable[[], str]) -> Any:
         for sleep_secs in [1, 4, 4]:
             try:
    @@ -1811,6 +472,10 @@ def update_default_image(ctx: CephadmContext) -> None:
                 ctx.image = Keepalived.default_image
             if type_ == SNMPGateway.daemon_type:
                 ctx.image = SNMPGateway.default_image
    +        if type_ == MgmtGateway.daemon_type:
    +            ctx.image = MgmtGateway.default_image
    +        if type_ == OAuth2Proxy.daemon_type:
    +            ctx.image = OAuth2Proxy.default_image
             if type_ == CephNvmeof.daemon_type:
                 ctx.image = CephNvmeof.default_image
             if type_ in Tracing.components:
    @@ -1834,17 +499,66 @@ def daemon_name_or_type(daemon: Dict[str, str]) -> str:
         if by_name and '.' not in daemon_filter:
             logger.warning(f'Trying to get container info using invalid daemon name {daemon_filter}')
             return None
    -    daemons = list_daemons(ctx, detail=False)
    -    matching_daemons = [d for d in daemons if daemon_name_or_type(d) == daemon_filter and d['fsid'] == ctx.fsid]
    +    if by_name:
    +        matching_daemons = _get_matching_daemons_by_name(ctx, daemon_filter)
    +    else:
    +        # NOTE: we are passing detail=False here as in this case where we are not
    +        # doing it by_name, we really only need the names of the daemons. Additionally,
    +        # when not doing it by_name, we are getting the info for all daemons on the
    +        # host, and doing this with detail=True tends to be slow.
    +        daemons = list_daemons(ctx, detail=False)
    +        matching_daemons = [d for d in daemons if daemon_name_or_type(d) == daemon_filter and d['fsid'] == ctx.fsid]
         if matching_daemons:
    -        d_type, d_id = matching_daemons[0]['name'].split('.', 1)
    -        out, _, code = get_container_stats(ctx, ctx.container_engine.path, ctx.fsid, d_type, d_id)
    -        if not code:
    -            (container_id, image_name, image_id, start, version) = out.strip().split(',')
    -            return ContainerInfo(container_id, image_name, image_id, start, version)
    +        if (
    +            by_name
    +            and 'state' in matching_daemons[0]
    +            and matching_daemons[0]['state'] != 'running'
    +            and 'container_image_name' in matching_daemons[0]
    +            and matching_daemons[0]['container_image_name']
    +        ):
    +            # this daemon contianer is not running so the regular `podman/docker inspect` on the
    +            # container will not help us. If we have the image name from the list_daemons output
    +            # we can try that.
    +            image_name = matching_daemons[0]['container_image_name']
    +            out, _, code = get_container_stats_by_image_name(ctx, ctx.container_engine.path, image_name)
    +            if not code:
    +                # keep in mind, the daemon container is not running, so no container id here
    +                (image_id, start, version) = out.strip().split(',')
    +                return ContainerInfo(
    +                    container_id='',
    +                    image_name=image_name,
    +                    image_id=image_id,
    +                    start=start,
    +                    version=version)
    +        else:
    +            d_type, d_id = matching_daemons[0]['name'].split('.', 1)
    +            out, _, code = get_container_stats(ctx, ctx.container_engine.path, ctx.fsid, d_type, d_id)
    +            if not code:
    +                (container_id, image_name, image_id, start, version) = out.strip().split(',')
    +                return ContainerInfo(container_id, image_name, image_id, start, version)
         return None
     
     
    +def _get_matching_daemons_by_name(ctx: CephadmContext, daemon_filter: str) -> List[Dict[str, str]]:
    +    # NOTE: we are not passing detail=False to this list_daemons call
    +    # as we want the container_image name in the case where we are
    +    # doing this by name and this is skipped when detail=False
    +    matching_daemons = list_daemons(ctx, daemon_name=daemon_filter)
    +    if len(matching_daemons) > 1:
    +        logger.warning(f'Found multiple daemons sharing same name: {daemon_filter}')
    +        # Take the first daemon we find that is actually running, or just the
    +        # first in the list if none are running
    +        matched_daemon = None
    +        for d in matching_daemons:
    +            if 'state' in d and d['state'] == 'running':
    +                matched_daemon = d
    +                break
    +        if not matched_daemon:
    +            matched_daemon = matching_daemons[0]
    +        matching_daemons = [matched_daemon]
    +    return matching_daemons
    +
    +
     def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional[str]:
         """
          Infer the local ceph image based on the following priority criteria:
    @@ -1870,7 +584,7 @@ def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional
     
         container_info = None
         daemon_name = ctx.name if ('name' in ctx and ctx.name and '.' in ctx.name) else None
    -    daemons_ls = [daemon_name] if daemon_name is not None else Ceph.daemons  # daemon types: 'mon', 'mgr', etc
    +    daemons_ls = [daemon_name] if daemon_name is not None else ceph_daemons()  # daemon types: 'mon', 'mgr', etc
         for daemon in daemons_ls:
             container_info = get_container_info(ctx, daemon, daemon_name is not None)
             if container_info is not None:
    @@ -1885,6 +599,8 @@ def infer_local_ceph_image(ctx: CephadmContext, container_path: str) -> Optional
                 if digest and not digest.endswith('@'):
                     logger.info(f"Using ceph image with id '{image_id}' and tag '{tag}' created on {created_date}\n{digest}")
                     return digest
    +    if container_info is not None:
    +        logger.warning(f"Not using image '{container_info.image_id}' as it's not in list of non-dangling images with ceph=True label")
         return None
     
     
    @@ -2013,7 +729,7 @@ def get_unit_name(
         return DaemonIdentity(fsid, daemon_type, daemon_id).unit_name
     
     
    -def get_unit_name_by_daemon_name(ctx: CephadmContext, fsid: str, name: str) -> str:
    +def lookup_unit_name_by_daemon_name(ctx: CephadmContext, fsid: str, name: str) -> str:
         daemon = get_daemon_description(ctx, fsid, name)
         try:
             return daemon['systemd_unit']
    @@ -2042,117 +758,6 @@ def get_legacy_daemon_fsid(ctx, cluster,
         return fsid
     
     
    -def get_daemon_args(ctx: CephadmContext, ident: 'DaemonIdentity') -> List[str]:
    -    r = list()  # type: List[str]
    -
    -    daemon_type = ident.daemon_type
    -    if daemon_type in Ceph.daemons and daemon_type not in ['crash', 'ceph-exporter']:
    -        r += [
    -            '--setuser', 'ceph',
    -            '--setgroup', 'ceph',
    -            '--default-log-to-file=false',
    -        ]
    -        log_to_journald = should_log_to_journald(ctx)
    -        if log_to_journald:
    -            r += [
    -                '--default-log-to-journald=true',
    -                '--default-log-to-stderr=false',
    -            ]
    -        else:
    -            r += [
    -                '--default-log-to-stderr=true',
    -                '--default-log-stderr-prefix=debug ',
    -            ]
    -        if daemon_type == 'mon':
    -            r += [
    -                '--default-mon-cluster-log-to-file=false',
    -            ]
    -            if log_to_journald:
    -                r += [
    -                    '--default-mon-cluster-log-to-journald=true',
    -                    '--default-mon-cluster-log-to-stderr=false',
    -                ]
    -            else:
    -                r += ['--default-mon-cluster-log-to-stderr=true']
    -    elif daemon_type in Monitoring.components:
    -        metadata = Monitoring.components[daemon_type]
    -        r += metadata.get('args', list())
    -        # set ip and port to bind to for nodeexporter,alertmanager,prometheus
    -        if daemon_type not in ['grafana', 'loki', 'promtail']:
    -            ip = ''
    -            port = Monitoring.port_map[daemon_type][0]
    -            meta = fetch_meta(ctx)
    -            if meta:
    -                if 'ip' in meta and meta['ip']:
    -                    ip = meta['ip']
    -                if 'ports' in meta and meta['ports']:
    -                    port = meta['ports'][0]
    -            r += [f'--web.listen-address={ip}:{port}']
    -            if daemon_type == 'prometheus':
    -                config = fetch_configs(ctx)
    -                retention_time = config.get('retention_time', '15d')
    -                retention_size = config.get('retention_size', '0')  # default to disabled
    -                r += [f'--storage.tsdb.retention.time={retention_time}']
    -                r += [f'--storage.tsdb.retention.size={retention_size}']
    -                scheme = 'http'
    -                host = get_fqdn()
    -                # in case host is not an fqdn then we use the IP to
    -                # avoid producing a broken web.external-url link
    -                if '.' not in host:
    -                    ipv4_addrs, ipv6_addrs = get_ip_addresses(get_hostname())
    -                    # use the first ipv4 (if any) otherwise use the first ipv6
    -                    addr = next(iter(ipv4_addrs or ipv6_addrs), None)
    -                    host = wrap_ipv6(addr) if addr else host
    -                r += [f'--web.external-url={scheme}://{host}:{port}']
    -        if daemon_type == 'alertmanager':
    -            config = fetch_configs(ctx)
    -            peers = config.get('peers', list())  # type: ignore
    -            for peer in peers:
    -                r += ['--cluster.peer={}'.format(peer)]
    -            try:
    -                r += [f'--web.config.file={config["web_config"]}']
    -            except KeyError:
    -                pass
    -            # some alertmanager, by default, look elsewhere for a config
    -            r += ['--config.file=/etc/alertmanager/alertmanager.yml']
    -        if daemon_type == 'promtail':
    -            r += ['--config.expand-env']
    -        if daemon_type == 'prometheus':
    -            config = fetch_configs(ctx)
    -            try:
    -                r += [f'--web.config.file={config["web_config"]}']
    -            except KeyError:
    -                pass
    -        if daemon_type == 'node-exporter':
    -            config = fetch_configs(ctx)
    -            try:
    -                r += [f'--web.config.file={config["web_config"]}']
    -            except KeyError:
    -                pass
    -            r += ['--path.procfs=/host/proc',
    -                  '--path.sysfs=/host/sys',
    -                  '--path.rootfs=/rootfs']
    -    elif daemon_type == 'jaeger-agent':
    -        r.extend(Tracing.components[daemon_type]['daemon_args'])
    -    elif daemon_type == NFSGanesha.daemon_type:
    -        nfs_ganesha = NFSGanesha.init(ctx, ident.fsid, ident.daemon_id)
    -        r += nfs_ganesha.get_daemon_args()
    -    elif daemon_type == CephExporter.daemon_type:
    -        ceph_exporter = CephExporter.init(ctx, ident.fsid, ident.daemon_id)
    -        r.extend(ceph_exporter.get_daemon_args())
    -    elif daemon_type == HAproxy.daemon_type:
    -        haproxy = HAproxy.init(ctx, ident.fsid, ident.daemon_id)
    -        r += haproxy.get_daemon_args()
    -    elif daemon_type == CustomContainer.daemon_type:
    -        cc = CustomContainer.init(ctx, ident.fsid, ident.daemon_id)
    -        r.extend(cc.get_daemon_args())
    -    elif daemon_type == SNMPGateway.daemon_type:
    -        sc = SNMPGateway.init(ctx, ident.fsid, ident.daemon_id)
    -        r.extend(sc.get_daemon_args())
    -
    -    return r
    -
    -
     def create_daemon_dirs(
         ctx: CephadmContext,
         ident: 'DaemonIdentity',
    @@ -2165,7 +770,7 @@ def create_daemon_dirs(
         fsid, daemon_type = ident.fsid, ident.daemon_type
         data_dir = make_data_dir(ctx, ident, uid=uid, gid=gid)
     
    -    if daemon_type in Ceph.daemons:
    +    if daemon_type in ceph_daemons():
             make_log_dir(ctx, fsid, uid=uid, gid=gid)
     
         if config:
    @@ -2198,8 +803,10 @@ def create_daemon_dirs(
                 makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
                 makedirs(os.path.join(data_dir_root, config_dir, 'certs'), uid, gid, 0o755)
                 makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/datasources'), uid, gid, 0o755)
    -            makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
    +            makedirs(os.path.join(data_dir_root, config_dir, 'provisioning/dashboards'), uid, gid, 0o755)
    +            makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o472)
                 touch(os.path.join(data_dir_root, 'data', 'grafana.db'), uid, gid)
    +            recursive_chown(os.path.join(data_dir_root, 'data'), uid, gid)
             elif daemon_type == 'alertmanager':
                 data_dir_root = ident.data_dir(ctx.data_dir)
                 config_dir = 'etc/alertmanager'
    @@ -2263,6 +870,27 @@ def create_daemon_dirs(
             sg = SNMPGateway.init(ctx, fsid, ident.daemon_id)
             sg.create_daemon_conf()
     
    +    elif daemon_type == MgmtGateway.daemon_type:
    +        cg = MgmtGateway.init(ctx, fsid, ident.daemon_id)
    +        cg.create_daemon_dirs(data_dir, uid, gid)
    +
    +    elif daemon_type == OAuth2Proxy.daemon_type:
    +        co = OAuth2Proxy.init(ctx, fsid, ident.daemon_id)
    +        co.create_daemon_dirs(data_dir, uid, gid)
    +
    +    elif daemon_type == NodeProxy.daemon_type:
    +        node_proxy = NodeProxy.init(ctx, fsid, ident.daemon_id)
    +        node_proxy.create_daemon_dirs(data_dir, uid, gid)
    +
    +    elif daemon_type == CephExporter.daemon_type:
    +        ceph_exporter = CephExporter.init(ctx, fsid, ident.daemon_id)
    +        ceph_exporter.create_daemon_dirs(data_dir, uid, gid)
    +
    +    else:
    +        daemon = daemon_form_create(ctx, ident)
    +        if isinstance(daemon, ContainerDaemonForm):
    +            daemon.prepare_data_dir(data_dir, uid, gid)
    +
         _write_custom_conf_files(ctx, ident, uid, gid)
     
     
    @@ -2301,17 +929,10 @@ def _write_custom_conf_files(
     def get_container_binds(
         ctx: CephadmContext, ident: 'DaemonIdentity'
     ) -> List[List[str]]:
    -    binds = list()
    -
    -    if ident.daemon_type == CephIscsi.daemon_type:
    -        binds.extend(CephIscsi.get_container_binds())
    -    if ident.daemon_type == CephNvmeof.daemon_type:
    -        binds.extend(CephNvmeof.get_container_binds())
    -    elif ident.daemon_type == CustomContainer.daemon_type:
    -        cc = CustomContainer.init(ctx, ident.fsid, ident.daemon_id)
    -        data_dir = ident.data_dir(ctx.data_dir)
    -        binds.extend(cc.get_container_binds(data_dir))
    -
    +    binds: List[List[str]] = list()
    +    daemon = daemon_form_create(ctx, ident)
    +    assert isinstance(daemon, ContainerDaemonForm)
    +    daemon.customize_container_binds(ctx, binds)
         return binds
     
     
    @@ -2321,76 +942,11 @@ def get_container_mounts_for_type(
         """Return a dictionary mapping container-external paths to container-internal
         paths given an fsid and daemon_type.
         """
    -    mounts = _get_container_mounts_for_type(ctx, fsid, daemon_type)
    +    mounts = get_ceph_mounts_for_type(ctx, fsid, daemon_type)
         _update_podman_mounts(ctx, mounts)
         return mounts
     
     
    -def _get_container_mounts_for_type(
    -    ctx: CephadmContext, fsid: str, daemon_type: str
    -) -> Dict[str, str]:
    -    """The main implementation of get_container_mounts_for_type minus the call
    -    to _update_podman_mounts so that this can be called from
    -    get_container_mounts.
    -    """
    -    mounts = dict()
    -
    -    if daemon_type in Ceph.daemons:
    -        if fsid:
    -            run_path = os.path.join('/var/run/ceph', fsid)
    -            if os.path.exists(run_path):
    -                mounts[run_path] = '/var/run/ceph:z'
    -            log_dir = get_log_dir(fsid, ctx.log_dir)
    -            mounts[log_dir] = '/var/log/ceph:z'
    -            crash_dir = '/var/lib/ceph/%s/crash' % fsid
    -            if os.path.exists(crash_dir):
    -                mounts[crash_dir] = '/var/lib/ceph/crash:z'
    -            if daemon_type != 'crash' and should_log_to_journald(ctx):
    -                journald_sock_dir = '/run/systemd/journal'
    -                mounts[journald_sock_dir] = journald_sock_dir
    -
    -    if daemon_type in ['mon', 'osd', 'clusterless-ceph-volume']:
    -        mounts['/dev'] = '/dev'  # FIXME: narrow this down?
    -        mounts['/run/udev'] = '/run/udev'
    -    if daemon_type in ['osd', 'clusterless-ceph-volume']:
    -        mounts['/sys'] = '/sys'  # for numa.cc, pick_address, cgroups, ...
    -        mounts['/run/lvm'] = '/run/lvm'
    -        mounts['/run/lock/lvm'] = '/run/lock/lvm'
    -    if daemon_type == 'osd':
    -        # selinux-policy in the container may not match the host.
    -        if HostFacts(ctx).selinux_enabled:
    -            cluster_dir = f'{ctx.data_dir}/{fsid}'
    -            selinux_folder = f'{cluster_dir}/selinux'
    -            if os.path.exists(cluster_dir):
    -                if not os.path.exists(selinux_folder):
    -                    os.makedirs(selinux_folder, mode=0o755)
    -                mounts[selinux_folder] = '/sys/fs/selinux:ro'
    -            else:
    -                logger.error(f'Cluster direcotry {cluster_dir} does not exist.')
    -        mounts['/'] = '/rootfs'
    -
    -    try:
    -        if ctx.shared_ceph_folder:  # make easy manager modules/ceph-volume development
    -            ceph_folder = pathify(ctx.shared_ceph_folder)
    -            if os.path.exists(ceph_folder):
    -                cephadm_binary = ceph_folder + '/src/cephadm/cephadm'
    -                if not os.path.exists(pathify(cephadm_binary)):
    -                    raise Error("cephadm binary does not exist. Please run './build.sh cephadm' from ceph/src/cephadm/ directory.")
    -                mounts[cephadm_binary] = '/usr/sbin/cephadm'
    -                mounts[ceph_folder + '/src/ceph-volume/ceph_volume'] = '/usr/lib/python3.6/site-packages/ceph_volume'
    -                mounts[ceph_folder + '/src/pybind/mgr'] = '/usr/share/ceph/mgr'
    -                mounts[ceph_folder + '/src/python-common/ceph'] = '/usr/lib/python3.6/site-packages/ceph'
    -                mounts[ceph_folder + '/monitoring/ceph-mixin/dashboards_out'] = '/etc/grafana/dashboards/ceph-dashboard'
    -                mounts[ceph_folder + '/monitoring/ceph-mixin/prometheus_alerts.yml'] = '/etc/prometheus/ceph/ceph_default_alerts.yml'
    -            else:
    -                logger.error(
    -                    'Ceph shared source folder does not exist.',
    -                    extra=Highlight.FAILURE.extra())
    -    except AttributeError:
    -        pass
    -    return mounts
    -
    -
     def get_container_mounts(
         ctx: CephadmContext, ident: 'DaemonIdentity', no_config: bool = False
     ) -> Dict[str, str]:
    @@ -2398,82 +954,20 @@ def get_container_mounts(
         paths given a daemon identity.
         Setting `no_config` will skip mapping a daemon specific ceph.conf file.
         """
    -    # unpack fsid and daemon_type from ident because they're used very frequently
    -    fsid, daemon_type = ident.fsid, ident.daemon_type
    -    mounts = get_container_mounts_for_type(ctx, fsid, daemon_type)
    +    # unpack daemon_type from ident because they're used very frequently
    +    daemon_type = ident.daemon_type
    +    mounts: Dict[str, str] = {}
     
         assert ident.fsid
         assert ident.daemon_id
    -    if daemon_type in Ceph.daemons:
    -        data_dir = ident.data_dir(ctx.data_dir)
    -        if daemon_type == 'rgw':
    -            cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (ident.daemon_id)
    -        else:
    -            cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (daemon_type, ident.daemon_id)
    -        if daemon_type != 'crash':
    -            mounts[data_dir] = cdata_dir + ':z'
    -        if not no_config:
    -            mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
    -        if daemon_type in ['rbd-mirror', 'cephfs-mirror', 'crash', 'ceph-exporter']:
    -            # these do not search for their keyrings in a data directory
    -            mounts[data_dir + '/keyring'] = '/etc/ceph/ceph.client.%s.%s.keyring' % (daemon_type, ident.daemon_id)
    -
    -    if daemon_type in Monitoring.components:
    -        data_dir = ident.data_dir(ctx.data_dir)
    -        log_dir = get_log_dir(fsid, ctx.log_dir)
    -        if daemon_type == 'prometheus':
    -            mounts[os.path.join(data_dir, 'etc/prometheus')] = '/etc/prometheus:Z'
    -            mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
    -        elif daemon_type == 'loki':
    -            mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z'
    -            mounts[os.path.join(data_dir, 'data')] = '/loki:Z'
    -        elif daemon_type == 'promtail':
    -            mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z'
    -            mounts[log_dir] = '/var/log/ceph:z'
    -            mounts[os.path.join(data_dir, 'data')] = '/promtail:Z'
    -        elif daemon_type == 'node-exporter':
    -            mounts[os.path.join(data_dir, 'etc/node-exporter')] = '/etc/node-exporter:Z'
    -            mounts['/proc'] = '/host/proc:ro'
    -            mounts['/sys'] = '/host/sys:ro'
    -            mounts['/'] = '/rootfs:ro'
    -        elif daemon_type == 'grafana':
    -            mounts[os.path.join(data_dir, 'etc/grafana/grafana.ini')] = '/etc/grafana/grafana.ini:Z'
    -            mounts[os.path.join(data_dir, 'etc/grafana/provisioning/datasources')] = '/etc/grafana/provisioning/datasources:Z'
    -            mounts[os.path.join(data_dir, 'etc/grafana/certs')] = '/etc/grafana/certs:Z'
    -            mounts[os.path.join(data_dir, 'data/grafana.db')] = '/var/lib/grafana/grafana.db:Z'
    -        elif daemon_type == 'alertmanager':
    -            mounts[os.path.join(data_dir, 'etc/alertmanager')] = '/etc/alertmanager:Z'
    -
    -    if daemon_type == NFSGanesha.daemon_type:
    -        data_dir = ident.data_dir(ctx.data_dir)
    -        nfs_ganesha = NFSGanesha.init(ctx, fsid, ident.daemon_id)
    -        mounts.update(nfs_ganesha.get_container_mounts(data_dir))
    -
    -    if daemon_type == HAproxy.daemon_type:
    -        data_dir = ident.data_dir(ctx.data_dir)
    -        mounts.update(HAproxy.get_container_mounts(data_dir))
    -
    -    if daemon_type == CephNvmeof.daemon_type:
    -        data_dir = ident.data_dir(ctx.data_dir)
    -        mounts.update(CephNvmeof.get_container_mounts(data_dir))
    -
    -    if daemon_type == CephIscsi.daemon_type:
    -        data_dir = ident.data_dir(ctx.data_dir)
    -        # Removes ending ".tcmu" from data_dir a tcmu-runner uses the same data_dir
    -        # as rbd-runner-api
    -        if data_dir.endswith('.tcmu'):
    -            data_dir = re.sub(r'\.tcmu$', '', data_dir)
    -        log_dir = get_log_dir(fsid, ctx.log_dir)
    -        mounts.update(CephIscsi.get_container_mounts(data_dir, log_dir))
    -
    -    if daemon_type == Keepalived.daemon_type:
    -        data_dir = ident.data_dir(ctx.data_dir)
    -        mounts.update(Keepalived.get_container_mounts(data_dir))
    -
    -    if daemon_type == CustomContainer.daemon_type:
    -        cc = CustomContainer.init(ctx, fsid, ident.daemon_id)
    -        data_dir = ident.data_dir(ctx.data_dir)
    -        mounts.update(cc.get_container_mounts(data_dir))
    +    # Ceph daemon types are special cased here beacause of the no_config
    +    # option which JJM thinks is *only* used by cephadm shell
    +    if daemon_type in ceph_daemons():
    +        mounts = Ceph.get_ceph_mounts(ctx, ident, no_config=no_config)
    +    else:
    +        daemon = daemon_form_create(ctx, ident)
    +        assert isinstance(daemon, ContainerDaemonForm)
    +        daemon.customize_container_mounts(ctx, mounts)
     
         _update_podman_mounts(ctx, mounts)
         return mounts
    @@ -2481,17 +975,8 @@ def get_container_mounts(
     
     def _update_podman_mounts(ctx: CephadmContext, mounts: Dict[str, str]) -> None:
         """Update the given mounts dict with mounts specific to podman."""
    -    # Modifications podman makes to /etc/hosts causes issues with
    -    # certain daemons (specifically referencing "host.containers.internal" entry
    -    # being added to /etc/hosts in this case). To avoid that, but still
    -    # allow users to use /etc/hosts for hostname resolution, we can
    -    # mount the host's /etc/hosts file.
    -    # https://tracker.ceph.com/issues/58532
    -    # https://tracker.ceph.com/issues/57018
         if isinstance(ctx.container_engine, Podman):
    -        if os.path.exists('/etc/hosts'):
    -            if '/etc/hosts' not in mounts:
    -                mounts['/etc/hosts'] = '/etc/hosts:ro'
    +        ctx.container_engine.update_mounts(ctx, mounts)
     
     
     def get_ceph_volume_container(ctx: CephadmContext,
    @@ -2521,189 +1006,27 @@ def get_ceph_volume_container(ctx: CephadmContext,
         )
     
     
    -def set_pids_limit_unlimited(ctx: CephadmContext, container_args: List[str]) -> None:
    -    # set container's pids-limit to unlimited rather than default (Docker 4096 / Podman 2048)
    -    # Useful for daemons like iscsi where the default pids-limit limits the number of luns
    -    # per iscsi target or rgw where increasing the rgw_thread_pool_size to a value near
    -    # the default pids-limit may cause the container to crash.
    -    if (
    -        isinstance(ctx.container_engine, Podman)
    -        and ctx.container_engine.version >= PIDS_LIMIT_UNLIMITED_PODMAN_VERSION
    -    ):
    -        container_args.append('--pids-limit=-1')
    -    else:
    -        container_args.append('--pids-limit=0')
    -
    -
     def get_container(
         ctx: CephadmContext,
         ident: 'DaemonIdentity',
    -    privileged: bool = False,
    -    ptrace: bool = False,
    -    container_args: Optional[List[str]] = None,
     ) -> 'CephContainer':
    -    entrypoint: str = ''
    -    name: str = ''
    -    ceph_args: List[str] = []
    -    envs: List[str] = []
    -    host_network: bool = True
    -
    -    daemon_type = ident.daemon_type
    -    if daemon_type in Ceph.daemons:
    -        envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
    -    if container_args is None:
    -        container_args = []
    -    if daemon_type in Ceph.daemons or daemon_type in Ceph.gateways:
    -        set_pids_limit_unlimited(ctx, container_args)
    -    if daemon_type in ['mon', 'osd']:
    -        # mon and osd need privileged in order for libudev to query devices
    -        privileged = True
    -    if daemon_type == 'rgw':
    -        entrypoint = '/usr/bin/radosgw'
    -        name = 'client.rgw.%s' % ident.daemon_id
    -    elif daemon_type == 'rbd-mirror':
    -        entrypoint = '/usr/bin/rbd-mirror'
    -        name = 'client.rbd-mirror.%s' % ident.daemon_id
    -    elif daemon_type == 'cephfs-mirror':
    -        entrypoint = '/usr/bin/cephfs-mirror'
    -        name = 'client.cephfs-mirror.%s' % ident.daemon_id
    -    elif daemon_type == 'crash':
    -        entrypoint = '/usr/bin/ceph-crash'
    -        name = 'client.crash.%s' % ident.daemon_id
    -    elif daemon_type in ['mon', 'mgr', 'mds', 'osd']:
    -        entrypoint = '/usr/bin/ceph-' + daemon_type
    -        name = ident.daemon_name
    -    elif daemon_type in Monitoring.components:
    -        entrypoint = ''
    -    elif daemon_type in Tracing.components:
    -        entrypoint = ''
    -        name = ident.daemon_name
    -        config = fetch_configs(ctx)
    -        Tracing.set_configuration(config, daemon_type)
    -        envs.extend(Tracing.components[daemon_type].get('envs', []))
    -    elif daemon_type == NFSGanesha.daemon_type:
    -        entrypoint = NFSGanesha.entrypoint
    -        name = ident.daemon_name
    -        envs.extend(NFSGanesha.get_container_envs())
    -    elif daemon_type == CephExporter.daemon_type:
    -        entrypoint = CephExporter.entrypoint
    -        name = 'client.ceph-exporter.%s' % ident.daemon_id
    -    elif daemon_type == HAproxy.daemon_type:
    -        name = ident.daemon_name
    -        container_args.extend(['--user=root'])  # haproxy 2.4 defaults to a different user
    -    elif daemon_type == Keepalived.daemon_type:
    -        name = ident.daemon_name
    -        envs.extend(Keepalived.get_container_envs())
    -        container_args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
    -    elif daemon_type == CephNvmeof.daemon_type:
    -        name = ident.daemon_name
    -        container_args.extend(['--ulimit', 'memlock=-1:-1'])
    -        container_args.extend(['--ulimit', 'nofile=10240'])
    -        container_args.extend(['--cap-add=SYS_ADMIN', '--cap-add=CAP_SYS_NICE'])
    -    elif daemon_type == CephIscsi.daemon_type:
    -        entrypoint = CephIscsi.entrypoint
    -        name = ident.daemon_name
    -        # So the container can modprobe iscsi_target_mod and have write perms
    -        # to configfs we need to make this a privileged container.
    -        privileged = True
    -    elif daemon_type == CustomContainer.daemon_type:
    -        cc = CustomContainer.init(ctx, ident.fsid, ident.daemon_id)
    -        entrypoint = cc.entrypoint
    -        host_network = False
    -        envs.extend(cc.get_container_envs())
    -        container_args.extend(cc.get_container_args())
    -
    -    if daemon_type in Monitoring.components:
    -        uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
    -        monitoring_args = [
    -            '--user',
    -            str(uid),
    -            # FIXME: disable cpu/memory limits for the time being (not supported
    -            # by ubuntu 18.04 kernel!)
    -        ]
    -        container_args.extend(monitoring_args)
    -        if daemon_type == 'node-exporter':
    -            # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys',
    -            # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation
    -            # between the node-exporter container and the host to avoid selinux denials
    -            container_args.extend(['--security-opt', 'label=disable'])
    -    elif daemon_type == 'crash':
    -        ceph_args = ['-n', name]
    -    elif daemon_type in Ceph.daemons:
    -        ceph_args = ['-n', name, '-f']
    -    elif daemon_type == SNMPGateway.daemon_type:
    -        sg = SNMPGateway.init(ctx, ident.fsid, ident.daemon_id)
    -        container_args.append(
    -            f'--env-file={sg.conf_file_path}'
    -        )
    -
    -    # if using podman, set -d, --conmon-pidfile & --cidfile flags
    -    # so service can have Type=Forking
    -    if isinstance(ctx.container_engine, Podman):
    -        runtime_dir = '/run'
    -        service_name = f'{ident.unit_name}.service'
    -        container_args.extend([
    -            '-d', '--log-driver', 'journald',
    -            '--conmon-pidfile',
    -            f'{runtime_dir}/{service_name}-pid',
    -            '--cidfile',
    -            f'{runtime_dir}/{service_name}-cid',
    -        ])
    -        if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION and not ctx.no_cgroups_split:
    -            container_args.append('--cgroups=split')
    -        # if /etc/hosts doesn't exist, we can be confident
    -        # users aren't using it for host name resolution
    -        # and adding --no-hosts avoids bugs created in certain daemons
    -        # by modifications podman makes to /etc/hosts
    -        # https://tracker.ceph.com/issues/58532
    -        # https://tracker.ceph.com/issues/57018
    -        if not os.path.exists('/etc/hosts'):
    -            container_args.extend(['--no-hosts'])
    -
    -    return CephContainer.for_daemon(
    -        ctx,
    -        ident=ident,
    -        entrypoint=entrypoint,
    -        args=ceph_args + get_daemon_args(ctx, ident),
    -        container_args=container_args,
    -        volume_mounts=get_container_mounts(ctx, ident),
    -        bind_mounts=get_container_binds(ctx, ident),
    -        envs=envs,
    -        privileged=privileged,
    -        ptrace=ptrace,
    -        host_network=host_network,
    +    daemon = daemon_form_create(ctx, ident)
    +    assert isinstance(daemon, ContainerDaemonForm)
    +    privileged = ident.daemon_type in {'mon', 'osd', CephIscsi.daemon_type}
    +    host_network = ident.daemon_type != CustomContainer.daemon_type
    +    return daemon_to_container(
    +        ctx, daemon, privileged=privileged, host_network=host_network
         )
     
     
    -def extract_uid_gid(ctx, img='', file_path='/var/lib/ceph'):
    -    # type: (CephadmContext, str, Union[str, List[str]]) -> Tuple[int, int]
    -
    -    if not img:
    -        img = ctx.image
    -
    -    if isinstance(file_path, str):
    -        paths = [file_path]
    -    else:
    -        paths = file_path
    -
    -    ex: Optional[Tuple[str, RuntimeError]] = None
    -
    -    for fp in paths:
    -        try:
    -            out = CephContainer(
    -                ctx,
    -                image=img,
    -                entrypoint='stat',
    -                args=['-c', '%u %g', fp]
    -            ).run(verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
    -            uid, gid = out.split(' ')
    -            return int(uid), int(gid)
    -        except RuntimeError as e:
    -            ex = (fp, e)
    -    if ex:
    -        raise Error(f'Failed to extract uid/gid for path {ex[0]}: {ex[1]}')
    -
    -    raise RuntimeError('uid/gid not found')
    +def _update_container_args_for_podman(
    +    ctx: CephadmContext, ident: DaemonIdentity, container_args: List[str]
    +) -> None:
    +    if not isinstance(ctx.container_engine, Podman):
    +        return
    +    container_args.extend(
    +        ctx.container_engine.service_args(ctx, ident.service_name)
    +    )
     
     
     def deploy_daemon(
    @@ -2718,6 +1041,7 @@ def deploy_daemon(
         deployment_type: DeploymentType = DeploymentType.DEFAULT,
         endpoints: Optional[List[EndPoint]] = None,
         init_containers: Optional[List['InitContainer']] = None,
    +    sidecars: Optional[List[SidecarContainer]] = None,
     ) -> None:
         endpoints = endpoints or []
         daemon_type = ident.daemon_type
    @@ -2761,7 +1085,7 @@ def deploy_daemon(
                     '--fsid', ident.fsid,
                     '-c', '/tmp/config',
                     '--keyring', '/tmp/keyring',
    -            ] + get_daemon_args(ctx, ident),
    +            ] + Ceph.create(ctx, ident).get_daemon_args(),
                 volume_mounts={
                     log_dir: '/var/log/ceph:z',
                     mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (ident.daemon_id),
    @@ -2797,6 +1121,7 @@ def deploy_daemon(
                         osd_fsid=osd_fsid,
                         endpoints=endpoints,
                         init_containers=init_containers,
    +                    sidecars=sidecars,
                     )
                 else:
                     raise RuntimeError('attempting to deploy a daemon without a container image')
    @@ -2811,111 +1136,21 @@ def deploy_daemon(
         update_firewalld(ctx, daemon_form_create(ctx, ident))
     
         # Open ports explicitly required for the daemon
    -    if endpoints:
    -        fw = Firewalld(ctx)
    -        fw.open_ports([e.port for e in endpoints] + fw.external_ports.get(daemon_type, []))
    -        fw.apply_rules()
    +    if not ('skip_firewalld' in ctx and ctx.skip_firewalld):
    +        if endpoints:
    +            fw = Firewalld(ctx)
    +            fw.open_ports([e.port for e in endpoints] + fw.external_ports.get(daemon_type, []))
    +            fw.apply_rules()
     
         # If this was a reconfig and the daemon is not a Ceph daemon, restart it
         # so it can pick up potential changes to its configuration files
    -    if deployment_type == DeploymentType.RECONFIG and daemon_type not in Ceph.daemons:
    +    if deployment_type == DeploymentType.RECONFIG and daemon_type not in ceph_daemons():
             # ceph daemons do not need a restart; others (presumably) do to pick
             # up the new config
             call_throws(ctx, ['systemctl', 'reset-failed', ident.unit_name])
             call_throws(ctx, ['systemctl', 'restart', ident.unit_name])
     
     
    -def _bash_cmd(
    -    fh: IO[str],
    -    cmd: List[str],
    -    check: bool = True,
    -    background: bool = False,
    -    stderr: bool = True,
    -) -> None:
    -    line = ' '.join(shlex.quote(arg) for arg in cmd)
    -    if not check:
    -        line = f'! {line}'
    -    if not stderr:
    -        line = f'{line} 2> /dev/null'
    -    if background:
    -        line = f'{line} &'
    -    fh.write(line)
    -    fh.write('\n')
    -
    -
    -def _write_container_cmd_to_bash(
    -    ctx: CephadmContext,
    -    file_obj: IO[str],
    -    container: 'CephContainer',
    -    comment: Optional[str] = None,
    -    background: Optional[bool] = False,
    -) -> None:
    -    if comment:
    -        # Sometimes adding a comment, especially if there are multiple containers in one
    -        # unit file, makes it easier to read and grok.
    -        assert '\n' not in comment
    -        file_obj.write(f'# {comment}\n')
    -    # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
    -    _bash_cmd(
    -        file_obj, container.rm_cmd(old_cname=True), check=False, stderr=False
    -    )
    -    _bash_cmd(file_obj, container.rm_cmd(), check=False, stderr=False)
    -
    -    # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
    -    if isinstance(ctx.container_engine, Podman):
    -        _bash_cmd(
    -            file_obj,
    -            container.rm_cmd(storage=True),
    -            check=False,
    -            stderr=False,
    -        )
    -        _bash_cmd(
    -            file_obj,
    -            container.rm_cmd(old_cname=True, storage=True),
    -            check=False,
    -            stderr=False,
    -        )
    -
    -    # container run command
    -    _bash_cmd(file_obj, container.run_cmd(), background=bool(background))
    -
    -
    -def _write_init_container_cmds(
    -    ctx: CephadmContext,
    -    file_obj: IO[str],
    -    index: int,
    -    init_container: 'InitContainer',
    -) -> None:
    -    file_obj.write(f'# init container {index}: {init_container.cname}\n')
    -    _bash_cmd(file_obj, init_container.run_cmd())
    -    _write_init_container_cmds_clean(ctx, file_obj, init_container, comment='')
    -
    -
    -def _write_init_container_cmds_clean(
    -    ctx: CephadmContext,
    -    file_obj: IO[str],
    -    init_container: 'InitContainer',
    -    comment: str = 'init container cleanup',
    -) -> None:
    -    if comment:
    -        assert '\n' not in comment
    -        file_obj.write(f'# {comment}\n')
    -    _bash_cmd(
    -        file_obj,
    -        init_container.rm_cmd(),
    -        check=False,
    -        stderr=False,
    -    )
    -    # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
    -    if isinstance(ctx.container_engine, Podman):
    -        _bash_cmd(
    -            file_obj,
    -            init_container.rm_cmd(storage=True),
    -            check=False,
    -            stderr=False,
    -        )
    -
    -
     def clean_cgroup(ctx: CephadmContext, fsid: str, unit_name: str) -> None:
         # systemd may fail to cleanup cgroups from previous stopped unit, which will cause next "systemctl start" to fail.
         # see https://tracker.ceph.com/issues/50998
    @@ -2951,87 +1186,60 @@ def deploy_daemon_units(
         start: bool = True,
         osd_fsid: Optional[str] = None,
         endpoints: Optional[List[EndPoint]] = None,
    -    init_containers: Optional[List['InitContainer']] = None,
    -) -> None:
    -    # cmd
    -
    -    # unpack values from ident because they're used very frequently
    -    fsid = ident.fsid
    -    daemon_type = ident.daemon_type
    -    daemon_id = ident.daemon_id
    -
    +    init_containers: Optional[List[InitContainer]] = None,
    +    sidecars: Optional[List[SidecarContainer]] = None,
    +) -> None:
         data_dir = ident.data_dir(ctx.data_dir)
    -    run_file_path = data_dir + '/unit.run'
    -    meta_file_path = data_dir + '/unit.meta'
    -    with write_new(run_file_path) as f, write_new(meta_file_path) as metaf:
    -
    -        f.write('set -e\n')
    -
    -        if daemon_type in Ceph.daemons:
    -            install_path = find_program('install')
    -            f.write('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=fsid, uid=uid, gid=gid))
    +    pre_start_commands: List[runscripts.Command] = []
    +    post_stop_commands: List[runscripts.Command] = []
    +
    +    if ident.daemon_type in ceph_daemons():
    +        install_path = find_program('install')
    +        pre_start_commands.append('{install_path} -d -m0770 -o {uid} -g {gid} /var/run/ceph/{fsid}\n'.format(install_path=install_path, fsid=ident.fsid, uid=uid, gid=gid))
    +    if ident.daemon_type == 'osd':
    +        assert osd_fsid
    +        pre_start_commands.extend(_osd_unit_run_commands(
    +            ctx, ident, osd_fsid, data_dir, uid, gid
    +        ))
    +        post_stop_commands.extend(
    +            _osd_unit_poststop_commands(ctx, ident, osd_fsid)
    +        )
    +    if ident.daemon_type == CephIscsi.daemon_type:
    +        pre_start_commands.append(
    +            CephIscsi.configfs_mount_umount(data_dir, mount=True)
    +        )
    +        post_stop_commands.append(
    +            CephIscsi.configfs_mount_umount(data_dir, mount=False)
    +        )
     
    -        # pre-start cmd(s)
    -        if daemon_type == 'osd':
    -            assert osd_fsid
    -            _write_osd_unit_run_commands(
    -                ctx, f, ident, osd_fsid, data_dir, uid, gid
    -            )
    -        elif daemon_type == CephIscsi.daemon_type:
    -            _write_iscsi_unit_run_commands(ctx, f, ident, data_dir)
    -        init_containers = init_containers or []
    -        if init_containers:
    -            _write_init_container_cmds_clean(ctx, f, init_containers[0])
    -        for idx, ic in enumerate(init_containers):
    -            _write_init_container_cmds(ctx, f, idx, ic)
    -
    -        _write_container_cmd_to_bash(ctx, f, container, '%s.%s' % (daemon_type, str(daemon_id)))
    -
    -        # some metadata about the deploy
    -        meta: Dict[str, Any] = fetch_meta(ctx)
    -        meta.update({
    -            'memory_request': int(ctx.memory_request) if ctx.memory_request else None,
    -            'memory_limit': int(ctx.memory_limit) if ctx.memory_limit else None,
    -        })
    -        if not meta.get('ports'):
    -            if endpoints:
    -                meta['ports'] = [e.port for e in endpoints]
    -            else:
    -                meta['ports'] = []
    -        metaf.write(json.dumps(meta, indent=4) + '\n')
    -
    -    timeout = 30 if daemon_type == 'osd' else None
    -    # post-stop command(s)
    -    with write_new(data_dir + '/unit.poststop') as f:
    -        # this is a fallback to eventually stop any underlying container that was not stopped properly by unit.stop,
    -        # this could happen in very slow setups as described in the issue https://tracker.ceph.com/issues/58242.
    -        _write_stop_actions(ctx, cast(TextIO, f), container, timeout)
    -        if daemon_type == 'osd':
    -            assert osd_fsid
    -            _write_osd_unit_poststop_commands(ctx, f, ident, osd_fsid)
    -        elif daemon_type == CephIscsi.daemon_type:
    -            _write_iscsi_unit_poststop_commands(ctx, f, ident, data_dir)
    -
    -    # post-stop command(s)
    -    with write_new(data_dir + '/unit.stop') as f:
    -        _write_stop_actions(ctx, cast(TextIO, f), container, timeout)
    -
    -    if container:
    -        with write_new(data_dir + '/unit.image') as f:
    -            f.write(container.image + '\n')
    +    runscripts.write_service_scripts(
    +        ctx,
    +        ident,
    +        container=container,
    +        init_containers=init_containers,
    +        sidecars=sidecars,
    +        endpoints=endpoints,
    +        pre_start_commands=pre_start_commands,
    +        post_stop_commands=post_stop_commands,
    +        timeout=30 if ident.daemon_type == 'osd' else None,
    +    )
     
         # sysctl
    -    install_sysctl(ctx, fsid, daemon_form_create(ctx, ident))
    +    install_sysctl(ctx, ident.fsid, daemon_form_create(ctx, ident))
     
         # systemd
    -    install_base_units(ctx, fsid)
    -    unit = get_unit_file(ctx, fsid)
    -    unit_file = 'ceph-%s@.service' % (fsid)
    -    with write_new(ctx.unit_dir + '/' + unit_file, perms=None) as f:
    -        f.write(unit)
    +    ic_ids = [
    +        DaemonSubIdentity.must(ic.identity) for ic in init_containers or []
    +    ]
    +    sc_ids = [
    +        DaemonSubIdentity.must(sc.identity) for sc in sidecars or []
    +    ]
    +    systemd_unit.update_files(
    +        ctx, ident, init_container_ids=ic_ids, sidecar_ids=sc_ids
    +    )
         call_throws(ctx, ['systemctl', 'daemon-reload'])
     
    -    unit_name = get_unit_name(fsid, daemon_type, daemon_id)
    +    unit_name = get_unit_name(ident.fsid, ident.daemon_type, ident.daemon_id)
         call(ctx, ['systemctl', 'stop', unit_name],
              verbosity=CallVerbosity.DEBUG)
         call(ctx, ['systemctl', 'reset-failed', unit_name],
    @@ -3039,38 +1247,31 @@ def deploy_daemon_units(
         if enable:
             call_throws(ctx, ['systemctl', 'enable', unit_name])
         if start:
    -        clean_cgroup(ctx, fsid, unit_name)
    -        call_throws(ctx, ['systemctl', 'start', unit_name])
    -
    -
    -def _write_stop_actions(
    -    ctx: CephadmContext, f: TextIO, container: 'CephContainer', timeout: Optional[int]
    -) -> None:
    -    # following generated script basically checks if the container exists
    -    # before stopping it. Exit code will be success either if it doesn't
    -    # exist or if it exists and is stopped successfully.
    -    container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null'
    -    f.write(f'! {container_exists % container.old_cname} || {" ".join(container.stop_cmd(old_cname=True, timeout=timeout))} \n')
    -    f.write(f'! {container_exists % container.cname} || {" ".join(container.stop_cmd(timeout=timeout))} \n')
    +        clean_cgroup(ctx, ident.fsid, unit_name)
    +        try:
    +            call_throws(ctx, ['systemctl', 'start', unit_name])
    +        except Exception as e:
    +            logger.error(f'systemctl start failed for {unit_name}: {str(e)}')
    +            raise DaemonStartException()
     
     
    -def _write_osd_unit_run_commands(
    +def _osd_unit_run_commands(
         ctx: CephadmContext,
    -    f: IO,
         ident: 'DaemonIdentity',
         osd_fsid: str,
         data_dir: str,
         uid: int,
         gid: int,
    -) -> None:
    +) -> List[runscripts.Command]:
    +    cmds: List[runscripts.Command] = []
         # osds have a pre-start step
         simple_fn = os.path.join('/etc/ceph/osd',
                                  '%s-%s.json.adopted-by-cephadm' % (ident.daemon_id, osd_fsid))
         if os.path.exists(simple_fn):
    -        f.write('# Simple OSDs need chown on startup:\n')
    +        cmds.append('# Simple OSDs need chown on startup:\n')
             for n in ['block', 'block.db', 'block.wal']:
                 p = os.path.join(data_dir, n)
    -            f.write('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
    +            cmds.append('[ ! -L {p} ] || chown {uid}:{gid} {p}\n'.format(p=p, uid=uid, gid=gid))
         else:
             # if ceph-volume does not support 'ceph-volume activate', we must
             # do 'ceph-volume lvm activate'.
    @@ -3110,21 +1311,13 @@ def _write_osd_unit_run_commands(
                 bind_mounts=get_container_binds(ctx, ident),
                 cname='ceph-%s-%s.%s-activate' % (fsid, daemon_type, daemon_id),
             )
    -        _write_container_cmd_to_bash(ctx, f, prestart, 'LVM OSDs use ceph-volume lvm activate')
    -
    -
    -def _write_iscsi_unit_run_commands(
    -    ctx: CephadmContext, f: IO, ident: 'DaemonIdentity', data_dir: str
    -) -> None:
    -    f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=True)) + '\n')
    -    ceph_iscsi = CephIscsi.init(ctx, ident.fsid, ident.daemon_id)
    -    tcmu_container = ceph_iscsi.get_tcmu_runner_container()
    -    _write_container_cmd_to_bash(ctx, f, tcmu_container, 'iscsi tcmu-runner container', background=True)
    +        cmds.append(runscripts.ContainerCommand(prestart, comment='LVM OSDs use ceph-volume lvm activate'))
    +    return cmds
     
     
    -def _write_osd_unit_poststop_commands(
    -    ctx: CephadmContext, f: IO, ident: 'DaemonIdentity', osd_fsid: str
    -) -> None:
    +def _osd_unit_poststop_commands(
    +    ctx: CephadmContext, ident: 'DaemonIdentity', osd_fsid: str
    +) -> List[runscripts.Command]:
         poststop = get_ceph_volume_container(
             ctx,
             args=[
    @@ -3135,156 +1328,8 @@ def _write_osd_unit_poststop_commands(
             bind_mounts=get_container_binds(ctx, ident),
             cname='ceph-%s-%s.%s-deactivate' % (ident.fsid, ident.daemon_type, ident.daemon_id),
         )
    -    _write_container_cmd_to_bash(ctx, f, poststop, 'deactivate osd')
    -
    -
    -def _write_iscsi_unit_poststop_commands(
    -    ctx: CephadmContext, f: IO, ident: 'DaemonIdentity', data_dir: str
    -) -> None:
    -    # make sure we also stop the tcmu container
    -    runtime_dir = '/run'
    -    ceph_iscsi = CephIscsi.init(ctx, ident.fsid, ident.daemon_id)
    -    tcmu_container = ceph_iscsi.get_tcmu_runner_container()
    -    f.write('! ' + ' '.join(tcmu_container.stop_cmd()) + '\n')
    -    f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-pid' % (ident.fsid, ident.daemon_type, ident.daemon_id + '.tcmu') + '\n')
    -    f.write('! ' + 'rm ' + runtime_dir + '/ceph-%s@%s.%s.service-cid' % (ident.fsid, ident.daemon_type, ident.daemon_id + '.tcmu') + '\n')
    -    f.write(' '.join(CephIscsi.configfs_mount_umount(data_dir, mount=False)) + '\n')
    -
    -
    -def install_base_units(ctx, fsid):
    -    # type: (CephadmContext, str) -> None
    -    """
    -    Set up ceph.target and ceph-$fsid.target units.
    -    """
    -    # global unit
    -    existed = os.path.exists(ctx.unit_dir + '/ceph.target')
    -    with write_new(ctx.unit_dir + '/ceph.target', perms=None) as f:
    -        f.write('[Unit]\n'
    -                'Description=All Ceph clusters and services\n'
    -                '\n'
    -                '[Install]\n'
    -                'WantedBy=multi-user.target\n')
    -    if not existed:
    -        # we disable before enable in case a different ceph.target
    -        # (from the traditional package) is present; while newer
    -        # systemd is smart enough to disable the old
    -        # (/lib/systemd/...) and enable the new (/etc/systemd/...),
    -        # some older versions of systemd error out with EEXIST.
    -        call_throws(ctx, ['systemctl', 'disable', 'ceph.target'])
    -        call_throws(ctx, ['systemctl', 'enable', 'ceph.target'])
    -        call_throws(ctx, ['systemctl', 'start', 'ceph.target'])
    -
    -    # cluster unit
    -    existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid)
    -    with write_new(ctx.unit_dir + f'/ceph-{fsid}.target', perms=None) as f:
    -        f.write(
    -            '[Unit]\n'
    -            'Description=Ceph cluster {fsid}\n'
    -            'PartOf=ceph.target\n'
    -            'Before=ceph.target\n'
    -            '\n'
    -            '[Install]\n'
    -            'WantedBy=multi-user.target ceph.target\n'.format(
    -                fsid=fsid)
    -        )
    -    if not existed:
    -        call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid])
    -        call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid])
    -
    -    # don't overwrite file in order to allow users to manipulate it
    -    if os.path.exists(ctx.logrotate_dir + f'/ceph-{fsid}'):
    -        return
    -
    -    # logrotate for the cluster
    -    with write_new(ctx.logrotate_dir + f'/ceph-{fsid}', perms=None) as f:
    -        """
    -        This is a bit sloppy in that the killall/pkill will touch all ceph daemons
    -        in all containers, but I don't see an elegant way to send SIGHUP *just* to
    -        the daemons for this cluster.  (1) systemd kill -s will get the signal to
    -        podman, but podman will exit.  (2) podman kill will get the signal to the
    -        first child (bash), but that isn't the ceph daemon.  This is simpler and
    -        should be harmless.
    -        """
    -        targets: List[str] = [
    -            'ceph-mon',
    -            'ceph-mgr',
    -            'ceph-mds',
    -            'ceph-osd',
    -            'ceph-fuse',
    -            'radosgw',
    -            'rbd-mirror',
    -            'cephfs-mirror',
    -            'tcmu-runner'
    -        ]
    +    return [runscripts.ContainerCommand(poststop, comment='deactivate osd')]
     
    -        f.write("""# created by cephadm
    -/var/log/ceph/%s/*.log {
    -    rotate 7
    -    daily
    -    compress
    -    sharedscripts
    -    postrotate
    -        killall -q -1 %s || pkill -1 -x '%s' || true
    -    endscript
    -    missingok
    -    notifempty
    -    su root root
    -}
    -""" % (fsid, ' '.join(targets), '|'.join(targets)))
    -
    -
    -def get_unit_file(ctx, fsid):
    -    # type: (CephadmContext, str) -> str
    -    extra_args = ''
    -    if isinstance(ctx.container_engine, Podman):
    -        extra_args = ('ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
    -                      'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid\n'
    -                      'Type=forking\n'
    -                      'PIDFile=%t/%n-pid\n')
    -        if ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION:
    -            extra_args += 'Delegate=yes\n'
    -
    -    docker = isinstance(ctx.container_engine, Docker)
    -    u = """# generated by cephadm
    -[Unit]
    -Description=Ceph %i for {fsid}
    -
    -# According to:
    -#   http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
    -# these can be removed once ceph-mon will dynamically change network
    -# configuration.
    -After=network-online.target local-fs.target time-sync.target{docker_after}
    -Wants=network-online.target local-fs.target time-sync.target
    -{docker_requires}
    -
    -PartOf=ceph-{fsid}.target
    -Before=ceph-{fsid}.target
    -
    -[Service]
    -LimitNOFILE=1048576
    -LimitNPROC=1048576
    -EnvironmentFile=-/etc/environment
    -ExecStart=/bin/bash {data_dir}/{fsid}/%i/unit.run
    -ExecStop=-/bin/bash -c 'bash {data_dir}/{fsid}/%i/unit.stop'
    -ExecStopPost=-/bin/bash {data_dir}/{fsid}/%i/unit.poststop
    -KillMode=none
    -Restart=on-failure
    -RestartSec=10s
    -TimeoutStartSec=200
    -TimeoutStopSec=120
    -StartLimitInterval=30min
    -StartLimitBurst=5
    -{extra_args}
    -[Install]
    -WantedBy=ceph-{fsid}.target
    -""".format(fsid=fsid,
    -           data_dir=ctx.data_dir,
    -           extra_args=extra_args,
    -           # if docker, we depend on docker.service
    -           docker_after=' docker.service' if docker else '',
    -           docker_requires='Requires=docker.service\n' if docker else '')
    -
    -    return u
     
     ##################################
     
    @@ -3330,12 +1375,13 @@ def run(self) -> None:
                             conn.send(err_str.encode())
                             logger.error(err_str)
                         else:
    -                        conn.send(b'ACK')
    -                        if 'config' in data:
    -                            self.agent.wakeup()
    -                        self.agent.ls_gatherer.wakeup()
    -                        self.agent.volume_gatherer.wakeup()
    -                        logger.debug(f'Got mgr message {data}')
    +                        if 'counter' in data:
    +                            conn.send(b'ACK')
    +                            if 'config' in data:
    +                                self.agent.wakeup()
    +                            self.agent.ls_gatherer.wakeup()
    +                            self.agent.volume_gatherer.wakeup()
    +                            logger.debug(f'Got mgr message {data}')
                 except Exception as e:
                     logger.error(f'Mgr Listener encountered exception: {e}')
     
    @@ -3343,17 +1389,20 @@ def shutdown(self) -> None:
             self.stop = True
     
         def handle_json_payload(self, data: Dict[Any, Any]) -> None:
    -        self.agent.ack = int(data['counter'])
    -        if 'config' in data:
    -            logger.info('Received new config from mgr')
    -            config = data['config']
    -            for filename in config:
    -                if filename in self.agent.required_files:
    -                    file_path = os.path.join(self.agent.daemon_dir, filename)
    -                    with write_new(file_path) as f:
    -                        f.write(config[filename])
    -            self.agent.pull_conf_settings()
    -            self.agent.wakeup()
    +        if 'counter' in data:
    +            self.agent.ack = int(data['counter'])
    +            if 'config' in data:
    +                logger.info('Received new config from mgr')
    +                config = data['config']
    +                for filename in config:
    +                    if filename in self.agent.required_files:
    +                        file_path = os.path.join(self.agent.daemon_dir, filename)
    +                        with write_new(file_path) as f:
    +                            f.write(config[filename])
    +                self.agent.pull_conf_settings()
    +                self.agent.wakeup()
    +        else:
    +            raise RuntimeError('No valid data received.')
     
     
     @register_daemon_form
    @@ -3408,6 +1457,9 @@ def __init__(self, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str] =
             self.recent_iteration_run_times: List[float] = [0.0, 0.0, 0.0]
             self.recent_iteration_index: int = 0
             self.cached_ls_values: Dict[str, Dict[str, str]] = {}
    +        self.ssl_ctx = ssl.create_default_context()
    +        self.ssl_ctx.check_hostname = True
    +        self.ssl_ctx.verify_mode = ssl.CERT_REQUIRED
     
         def validate(self, config: Dict[str, str] = {}) -> None:
             # check for the required files
    @@ -3437,19 +1489,19 @@ def deploy_daemon_unit(self, config: Dict[str, str] = {}) -> None:
             with write_new(meta_file_path) as f:
                 f.write(json.dumps(meta, indent=4) + '\n')
     
    -        unit_file_path = os.path.join(self.ctx.unit_dir, self.unit_name())
    +        unit_file_path = os.path.join(self.ctx.unit_dir, self._service_name())
             with write_new(unit_file_path) as f:
                 f.write(self.unit_file())
     
             call_throws(self.ctx, ['systemctl', 'daemon-reload'])
    -        call(self.ctx, ['systemctl', 'stop', self.unit_name()],
    +        call(self.ctx, ['systemctl', 'stop', self._service_name()],
                  verbosity=CallVerbosity.DEBUG)
    -        call(self.ctx, ['systemctl', 'reset-failed', self.unit_name()],
    +        call(self.ctx, ['systemctl', 'reset-failed', self._service_name()],
                  verbosity=CallVerbosity.DEBUG)
    -        call_throws(self.ctx, ['systemctl', 'enable', '--now', self.unit_name()])
    +        call_throws(self.ctx, ['systemctl', 'enable', '--now', self._service_name()])
     
    -    def unit_name(self) -> str:
    -        return '{}.service'.format(get_unit_name(self.fsid, self.daemon_type, self.daemon_id))
    +    def _service_name(self) -> str:
    +        return self.identity.service_name
     
         def unit_run(self) -> str:
             py3 = shutil.which('python3')
    @@ -3457,24 +1509,8 @@ def unit_run(self) -> str:
             return ('set -e\n' + f'{py3} {binary_path} agent --fsid {self.fsid} --daemon-id {self.daemon_id} &\n')
     
         def unit_file(self) -> str:
    -        return """#generated by cephadm
    -[Unit]
    -Description=cephadm agent for cluster {fsid}
    -
    -PartOf=ceph-{fsid}.target
    -Before=ceph-{fsid}.target
    -
    -[Service]
    -Type=forking
    -ExecStart=/bin/bash {data_dir}/unit.run
    -Restart=on-failure
    -RestartSec=10s
    -
    -[Install]
    -WantedBy=ceph-{fsid}.target
    -""".format(
    -            fsid=self.fsid,
    -            data_dir=self.daemon_dir
    +        return templating.render(
    +            self.ctx, templating.Templates.agent_service, agent=self
             )
     
         def shutdown(self) -> None:
    @@ -3519,6 +1555,7 @@ def pull_conf_settings(self) -> None:
     
         def run(self) -> None:
             self.pull_conf_settings()
    +        self.ssl_ctx.load_verify_locations(self.ca_path)
     
             try:
                 for _ in range(1001):
    @@ -3540,11 +1577,6 @@ def run(self) -> None:
             if not self.volume_gatherer.is_alive():
                 self.volume_gatherer.start()
     
    -        ssl_ctx = ssl.create_default_context()
    -        ssl_ctx.check_hostname = True
    -        ssl_ctx.verify_mode = ssl.CERT_REQUIRED
    -        ssl_ctx.load_verify_locations(self.ca_path)
    -
             while not self.stop:
                 start_time = time.monotonic()
                 ack = self.ack
    @@ -3570,15 +1602,19 @@ def run(self) -> None:
                                    'port': self.listener_port})
                 data = data.encode('ascii')
     
    -            url = f'https://{self.target_ip}:{self.target_port}/data/'
                 try:
    -                req = Request(url, data, {'Content-Type': 'application/json'})
                     send_time = time.monotonic()
    -                with urlopen(req, context=ssl_ctx) as response:
    -                    response_str = response.read()
    -                    response_json = json.loads(response_str)
    -                    total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds()
    -                    logger.info(f'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.')
    +                status, response = http_query(addr=self.target_ip,
    +                                              port=self.target_port,
    +                                              data=data,
    +                                              endpoint='/data',
    +                                              ssl_ctx=self.ssl_ctx)
    +                if status != 200:
    +                    logger.error(f'HTTP error {status} while querying agent endpoint: {response}')
    +                    raise RuntimeError(f'non-200 response <{status}> from agent endpoint: {response}')
    +                response_json = json.loads(response)
    +                total_request_time = datetime.timedelta(seconds=(time.monotonic() - send_time)).total_seconds()
    +                logger.info(f'Received mgr response: "{response_json["result"]}" {total_request_time} seconds after sending request.')
                 except Exception as e:
                     logger.error(f'Failed to send metadata to mgr: {e}')
     
    @@ -3632,7 +1668,7 @@ def _daemon_ls_subset(self) -> Dict[str, Dict[str, Any]]:
             )
             name_id_mapping: Dict[str, str] = self._parse_container_id_name(code, out)
             for i in os.listdir(data_dir):
    -            if i in ['mon', 'osd', 'mds', 'mgr']:
    +            if i in ['mon', 'osd', 'mds', 'mgr', 'rgw']:
                     daemon_type = i
                     for j in os.listdir(os.path.join(data_dir, i)):
                         if '-' not in j:
    @@ -3806,21 +1842,73 @@ def command_agent(ctx: CephadmContext) -> None:
     def command_version(ctx):
         # type: (CephadmContext) -> int
         import importlib
    +    import zipimport
    +    import types
     
    +    vmod: Optional[types.ModuleType]
    +    zmod: Optional[types.ModuleType]
         try:
    -        vmod = importlib.import_module('_version')
    +        vmod = importlib.import_module('_cephadmmeta.version')
    +        zmod = vmod
         except ImportError:
    -        print('cephadm version UNKNOWN')
    -        return 1
    -    _unset = ''
    -    print('cephadm version {0} ({1}) {2} ({3})'.format(
    -        getattr(vmod, 'CEPH_GIT_NICE_VER', _unset),
    -        getattr(vmod, 'CEPH_GIT_VER', _unset),
    -        getattr(vmod, 'CEPH_RELEASE_NAME', _unset),
    -        getattr(vmod, 'CEPH_RELEASE_TYPE', _unset),
    -    ))
    +        vmod = zmod = None
    +    if vmod is None:
    +        # fallback to earlier location
    +        try:
    +            vmod = importlib.import_module('_version')
    +        except ImportError:
    +            pass
    +    if zmod is None:
    +        # fallback to outer package, for zip import module
    +        try:
    +            zmod = importlib.import_module('_cephadmmeta')
    +        except ImportError:
    +            zmod = None
    +
    +    if not ctx.verbose:
    +        if vmod is None:
    +            print('cephadm version UNKNOWN')
    +            return 1
    +        _unset = ''
    +        print(
    +            'cephadm version {0} ({1}) {2} ({3})'.format(
    +                getattr(vmod, 'CEPH_GIT_NICE_VER', _unset),
    +                getattr(vmod, 'CEPH_GIT_VER', _unset),
    +                getattr(vmod, 'CEPH_RELEASE_NAME', _unset),
    +                getattr(vmod, 'CEPH_RELEASE_TYPE', _unset),
    +            )
    +        )
    +        return 0
    +
    +    out: Dict[str, Any] = {'name': 'cephadm'}
    +    ceph_vars = [
    +        'CEPH_GIT_NICE_VER',
    +        'CEPH_GIT_VER',
    +        'CEPH_RELEASE_NAME',
    +        'CEPH_RELEASE_TYPE',
    +    ]
    +    for var in ceph_vars:
    +        value = getattr(vmod, var, None)
    +        if value is not None:
    +            out[var.lower()] = value
    +
    +    loader = getattr(zmod, '__loader__', None)
    +    if loader and isinstance(loader, zipimport.zipimporter):
    +        try:
    +            deps_info = json.loads(loader.get_data('_cephadmmeta/deps.json'))
    +            out['bundled_packages'] = deps_info
    +        except OSError:
    +            pass
    +        files = getattr(loader, '_files', {})
    +        out['zip_root_entries'] = sorted(
    +            {p.split('/')[0] for p in files.keys()}
    +        )
    +
    +    json.dump(out, sys.stdout, indent=2)
    +    print()
         return 0
     
    +
     ##################################
     
     
    @@ -3847,14 +1935,7 @@ def _pull_image(ctx, image, insecure=False):
             'Digest did not match, expected',
         ]
     
    -    cmd = [ctx.container_engine.path, 'pull', image]
    -    if isinstance(ctx.container_engine, Podman):
    -        if insecure:
    -            cmd.append('--tls-verify=false')
    -
    -        if os.path.exists('/etc/ceph/podman-auth.json'):
    -            cmd.append('--authfile=/etc/ceph/podman-auth.json')
    -    cmd_str = ' '.join(cmd)
    +    cmd = pull_command(ctx, image, insecure=insecure)
     
         for sleep_secs in [1, 4, 25]:
             out, err, ret = call(ctx, cmd, verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
    @@ -3864,6 +1945,7 @@ def _pull_image(ctx, image, insecure=False):
             if 'unauthorized' in err:
                 raise UnauthorizedRegistryError()
     
    +        cmd_str = ' '.join(cmd)
             if not any(pattern in err for pattern in ignorelist):
                 raise Error('Failed command: %s' % cmd_str)
     
    @@ -3912,11 +1994,15 @@ def get_image_info_from_inspect(out, image):
     def get_public_net_from_cfg(ctx: CephadmContext) -> Optional[str]:
         """Get mon public network from configuration file."""
         cp = read_config(ctx.config)
    -    if not cp.has_option('global', 'public_network'):
    +    public_network = ''
    +    if cp.has_option('mon', 'public_network'):
    +        public_network = cp.get('mon', 'public_network').strip('"').strip("'")
    +    elif cp.has_option('global', 'public_network'):
    +        public_network = cp.get('global', 'public_network').strip('"').strip("'")
    +    else:
             return None
     
         # Ensure all public CIDR networks are valid
    -    public_network = cp.get('global', 'public_network').strip('"').strip("'")
         rc, _, err_msg = check_subnet(public_network)
         if rc:
             raise Error(f'Invalid public_network {public_network} parameter: {err_msg}')
    @@ -4142,7 +2228,7 @@ def prepare_create_mon(
                 '-c', '/dev/null',
                 '--monmap', '/tmp/monmap',
                 '--keyring', '/tmp/keyring',
    -        ] + get_daemon_args(ctx, ident),
    +        ] + Ceph.create(ctx, ident).get_daemon_args(),
             volume_mounts={
                 log_dir: '/var/log/ceph:z',
                 mon_dir: '/var/lib/ceph/mon/ceph-%s:z' % (mon_id),
    @@ -4336,6 +2422,12 @@ def enable_cephadm_mgr_module(
         logger.info('Enabling cephadm module...')
         cli(['mgr', 'module', 'enable', 'cephadm'])
         wait_for_mgr_restart()
    +    # https://tracker.ceph.com/issues/67969
    +    # luckily `ceph mgr module enable ` returns
    +    # a zero rc when the module is already enabled so
    +    # this is no issue even if it is unnecessary
    +    logger.info('Verifying orchestrator module is enabled...')
    +    cli(['mgr', 'module', 'enable', 'orchestrator'])
         logger.info('Setting orchestrator backend to cephadm...')
         cli(['orch', 'set', 'backend', 'cephadm'])
     
    @@ -4362,11 +2454,23 @@ def prepare_dashboard(
                 pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z',
                 pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z'
             }
    -        cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
    -        cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
         else:
    -        logger.info('Generating a dashboard self-signed certificate...')
    -        cli(['dashboard', 'create-self-signed-cert'])
    +        logger.info('Using certmgr to generate dashboard self-signed certificate...')
    +        cert_key = json_loads_retry(lambda: cli(['orch', 'certmgr', 'generate-certificates', 'dashboard'],
    +                                                verbosity=CallVerbosity.QUIET_UNLESS_ERROR))
    +        mounts = {}
    +        if cert_key:
    +            cert_file = write_tmp(cert_key['cert'], uid, gid)
    +            key_file = write_tmp(cert_key['key'], uid, gid)
    +            mounts = {
    +                cert_file.name: '/tmp/dashboard.crt:z',
    +                key_file.name: '/tmp/dashboard.key:z'
    +            }
    +        else:
    +            logger.error('Cannot generate certificates for Ceph dashboard.')
    +
    +    cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
    +    cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
     
         logger.info('Creating initial admin user...')
         password = ctx.initial_dashboard_password or generate_password()
    @@ -4415,6 +2519,14 @@ def prepare_bootstrap_config(
         ):
             cp.set('mon', 'auth_allow_insecure_global_id_reclaim', 'false')
     
    +    if not cp.has_section('osd'):
    +        cp.add_section('osd')
    +    if (
    +            not cp.has_option('osd', 'osd_memory_target_autotune')
    +            and not cp.has_option('osd', 'osd memory target autotune')
    +    ):
    +        cp.set('osd', 'osd_memory_target_autotune', 'true')
    +
         if ctx.single_host_defaults:
             logger.info('Adjusting default settings to suit single-host cluster...')
             # replicate across osds, not hosts
    @@ -4495,7 +2607,7 @@ def finish_bootstrap_config(
     
         if mon_network:
             cp = read_config(ctx.config)
    -        cfg_section = 'global' if cp.has_option('global', 'public_network') else 'mon'
    +        cfg_section = 'mon' if cp.has_option('mon', 'public_network') else 'global'
             logger.info(f'Setting public_network to {mon_network} in {cfg_section} config section')
             cli(['config', 'set', cfg_section, 'public_network', mon_network])
     
    @@ -4506,6 +2618,12 @@ def finish_bootstrap_config(
         if ipv6 or ipv6_cluster_network:
             logger.info('Enabling IPv6 (ms_bind_ipv6) binding')
             cli(['config', 'set', 'global', 'ms_bind_ipv6', 'true'])
    +        # note: Ceph does not fully support dual stack.
    +        # kernel clients: https://tracker.ceph.com/issues/49581
    +        # if we do not disable ipv4 binding, daemons will bind
    +        # to 0.0.0.0 and clients will misbehave.
    +        logger.info('Disabling IPv4 (ms_bind_ipv4) binding')
    +        cli(['config', 'set', 'global', 'ms_bind_ipv4', 'false'])
     
         with open(ctx.output_config, 'w') as f:
             f.write(config)
    @@ -4513,88 +2631,6 @@ def finish_bootstrap_config(
         pass
     
     
    -def _extract_host_info_from_applied_spec(f: Iterable[str]) -> List[Dict[str, str]]:
    -    # overall goal of this function is to go through an applied spec and find
    -    # the hostname (and addr is provided) for each host spec in the applied spec.
    -    # Generally, we should be able to just pass the spec to the mgr module where
    -    # proper yaml parsing can happen, but for host specs in particular we want to
    -    # be able to distribute ssh keys, which requires finding the hostname (and addr
    -    # if possible) for each potential host spec in the applied spec.
    -
    -    specs: List[List[str]] = []
    -    current_spec: List[str] = []
    -    for line in f:
    -        if re.search(r'^---\s+', line):
    -            if current_spec:
    -                specs.append(current_spec)
    -            current_spec = []
    -        else:
    -            line = line.strip()
    -            if line:
    -                current_spec.append(line)
    -    if current_spec:
    -        specs.append(current_spec)
    -
    -    host_specs: List[List[str]] = []
    -    for spec in specs:
    -        for line in spec:
    -            if 'service_type' in line:
    -                try:
    -                    _, type = line.split(':')
    -                    type = type.strip()
    -                    if type == 'host':
    -                        host_specs.append(spec)
    -                except ValueError as e:
    -                    spec_str = '\n'.join(spec)
    -                    logger.error(f'Failed to pull service_type from spec:\n{spec_str}. Got error: {e}')
    -                break
    -            spec_str = '\n'.join(spec)
    -            logger.error(f'Failed to find service_type within spec:\n{spec_str}')
    -
    -    host_dicts = []
    -    for s in host_specs:
    -        host_dict = _extract_host_info_from_spec(s)
    -        # if host_dict is empty here, we failed to pull the hostname
    -        # for the host from the spec. This should have already been logged
    -        # so at this point we just don't want to include it in our output
    -        if host_dict:
    -            host_dicts.append(host_dict)
    -
    -    return host_dicts
    -
    -
    -def _extract_host_info_from_spec(host_spec: List[str]) -> Dict[str, str]:
    -    # note:for our purposes here, we only really want the hostname
    -    # and address of the host from each of these specs in order to
    -    # be able to distribute ssh keys. We will later apply the spec
    -    # through the mgr module where proper yaml parsing can be done
    -    # The returned dicts from this function should only contain
    -    # one or two entries, one (required) for hostname, one (optional) for addr
    -    # {
    -    #   hostname: 
    -    #   addr: 
    -    # }
    -    # if we fail to find the hostname, an empty dict is returned
    -
    -    host_dict = {}  # type: Dict[str, str]
    -    for line in host_spec:
    -        for field in ['hostname', 'addr']:
    -            if field in line:
    -                try:
    -                    _, field_value = line.split(':')
    -                    field_value = field_value.strip()
    -                    host_dict[field] = field_value
    -                except ValueError as e:
    -                    spec_str = '\n'.join(host_spec)
    -                    logger.error(f'Error trying to pull {field} from host spec:\n{spec_str}. Got error: {e}')
    -
    -    if 'hostname' not in host_dict:
    -        spec_str = '\n'.join(host_spec)
    -        logger.error(f'Could not find hostname in host spec:\n{spec_str}')
    -        return {}
    -    return host_dict
    -
    -
     def _distribute_ssh_keys(ctx: CephadmContext, host_info: Dict[str, str], bootstrap_hostname: str) -> int:
         # copy ssh key to hosts in host spec (used for apply spec)
         ssh_key = CEPH_DEFAULT_PUBKEY
    @@ -4645,27 +2681,31 @@ def _rollback(ctx: CephadmContext) -> Any:
                 # another cluster with the provided fsid already exists: don't remove.
                 raise
             except (KeyboardInterrupt, Exception) as e:
    -            logger.error(f'{type(e).__name__}: {e}')
    -            if ctx.cleanup_on_failure:
    +            # If ctx.fsid is None it would print meaningless message suggesting
    +            # running "cephadm rm-cluster --force --fsid None"
    +            if ctx.no_cleanup_on_failure and ctx.fsid is not None:
                     logger.info('\n\n'
                                 '\t***************\n'
    -                            '\tCephadm hit an issue during cluster installation. Current cluster files will be deleted automatically,\n'
    -                            '\tto disable this behaviour you can pass the --no-cleanup-on-failure flag. In case of any previous\n'
    -                            '\tbroken installation user must use the following command to completely delete the broken cluster:\n\n'
    -                            '\t> cephadm rm-cluster --force --zap-osds --fsid \n\n'
    +                            '\tCephadm hit an issue during cluster installation. Current cluster files will NOT BE DELETED automatically. To change\n'
    +                            '\tthis behaviour do not pass the --no-cleanup-on-failure flag. To remove this broken cluster manually please run:\n\n'
    +                            f'\t   > cephadm rm-cluster --force --fsid {ctx.fsid}\n\n'
    +                            '\tin case of any previous broken installation, users must use the rm-cluster command to delete the broken cluster:\n\n'
    +                            '\t   > cephadm rm-cluster --force --zap-osds --fsid \n\n'
                                 '\tfor more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster\n'
                                 '\t***************\n\n')
    -                _rm_cluster(ctx, keep_logs=False, zap_osds=False)
    -            else:
    +            if not ctx.no_cleanup_on_failure:
    +                # The logger.error() used to be called before these conditions, which resulted in the error being printed twice.
    +                # Moving it inside this condition to print the error if _rm_cluster() is called and also fails.
    +                logger.error(f'{type(e).__name__}: {e}')
                     logger.info('\n\n'
                                 '\t***************\n'
    -                            '\tCephadm hit an issue during cluster installation. Current cluster files will NOT BE DELETED automatically to change\n'
    -                            '\tthis behaviour you can pass the --cleanup-on-failure. To remove this broken cluster manually please run:\n\n'
    -                            f'\t   > cephadm rm-cluster --force --fsid {ctx.fsid}\n\n'
    -                            '\tin case of any previous broken installation user must use the rm-cluster command to delete the broken cluster:\n\n'
    -                            '\t   > cephadm rm-cluster --force --zap-osds --fsid \n\n'
    +                            '\tCephadm hit an issue during cluster installation. Current cluster files will be deleted automatically.\n'
    +                            '\tTo disable this behaviour you can pass the --no-cleanup-on-failure flag. In case of any previous\n'
    +                            '\tbroken installation, users must use the following command to completely delete the broken cluster:\n\n'
    +                            '\t> cephadm rm-cluster --force --zap-osds --fsid \n\n'
                                 '\tfor more information please refer to https://docs.ceph.com/en/latest/cephadm/operations/#purging-a-cluster\n'
                                 '\t***************\n\n')
    +                _rm_cluster(ctx, keep_logs=False, zap_osds=False)
                 raise
         return cast(FuncT, _rollback)
     
    @@ -4684,6 +2724,13 @@ def command_bootstrap(ctx):
         if not ctx.output_pub_ssh_key:
             ctx.output_pub_ssh_key = os.path.join(ctx.output_dir, CEPH_PUBKEY)
     
    +    if ctx.apply_spec and not os.path.exists(ctx.apply_spec):
    +        # Given that nothing has been deployed at this point, setting `ctx.no_cleanup_on_failure = True`
    +        # as there's no need to call _rm_cluster() which would generate the message:
    +        # "ERROR: must select the cluster to delete by passing --fsid to proceed"
    +        ctx.no_cleanup_on_failure = True
    +        raise Error(f"--apply-spec has been specified but {ctx.apply_spec} doesn't exist.")
    +
         if (
             (bool(ctx.ssh_private_key) is not bool(ctx.ssh_public_key))
             and (bool(ctx.ssh_private_key) is not bool(ctx.ssh_signed_cert))
    @@ -4726,7 +2773,12 @@ def command_bootstrap(ctx):
                 except PermissionError:
                     raise Error(f'Unable to create {dirname} due to permissions failure. Retry with root, or sudo or preallocate the directory.')
     
    -    (user_conf, _) = get_config_and_keyring(ctx)
    +    if getattr(ctx, 'custom_prometheus_alerts', None):
    +        ctx.custom_prometheus_alerts = os.path.abspath(ctx.custom_prometheus_alerts)
    +        if not os.path.isfile(ctx.custom_prometheus_alerts):
    +            raise Error(f'No custom prometheus alerts file found at {ctx.custom_prometheus_alerts}')
    +
    +    _, _ = get_config_and_keyring(ctx)
     
         if ctx.ssh_user != 'root':
             check_ssh_connectivity(ctx)
    @@ -4798,6 +2850,8 @@ def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.V
                 admin_keyring.name: '/etc/ceph/ceph.client.admin.keyring:z',
                 tmp_config.name: '/etc/ceph/ceph.conf:z',
             }
    +        if getattr(ctx, 'custom_prometheus_alerts', None):
    +            mounts[ctx.custom_prometheus_alerts] = '/etc/ceph/custom_alerts.yml:z'
             for k, v in extra_mounts.items():
                 mounts[k] = v
             timeout = timeout or ctx.timeout
    @@ -4824,18 +2878,17 @@ def cli(cmd, extra_mounts={}, timeout=DEFAULT_TIMEOUT, verbosity=CallVerbosity.V
         # create mgr
         create_mgr(ctx, uid, gid, fsid, mgr_id, mgr_key, config, cli)
     
    -    if user_conf:
    -        # user given config settings were already assimilated earlier
    -        # but if the given settings contained any attributes in
    -        # the mgr (e.g. mgr/cephadm/container_image_prometheus)
    -        # they don't seem to be stored if there isn't a mgr yet.
    -        # Since re-assimilating the same conf settings should be
    -        # idempotent we can just do it again here.
    -        with tempfile.NamedTemporaryFile(buffering=0) as tmp:
    -            tmp.write(user_conf.encode('utf-8'))
    -            cli(['config', 'assimilate-conf',
    -                 '-i', '/var/lib/ceph/user.conf'],
    -                {tmp.name: '/var/lib/ceph/user.conf:z'})
    +    # user given config settings were already assimilated earlier
    +    # but if the given settings contained any attributes in
    +    # the mgr (e.g. mgr/cephadm/container_image_prometheus)
    +    # they don't seem to be stored if there isn't a mgr yet.
    +    # Since re-assimilating the same conf settings should be
    +    # idempotent we can just do it again here.
    +    with tempfile.NamedTemporaryFile(buffering=0) as tmp:
    +        tmp.write(config.encode('utf-8'))
    +        cli(['config', 'assimilate-conf',
    +             '-i', '/var/lib/ceph/user.conf'],
    +            {tmp.name: '/var/lib/ceph/user.conf:z'})
     
         if getattr(ctx, 'log_dest', None):
             ldkey = 'mgr/cephadm/cephadm_log_destination'
    @@ -4884,6 +2937,10 @@ def mgr_has_latest_epoch():
     
         cli(['config', 'set', 'mgr', 'mgr/cephadm/container_init', str(ctx.container_init), '--force'])
     
    +    if ctx.no_cgroups_split:
    +        logger.info('Setting mgr/cephadm/cgroups_split to false')
    +        cli(['config', 'set', 'mgr', 'mgr/cephadm/cgroups_split', 'false', '--force'])
    +
         if not ctx.skip_dashboard:
             prepare_dashboard(ctx, uid, gid, cli, wait_for_mgr_restart)
     
    @@ -4909,7 +2966,7 @@ def mgr_has_latest_epoch():
             mounts = {}
             mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:ro'
             try:
    -            out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
    +            out = cli(['orch', 'apply', '--continue-on-error', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
                 logger.info(out)
             except Exception:
                 ctx.error_code = -errno.EINVAL
    @@ -4917,10 +2974,6 @@ def mgr_has_latest_epoch():
     
         save_cluster_config(ctx, uid, gid, fsid)
     
    -    # enable autotune for osd_memory_target
    -    logger.info('Enabling autotune for osd_memory_target')
    -    cli(['config', 'set', 'osd', 'osd_memory_target_autotune', 'true'])
    -
         # Notify the Dashboard to show the 'Expand cluster' page on first log in.
         cli(['config-key', 'set', 'mgr/dashboard/cluster/status', 'INSTALLED'])
     
    @@ -4938,6 +2991,13 @@ def mgr_has_latest_epoch():
                     'For more information see:\n\n'
                     '\thttps://docs.ceph.com/en/latest/mgr/telemetry/\n')
         logger.info('Bootstrap complete.')
    +
    +    if getattr(ctx, 'deploy_cephadm_agent', None):
    +        cli(['config', 'set', 'mgr', 'mgr/cephadm/use_agent', 'true'])
    +
    +    if getattr(ctx, 'custom_prometheus_alerts', None):
    +        cli(['orch', 'prometheus', 'set-custom-alerts', '-i', '/etc/ceph/custom_alerts.yml'])
    +
         return ctx.error_code
     
     ##################################
    @@ -4973,57 +3033,6 @@ def command_registry_login(ctx: CephadmContext) -> int:
     ##################################
     
     
    -def extract_uid_gid_monitoring(ctx, daemon_type):
    -    # type: (CephadmContext, str) -> Tuple[int, int]
    -
    -    if daemon_type == 'prometheus':
    -        uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus')
    -    elif daemon_type == 'node-exporter':
    -        uid, gid = 65534, 65534
    -    elif daemon_type == 'grafana':
    -        uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana')
    -    elif daemon_type == 'loki':
    -        uid, gid = extract_uid_gid(ctx, file_path='/etc/loki')
    -    elif daemon_type == 'promtail':
    -        uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail')
    -    elif daemon_type == 'alertmanager':
    -        uid, gid = extract_uid_gid(ctx, file_path=['/etc/alertmanager', '/etc/prometheus'])
    -    else:
    -        raise Error('{} not implemented yet'.format(daemon_type))
    -    return uid, gid
    -
    -
    -def get_deployment_container(
    -    ctx: CephadmContext,
    -    ident: 'DaemonIdentity',
    -    privileged: bool = False,
    -    ptrace: bool = False,
    -    container_args: Optional[List[str]] = None,
    -) -> 'CephContainer':
    -    # wrapper for get_container specifically for containers made during the `cephadm deploy`
    -    # command. Adds some extra things such as extra container args and custom config files
    -    c = get_container(ctx, ident, privileged, ptrace, container_args)
    -    if 'extra_container_args' in ctx and ctx.extra_container_args:
    -        c.container_args.extend(ctx.extra_container_args)
    -    if 'extra_entrypoint_args' in ctx and ctx.extra_entrypoint_args:
    -        c.args.extend(ctx.extra_entrypoint_args)
    -    ccfiles = fetch_custom_config_files(ctx)
    -    if ccfiles:
    -        mandatory_keys = ['mount_path', 'content']
    -        for conf in ccfiles:
    -            if all(k in conf for k in mandatory_keys):
    -                mount_path = conf['mount_path']
    -                file_path = os.path.join(
    -                    ctx.data_dir,
    -                    ident.fsid,
    -                    'custom_config_files',
    -                    ident.daemon_name,
    -                    os.path.basename(mount_path)
    -                )
    -                c.volume_mounts[file_path] = mount_path
    -    return c
    -
    -
     def get_deployment_type(
         ctx: CephadmContext, ident: 'DaemonIdentity',
     ) -> DeploymentType:
    @@ -5047,7 +3056,10 @@ def get_deployment_type(
     @deprecated_command
     def command_deploy(ctx):
         # type: (CephadmContext) -> None
    -    _common_deploy(ctx)
    +    try:
    +        _common_deploy(ctx)
    +    except DaemonStartException:
    +        sys.exit(DAEMON_FAILED_ERROR)
     
     
     def apply_deploy_config_to_ctx(
    @@ -5090,7 +3102,10 @@ def command_deploy_from(ctx: CephadmContext) -> None:
         config_data = read_configuration_source(ctx)
         logger.debug('Loaded deploy configuration: %r', config_data)
         apply_deploy_config_to_ctx(config_data, ctx)
    -    _common_deploy(ctx)
    +    try:
    +        _common_deploy(ctx)
    +    except DaemonStartException:
    +        sys.exit(DAEMON_FAILED_ERROR)
     
     
     def _common_deploy(ctx: CephadmContext) -> None:
    @@ -5108,145 +3123,8 @@ def _common_deploy(ctx: CephadmContext) -> None:
     
         # Get and check ports explicitly required to be opened
         endpoints = fetch_endpoints(ctx)
    -    _dispatch_deploy(ctx, ident, endpoints, deployment_type)
    -
    -
    -def _dispatch_deploy(
    -    ctx: CephadmContext,
    -    ident: 'DaemonIdentity',
    -    daemon_endpoints: List[EndPoint],
    -    deployment_type: DeploymentType,
    -) -> None:
    -    daemon_type = ident.daemon_type
    -    if daemon_type in Ceph.daemons:
    -        config, keyring = get_config_and_keyring(ctx)
    -        uid, gid = extract_uid_gid(ctx)
    -        make_var_run(ctx, ctx.fsid, uid, gid)
    -
    -        config_json = fetch_configs(ctx)
    -
    -        c = get_deployment_container(ctx, ident, ptrace=ctx.allow_ptrace)
    -
    -        if daemon_type == 'mon' and config_json is not None:
    -            if 'crush_location' in config_json:
    -                c_loc = config_json['crush_location']
    -                # was originally "c.args.extend(['--set-crush-location', c_loc])"
    -                # but that doesn't seem to persist in the object after it's passed
    -                # in further function calls
    -                c.args = c.args + ['--set-crush-location', c_loc]
    -
    -        deploy_daemon(
    -            ctx,
    -            ident,
    -            c,
    -            uid,
    -            gid,
    -            config=config,
    -            keyring=keyring,
    -            osd_fsid=ctx.osd_fsid,
    -            deployment_type=deployment_type,
    -            endpoints=daemon_endpoints,
    -        )
    -
    -    elif daemon_type in Monitoring.components:
    -        # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
    -        # Default Checks
    -        # make sure provided config-json is sufficient
    -        config = fetch_configs(ctx)  # type: ignore
    -        required_files = Monitoring.components[daemon_type].get('config-json-files', list())
    -        required_args = Monitoring.components[daemon_type].get('config-json-args', list())
    -        if required_files:
    -            if not config or not all(c in config.get('files', {}).keys() for c in required_files):  # type: ignore
    -                raise Error('{} deployment requires config-json which must '
    -                            'contain file content for {}'.format(daemon_type.capitalize(), ', '.join(required_files)))
    -        if required_args:
    -            if not config or not all(c in config.keys() for c in required_args):  # type: ignore
    -                raise Error('{} deployment requires config-json which must '
    -                            'contain arg for {}'.format(daemon_type.capitalize(), ', '.join(required_args)))
    -
    -        uid, gid = extract_uid_gid_monitoring(ctx, daemon_type)
    -        c = get_deployment_container(ctx, ident)
    -        deploy_daemon(
    -            ctx,
    -            ident,
    -            c,
    -            uid,
    -            gid,
    -            deployment_type=deployment_type,
    -            endpoints=daemon_endpoints
    -        )
    -
    -    elif daemon_type == CephIscsi.daemon_type:
    -        config, keyring = get_config_and_keyring(ctx)
    -        uid, gid = extract_uid_gid(ctx)
    -        c = get_deployment_container(ctx, ident)
    -        deploy_daemon(
    -            ctx,
    -            ident,
    -            c,
    -            uid,
    -            gid,
    -            config=config,
    -            keyring=keyring,
    -            deployment_type=deployment_type,
    -            endpoints=daemon_endpoints
    -        )
    -    elif daemon_type == CephNvmeof.daemon_type:
    -        config, keyring = get_config_and_keyring(ctx)
    -        uid, gid = 167, 167  # TODO: need to get properly the uid/gid
    -        c = get_deployment_container(ctx, ident)
    -        deploy_daemon(
    -            ctx,
    -            ident,
    -            c,
    -            uid,
    -            gid,
    -            config=config,
    -            keyring=keyring,
    -            deployment_type=deployment_type,
    -            endpoints=daemon_endpoints,
    -        )
    -    elif daemon_type in Tracing.components:
    -        uid, gid = 65534, 65534
    -        c = get_container(ctx, ident)
    -        deploy_daemon(
    -            ctx,
    -            ident,
    -            c,
    -            uid,
    -            gid,
    -            deployment_type=deployment_type,
    -            endpoints=daemon_endpoints,
    -        )
    -    elif daemon_type == HAproxy.daemon_type:
    -        haproxy = HAproxy.init(ctx, ident.fsid, ident.daemon_id)
    -        uid, gid = haproxy.extract_uid_gid_haproxy()
    -        c = get_deployment_container(ctx, ident)
    -        deploy_daemon(
    -            ctx,
    -            ident,
    -            c,
    -            uid,
    -            gid,
    -            deployment_type=deployment_type,
    -            endpoints=daemon_endpoints,
    -        )
    -
    -    elif daemon_type == Keepalived.daemon_type:
    -        keepalived = Keepalived.init(ctx, ident.fsid, ident.daemon_id)
    -        uid, gid = keepalived.extract_uid_gid_keepalived()
    -        c = get_deployment_container(ctx, ident)
    -        deploy_daemon(
    -            ctx,
    -            ident,
    -            c,
    -            uid,
    -            gid,
    -            deployment_type=deployment_type,
    -            endpoints=daemon_endpoints,
    -        )
     
    -    elif daemon_type == CephadmAgent.daemon_type:
    +    if ident.daemon_type == CephadmAgent.daemon_type:
             # get current user gid and uid
             uid = os.getuid()
             gid = os.getgid()
    @@ -5257,17 +3135,15 @@ def _dispatch_deploy(
                 uid,
                 gid,
                 deployment_type=deployment_type,
    -            endpoints=daemon_endpoints,
    +            endpoints=endpoints,
             )
     
         else:
             try:
    -            _deploy_daemon_container(
    -                ctx, ident, daemon_endpoints, deployment_type
    -            )
    +            _deploy_daemon_container(ctx, ident, endpoints, deployment_type)
             except UnexpectedDaemonTypeError:
                 raise Error('daemon type {} not implemented in command_deploy function'
    -                        .format(daemon_type))
    +                        .format(ident.daemon_type))
     
     
     def _deploy_daemon_container(
    @@ -5281,6 +3157,7 @@ def _deploy_daemon_container(
         daemon.customize_container_endpoints(daemon_endpoints, deployment_type)
         ctr = daemon.container(ctx)
         ics = daemon.init_containers(ctx)
    +    sccs = daemon.sidecar_containers(ctx)
         config, keyring = daemon.config_and_keyring(ctx)
         uid, gid = daemon.uid_gid(ctx)
         deploy_daemon(
    @@ -5295,6 +3172,7 @@ def _deploy_daemon_container(
             endpoints=daemon_endpoints,
             osd_fsid=daemon.osd_fsid,
             init_containers=ics,
    +        sidecars=sccs,
         )
     
     ##################################
    @@ -5328,10 +3206,10 @@ def command_shell(ctx):
                 daemon_type = ctx.name
                 daemon_id = None
         else:
    -        daemon_type = 'osd'  # get the most mounts
    +        daemon_type = 'shell'  # get limited set of mounts
             daemon_id = None
     
    -    if ctx.fsid and daemon_type in Ceph.daemons:
    +    if ctx.fsid and daemon_type in ceph_daemons():
             make_log_dir(ctx, ctx.fsid)
     
         if daemon_id and not ctx.fsid:
    @@ -5411,6 +3289,10 @@ def command_shell(ctx):
             privileged=True)
         command = c.shell_cmd(command)
     
    +    if ctx.dry_run:
    +        print(' '.join(shlex.quote(arg) for arg in command))
    +        return 0
    +
         return call_timeout(ctx, command, ctx.timeout)
     
     ##################################
    @@ -5462,7 +3344,7 @@ def command_ceph_volume(ctx):
             lock.acquire()
     
         (uid, gid) = (0, 0)  # ceph-volume runs as root
    -    mounts = get_container_mounts_for_type(ctx, ctx.fsid, 'osd')
    +    mounts = get_container_mounts_for_type(ctx, ctx.fsid, 'ceph-volume')
     
         tmp_config = None
         tmp_keyring = None
    @@ -5493,13 +3375,26 @@ def command_ceph_volume(ctx):
     ##################################
     
     
    +@infer_fsid
    +def command_unit_install(ctx):
    +    # type: (CephadmContext) -> int
    +    if not getattr(ctx, 'fsid', None):
    +        raise Error('must pass --fsid to specify cluster')
    +    if not getattr(ctx, 'name', None):
    +        raise Error('daemon name required')
    +    ident = DaemonIdentity.from_context(ctx)
    +    systemd_unit.update_files(ctx, ident)
    +    call_throws(ctx, ['systemctl', 'daemon-reload'])
    +    return 0
    +
    +
     @infer_fsid
     def command_unit(ctx):
         # type: (CephadmContext) -> int
         if not ctx.fsid:
             raise Error('must pass --fsid to specify cluster')
     
    -    unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
    +    unit_name = lookup_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
     
         _, _, code = call(
             ctx,
    @@ -5518,7 +3413,7 @@ def command_logs(ctx):
         if not ctx.fsid:
             raise Error('must pass --fsid to specify cluster')
     
    -    unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
    +    unit_name = lookup_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
     
         cmd = [find_program('journalctl')]
         cmd.extend(['-u', unit_name])
    @@ -5548,12 +3443,17 @@ def serialize_sets(obj: Any) -> Any:
     def command_ls(ctx):
         # type: (CephadmContext) -> None
         ls = list_daemons(ctx, detail=not ctx.no_detail,
    -                      legacy_dir=ctx.legacy_dir)
    +                      legacy_dir=ctx.legacy_dir,
    +                      daemon_name=ctx.name)
         print(json.dumps(ls, indent=4))
     
     
    -def list_daemons(ctx, detail=True, legacy_dir=None):
    -    # type: (CephadmContext, bool, Optional[str]) -> List[Dict[str, str]]
    +def list_daemons(
    +    ctx: CephadmContext,
    +    detail: bool = True,
    +    legacy_dir: Optional[str] = None,
    +    daemon_name: Optional[str] = None,
    +) -> List[Dict[str, str]]:
         host_version: Optional[str] = None
         ls = []
         container_path = ctx.container_engine.path
    @@ -5588,7 +3488,7 @@ def list_daemons(ctx, detail=True, legacy_dir=None):
         # /var/lib/ceph
         if os.path.exists(data_dir):
             for i in os.listdir(data_dir):
    -            if i in ['mon', 'osd', 'mds', 'mgr']:
    +            if i in ['mon', 'osd', 'mds', 'mgr', 'rgw']:
                     daemon_type = i
                     for j in os.listdir(os.path.join(data_dir, i)):
                         if '-' not in j:
    @@ -5622,6 +3522,8 @@ def list_daemons(ctx, detail=True, legacy_dir=None):
                     for j in os.listdir(os.path.join(data_dir, i)):
                         if '.' in j and os.path.isdir(os.path.join(data_dir, fsid, j)):
                             name = j
    +                        if daemon_name and name != daemon_name:
    +                            continue
                             (daemon_type, daemon_id) = j.split('.', 1)
                             unit_name = get_unit_name(fsid,
                                                       daemon_type,
    @@ -5677,8 +3579,10 @@ def list_daemons(ctx, detail=True, legacy_dir=None):
                                     version = CephIscsi.get_version(ctx, container_id)
                                 if daemon_type == CephNvmeof.daemon_type:
                                     version = CephNvmeof.get_version(ctx, container_id)
    +                            if daemon_type == SMB.daemon_type:
    +                                version = SMB.get_version(ctx, container_id)
                                 elif not version:
    -                                if daemon_type in Ceph.daemons:
    +                                if daemon_type in ceph_daemons():
                                         out, err, code = call(ctx,
                                                               [container_path, 'exec', container_id,
                                                                'ceph', '-v'],
    @@ -5690,7 +3594,7 @@ def list_daemons(ctx, detail=True, legacy_dir=None):
                                     elif daemon_type == 'grafana':
                                         out, err, code = call(ctx,
                                                               [container_path, 'exec', container_id,
    -                                                           'grafana-server', '-v'],
    +                                                           'grafana', 'server', '-v'],
                                                               verbosity=CallVerbosity.QUIET)
                                         if not code and \
                                            out.startswith('Version '):
    @@ -5732,6 +3636,12 @@ def list_daemons(ctx, detail=True, legacy_dir=None):
                                     elif daemon_type == SNMPGateway.daemon_type:
                                         version = SNMPGateway.get_version(ctx, fsid, daemon_id)
                                         seen_versions[image_id] = version
    +                                elif daemon_type == MgmtGateway.daemon_type:
    +                                    version = MgmtGateway.get_version(ctx, container_id)
    +                                    seen_versions[image_id] = version
    +                                elif daemon_type == OAuth2Proxy.daemon_type:
    +                                    version = OAuth2Proxy.get_version(ctx, container_id)
    +                                    seen_versions[image_id] = version
                                     else:
                                         logger.warning('version for unknown daemon type %s' % daemon_type)
                             else:
    @@ -5819,6 +3729,7 @@ def get_daemon_description(ctx, fsid, name, detail=False, legacy_dir=None):
     
     
     def get_container_stats(ctx: CephadmContext, container_path: str, fsid: str, daemon_type: str, daemon_id: str) -> Tuple[str, str, int]:
    +    """returns container id, image name, image id, created time, and ceph version if available"""
         c = CephContainer.for_daemon(
             ctx, DaemonIdentity(fsid, daemon_type, daemon_id), 'bash'
         )
    @@ -5834,6 +3745,18 @@ def get_container_stats(ctx: CephadmContext, container_path: str, fsid: str, dae
                 break
         return out, err, code
     
    +
    +def get_container_stats_by_image_name(ctx: CephadmContext, container_path: str, image_name: str) -> Tuple[str, str, int]:
    +    """returns image id, created time, and ceph version if available"""
    +    out, err, code = '', '', -1
    +    cmd = [
    +        container_path, 'image', 'inspect',
    +        '--format', '{{.Id}},{{.Created}},{{index .Config.Labels "io.ceph.version"}}',
    +        image_name
    +    ]
    +    out, err, code = call(ctx, cmd, verbosity=CallVerbosity.QUIET)
    +    return out, err, code
    +
     ##################################
     
     
    @@ -5867,7 +3790,7 @@ def command_adopt(ctx):
         lock.acquire()
     
         # call correct adoption
    -    if daemon_type in Ceph.daemons:
    +    if daemon_type in ceph_daemons():
             command_adopt_ceph(ctx, daemon_type, daemon_id, fsid)
         elif daemon_type == 'prometheus':
             command_adopt_prometheus(ctx, daemon_id, fsid)
    @@ -6124,7 +4047,7 @@ def command_adopt_ceph(ctx, daemon_type, daemon_id, fsid):
     def command_adopt_prometheus(ctx, daemon_id, fsid):
         # type: (CephadmContext, str, str) -> None
         daemon_type = 'prometheus'
    -    (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
    +    (uid, gid) = Monitoring.extract_uid_gid(ctx, daemon_type)
         # should try to set the ports we know cephadm defaults
         # to for these services in the firewall.
         ports = Monitoring.port_map['prometheus']
    @@ -6171,13 +4094,13 @@ def command_adopt_grafana(ctx, daemon_id, fsid):
         # type: (CephadmContext, str, str) -> None
     
         daemon_type = 'grafana'
    -    (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
    +    (uid, gid) = Monitoring.extract_uid_gid(ctx, daemon_type)
         # should try to set the ports we know cephadm defaults
         # to for these services in the firewall.
         ports = Monitoring.port_map['grafana']
         endpoints = [EndPoint('0.0.0.0', p) for p in ports]
     
    -    _stop_and_disable(ctx, 'grafana-server')
    +    _stop_and_disable(ctx, 'grafana server')
     
         ident = DaemonIdentity(fsid, daemon_type, daemon_id)
         data_dir_dst = make_data_dir(
    @@ -6242,7 +4165,7 @@ def command_adopt_alertmanager(ctx, daemon_id, fsid):
         # type: (CephadmContext, str, str) -> None
     
         daemon_type = 'alertmanager'
    -    (uid, gid) = extract_uid_gid_monitoring(ctx, daemon_type)
    +    (uid, gid) = Monitoring.extract_uid_gid(ctx, daemon_type)
         # should try to set the ports we know cephadm defaults
         # to for these services in the firewall.
         ports = Monitoring.port_map['alertmanager']
    @@ -6329,40 +4252,59 @@ def command_rm_daemon(ctx):
         lock = FileLock(ctx, ctx.fsid)
         lock.acquire()
     
    -    (daemon_type, daemon_id) = ctx.name.split('.', 1)
    -    unit_name = get_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
    -
    -    if daemon_type in ['mon', 'osd'] and not ctx.force:
    +    ident = DaemonIdentity.from_context(ctx)
    +    try:
    +        # attempt a fast-path conversion that maps the fsid+name to
    +        # the systemd service name, verifying that there is such a service
    +        call_throws(ctx, ['systemctl', 'status', ident.service_name])
    +        unit_name = ident.service_name
    +    except RuntimeError:
    +        # fall back to looking up all possible services that might match
    +        # (JJM) Preserved this operation in case theres some backwards compat
    +        # issues where the DaemonIdentity derived name is not correct.
    +        unit_name = lookup_unit_name_by_daemon_name(ctx, ctx.fsid, ctx.name)
    +
    +    if ident.daemon_type in ['mon', 'osd'] and not ctx.force:
             raise Error('must pass --force to proceed: '
                         'this command may destroy precious data!')
     
    -    call(ctx, ['systemctl', 'stop', unit_name],
    -         verbosity=CallVerbosity.DEBUG)
    -    call(ctx, ['systemctl', 'reset-failed', unit_name],
    -         verbosity=CallVerbosity.DEBUG)
    -    call(ctx, ['systemctl', 'disable', unit_name],
    -         verbosity=CallVerbosity.DEBUG)
    +    terminate_service(ctx, unit_name)
    +
    +    # clean up any extra systemd unit files
    +    sd_path_info = systemd_unit.sidecars_from_dropin(
    +        systemd_unit.PathInfo(ctx.unit_dir, ident), missing_ok=True
    +    )
    +    for sc_unit in sd_path_info.sidecar_unit_files.values():
    +        terminate_service(ctx, sc_unit.name)
    +        unlink_file(sc_unit, missing_ok=True)
    +    terminate_service(ctx, sd_path_info.init_ctr_unit_file.name)
    +    unlink_file(sd_path_info.init_ctr_unit_file, missing_ok=True)
    +    unlink_file(sd_path_info.drop_in_file, missing_ok=True)
    +    try:
    +        sd_path_info.drop_in_file.parent.rmdir()
    +    except OSError:
    +        pass
     
         # force remove rgw admin socket file if leftover
    -    if daemon_type in ['rgw']:
    +    if ident.daemon_type in ['rgw']:
             rgw_asok_path = f'/var/run/ceph/{ctx.fsid}/ceph-client.{ctx.name}.*.asok'
             call(ctx, ['rm', '-rf', rgw_asok_path],
                  verbosity=CallVerbosity.DEBUG)
     
    -    ident = DaemonIdentity(ctx.fsid, daemon_type, daemon_id)
         data_dir = ident.data_dir(ctx.data_dir)
    -    if daemon_type in ['mon', 'osd', 'prometheus'] and \
    +    if ident.daemon_type in ['mon', 'osd', 'prometheus'] and \
            not ctx.force_delete_data:
             # rename it out of the way -- do not delete
             backup_dir = os.path.join(ctx.data_dir, ctx.fsid, 'removed')
             if not os.path.exists(backup_dir):
                 makedirs(backup_dir, 0, 0, DATA_DIR_MODE)
    -        dirname = '%s.%s_%s' % (daemon_type, daemon_id,
    -                                datetime.datetime.utcnow().strftime(DATEFMT))
    +        dirname = '%s_%s' % (
    +            ident.daemon_name, datetime.datetime.utcnow().strftime(DATEFMT)
    +        )
             os.rename(data_dir,
                       os.path.join(backup_dir, dirname))
         else:
    -        call_throws(ctx, ['rm', '-rf', data_dir])
    +        shutil.rmtree(data_dir, ignore_errors=True)
     
         endpoints = fetch_endpoints(ctx)
         ports: List[int] = [e.port for e in endpoints]
    @@ -6455,27 +4397,29 @@ def _rm_cluster(ctx: CephadmContext, keep_logs: bool, zap_osds: bool) -> None:
         if not ctx.fsid:
             raise Error('must select the cluster to delete by passing --fsid to proceed')
     
    -    def disable_systemd_service(unit_name: str) -> None:
    -        call(ctx, ['systemctl', 'stop', unit_name],
    -             verbosity=CallVerbosity.DEBUG)
    -        call(ctx, ['systemctl', 'reset-failed', unit_name],
    -             verbosity=CallVerbosity.DEBUG)
    -        call(ctx, ['systemctl', 'disable', unit_name],
    -             verbosity=CallVerbosity.DEBUG)
    -
         logger.info(f'Deleting cluster with fsid: {ctx.fsid}')
     
         # stop + disable individual daemon units
    +    sd_paths = []
         for d in list_daemons(ctx, detail=False):
             if d['fsid'] != ctx.fsid:
                 continue
             if d['style'] != 'cephadm:v1':
                 continue
    -        disable_systemd_service('ceph-%s@%s' % (ctx.fsid, d['name']))
    +        terminate_service(ctx, 'ceph-%s@%s' % (ctx.fsid, d['name']))
    +        # terminate sidecar & other supplemental services
    +        ident = DaemonIdentity.from_name(ctx.fsid, d['name'])
    +        sd_path_info = systemd_unit.sidecars_from_dropin(
    +            systemd_unit.PathInfo(ctx.unit_dir, ident), missing_ok=True
    +        )
    +        for sc_unit in sd_path_info.sidecar_unit_files.values():
    +            terminate_service(ctx, sc_unit.name)
    +        terminate_service(ctx, sd_path_info.init_ctr_unit_file.name)
    +        sd_paths.append(sd_path_info)
     
         # cluster units
         for unit_name in ['ceph-%s.target' % ctx.fsid]:
    -        disable_systemd_service(unit_name)
    +        terminate_service(ctx, unit_name)
     
         slice_name = 'system-ceph\\x2d{}.slice'.format(ctx.fsid.replace('-', '\\x2d'))
         call(ctx, ['systemctl', 'stop', slice_name],
    @@ -6486,40 +4430,49 @@ def disable_systemd_service(unit_name: str) -> None:
             _zap_osds(ctx)
     
         # rm units
    -    call_throws(ctx, ['rm', '-f', ctx.unit_dir
    -                      + '/ceph-%s@.service' % ctx.fsid])
    -    call_throws(ctx, ['rm', '-f', ctx.unit_dir
    -                      + '/ceph-%s.target' % ctx.fsid])
    -    call_throws(ctx, ['rm', '-rf',
    -                      ctx.unit_dir + '/ceph-%s.target.wants' % ctx.fsid])
    +    for sd_path_info in sd_paths:
    +        for sc_unit in sd_path_info.sidecar_unit_files.values():
    +            unlink_file(sc_unit, missing_ok=True)
    +        unlink_file(sd_path_info.init_ctr_unit_file, missing_ok=True)
    +        shutil.rmtree(sd_path_info.drop_in_file.parent, ignore_errors=True)
    +    unit_dir = Path(ctx.unit_dir)
    +    unlink_file(unit_dir / f'ceph-{ctx.fsid}@.service', missing_ok=True)
    +    unlink_file(unit_dir / f'ceph-{ctx.fsid}.target', missing_ok=True)
    +    shutil.rmtree(unit_dir / f'ceph-{ctx.fsid}.target.wants', ignore_errors=True)
    +
         # rm data
    -    call_throws(ctx, ['rm', '-rf', ctx.data_dir + '/' + ctx.fsid])
    +    shutil.rmtree(Path(ctx.data_dir) / ctx.fsid, ignore_errors=True)
     
         if not keep_logs:
             # rm logs
    -        call_throws(ctx, ['rm', '-rf', ctx.log_dir + '/' + ctx.fsid])
    -        call_throws(ctx, ['rm', '-rf', ctx.log_dir
    -                          + '/*.wants/ceph-%s@*' % ctx.fsid])
    +        shutil.rmtree(Path(ctx.log_dir) / ctx.fsid, ignore_errors=True)
     
         # rm logrotate config
    -    call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/ceph-%s' % ctx.fsid])
    +    unlink_file(
    +        Path(ctx.logrotate_dir) / ('ceph-%s' % ctx.fsid), ignore_errors=True
    +    )
     
         # if last cluster on host remove shared files
         if get_ceph_cluster_count(ctx) == 0:
    -        disable_systemd_service('ceph.target')
    +        terminate_service(ctx, 'ceph.target')
     
             # rm shared ceph target files
    -        call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/multi-user.target.wants/ceph.target'])
    -        call_throws(ctx, ['rm', '-f', ctx.unit_dir + '/ceph.target'])
    +        unlink_file(
    +            Path(ctx.unit_dir) / 'multi-user.target.wants/ceph.target',
    +            ignore_errors=True
    +        )
    +        unlink_file(Path(ctx.unit_dir) / 'ceph.target', ignore_errors=True)
     
             # rm cephadm logrotate config
    -        call_throws(ctx, ['rm', '-f', ctx.logrotate_dir + '/cephadm'])
    +        unlink_file(Path(ctx.logrotate_dir) / 'cephadm', ignore_errors=True)
     
             if not keep_logs:
                 # remove all cephadm logs
                 for fname in glob(f'{ctx.log_dir}/cephadm.log*'):
                     os.remove(fname)
     
    +        unlink_file(Path('/etc/ceph/podman-auth.json'), missing_ok=True, ignore_errors=True)
    +
         # rm sysctl settings
         sysctl_dirs: List[Path] = [Path(ctx.sysctl_dir), Path('/usr/lib/sysctl.d')]
     
    @@ -6548,8 +4501,9 @@ def disable_systemd_service(unit_name: str) -> None:
     ##################################
     
     
    -def check_time_sync(ctx, enabler=None):
    -    # type: (CephadmContext, Optional[Packager]) -> bool
    +def check_time_sync(
    +    ctx: CephadmContext, enabler: Optional[Packager] = None
    +) -> bool:
         units = [
             'chrony.service',  # 18.04 (at least)
             'chronyd.service',  # el / opensuse
    @@ -6558,6 +4512,7 @@ def check_time_sync(ctx, enabler=None):
             'ntp.service',  # 18.04 (at least)
             'ntpsec.service',  # 20.04 (at least) / buster
             'openntpd.service',  # ubuntu / debian
    +        'timemaster.service',  # linuxptp on ubuntu/debian
         ]
         if not check_units(ctx, units, enabler):
             logger.warning('No time sync service is running; checked for %s' % units)
    @@ -6612,6 +4567,7 @@ def command_prepare_host(ctx: CephadmContext) -> None:
             if not pkg:
                 pkg = create_packager(ctx)
             pkg.install_podman()
    +        ctx.container_engine = find_container_engine(ctx)
     
         logger.info('Verifying lvm2 is present...')
         if not find_executable('lvcreate'):
    @@ -6742,6 +4698,13 @@ def probe_hba(scan_path: str) -> None:
         return f'Ok. {len(all_scan_files)} adapters detected: {len(scan_files)} rescanned, {len(skipped)} skipped, {len(failures)} failed ({elapsed:.2f}s)'
     
     
    +def command_list_images(ctx: CephadmContext) -> None:
    +    """this function will list the default images used by different services"""
    +    cp_obj = ConfigParser()
    +    cp_obj['mgr'] = get_mgr_images()
    +    # print default images
    +    cp_obj.write(sys.stdout)
    +
     ##################################
     
     
    @@ -6770,7 +4733,17 @@ def target_exists(ctx: CephadmContext) -> bool:
     
     
     @infer_fsid
    -def command_maintenance(ctx: CephadmContext) -> str:
    +def command_maintenance(ctx: CephadmContext) -> int:
    +    msg = change_maintenance_mode(ctx)
    +    # mgr module reads the string emitted here from stderr
    +    sys.stderr.write(msg + '\n')
    +    sys.stderr.flush()
    +    if msg.startswith('fail'):
    +        return 1
    +    return 0
    +
    +
    +def change_maintenance_mode(ctx: CephadmContext) -> str:
         if not ctx.fsid:
             raise Error('failed - must pass --fsid to specify cluster')
     
    @@ -6990,9 +4963,16 @@ def _get_parser():
         parser_version = subparsers.add_parser(
             'version', help='get cephadm version')
         parser_version.set_defaults(func=command_version)
    +    parser_version.add_argument(
    +        '--verbose',
    +        action='store_true',
    +        help='Detailed version information',
    +    )
     
         parser_pull = subparsers.add_parser(
    -        'pull', help='pull the default container image')
    +        'pull',
    +        help='pull a ceph container image (will pull the default image if --image not provided)',
    +        usage='cephadm pull (for default image) | cephadm --image  pull (for custom ceph image)')
         parser_pull.set_defaults(func=command_pull)
         parser_pull.add_argument(
             '--insecure',
    @@ -7015,6 +4995,9 @@ def _get_parser():
             '--legacy-dir',
             default='/',
             help='base directory for legacy daemon data')
    +    parser_ls.add_argument(
    +        '--name', '-n',
    +        help='Only get data for specific daemon. Format of daemon name: (type.id)')
     
         parser_list_networks = subparsers.add_parser(
             'list-networks', help='list IP networks')
    @@ -7059,6 +5042,11 @@ def _get_parser():
             action='store_true',
             default=CONTAINER_INIT,
             help=argparse.SUPPRESS)
    +    parser_adopt.add_argument(
    +        '--no-cgroups-split',
    +        action='store_true',
    +        default=False,
    +        help='Do not run containers with --cgroups=split (currently only relevant when using podman)')
     
         parser_rm_daemon = subparsers.add_parser(
             'rm-daemon', help='remove daemon instance')
    @@ -7151,7 +5139,7 @@ def _get_parser():
             '--volume', '-v',
             action='append',
             default=[],
    -        help='set environment variable')
    +        help='mount a volume')
         parser_shell.add_argument(
             'command', nargs=argparse.REMAINDER,
             help='command (optional)')
    @@ -7159,6 +5147,10 @@ def _get_parser():
             '--no-hosts',
             action='store_true',
             help='dont pass /etc/hosts through to the container')
    +    parser_shell.add_argument(
    +        '--dry-run',
    +        action='store_true',
    +        help='print, but do not execute, the container command to start the shell')
     
         parser_enter = subparsers.add_parser(
             'enter', help='run an interactive shell inside a running daemon container')
    @@ -7223,6 +5215,17 @@ def _get_parser():
             required=True,
             help='daemon name (type.id)')
     
    +    parser_unit_install = subparsers.add_parser(
    +        'unit-install', help="Install the daemon's systemd unit")
    +    parser_unit_install.set_defaults(func=command_unit_install)
    +    parser_unit_install.add_argument(
    +        '--fsid',
    +        help='cluster FSID')
    +    parser_unit_install.add_argument(
    +        '--name', '-n',
    +        required=True,
    +        help='daemon name (type.id)')
    +
         parser_logs = subparsers.add_parser(
             'logs', help='print journald logs for a daemon container')
         parser_logs.set_defaults(func=command_logs)
    @@ -7355,22 +5358,10 @@ def _get_parser():
             '--allow-overwrite',
             action='store_true',
             help='allow overwrite of existing --output-* config/keyring/ssh files')
    -    # following logic to have both '--cleanup-on-failure' and '--no-cleanup-on-failure'
    -    # has been included in argparse of python v3.9, however since we have to support
    -    # older python versions the following is more generic. Once python v3.9 becomes
    -    # the minium supported version we can implement the same by using the new option
    -    # argparse.BooleanOptionalAction
    -    group = parser_bootstrap.add_mutually_exclusive_group()
    -    group.add_argument(
    -        '--cleanup-on-failure',
    -        action='store_true',
    -        default=True,
    -        help='Delete cluster files in case of a failed installation')
    -    group.add_argument(
    +    parser_bootstrap.add_argument(
             '--no-cleanup-on-failure',
    -        action='store_const',
    -        const=False,
    -        dest='cleanup_on_failure',
    +        action='store_true',
    +        default=False,
             help='Do not delete cluster files in case of a failed installation')
         parser_bootstrap.add_argument(
             '--allow-fqdn-hostname',
    @@ -7432,6 +5423,13 @@ def _get_parser():
             '--log-to-file',
             action='store_true',
             help='configure cluster to log to traditional log files in /var/log/ceph/$fsid')
    +    parser_bootstrap.add_argument(
    +        '--deploy-cephadm-agent',
    +        action='store_true',
    +        help='deploy the cephadm-agent')
    +    parser_bootstrap.add_argument(
    +        '--custom-prometheus-alerts',
    +        help='provide a file with custom prometheus alerts')
     
         parser_deploy = subparsers.add_parser(
             'deploy', help='deploy a daemon')
    @@ -7570,6 +5568,9 @@ def _get_parser():
             'disk-rescan', help='rescan all HBAs to detect new/removed devices')
         parser_disk_rescan.set_defaults(func=command_rescan_disks)
     
    +    parser_list_images = subparsers.add_parser(
    +        'list-images', help='list all the default images')
    +    parser_list_images.set_defaults(func=command_list_images)
         return parser
     
     
    @@ -7636,7 +5637,8 @@ def main() -> None:
                         command_prepare_host,
                         command_add_repo,
                         command_rm_repo,
    -                    command_install
    +                    command_install,
    +                    command_bootstrap
                     ]:
                 check_container_engine(ctx)
             # command handler
    diff --git a/src/cephadm/cephadmlib/agent.py b/src/cephadm/cephadmlib/agent.py
    new file mode 100644
    index 000000000000..330ea6945f34
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/agent.py
    @@ -0,0 +1,34 @@
    +from urllib.error import HTTPError, URLError
    +from urllib.request import urlopen, Request
    +from typing import Optional, Any, Tuple
    +import logging
    +
    +logger = logging.getLogger()
    +
    +
    +def http_query(
    +    addr: str = '',
    +    port: str = '',
    +    data: Optional[bytes] = None,
    +    endpoint: str = '',
    +    ssl_ctx: Optional[Any] = None,
    +    timeout: Optional[int] = 10,
    +) -> Tuple[int, str]:
    +    url = f'https://{addr}:{port}{endpoint}'
    +    logger.debug(f'sending query to {url}')
    +    try:
    +        req = Request(url, data, {'Content-Type': 'application/json'})
    +        with urlopen(req, context=ssl_ctx, timeout=timeout) as response:
    +            response_str = response.read()
    +            response_status = response.status
    +    except HTTPError as e:
    +        logger.debug(f'{e.code} {e.reason}')
    +        response_status = e.code
    +        response_str = e.reason
    +    except URLError as e:
    +        logger.debug(f'{e.reason}')
    +        response_status = -1
    +        response_str = e.reason
    +    except Exception:
    +        raise
    +    return (response_status, response_str)
    diff --git a/src/cephadm/cephadmlib/call_wrappers.py b/src/cephadm/cephadmlib/call_wrappers.py
    index 3fe2171e99d5..d3d327c218c9 100644
    --- a/src/cephadm/cephadmlib/call_wrappers.py
    +++ b/src/cephadm/cephadmlib/call_wrappers.py
    @@ -311,14 +311,14 @@ def call_throws(
         return out, err, ret
     
     
    -def call_timeout(ctx, command, timeout):
    -    # type: (CephadmContext, List[str], int) -> int
    +def call_timeout(
    +    ctx: CephadmContext, command: List[str], timeout: int
    +) -> int:
         logger.debug(
             'Running command (timeout=%s): %s' % (timeout, ' '.join(command))
         )
     
    -    def raise_timeout(command, timeout):
    -        # type: (List[str], int) -> NoReturn
    +    def raise_timeout(command: List[str], timeout: int) -> NoReturn:
             msg = 'Command `%s` timed out after %s seconds' % (command, timeout)
             logger.debug(msg)
             raise TimeoutExpired(msg)
    diff --git a/src/cephadm/cephadmlib/constants.py b/src/cephadm/cephadmlib/constants.py
    index d1e0aa4425c5..1df46353fb30 100644
    --- a/src/cephadm/cephadmlib/constants.py
    +++ b/src/cephadm/cephadmlib/constants.py
    @@ -3,25 +3,11 @@
     # Default container images -----------------------------------------------------
     DEFAULT_IMAGE = 'quay.ceph.io/ceph-ci/ceph:main'
     DEFAULT_IMAGE_IS_MAIN = True
    -DEFAULT_IMAGE_RELEASE = 'reef'
    -DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.43.0'
    -DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
    -DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
    -DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.5.0'
    -DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0'
    -DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:9.4.7'
    -DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3'
    -DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.2.4'
    -DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:0.0.1'
    -DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
    -DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23'
    -DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29'
    -DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29'
    -DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29'
    -DEFAULT_REGISTRY = 'docker.io'  # normalize unqualified digests to this
    +DEFAULT_IMAGE_RELEASE = 'squid'
    +DEFAULT_REGISTRY = 'quay.io'  # normalize unqualified digests to this
     # ------------------------------------------------------------------------------
     
    -LATEST_STABLE_RELEASE = 'reef'
    +LATEST_STABLE_RELEASE = 'squid'
     DATA_DIR = '/var/lib/ceph'
     LOG_DIR = '/var/log/ceph'
     LOCK_DIR = '/run/cephadm'
    @@ -48,3 +34,6 @@
     DATEFMT = '%Y-%m-%dT%H:%M:%S.%fZ'
     QUIET_LOG_LEVEL = 9  # DEBUG is 10, so using 9 to be lower level than DEBUG
     NO_DEPRECATED = False
    +UID_NOBODY = 65534
    +GID_NOGROUP = 65534
    +DAEMON_FAILED_ERROR = 17
    diff --git a/src/cephadm/cephadmlib/container_daemon_form.py b/src/cephadm/cephadmlib/container_daemon_form.py
    index 5aef951f37c7..8696c9cbd66f 100644
    --- a/src/cephadm/cephadmlib/container_daemon_form.py
    +++ b/src/cephadm/cephadmlib/container_daemon_form.py
    @@ -2,9 +2,10 @@
     
     import abc
     
    -from typing import List, Tuple, Optional
    +from typing import List, Tuple, Optional, Dict
     
    -from .container_types import CephContainer, InitContainer
    +from .container_engines import Podman
    +from .container_types import CephContainer, InitContainer, SidecarContainer
     from .context import CephadmContext
     from .daemon_form import DaemonForm
     from .deploy import DeploymentType
    @@ -39,25 +40,57 @@ def init_containers(self, ctx: CephadmContext) -> List[InitContainer]:
             """
             return []
     
    -    def customize_container_binds(self, binds: List[List[str]]) -> None:
    +    def sidecar_containers(
    +        self, ctx: CephadmContext
    +    ) -> List[SidecarContainer]:
    +        """Returns a list of sidecar containers that should be executed along
    +        with the primary service container.
    +        """
    +        return []
    +
    +    def customize_container_binds(
    +        self, ctx: CephadmContext, binds: List[List[str]]
    +    ) -> None:
             """Given a list of container binds this function can update, delete,
             or otherwise mutate the binds that the container will use.
             """
             pass
     
    -    def customize_container_mounts(self, mounts: List[str]) -> None:
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
             """Given a list of container mounts this function can update, delete,
             or otherwise mutate the mounts that the container will use.
             """
             pass
     
    -    def customize_container_args(self, args: List[str]) -> None:
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
             """Given a list of container arguments this function can update,
             delete, or otherwise mutate the arguments that the container engine
             will use.
             """
             pass
     
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        """Given a list of arguments for the containerized process, this
    +        function can update, delete, or otherwise mutate the arguments that the
    +        process will use.
    +        """
    +        pass
    +
    +    def customize_container_envs(
    +        self, ctx: CephadmContext, envs: List[str]
    +    ) -> None:
    +        """Given a list of environment vars this function can update, delete,
    +        or otherwise mutate the environment variables that are passed by the
    +        container engine to the processes it executes.
    +        """
    +        pass
    +
         def customize_container_endpoints(
             self, endpoints: List[EndPoint], deployment_type: DeploymentType
         ) -> None:
    @@ -80,3 +113,77 @@ def osd_fsid(self) -> Optional[str]:
             expected to understand this.
             """
             return None
    +
    +    def default_entrypoint(self) -> str:
    +        """Return the default entrypoint value when running a deamon process
    +        in a container.
    +        """
    +        return ''
    +
    +    def prepare_data_dir(self, data_dir: str, uid: int, gid: int) -> None:
    +        pass
    +
    +
    +def daemon_to_container(
    +    ctx: CephadmContext,
    +    daemon: ContainerDaemonForm,
    +    *,
    +    privileged: bool = False,
    +    ptrace: bool = False,
    +    host_network: bool = True,
    +    entrypoint: Optional[str] = None,
    +    container_args: Optional[List[str]] = None,
    +    container_mounts: Optional[Dict[str, str]] = None,
    +    container_binds: Optional[List[List[str]]] = None,
    +    envs: Optional[List[str]] = None,
    +    args: Optional[List[str]] = None,
    +    auto_podman_args: bool = True,
    +    auto_podman_mounts: bool = True,
    +) -> CephContainer:
    +    """daemon_to_container is a utility function that serves to create
    +    CephContainer instances from a container daemon form's customize and
    +    entrypoint methods.
    +    Most of the parameters (like mounts, container_args, etc) can be passed in
    +    to "pre customize" the values.
    +    The auto_podman_args argument enables adding default arguments expected on
    +    all podman daemons (true by default).
    +    The auto_podman_mounts argument enables adding mounts expected on all
    +    daemons running on podman (true by default).
    +    """
    +    container_args = container_args if container_args else []
    +    container_mounts = container_mounts if container_mounts else {}
    +    container_binds = container_binds if container_binds else []
    +    envs = envs if envs else []
    +    args = args if args else []
    +
    +    if entrypoint is None:
    +        entrypoint = daemon.default_entrypoint()
    +    daemon.customize_container_args(ctx, container_args)
    +    daemon.customize_container_mounts(ctx, container_mounts)
    +    daemon.customize_container_binds(ctx, container_binds)
    +    daemon.customize_container_envs(ctx, envs)
    +    daemon.customize_process_args(ctx, args)
    +
    +    _is_podman = isinstance(ctx.container_engine, Podman)
    +    if auto_podman_mounts and _is_podman:
    +        ctx.container_engine.update_mounts(ctx, container_mounts)
    +    if auto_podman_args and _is_podman:
    +        container_args.extend(
    +            ctx.container_engine.service_args(
    +                ctx, daemon.identity.service_name
    +            )
    +        )
    +
    +    return CephContainer.for_daemon(
    +        ctx,
    +        ident=daemon.identity,
    +        entrypoint=entrypoint,
    +        args=args,
    +        container_args=container_args,
    +        volume_mounts=container_mounts,
    +        bind_mounts=container_binds,
    +        envs=envs,
    +        privileged=privileged,
    +        ptrace=ptrace,
    +        host_network=host_network,
    +    )
    diff --git a/src/cephadm/cephadmlib/container_engine_base.py b/src/cephadm/cephadmlib/container_engine_base.py
    index 135b2f4f3210..c8d4bfbcf290 100644
    --- a/src/cephadm/cephadmlib/container_engine_base.py
    +++ b/src/cephadm/cephadmlib/container_engine_base.py
    @@ -11,5 +11,12 @@ def __init__(self) -> None:
         def EXE(self) -> str:
             raise NotImplementedError()
     
    +    @property
    +    def unlimited_pids_option(self) -> str:
    +        """The option to pass to the container engine for allowing unlimited
    +        pids (processes).
    +        """
    +        return '--pids-limit=0'
    +
         def __str__(self) -> str:
             return f'{self.EXE} ({self.path})'
    diff --git a/src/cephadm/cephadmlib/container_engines.py b/src/cephadm/cephadmlib/container_engines.py
    index 396161906431..64ce7ae821ab 100644
    --- a/src/cephadm/cephadmlib/container_engines.py
    +++ b/src/cephadm/cephadmlib/container_engines.py
    @@ -2,12 +2,17 @@
     
     import os
     
    -from typing import Tuple, List, Optional
    +from typing import Tuple, List, Optional, Dict
     
     from .call_wrappers import call_throws, CallVerbosity
     from .context import CephadmContext
     from .container_engine_base import ContainerEngine
    -from .constants import DEFAULT_MODE, MIN_PODMAN_VERSION
    +from .constants import (
    +    CGROUPS_SPLIT_PODMAN_VERSION,
    +    DEFAULT_MODE,
    +    MIN_PODMAN_VERSION,
    +    PIDS_LIMIT_UNLIMITED_PODMAN_VERSION,
    +)
     from .exceptions import Error
     
     
    @@ -36,6 +41,68 @@ def __str__(self) -> str:
             version = '.'.join(map(str, self.version))
             return f'{self.EXE} ({self.path}) version {version}'
     
    +    @property
    +    def supports_split_cgroups(self) -> bool:
    +        """Return true if this version of podman supports split cgroups."""
    +        return self.version >= CGROUPS_SPLIT_PODMAN_VERSION
    +
    +    @property
    +    def unlimited_pids_option(self) -> str:
    +        """The option to pass to the container engine for allowing unlimited
    +        pids (processes).
    +        """
    +        if self.version >= PIDS_LIMIT_UNLIMITED_PODMAN_VERSION:
    +            return '--pids-limit=-1'
    +        return '--pids-limit=0'
    +
    +    def service_args(
    +        self, ctx: CephadmContext, service_name: str
    +    ) -> List[str]:
    +        """Return a list of arguments that should be added to the engine's run
    +        command when starting a long-term service (aka daemon) container.
    +        """
    +        args = []
    +        # if using podman, set -d, --conmon-pidfile & --cidfile flags
    +        # so service can have Type=Forking
    +        runtime_dir = '/run'
    +        args.extend(
    +            [
    +                '-d',
    +                '--log-driver',
    +                'journald',
    +                '--conmon-pidfile',
    +                f'{runtime_dir}/{service_name}-pid',
    +                '--cidfile',
    +                f'{runtime_dir}/{service_name}-cid',
    +            ]
    +        )
    +        if self.supports_split_cgroups and not ctx.no_cgroups_split:
    +            args.append('--cgroups=split')
    +        # if /etc/hosts doesn't exist, we can be confident
    +        # users aren't using it for host name resolution
    +        # and adding --no-hosts avoids bugs created in certain daemons
    +        # by modifications podman makes to /etc/hosts
    +        # https://tracker.ceph.com/issues/58532
    +        # https://tracker.ceph.com/issues/57018
    +        if not os.path.exists('/etc/hosts'):
    +            args.append('--no-hosts')
    +        return args
    +
    +    def update_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        """Update mounts adding entries that are specific to podman."""
    +        # Modifications podman makes to /etc/hosts causes issues with certain
    +        # daemons (specifically referencing "host.containers.internal" entry
    +        # being added to /etc/hosts in this case). To avoid that, but still
    +        # allow users to use /etc/hosts for hostname resolution, we can mount
    +        # the host's /etc/hosts file.
    +        # https://tracker.ceph.com/issues/58532
    +        # https://tracker.ceph.com/issues/57018
    +        if os.path.exists('/etc/hosts'):
    +            if '/etc/hosts' not in mounts:
    +                mounts['/etc/hosts'] = '/etc/hosts:ro'
    +
     
     class Docker(ContainerEngine):
         EXE = 'docker'
    @@ -108,3 +175,17 @@ def registry_login(
                 'Failed to login to custom registry @ %s as %s with given password'
                 % (ctx.registry_url, ctx.registry_username)
             )
    +
    +
    +def pull_command(
    +    ctx: CephadmContext, image: str, insecure: bool = False
    +) -> List[str]:
    +    """Return a command that can be run to pull an image."""
    +    cmd = [ctx.container_engine.path, 'pull', image]
    +    if isinstance(ctx.container_engine, Podman):
    +        if insecure:
    +            cmd.append('--tls-verify=false')
    +
    +        if os.path.exists('/etc/ceph/podman-auth.json'):
    +            cmd.append('--authfile=/etc/ceph/podman-auth.json')
    +    return cmd
    diff --git a/src/cephadm/cephadmlib/container_types.py b/src/cephadm/cephadmlib/container_types.py
    index 34c7ed29ada0..f1e829cbdf7b 100644
    --- a/src/cephadm/cephadmlib/container_types.py
    +++ b/src/cephadm/cephadmlib/container_types.py
    @@ -1,11 +1,15 @@
     # container_types.py - container instance wrapper types
     
    +import copy
    +import enum
    +import functools
     import os
     
    -from typing import Dict, List, Optional, Any
    +from typing import Dict, List, Optional, Any, Union, Tuple, Iterable, cast
     
     from .call_wrappers import call, call_throws, CallVerbosity
     from .constants import DEFAULT_TIMEOUT
    +import ceph.cephadm.images as default_images
     from .container_engines import Docker, Podman
     from .context import CephadmContext
     from .daemon_identity import DaemonIdentity, DaemonSubIdentity
    @@ -70,9 +74,8 @@ def cname(self) -> str:
             assert self.identity
             return self.identity.container_name
     
    -    def build_run_cmd(self) -> List[str]:
    -        cmd_args: List[str] = [self._container_engine]
    -        cmd_args.append('run')
    +    def build_engine_run_args(self) -> List[str]:
    +        cmd_args: List[str] = []
             if self.remove:
                 cmd_args.append('--rm')
             if self.ipc:
    @@ -147,14 +150,14 @@ def build_run_cmd(self) -> List[str]:
                 [],
             )
     
    +        return cmd_args + self.container_args + envs + vols + binds
    +
    +    def build_run_cmd(self) -> List[str]:
             return (
    -            cmd_args
    -            + self.container_args
    -            + envs
    -            + vols
    -            + binds
    +            [self._container_engine, 'run']
    +            + self.build_engine_run_args()
                 + [self.image]
    -            + self.args
    +            + list(self.args)
             )
     
         def build_rm_cmd(
    @@ -179,6 +182,33 @@ def build_stop_cmd(
             cmd.append(cname or self.cname)
             return cmd
     
    +    @classmethod
    +    def from_container(
    +        cls,
    +        other: 'BasicContainer',
    +        *,
    +        ident: Optional[DaemonIdentity] = None,
    +    ) -> 'BasicContainer':
    +        return cls(
    +            other.ctx,
    +            image=other.image,
    +            entrypoint=other.entrypoint,
    +            identity=(ident or other.identity),
    +            args=other.args,
    +            container_args=copy.copy(other.container_args),
    +            envs=copy.copy(other.envs),
    +            volume_mounts=copy.copy(other.volume_mounts),
    +            bind_mounts=copy.copy(other.bind_mounts),
    +            network=other.network,
    +            ipc=other.ipc,
    +            init=other.init,
    +            ptrace=other.ptrace,
    +            privileged=other.privileged,
    +            remove=other.remove,
    +            memory_request=other.memory_request,
    +            memory_limit=other.memory_limit,
    +        )
    +
     
     class CephContainer(BasicContainer):
         def __init__(
    @@ -459,6 +489,63 @@ def run_cmd(self) -> List[str]:
         def rm_cmd(self, storage: bool = False) -> List[str]:
             return self.build_rm_cmd(storage=storage)
     
    +    def stop_cmd(self, timeout: Optional[int] = None) -> List[str]:
    +        return self.build_stop_cmd(timeout=timeout)
    +
    +
    +class SidecarContainer(BasicContainer):
    +    @classmethod
    +    def from_primary_and_values(
    +        cls,
    +        ctx: CephadmContext,
    +        primary: BasicContainer,
    +        sidecar_name: str,
    +        *,
    +        image: str = '',
    +        entrypoint: str = '',
    +        args: Optional[List[str]] = None,
    +        init: Optional[bool] = None,
    +    ) -> 'SidecarContainer':
    +        assert primary.identity
    +        identity = DaemonSubIdentity.from_parent(
    +            primary.identity, sidecar_name
    +        )
    +        ctr = cast(
    +            SidecarContainer, cls.from_container(primary, ident=identity)
    +        )
    +        ctr.remove = True
    +        if image:
    +            ctr.image = image
    +        if entrypoint:
    +            ctr.entrypoint = entrypoint
    +        if args:
    +            ctr.args = args
    +        if init is not None:
    +            ctr.init = init
    +        return ctr
    +
    +    def build_engine_run_args(self) -> List[str]:
    +        assert isinstance(self.identity, DaemonSubIdentity)
    +        cmd_args = super().build_engine_run_args()
    +        if self._using_podman:
    +            # sidecar containers are always services, otherwise they
    +            # would not be sidecars
    +            cmd_args += self.ctx.container_engine.service_args(
    +                self.ctx, self.identity.sidecar_service_name
    +            )
    +        return cmd_args
    +
    +    def run_cmd(self) -> List[str]:
    +        if not (self.envs and self.envs[0].startswith('NODE_NAME=')):
    +            self.envs.insert(0, 'NODE_NAME=%s' % get_hostname())
    +        return self.build_run_cmd()
    +
    +    def rm_cmd(self, storage: bool = False) -> List[str]:
    +        return self.build_rm_cmd(storage=storage)
    +
    +    def stop_cmd(self, timeout: Optional[int] = None) -> List[str]:
    +        return self.build_stop_cmd(timeout=timeout)
    +
     
     def is_container_running(ctx: CephadmContext, c: 'CephContainer') -> bool:
         if ctx.name.split('.', 1)[0] in ['agent', 'cephadm-exporter']:
    @@ -485,3 +572,107 @@ def get_running_container_name(
             if out.strip() == 'running':
                 return name
         return None
    +
    +
    +def extract_uid_gid(
    +    ctx: CephadmContext,
    +    img: str = '',
    +    file_path: Union[str, List[str]] = '/var/lib/ceph',
    +) -> Tuple[int, int]:
    +    if not img:
    +        img = ctx.image
    +
    +    if isinstance(file_path, str):
    +        paths = [file_path]
    +    else:
    +        paths = file_path
    +
    +    ex: Optional[Tuple[str, RuntimeError]] = None
    +
    +    for fp in paths:
    +        try:
    +            out = CephContainer(
    +                ctx, image=img, entrypoint='stat', args=['-c', '%u %g', fp]
    +            ).run(verbosity=CallVerbosity.QUIET_UNLESS_ERROR)
    +            uid, gid = out.split(' ')
    +            return int(uid), int(gid)
    +        except RuntimeError as e:
    +            ex = (fp, e)
    +    if ex:
    +        raise Error(f'Failed to extract uid/gid for path {ex[0]}: {ex[1]}')
    +
    +    raise RuntimeError('uid/gid not found')
    +
    +
    +@functools.lru_cache()
    +def _opt_key(value: str) -> str:
    +    """Return a (long) option stripped of its value."""
    +    return value.split('=', 1)[0]
    +
    +
    +def _replace_container_arg(args: List[str], new_arg: str) -> None:
    +    """Remove and replace arguments that have the same `--xyz` part as
    +    the given `new_arg`. If new_arg is expected to have a value it
    +    must be part of the new_arg string following an equal sign (`=`).
    +    The existing arg may be a single or two strings in the input list.
    +    """
    +    key = _opt_key(new_arg)
    +    has_value = key != new_arg
    +    try:
    +        idx = [_opt_key(v) for v in args].index(key)
    +        if '=' in args[idx] or not has_value:
    +            del args[idx]
    +        else:
    +            del args[idx]
    +            del args[idx]
    +    except ValueError:
    +        pass
    +    args.append(new_arg)
    +
    +
    +class Namespace(enum.Enum):
    +    """General container namespace control options."""
    +
    +    cgroupns = 'cgroupns'
    +    cgroup = 'cgroupns'  # alias
    +    ipc = 'ipc'
    +    network = 'network'
    +    pid = 'pid'
    +    userns = 'userns'
    +    user = 'userns'  # alias
    +    uts = 'uts'
    +
    +    def to_option(self, value: str) -> str:
    +        return f'--{self}={value}'
    +
    +    def __str__(self) -> str:
    +        return self.value
    +
    +
    +def enable_shared_namespaces(
    +    args: List[str],
    +    name: str,
    +    ns: Iterable[Namespace],
    +) -> None:
    +    """Update the args list to contain options that enable container namespace
    +    sharing where name is the name/id of the target container and ns is a list
    +    or set of namespaces that should be shared.
    +    """
    +    cc = f'container:{name}'
    +    for n in ns:
    +        _replace_container_arg(args, n.to_option(cc))
    +
    +
    +def get_mgr_images() -> dict:
    +    """Return dict of default mgr images"""
    +    mgr_prefix = 'mgr/cephadm/container_image_'
    +    mgr_images = {}
    +    images = vars(default_images)
    +    for key, value in images.items():
    +        if key.startswith('DEFAULT_') and key.endswith('_IMAGE'):
    +            # flake8 and black disagree about spaces around ":" hence the noqa comment
    +            suffix = key[
    +                len('DEFAULT_') : -len('_IMAGE')  # noqa: E203
    +            ].lower()
    +            mgr_images[mgr_prefix + suffix] = value
    +    return mgr_images
    diff --git a/src/cephadm/cephadmlib/context_getters.py b/src/cephadm/cephadmlib/context_getters.py
    index a78c67a7812e..7b99abeaa5f5 100644
    --- a/src/cephadm/cephadmlib/context_getters.py
    +++ b/src/cephadm/cephadmlib/context_getters.py
    @@ -6,7 +6,6 @@
     
     from typing import Any, Dict, List, Optional, Tuple, Union
     
    -from .constants import CGROUPS_SPLIT_PODMAN_VERSION
     from .container_engines import Podman
     from .context import CephadmContext
     from .exceptions import Error
    @@ -137,6 +136,24 @@ def fetch_endpoints(ctx: CephadmContext) -> List[EndPoint]:
         return endpoints
     
     
    +def fetch_rank_info(ctx: CephadmContext) -> Optional[Tuple[int, int]]:
    +    """Return the daemon's rank and rank generation values as a tuple of ints
    +    if available. Return None if rank information is not available.
    +    """
    +    meta = getattr(ctx, 'meta_properties', None)
    +    if meta is None:
    +        return None
    +    # We must either return both rank *and* rank_generation together or
    +    # nothing at all.
    +    try:
    +        rank, gen = meta['rank'], meta['rank_generation']
    +    except KeyError:
    +        return None
    +    if rank is None or gen is None:
    +        return None
    +    return int(rank), int(gen)
    +
    +
     def get_config_and_keyring(ctx):
         # type: (CephadmContext) -> Tuple[Optional[str], Optional[str]]
         config = None
    @@ -186,5 +203,5 @@ def should_log_to_journald(ctx: CephadmContext) -> bool:
             return ctx.log_to_journald
         return (
             isinstance(ctx.container_engine, Podman)
    -        and ctx.container_engine.version >= CGROUPS_SPLIT_PODMAN_VERSION
    +        and ctx.container_engine.supports_split_cgroups
         )
    diff --git a/src/cephadm/cephadmlib/daemon_identity.py b/src/cephadm/cephadmlib/daemon_identity.py
    index 7fc4af1cb771..bfe1a855186d 100644
    --- a/src/cephadm/cephadmlib/daemon_identity.py
    +++ b/src/cephadm/cephadmlib/daemon_identity.py
    @@ -1,14 +1,23 @@
     # deamon_identity.py - classes for identifying daemons & services
     
    +import enum
     import os
     import pathlib
     import re
     
    -from typing import Union
    +from typing import Union, Optional, Tuple
     
     from .context import CephadmContext
     
     
    +class Categories(str, enum.Enum):
    +    SIDECAR = 'sidecar'
    +    INIT = 'init'
    +
    +    def __str__(self) -> str:
    +        return self.value
    +
    +
     class DaemonIdentity:
         def __init__(
             self,
    @@ -48,12 +57,45 @@ def container_name(self) -> str:
             name = f'ceph-{self.fsid}-{self.daemon_type}-{self.daemon_id}'
             return name.replace('.', '-')
     
    +    def _systemd_name(
    +        self,
    +        *,
    +        framework: str = 'ceph',
    +        category: str = '',
    +        suffix: str = '',
    +        extension: str = '',
    +    ) -> str:
    +        if category:
    +            # validate the category value
    +            category = Categories(category)
    +        template_terms = [framework, self.fsid, category]
    +        instance_terms = [self.daemon_type]
    +        instance_terms.append(
    +            f'{self.daemon_id}:{suffix}' if suffix else self.daemon_id
    +        )
    +        instance_terms.append(extension)
    +        # use a comprehension to filter out terms that are blank
    +        base = '-'.join(v for v in template_terms if v)
    +        svc = '.'.join(v for v in instance_terms if v)
    +        return f'{base}@{svc}'
    +
         @property
         def unit_name(self) -> str:
    -        return f'ceph-{self.fsid}@{self.daemon_type}.{self.daemon_id}'
    +        return self._systemd_name()
    +
    +    @property
    +    def service_name(self) -> str:
    +        return self._systemd_name(extension='service')
    +
    +    @property
    +    def init_service_name(self) -> str:
    +        # all init contaienrs are run as a single systemd service
    +        return self._systemd_name(category='init', extension='service')
     
         def data_dir(self, base_data_dir: Union[str, os.PathLike]) -> str:
    -        return str(pathlib.Path(base_data_dir) / self.fsid / self.daemon_name)
    +        # do not use self.daemon_name as that may be overridden in subclasses
    +        dn = f'{self.daemon_type}.{self.daemon_id}'
    +        return str(pathlib.Path(base_data_dir) / self.fsid / dn)
     
         @classmethod
         def from_name(cls, fsid: str, name: str) -> 'DaemonIdentity':
    @@ -75,7 +117,7 @@ def __init__(
         ) -> None:
             super().__init__(fsid, daemon_type, daemon_id)
             self._subcomponent = subcomponent
    -        if not re.match('^[a-zA-Z0-9]{1,15}$', self._subcomponent):
    +        if not re.match('^[a-zA-Z0-9]{1,32}$', self._subcomponent):
                 raise ValueError(
                     f'invalid subcomponent; invalid characters: {subcomponent!r}'
                 )
    @@ -99,7 +141,24 @@ def unit_name(self) -> str:
             # of the same unit as the primary. However, to fix a bug with iscsi
             # this is a quick and dirty workaround for distinguishing the two types
             # when generating --cidfile and --conmon-pidfile values.
    -        return f'ceph-{self.fsid}@{self.daemon_type}.{self.daemon_id}.{self.subcomponent}'
    +        return self._systemd_name(suffix=self.subcomponent)
    +
    +    @property
    +    def service_name(self) -> str:
    +        # use the parent's service_name to get the service. sub-identities
    +        # must use other specific methods (like sidecar_service_name) for
    +        # sub-identity based services
    +        raise ValueError('called service_name on DaemonSubIdentity')
    +
    +    @property
    +    def sidecar_service_name(self) -> str:
    +        return self._systemd_name(
    +            category='sidecar', suffix=self.subcomponent, extension='service'
    +        )
    +
    +    def sidecar_script(self, base_data_dir: Union[str, os.PathLike]) -> str:
    +        sname = f'sidecar-{self.subcomponent}.run'
    +        return str(pathlib.Path(self.data_dir(base_data_dir)) / sname)
     
         @property
         def legacy_container_name(self) -> str:
    @@ -117,3 +176,56 @@ def from_parent(
                 parent.daemon_id,
                 subcomponent,
             )
    +
    +    @classmethod
    +    def from_service_name(
    +        cls, service_name: str
    +    ) -> Tuple['DaemonSubIdentity', str]:
    +        """Return a DaemonSubIdentity and category value by parsing the
    +        contents of a systemd service name for a sidecar container.
    +        """
    +        # ceph services always have the template@instance form
    +        tpart, ipart = service_name.split('@', 1)
    +        # drop the .service if it exists
    +        if ipart.endswith('.service'):
    +            ipart = ipart[:-8]
    +        # verify the service name starts with 'ceph' -- our framework
    +        framework, tpart = tpart.split('-', 1)
    +        if framework != 'ceph':
    +            raise ValueError(f'Invalid framework value: {service_name}')
    +        # we're parsing only services for subcomponents. it must take the
    +        # form -. Where categories are sidecar or init.
    +        fsid, category = tpart.rsplit('-', 1)
    +        try:
    +            Categories(category)
    +        except ValueError:
    +            raise ValueError(f'Invalid service category: {service_name}')
    +        # if it is a sidecar it will have a subcomponent name following a colon
    +        svcparts = ipart.split(':')
    +        if len(svcparts) == 1:
    +            subc = ''
    +        elif len(svcparts) == 2:
    +            subc = svcparts[1]
    +        else:
    +            raise ValueError(f'Unexpected instance value: {ipart}')
    +        # only services based on sidecars currently have named subcomponents
    +        # init subcomponents are all "hidden" within a single init service
    +        if subc and not category == Categories.SIDECAR:
    +            raise ValueError(
    +                f'Unexpected subcomponent {subc!r} for category {category}'
    +            )
    +        elif not subc:
    +            # because we return a DaemonSubIdentity we need some value for
    +            # the subcomponent on init services. Just repeat the category
    +            subc = str(category)
    +        daemon_type, daemon_id = svcparts[0].split('.', 1)
    +        return cls(fsid, daemon_type, daemon_id, subc), category
    +
    +    @classmethod
    +    def must(cls, value: Optional[DaemonIdentity]) -> 'DaemonSubIdentity':
    +        """Helper to assert value is of the correct type.  Mostly to make mypy
    +        happy.
    +        """
    +        if not isinstance(value, cls):
    +            raise TypeError(f'{value!r} is not a {cls}')
    +        return value
    diff --git a/src/cephadm/cephadmlib/daemons/__init__.py b/src/cephadm/cephadmlib/daemons/__init__.py
    new file mode 100644
    index 000000000000..bdf2c532e02d
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/__init__.py
    @@ -0,0 +1,32 @@
    +from .ceph import Ceph, OSD, CephExporter
    +from .custom import CustomContainer
    +from .ingress import HAproxy, Keepalived
    +from .iscsi import CephIscsi
    +from .monitoring import Monitoring
    +from .nfs import NFSGanesha
    +from .nvmeof import CephNvmeof
    +from .smb import SMB
    +from .snmp import SNMPGateway
    +from .tracing import Tracing
    +from .node_proxy import NodeProxy
    +from .mgmt_gateway import MgmtGateway
    +from .oauth2_proxy import OAuth2Proxy
    +
    +__all__ = [
    +    'Ceph',
    +    'CephExporter',
    +    'CephIscsi',
    +    'CephNvmeof',
    +    'CustomContainer',
    +    'HAproxy',
    +    'Keepalived',
    +    'Monitoring',
    +    'NFSGanesha',
    +    'OSD',
    +    'SMB',
    +    'SNMPGateway',
    +    'Tracing',
    +    'NodeProxy',
    +    'MgmtGateway',
    +    'OAuth2Proxy',
    +]
    diff --git a/src/cephadm/cephadmlib/daemons/ceph.py b/src/cephadm/cephadmlib/daemons/ceph.py
    new file mode 100644
    index 000000000000..cf26e0171648
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/ceph.py
    @@ -0,0 +1,520 @@
    +import logging
    +import os
    +
    +from typing import Any, Dict, List, Optional, Tuple, Union
    +
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_types import CephContainer, extract_uid_gid
    +from ..context_getters import (
    +    fetch_configs,
    +    get_config_and_keyring,
    +    should_log_to_journald,
    +)
    +from ..daemon_form import register as register_daemon_form
    +from ..daemon_identity import DaemonIdentity
    +from ..constants import DEFAULT_IMAGE
    +from ..context import CephadmContext
    +from ..deployment_utils import to_deployment_container
    +from ..exceptions import Error
    +from ..file_utils import (
    +    make_run_dir,
    +    pathify,
    +    populate_files,
    +    makedirs,
    +    recursive_chown,
    +)
    +from ..data_utils import dict_get
    +from ..host_facts import HostFacts
    +from ..logging import Highlight
    +from ..net_utils import get_hostname, get_ip_addresses
    +
    +
    +logger = logging.getLogger()
    +
    +
    +@register_daemon_form
    +class Ceph(ContainerDaemonForm):
    +    _daemons = (
    +        'mon',
    +        'mgr',
    +        'osd',
    +        'mds',
    +        'rgw',
    +        'rbd-mirror',
    +        'crash',
    +        'cephfs-mirror',
    +    )
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        # TODO: figure out a way to un-special-case osd
    +        return daemon_type in cls._daemons and daemon_type != 'osd'
    +
    +    def __init__(self, ctx: CephadmContext, ident: DaemonIdentity) -> None:
    +        self.ctx = ctx
    +        self._identity = ident
    +        self.user_supplied_config = False
    +
    +    @classmethod
    +    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Ceph':
    +        return cls(ctx, ident)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return self._identity
    +
    +    def firewall_service_name(self) -> str:
    +        if self.identity.daemon_type == 'mon':
    +            return 'ceph-mon'
    +        elif self.identity.daemon_type in ['mgr', 'mds']:
    +            return 'ceph'
    +        return ''
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        # previous to being a ContainerDaemonForm, this call to create the
    +        # var-run directory was hard coded in the deploy path. Eventually, it
    +        # would be good to move this somwhere cleaner and avoid needing to know
    +        # the uid/gid here.
    +        uid, gid = self.uid_gid(ctx)
    +        make_run_dir(ctx.fsid, uid, gid)
    +
    +        # mon and osd need privileged in order for libudev to query devices
    +        privileged = self.identity.daemon_type in ['mon', 'osd']
    +        ctr = daemon_to_container(ctx, self, privileged=privileged)
    +        ctr = to_deployment_container(ctx, ctr)
    +        config_json = fetch_configs(ctx)
    +        if self.identity.daemon_type == 'mon' and config_json is not None:
    +            if 'crush_location' in config_json:
    +                c_loc = config_json['crush_location']
    +                # was originally "c.args.extend(['--set-crush-location', c_loc])"
    +                # but that doesn't seem to persist in the object after it's passed
    +                # in further function calls
    +                ctr.args = ctr.args + ['--set-crush-location', c_loc]
    +        return ctr
    +
    +    _uid_gid: Optional[Tuple[int, int]] = None
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        if self._uid_gid is None:
    +            self._uid_gid = extract_uid_gid(ctx)
    +        return self._uid_gid
    +
    +    def config_and_keyring(
    +        self, ctx: CephadmContext
    +    ) -> Tuple[Optional[str], Optional[str]]:
    +        return get_config_and_keyring(ctx)
    +
    +    def get_daemon_args(self) -> List[str]:
    +        if self.identity.daemon_type == 'crash':
    +            return []
    +        r = [
    +            '--setuser',
    +            'ceph',
    +            '--setgroup',
    +            'ceph',
    +            '--default-log-to-file=false',
    +        ]
    +        log_to_journald = should_log_to_journald(self.ctx)
    +        if log_to_journald:
    +            r += [
    +                '--default-log-to-journald=true',
    +                '--default-log-to-stderr=false',
    +            ]
    +        else:
    +            r += [
    +                '--default-log-to-stderr=true',
    +                '--default-log-stderr-prefix=debug ',
    +            ]
    +        if self.identity.daemon_type == 'mon':
    +            r += [
    +                '--default-mon-cluster-log-to-file=false',
    +            ]
    +            if log_to_journald:
    +                r += [
    +                    '--default-mon-cluster-log-to-journald=true',
    +                    '--default-mon-cluster-log-to-stderr=false',
    +                ]
    +            else:
    +                r += ['--default-mon-cluster-log-to-stderr=true']
    +        return r
    +
    +    @staticmethod
    +    def get_ceph_mounts(
    +        ctx: CephadmContext,
    +        ident: DaemonIdentity,
    +        no_config: bool = False,
    +    ) -> Dict[str, str]:
    +        # Warning: This is a hack done for more expedient refactoring
    +        mounts = get_ceph_mounts_for_type(ctx, ident.fsid, ident.daemon_type)
    +        data_dir = ident.data_dir(ctx.data_dir)
    +        if ident.daemon_type == 'rgw':
    +            cdata_dir = '/var/lib/ceph/radosgw/ceph-rgw.%s' % (
    +                ident.daemon_id
    +            )
    +        else:
    +            cdata_dir = '/var/lib/ceph/%s/ceph-%s' % (
    +                ident.daemon_type,
    +                ident.daemon_id,
    +            )
    +        if ident.daemon_type != 'crash':
    +            mounts[data_dir] = cdata_dir + ':z'
    +        if not no_config:
    +            mounts[data_dir + '/config'] = '/etc/ceph/ceph.conf:z'
    +        if ident.daemon_type in [
    +            'rbd-mirror',
    +            'cephfs-mirror',
    +            'crash',
    +            'ceph-exporter',
    +        ]:
    +            # these do not search for their keyrings in a data directory
    +            mounts[
    +                data_dir + '/keyring'
    +            ] = '/etc/ceph/ceph.client.%s.%s.keyring' % (
    +                ident.daemon_type,
    +                ident.daemon_id,
    +            )
    +        return mounts
    +
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        no_config = bool(
    +            getattr(ctx, 'config', None) and self.user_supplied_config
    +        )
    +        cm = self.get_ceph_mounts(
    +            ctx,
    +            self.identity,
    +            no_config=no_config,
    +        )
    +        mounts.update(cm)
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.append(ctx.container_engine.unlimited_pids_option)
    +
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        ident = self.identity
    +        if ident.daemon_type == 'rgw':
    +            name = 'client.rgw.%s' % ident.daemon_id
    +        elif ident.daemon_type == 'rbd-mirror':
    +            name = 'client.rbd-mirror.%s' % ident.daemon_id
    +        elif ident.daemon_type == 'cephfs-mirror':
    +            name = 'client.cephfs-mirror.%s' % ident.daemon_id
    +        elif ident.daemon_type == 'crash':
    +            name = 'client.crash.%s' % ident.daemon_id
    +        elif ident.daemon_type in ['mon', 'mgr', 'mds', 'osd']:
    +            name = ident.daemon_name
    +        else:
    +            raise ValueError(ident)
    +        args.extend(['-n', name])
    +        if ident.daemon_type != 'crash':
    +            args.append('-f')
    +        args.extend(self.get_daemon_args())
    +
    +    def customize_container_envs(
    +        self, ctx: CephadmContext, envs: List[str]
    +    ) -> None:
    +        envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
    +
    +    def default_entrypoint(self) -> str:
    +        ep = {
    +            'rgw': '/usr/bin/radosgw',
    +            'rbd-mirror': '/usr/bin/rbd-mirror',
    +            'cephfs-mirror': '/usr/bin/cephfs-mirror',
    +        }
    +        daemon_type = self.identity.daemon_type
    +        return ep.get(daemon_type) or f'/usr/bin/ceph-{daemon_type}'
    +
    +
    +@register_daemon_form
    +class OSD(Ceph):
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        # TODO: figure out a way to un-special-case osd
    +        return daemon_type == 'osd'
    +
    +    def __init__(
    +        self,
    +        ctx: CephadmContext,
    +        ident: DaemonIdentity,
    +        osd_fsid: Optional[str] = None,
    +    ) -> None:
    +        super().__init__(ctx, ident)
    +        self._osd_fsid = osd_fsid
    +
    +    @classmethod
    +    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'OSD':
    +        osd_fsid = getattr(ctx, 'osd_fsid', None)
    +        if osd_fsid is None:
    +            logger.info(
    +                'Creating an OSD daemon form without an OSD FSID value'
    +            )
    +        return cls(ctx, ident, osd_fsid)
    +
    +    @staticmethod
    +    def get_sysctl_settings() -> List[str]:
    +        return [
    +            '# allow a large number of OSDs',
    +            'fs.aio-max-nr = 1048576',
    +            'kernel.pid_max = 4194304',
    +        ]
    +
    +    def firewall_service_name(self) -> str:
    +        return 'ceph'
    +
    +    @property
    +    def osd_fsid(self) -> Optional[str]:
    +        return self._osd_fsid
    +
    +
    +@register_daemon_form
    +class CephExporter(ContainerDaemonForm):
    +    """Defines a Ceph exporter container"""
    +
    +    daemon_type = 'ceph-exporter'
    +    entrypoint = '/usr/bin/ceph-exporter'
    +    DEFAULT_PORT = 9926
    +    port_map = {
    +        'ceph-exporter': DEFAULT_PORT,
    +    }
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return cls.daemon_type == daemon_type
    +
    +    def __init__(
    +        self,
    +        ctx: CephadmContext,
    +        fsid: str,
    +        daemon_id: Union[int, str],
    +        config_json: Dict[str, Any],
    +        image: str = DEFAULT_IMAGE,
    +    ) -> None:
    +        self.ctx = ctx
    +        self.fsid = fsid
    +        self.daemon_id = daemon_id
    +        self.image = image
    +
    +        self.sock_dir = config_json.get('sock-dir', '/var/run/ceph/')
    +        _, ipv6_addrs = get_ip_addresses(get_hostname())
    +        addrs = '::' if ipv6_addrs else '0.0.0.0'
    +        self.addrs = config_json.get('addrs', addrs)
    +        self.port = config_json.get('port', self.DEFAULT_PORT)
    +        self.prio_limit = config_json.get('prio-limit', 5)
    +        self.stats_period = config_json.get('stats-period', 5)
    +        self.https_enabled: bool = config_json.get('https_enabled', False)
    +        self.files = dict_get(config_json, 'files', {})
    +
    +    @classmethod
    +    def init(
    +        cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
    +    ) -> 'CephExporter':
    +        return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
    +
    +    @classmethod
    +    def create(
    +        cls, ctx: CephadmContext, ident: DaemonIdentity
    +    ) -> 'CephExporter':
    +        return cls.init(ctx, ident.fsid, ident.daemon_id)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    +
    +    def get_daemon_args(self) -> List[str]:
    +        args = [
    +            f'--sock-dir={self.sock_dir}',
    +            f'--addrs={self.addrs}',
    +            f'--port={self.port}',
    +            f'--prio-limit={self.prio_limit}',
    +            f'--stats-period={self.stats_period}',
    +        ]
    +        if self.https_enabled:
    +            args.extend(
    +                [
    +                    '--cert-file',
    +                    '/etc/certs/ceph-exporter.crt',
    +                    '--key-file',
    +                    '/etc/certs/ceph-exporter.key',
    +                ]
    +            )
    +        return args
    +
    +    def validate(self) -> None:
    +        if not os.path.isdir(self.sock_dir):
    +            raise Error(
    +                f'Desired sock dir for ceph-exporter is not directory: {self.sock_dir}'
    +            )
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        ctr = daemon_to_container(ctx, self)
    +        return to_deployment_container(ctx, ctr)
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        return extract_uid_gid(ctx)
    +
    +    def config_and_keyring(
    +        self, ctx: CephadmContext
    +    ) -> Tuple[Optional[str], Optional[str]]:
    +        return get_config_and_keyring(ctx)
    +
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        cm = Ceph.get_ceph_mounts(ctx, self.identity)
    +        mounts.update(cm)
    +        if self.https_enabled:
    +            data_dir = self.identity.data_dir(ctx.data_dir)
    +            mounts.update({os.path.join(data_dir, 'etc/certs'): '/etc/certs'})
    +
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        name = 'client.ceph-exporter.%s' % self.identity.daemon_id
    +        args.extend(['-n', name, '-f'])
    +        args.extend(self.get_daemon_args())
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.append(ctx.container_engine.unlimited_pids_option)
    +
    +    def customize_container_envs(
    +        self, ctx: CephadmContext, envs: List[str]
    +    ) -> None:
    +        envs.append('TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728')
    +
    +    def default_entrypoint(self) -> str:
    +        return self.entrypoint
    +
    +    def prepare_data_dir(self, data_dir: str, uid: int, gid: int) -> None:
    +        if not os.path.exists(self.sock_dir):
    +            os.mkdir(self.sock_dir)
    +        # part of validation is for the sock dir, so we postpone
    +        # it until now
    +        self.validate()
    +
    +    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
    +        """Create files under the container data dir"""
    +        if not os.path.isdir(data_dir):
    +            raise OSError('data_dir is not a directory: %s' % (data_dir))
    +        logger.info('Writing ceph-exporter config...')
    +        config_dir = os.path.join(data_dir, 'etc/')
    +        ssl_dir = os.path.join(data_dir, 'etc/certs')
    +        for ddir in [config_dir, ssl_dir]:
    +            makedirs(ddir, uid, gid, 0o755)
    +            recursive_chown(ddir, uid, gid)
    +        cert_files = {
    +            fname: content
    +            for fname, content in self.files.items()
    +            if fname.endswith('.crt') or fname.endswith('.key')
    +        }
    +        populate_files(ssl_dir, cert_files, uid, gid)
    +
    +
    +def get_ceph_mounts_for_type(
    +    ctx: CephadmContext, fsid: str, daemon_type: str
    +) -> Dict[str, str]:
    +    """The main implementation of get_container_mounts_for_type minus the call
    +    to _update_podman_mounts so that this can be called from
    +    get_container_mounts.
    +    """
    +    mounts = dict()
    +
    +    if daemon_type in ceph_daemons() or daemon_type in [
    +        'ceph-volume',
    +        'shell',
    +    ]:
    +        if fsid:
    +            run_path = os.path.join('/var/run/ceph', fsid)
    +            if os.path.exists(run_path):
    +                mounts[run_path] = '/var/run/ceph:z'
    +            log_dir = os.path.join(ctx.log_dir, fsid)
    +            if not os.path.exists(log_dir):
    +                os.mkdir(log_dir)
    +            mounts[log_dir] = '/var/log/ceph:z'
    +            crash_dir = '/var/lib/ceph/%s/crash' % fsid
    +            if os.path.exists(crash_dir):
    +                mounts[crash_dir] = '/var/lib/ceph/crash:z'
    +            if daemon_type != 'crash' and should_log_to_journald(ctx):
    +                journald_sock_dir = '/run/systemd/journal'
    +                mounts[journald_sock_dir] = journald_sock_dir
    +
    +    if daemon_type in [
    +        'mon',
    +        'osd',
    +        'ceph-volume',
    +        'clusterless-ceph-volume',
    +    ]:
    +        mounts['/dev'] = '/dev'  # FIXME: narrow this down?
    +        mounts['/run/udev'] = '/run/udev'
    +    if daemon_type in ['osd', 'ceph-volume', 'clusterless-ceph-volume']:
    +        mounts['/sys'] = '/sys'  # for numa.cc, pick_address, cgroups, ...
    +        mounts['/run/lvm'] = '/run/lvm'
    +        mounts['/run/lock/lvm'] = '/run/lock/lvm'
    +    if daemon_type in ['osd', 'ceph-volume']:
    +        # selinux-policy in the container may not match the host.
    +        if HostFacts(ctx).selinux_enabled:
    +            cluster_dir = f'{ctx.data_dir}/{fsid}'
    +            selinux_folder = f'{cluster_dir}/selinux'
    +            if os.path.exists(cluster_dir):
    +                if not os.path.exists(selinux_folder):
    +                    os.makedirs(selinux_folder, mode=0o755)
    +                mounts[selinux_folder] = '/sys/fs/selinux:ro'
    +            else:
    +                logger.error(
    +                    f'Cluster direcotry {cluster_dir} does not exist.'
    +                )
    +    if daemon_type == 'osd':
    +        mounts['/'] = '/rootfs'
    +    elif daemon_type == 'ceph-volume':
    +        mounts['/'] = '/rootfs:rslave'
    +
    +    try:
    +        if (
    +            ctx.shared_ceph_folder
    +        ):  # make easy manager modules/ceph-volume development
    +            ceph_folder = pathify(ctx.shared_ceph_folder)
    +            if os.path.exists(ceph_folder):
    +                cephadm_binary = ceph_folder + '/src/cephadm/cephadm'
    +                if not os.path.exists(pathify(cephadm_binary)):
    +                    raise Error(
    +                        "cephadm binary does not exist. Please run './build.sh cephadm' from ceph/src/cephadm/ directory."
    +                    )
    +                mounts[cephadm_binary] = '/usr/sbin/cephadm'
    +                mounts[
    +                    ceph_folder + '/src/ceph-volume/ceph_volume'
    +                ] = '/usr/lib/python3.9/site-packages/ceph_volume'
    +                mounts[
    +                    ceph_folder + '/src/pybind/mgr'
    +                ] = '/usr/share/ceph/mgr'
    +                mounts[
    +                    ceph_folder + '/src/python-common/ceph'
    +                ] = '/usr/lib/python3.9/site-packages/ceph'
    +                mounts[
    +                    ceph_folder + '/monitoring/ceph-mixin/dashboards_out'
    +                ] = '/etc/grafana/dashboards/ceph-dashboard'
    +                mounts[
    +                    ceph_folder
    +                    + '/monitoring/ceph-mixin/prometheus_alerts.yml'
    +                ] = '/etc/prometheus/ceph/ceph_default_alerts.yml'
    +            else:
    +                logger.error(
    +                    'Ceph shared source folder does not exist.',
    +                    extra=Highlight.FAILURE.extra(),
    +                )
    +    except AttributeError:
    +        pass
    +    return mounts
    +
    +
    +def ceph_daemons() -> List[str]:
    +    """A legacy method that returns a list of all daemon types considered ceph
    +    daemons.
    +    """
    +    cds = list(Ceph._daemons)
    +    cds.append(CephExporter.daemon_type)
    +    return cds
    diff --git a/src/cephadm/cephadmlib/daemons/custom.py b/src/cephadm/cephadmlib/daemons/custom.py
    new file mode 100644
    index 000000000000..76b4162e2893
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/custom.py
    @@ -0,0 +1,223 @@
    +import logging
    +import os
    +import re
    +
    +from typing import Any, Dict, List, Optional, Tuple, Union
    +
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_types import CephContainer, InitContainer
    +from ..context import CephadmContext
    +from ..context_getters import fetch_configs
    +from ..daemon_form import register as register_daemon_form
    +from ..daemon_identity import DaemonIdentity
    +from ..data_utils import dict_get, dict_get_join
    +from ..deploy import DeploymentType
    +from ..deployment_utils import to_deployment_container
    +from ..file_utils import write_new, makedirs
    +from ..net_utils import EndPoint
    +from ..constants import UID_NOBODY, GID_NOGROUP
    +
    +
    +logger = logging.getLogger()
    +
    +
    +@register_daemon_form
    +class CustomContainer(ContainerDaemonForm):
    +    """Defines a custom container"""
    +
    +    daemon_type = 'container'
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return cls.daemon_type == daemon_type
    +
    +    def __init__(
    +        self,
    +        fsid: str,
    +        daemon_id: Union[int, str],
    +        config_json: Dict,
    +        image: str,
    +    ) -> None:
    +        self.fsid = fsid
    +        self.daemon_id = daemon_id
    +        self.image = image
    +
    +        # config-json options
    +        self.entrypoint = dict_get(config_json, 'entrypoint')
    +        self.uid = dict_get(config_json, 'uid', UID_NOBODY)
    +        self.gid = dict_get(config_json, 'gid', GID_NOGROUP)
    +        self.volume_mounts = dict_get(config_json, 'volume_mounts', {})
    +        self.args = dict_get(config_json, 'args', [])
    +        self.envs = dict_get(config_json, 'envs', [])
    +        self.privileged = dict_get(config_json, 'privileged', False)
    +        self.bind_mounts = dict_get(config_json, 'bind_mounts', [])
    +        self.ports = dict_get(config_json, 'ports', [])
    +        self.dirs = dict_get(config_json, 'dirs', [])
    +        self.files = dict_get(config_json, 'files', {})
    +
    +    @classmethod
    +    def init(
    +        cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
    +    ) -> 'CustomContainer':
    +        return cls(fsid, daemon_id, fetch_configs(ctx), ctx.image)
    +
    +    @classmethod
    +    def create(
    +        cls, ctx: CephadmContext, ident: DaemonIdentity
    +    ) -> 'CustomContainer':
    +        return cls.init(ctx, ident.fsid, ident.daemon_id)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    +
    +    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
    +        """
    +        Create dirs/files below the container data directory.
    +        """
    +        logger.info(
    +            'Creating custom container configuration '
    +            'dirs/files in {} ...'.format(data_dir)
    +        )
    +
    +        if not os.path.isdir(data_dir):
    +            raise OSError('data_dir is not a directory: %s' % data_dir)
    +
    +        for dir_path in self.dirs:
    +            logger.info('Creating directory: {}'.format(dir_path))
    +            dir_path = os.path.join(data_dir, dir_path.strip('/'))
    +            makedirs(dir_path, uid, gid, 0o755)
    +
    +        for file_path in self.files:
    +            logger.info('Creating file: {}'.format(file_path))
    +            content = dict_get_join(self.files, file_path)
    +            file_path = os.path.join(data_dir, file_path.strip('/'))
    +            with write_new(
    +                file_path, owner=(uid, gid), encoding='utf-8'
    +            ) as f:
    +                f.write(content)
    +
    +    def get_daemon_args(self) -> List[str]:
    +        return []
    +
    +    def get_container_args(self) -> List[str]:
    +        return self.args
    +
    +    def get_container_envs(self) -> List[str]:
    +        return self.envs
    +
    +    def _get_container_mounts(self, data_dir: str) -> Dict[str, str]:
    +        """
    +        Get the volume mounts. Relative source paths will be located below
    +        `/var/lib/ceph//`.
    +
    +        Example:
    +        {
    +            /foo/conf: /conf
    +            foo/conf: /conf
    +        }
    +        becomes
    +        {
    +            /foo/conf: /conf
    +            /var/lib/ceph///foo/conf: /conf
    +        }
    +        """
    +        mounts = {}
    +        for source, destination in self.volume_mounts.items():
    +            source = os.path.join(data_dir, source)
    +            mounts[source] = destination
    +        return mounts
    +
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        data_dir = self.identity.data_dir(ctx.data_dir)
    +        mounts.update(self._get_container_mounts(data_dir))
    +
    +    def _get_container_binds(self, data_dir: str) -> List[List[str]]:
    +        """
    +        Get the bind mounts. Relative `source=...` paths will be located below
    +        `/var/lib/ceph//`.
    +
    +        Example:
    +        [
    +            'type=bind',
    +            'source=lib/modules',
    +            'destination=/lib/modules',
    +            'ro=true'
    +        ]
    +        becomes
    +        [
    +            ...
    +            'source=/var/lib/ceph///lib/modules',
    +            ...
    +        ]
    +        """
    +        binds = self.bind_mounts.copy()
    +        for bind in binds:
    +            for index, value in enumerate(bind):
    +                match = re.match(r'^source=(.+)$', value)
    +                if match:
    +                    bind[index] = 'source={}'.format(
    +                        os.path.join(data_dir, match.group(1))
    +                    )
    +        return binds
    +
    +    def customize_container_binds(
    +        self, ctx: CephadmContext, binds: List[List[str]]
    +    ) -> None:
    +        data_dir = self.identity.data_dir(ctx.data_dir)
    +        binds.extend(self._get_container_binds(data_dir))
    +
    +    # Cache the container so we don't need to rebuild it again when calling
    +    # into init_containers
    +    _container: Optional[CephContainer] = None
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        if self._container is None:
    +            ctr = daemon_to_container(
    +                ctx,
    +                self,
    +                host_network=False,
    +                privileged=self.privileged,
    +                ptrace=ctx.allow_ptrace,
    +            )
    +            self._container = to_deployment_container(ctx, ctr)
    +        return self._container
    +
    +    def init_containers(self, ctx: CephadmContext) -> List[InitContainer]:
    +        primary = self.container(ctx)
    +        init_containers: List[Dict[str, Any]] = getattr(
    +            ctx, 'init_containers', []
    +        )
    +        return [
    +            InitContainer.from_primary_and_opts(ctx, primary, ic_opts)
    +            for ic_opts in init_containers
    +        ]
    +
    +    def customize_container_endpoints(
    +        self, endpoints: List[EndPoint], deployment_type: DeploymentType
    +    ) -> None:
    +        if deployment_type == DeploymentType.DEFAULT:
    +            endpoints.extend([EndPoint('0.0.0.0', p) for p in self.ports])
    +
    +    def customize_container_envs(
    +        self, ctx: CephadmContext, envs: List[str]
    +    ) -> None:
    +        envs.extend(self.get_container_envs())
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.extend(self.get_container_args())
    +
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.extend(self.get_daemon_args())
    +
    +    def default_entrypoint(self) -> str:
    +        return self.entrypoint or ''
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        return self.uid, self.gid
    diff --git a/src/cephadm/cephadmlib/daemons/ingress.py b/src/cephadm/cephadmlib/daemons/ingress.py
    new file mode 100644
    index 000000000000..c88e39ac0257
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/ingress.py
    @@ -0,0 +1,285 @@
    +import os
    +
    +from typing import Dict, List, Optional, Tuple, Union
    +
    +from ceph.cephadm.images import (
    +    DEFAULT_HAPROXY_IMAGE,
    +    DEFAULT_KEEPALIVED_IMAGE,
    +)
    +from ..constants import (
    +    DATA_DIR_MODE,
    +)
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_types import CephContainer, extract_uid_gid
    +from ..context import CephadmContext
    +from ..context_getters import fetch_configs
    +from ..daemon_form import register as register_daemon_form
    +from ..daemon_identity import DaemonIdentity
    +from ..data_utils import dict_get, is_fsid
    +from ..deployment_utils import to_deployment_container
    +from ..exceptions import Error
    +from ..file_utils import makedirs, populate_files
    +
    +
    +@register_daemon_form
    +class HAproxy(ContainerDaemonForm):
    +    """Defines an HAproxy container"""
    +
    +    daemon_type = 'haproxy'
    +    required_files = ['haproxy.cfg']
    +    default_image = DEFAULT_HAPROXY_IMAGE
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return cls.daemon_type == daemon_type
    +
    +    def __init__(
    +        self,
    +        ctx: CephadmContext,
    +        fsid: str,
    +        daemon_id: Union[int, str],
    +        config_json: Dict,
    +        image: str,
    +    ) -> None:
    +        self.ctx = ctx
    +        self.fsid = fsid
    +        self.daemon_id = daemon_id
    +        self.image = image
    +
    +        # config-json options
    +        self.files = dict_get(config_json, 'files', {})
    +
    +        self.validate()
    +
    +    @classmethod
    +    def init(
    +        cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
    +    ) -> 'HAproxy':
    +        return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
    +
    +    @classmethod
    +    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'HAproxy':
    +        return cls.init(ctx, ident.fsid, ident.daemon_id)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    +
    +    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
    +        """Create files under the container data dir"""
    +        if not os.path.isdir(data_dir):
    +            raise OSError('data_dir is not a directory: %s' % (data_dir))
    +
    +        # create additional directories in data dir for HAproxy to use
    +        if not os.path.isdir(os.path.join(data_dir, 'haproxy')):
    +            makedirs(
    +                os.path.join(data_dir, 'haproxy'), uid, gid, DATA_DIR_MODE
    +            )
    +
    +        data_dir = os.path.join(data_dir, 'haproxy')
    +        populate_files(data_dir, self.files, uid, gid)
    +
    +    def get_daemon_args(self) -> List[str]:
    +        return ['haproxy', '-f', '/var/lib/haproxy/haproxy.cfg']
    +
    +    def validate(self) -> None:
    +        if not is_fsid(self.fsid):
    +            raise Error('not an fsid: %s' % self.fsid)
    +        if not self.daemon_id:
    +            raise Error('invalid daemon_id: %s' % self.daemon_id)
    +        if not self.image:
    +            raise Error('invalid image: %s' % self.image)
    +
    +        # check for the required files
    +        if self.required_files:
    +            for fname in self.required_files:
    +                if fname not in self.files:
    +                    raise Error(
    +                        'required file missing from config-json: %s' % fname
    +                    )
    +
    +    def get_daemon_name(self) -> str:
    +        return '%s.%s' % (self.daemon_type, self.daemon_id)
    +
    +    def get_container_name(self, desc: Optional[str] = None) -> str:
    +        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
    +        if desc:
    +            cname = '%s-%s' % (cname, desc)
    +        return cname
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        # better directory for this?
    +        print('UUUUU', extract_uid_gid)
    +        return extract_uid_gid(self.ctx, file_path='/var/lib')
    +
    +    @staticmethod
    +    def _get_container_mounts(data_dir: str) -> Dict[str, str]:
    +        mounts = dict()
    +        mounts[os.path.join(data_dir, 'haproxy')] = '/var/lib/haproxy'
    +        return mounts
    +
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        data_dir = self.identity.data_dir(ctx.data_dir)
    +        mounts.update(self._get_container_mounts(data_dir))
    +
    +    @staticmethod
    +    def get_sysctl_settings() -> List[str]:
    +        return [
    +            '# IP forwarding and non-local bind',
    +            'net.ipv4.ip_forward = 1',
    +            'net.ipv4.ip_nonlocal_bind = 1',
    +        ]
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        ctr = daemon_to_container(ctx, self)
    +        return to_deployment_container(ctx, ctr)
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.extend(
    +            ['--user=root']
    +        )  # haproxy 2.4 defaults to a different user
    +
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.extend(self.get_daemon_args())
    +
    +
    +@register_daemon_form
    +class Keepalived(ContainerDaemonForm):
    +    """Defines an Keepalived container"""
    +
    +    daemon_type = 'keepalived'
    +    required_files = ['keepalived.conf']
    +    default_image = DEFAULT_KEEPALIVED_IMAGE
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return cls.daemon_type == daemon_type
    +
    +    def __init__(
    +        self,
    +        ctx: CephadmContext,
    +        fsid: str,
    +        daemon_id: Union[int, str],
    +        config_json: Dict,
    +        image: str,
    +    ) -> None:
    +        self.ctx = ctx
    +        self.fsid = fsid
    +        self.daemon_id = daemon_id
    +        self.image = image
    +
    +        # config-json options
    +        self.files = dict_get(config_json, 'files', {})
    +
    +        self.validate()
    +
    +    @classmethod
    +    def init(
    +        cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
    +    ) -> 'Keepalived':
    +        return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
    +
    +    @classmethod
    +    def create(
    +        cls, ctx: CephadmContext, ident: DaemonIdentity
    +    ) -> 'Keepalived':
    +        return cls.init(ctx, ident.fsid, ident.daemon_id)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    +
    +    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
    +        """Create files under the container data dir"""
    +        if not os.path.isdir(data_dir):
    +            raise OSError('data_dir is not a directory: %s' % (data_dir))
    +
    +        # create additional directories in data dir for keepalived to use
    +        if not os.path.isdir(os.path.join(data_dir, 'keepalived')):
    +            makedirs(
    +                os.path.join(data_dir, 'keepalived'), uid, gid, DATA_DIR_MODE
    +            )
    +
    +        # populate files from the config-json
    +        populate_files(data_dir, self.files, uid, gid)
    +
    +    def validate(self) -> None:
    +        if not is_fsid(self.fsid):
    +            raise Error('not an fsid: %s' % self.fsid)
    +        if not self.daemon_id:
    +            raise Error('invalid daemon_id: %s' % self.daemon_id)
    +        if not self.image:
    +            raise Error('invalid image: %s' % self.image)
    +
    +        # check for the required files
    +        if self.required_files:
    +            for fname in self.required_files:
    +                if fname not in self.files:
    +                    raise Error(
    +                        'required file missing from config-json: %s' % fname
    +                    )
    +
    +    def get_daemon_name(self) -> str:
    +        return '%s.%s' % (self.daemon_type, self.daemon_id)
    +
    +    def get_container_name(self, desc: Optional[str] = None) -> str:
    +        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
    +        if desc:
    +            cname = '%s-%s' % (cname, desc)
    +        return cname
    +
    +    @staticmethod
    +    def get_container_envs() -> List[str]:
    +        envs = [
    +            'KEEPALIVED_AUTOCONF=false',
    +            'KEEPALIVED_CONF=/etc/keepalived/keepalived.conf',
    +            'KEEPALIVED_CMD=/usr/sbin/keepalived -n -l -f /etc/keepalived/keepalived.conf',
    +            'KEEPALIVED_DEBUG=false',
    +        ]
    +        return envs
    +
    +    @staticmethod
    +    def get_sysctl_settings() -> List[str]:
    +        return [
    +            '# IP forwarding and non-local bind',
    +            'net.ipv4.ip_forward = 1',
    +            'net.ipv4.ip_nonlocal_bind = 1',
    +        ]
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        # better directory for this?
    +        return extract_uid_gid(self.ctx, file_path='/var/lib')
    +
    +    @staticmethod
    +    def _get_container_mounts(data_dir: str) -> Dict[str, str]:
    +        mounts = dict()
    +        mounts[
    +            os.path.join(data_dir, 'keepalived.conf')
    +        ] = '/etc/keepalived/keepalived.conf'
    +        return mounts
    +
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        data_dir = self.identity.data_dir(ctx.data_dir)
    +        mounts.update(self._get_container_mounts(data_dir))
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        ctr = daemon_to_container(ctx, self)
    +        return to_deployment_container(ctx, ctr)
    +
    +    def customize_container_envs(
    +        self, ctx: CephadmContext, envs: List[str]
    +    ) -> None:
    +        envs.extend(self.get_container_envs())
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.extend(['--cap-add=NET_ADMIN', '--cap-add=NET_RAW'])
    diff --git a/src/cephadm/cephadmlib/daemons/iscsi.py b/src/cephadm/cephadmlib/daemons/iscsi.py
    new file mode 100644
    index 000000000000..c4b60f4a7717
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/iscsi.py
    @@ -0,0 +1,288 @@
    +import logging
    +import os
    +import re
    +
    +from typing import Dict, List, Optional, Tuple
    +
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_types import CephContainer, SidecarContainer, extract_uid_gid
    +from ..context_getters import fetch_configs, get_config_and_keyring
    +from ..daemon_form import register as register_daemon_form
    +from ..daemon_identity import DaemonIdentity
    +from ..constants import DEFAULT_IMAGE
    +from ..context import CephadmContext
    +from ..data_utils import dict_get, is_fsid
    +from ..deployment_utils import to_deployment_container
    +from ..exceptions import Error
    +from ..file_utils import makedirs, populate_files
    +from ..call_wrappers import call, CallVerbosity
    +
    +
    +logger = logging.getLogger()
    +
    +
    +@register_daemon_form
    +class CephIscsi(ContainerDaemonForm):
    +    """Defines a Ceph-Iscsi container"""
    +
    +    daemon_type = 'iscsi'
    +    entrypoint = '/usr/bin/rbd-target-api'
    +
    +    required_files = ['iscsi-gateway.cfg']
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return cls.daemon_type == daemon_type
    +
    +    def __init__(
    +        self,
    +        ctx: CephadmContext,
    +        ident: DaemonIdentity,
    +        config_json: Dict,
    +        image: str = DEFAULT_IMAGE,
    +    ):
    +        self.ctx = ctx
    +        self._identity = ident
    +        self.image = image
    +
    +        # config-json options
    +        self.files = dict_get(config_json, 'files', {})
    +
    +        # validate the supplied args
    +        self.validate()
    +
    +    @classmethod
    +    def init(
    +        cls, ctx: CephadmContext, fsid: str, daemon_id: str
    +    ) -> 'CephIscsi':
    +        return cls.create(
    +            ctx, DaemonIdentity(fsid, cls.daemon_type, daemon_id)
    +        )
    +
    +    @classmethod
    +    def create(
    +        cls, ctx: CephadmContext, ident: DaemonIdentity
    +    ) -> 'CephIscsi':
    +        return cls(ctx, ident, fetch_configs(ctx), ctx.image)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return self._identity
    +
    +    @property
    +    def fsid(self) -> str:
    +        return self._identity.fsid
    +
    +    @property
    +    def daemon_id(self) -> str:
    +        return self._identity.daemon_id
    +
    +    @staticmethod
    +    def _get_container_mounts(data_dir, log_dir):
    +        # type: (str, str) -> Dict[str, str]
    +        mounts = dict()
    +        mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
    +        mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
    +        mounts[
    +            os.path.join(data_dir, 'iscsi-gateway.cfg')
    +        ] = '/etc/ceph/iscsi-gateway.cfg:z'
    +        mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
    +        mounts[
    +            os.path.join(data_dir, 'tcmu-runner-entrypoint.sh')
    +        ] = '/usr/local/scripts/tcmu-runner-entrypoint.sh'
    +        mounts[log_dir] = '/var/log:z'
    +        mounts['/dev'] = '/dev'
    +        return mounts
    +
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        data_dir = self.identity.data_dir(ctx.data_dir)
    +        # Removes ending ".tcmu" from data_dir a tcmu-runner uses the same
    +        # data_dir as rbd-runner-api
    +        if data_dir.endswith('.tcmu'):
    +            data_dir = re.sub(r'\.tcmu$', '', data_dir)
    +        log_dir = os.path.join(ctx.log_dir, self.identity.fsid)
    +        mounts.update(CephIscsi._get_container_mounts(data_dir, log_dir))
    +
    +    def customize_container_binds(
    +        self, ctx: CephadmContext, binds: List[List[str]]
    +    ) -> None:
    +        lib_modules = [
    +            'type=bind',
    +            'source=/lib/modules',
    +            'destination=/lib/modules',
    +            'ro=true',
    +        ]
    +        binds.append(lib_modules)
    +
    +    @staticmethod
    +    def get_version(ctx, container_id):
    +        # type: (CephadmContext, str) -> Optional[str]
    +        def python(s: str) -> Tuple[str, str, int]:
    +            return call(
    +                ctx,
    +                [
    +                    ctx.container_engine.path,
    +                    'exec',
    +                    container_id,
    +                    '/usr/bin/python3',
    +                    '-c',
    +                    s,
    +                ],
    +                verbosity=CallVerbosity.QUIET,
    +            )
    +
    +        out, _, code = python(
    +            "from importlib.metadata import version; print(version('ceph_iscsi'))"
    +        )
    +        if code == 0:
    +            return out.strip()
    +        out, _, code = python(
    +            "import pkg_resources; print(pkg_resources.require('ceph_iscsi')[0].version)"
    +        )
    +        if code == 0:
    +            return out.strip()
    +        return None
    +
    +    def validate(self):
    +        # type: () -> None
    +        if not is_fsid(self.fsid):
    +            raise Error('not an fsid: %s' % self.fsid)
    +        if not self.daemon_id:
    +            raise Error('invalid daemon_id: %s' % self.daemon_id)
    +        if not self.image:
    +            raise Error('invalid image: %s' % self.image)
    +
    +        # check for the required files
    +        if self.required_files:
    +            for fname in self.required_files:
    +                if fname not in self.files:
    +                    raise Error(
    +                        'required file missing from config-json: %s' % fname
    +                    )
    +
    +    def get_daemon_name(self):
    +        # type: () -> str
    +        return '%s.%s' % (self.daemon_type, self.daemon_id)
    +
    +    def get_container_name(self, desc=None):
    +        # type: (Optional[str]) -> str
    +        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
    +        if desc:
    +            cname = '%s-%s' % (cname, desc)
    +        return cname
    +
    +    def create_daemon_dirs(self, data_dir, uid, gid):
    +        # type: (str, int, int) -> None
    +        """Create files under the container data dir"""
    +        if not os.path.isdir(data_dir):
    +            raise OSError('data_dir is not a directory: %s' % (data_dir))
    +
    +        logger.info('Creating ceph-iscsi config...')
    +        configfs_dir = os.path.join(data_dir, 'configfs')
    +        makedirs(configfs_dir, uid, gid, 0o755)
    +
    +        # set up the tcmu-runner entrypoint script
    +        # to be mounted into the container. For more info
    +        # on why we need this script, see the
    +        # tcmu_runner_entrypoint_script function
    +        self.files[
    +            'tcmu-runner-entrypoint.sh'
    +        ] = self.tcmu_runner_entrypoint_script()
    +
    +        # populate files from the config-json
    +        populate_files(data_dir, self.files, uid, gid)
    +
    +        # we want the tcmu runner entrypoint script to be executable
    +        # populate_files will give it 0o600 by default
    +        os.chmod(os.path.join(data_dir, 'tcmu-runner-entrypoint.sh'), 0o700)
    +
    +    @staticmethod
    +    def configfs_mount_umount(data_dir: str, mount: bool = True) -> str:
    +        mount_path = os.path.join(data_dir, 'configfs')
    +        if mount:
    +            cmd = (
    +                'if ! grep -qs {0} /proc/mounts; then '
    +                'mount -t configfs none {0}; fi'.format(mount_path)
    +            )
    +        else:
    +            cmd = (
    +                'if grep -qs {0} /proc/mounts; then '
    +                'umount {0}; fi'.format(mount_path)
    +            )
    +        return cmd
    +
    +    @staticmethod
    +    def tcmu_runner_entrypoint_script() -> str:
    +        # since we are having tcmu-runner be a background
    +        # process in its systemd unit (rbd-target-api being
    +        # the main process) systemd will not restart it when
    +        # it fails. in order to try and get around that for now
    +        # we can have a script mounted in the container that
    +        # that attempts to do the restarting for us. This script
    +        # can then become the entrypoint for the tcmu-runner
    +        # container
    +
    +        # This is intended to be dropped for a better solution
    +        # for at least the squid release onward
    +        return """#!/bin/bash
    +RUN_DIR=/var/run/tcmu-runner
    +
    +if [ ! -d "${RUN_DIR}" ] ; then
    +    mkdir -p "${RUN_DIR}"
    +fi
    +
    +rm -rf "${RUN_DIR}"/*
    +
    +while true
    +do
    +    touch "${RUN_DIR}"/start-up-$(date -Ins)
    +    /usr/bin/tcmu-runner
    +
    +    # If we got around 3 kills/segfaults in the last minute,
    +    # don't start anymore
    +    if [ $(find "${RUN_DIR}" -type f -cmin -1 | wc -l) -ge 3 ] ; then
    +        exit 0
    +    fi
    +
    +    sleep 1
    +done
    +"""
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        # So the container can modprobe iscsi_target_mod and have write perms
    +        # to configfs we need to make this a privileged container.
    +        ctr = daemon_to_container(ctx, self, privileged=True)
    +        return to_deployment_container(ctx, ctr)
    +
    +    def config_and_keyring(
    +        self, ctx: CephadmContext
    +    ) -> Tuple[Optional[str], Optional[str]]:
    +        return get_config_and_keyring(ctx)
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        return extract_uid_gid(ctx)
    +
    +    def default_entrypoint(self) -> str:
    +        return self.entrypoint
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.append(ctx.container_engine.unlimited_pids_option)
    +
    +    def sidecar_containers(
    +        self, ctx: CephadmContext
    +    ) -> List[SidecarContainer]:
    +        tcmu_sidecar = SidecarContainer.from_primary_and_values(
    +            ctx,
    +            self.container(ctx),
    +            'tcmu',
    +            # TODO: Eventually we don't want to run tcmu-runner through this
    +            # script.  This is intended to be a workaround backported to older
    +            # releases and should eventually be removed in at least squid
    +            # onward
    +            entrypoint='/usr/local/scripts/tcmu-runner-entrypoint.sh',
    +        )
    +        return [tcmu_sidecar]
    diff --git a/src/cephadm/cephadmlib/daemons/mgmt_gateway.py b/src/cephadm/cephadmlib/daemons/mgmt_gateway.py
    new file mode 100644
    index 000000000000..85f724959097
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/mgmt_gateway.py
    @@ -0,0 +1,188 @@
    +import logging
    +import os
    +from typing import Dict, List, Tuple, Optional
    +import re
    +
    +from ..call_wrappers import call, CallVerbosity
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_types import CephContainer, extract_uid_gid
    +from ..context import CephadmContext
    +from ..context_getters import fetch_configs
    +from ..daemon_form import register as register_daemon_form
    +from ..daemon_identity import DaemonIdentity
    +from ..deployment_utils import to_deployment_container
    +from ceph.cephadm.images import DEFAULT_NGINX_IMAGE
    +from ..data_utils import dict_get, is_fsid
    +from ..file_utils import populate_files, makedirs, recursive_chown
    +from ..exceptions import Error
    +
    +logger = logging.getLogger()
    +
    +
    +@register_daemon_form
    +class MgmtGateway(ContainerDaemonForm):
    +    """Defines an MgmtGateway container"""
    +
    +    daemon_type = 'mgmt-gateway'
    +    required_files = [
    +        'nginx.conf',
    +        'nginx_external_server.conf',
    +        'nginx_internal_server.conf',
    +        'nginx_internal.crt',
    +        'nginx_internal.key',
    +    ]
    +
    +    default_image = DEFAULT_NGINX_IMAGE
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return cls.daemon_type == daemon_type
    +
    +    def __init__(
    +        self,
    +        ctx: CephadmContext,
    +        fsid: str,
    +        daemon_id: str,
    +        config_json: Dict,
    +        image: str = DEFAULT_NGINX_IMAGE,
    +    ):
    +        self.ctx = ctx
    +        self.fsid = fsid
    +        self.daemon_id = daemon_id
    +        self.image = image
    +        self.files = dict_get(config_json, 'files', {})
    +        self.validate()
    +
    +    @classmethod
    +    def init(
    +        cls, ctx: CephadmContext, fsid: str, daemon_id: str
    +    ) -> 'MgmtGateway':
    +        return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
    +
    +    @classmethod
    +    def create(
    +        cls, ctx: CephadmContext, ident: DaemonIdentity
    +    ) -> 'MgmtGateway':
    +        return cls.init(ctx, ident.fsid, ident.daemon_id)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    +
    +    def validate(self) -> None:
    +        if not is_fsid(self.fsid):
    +            raise Error(f'not an fsid: {self.fsid}')
    +        if not self.daemon_id:
    +            raise Error(f'invalid daemon_id: {self.daemon_id}')
    +        if not self.image:
    +            raise Error(f'invalid image: {self.image}')
    +
    +        # check for the required files
    +        if self.required_files:
    +            for fname in self.required_files:
    +                if fname not in self.files:
    +                    raise Error(
    +                        'required file missing from config-json: %s' % fname
    +                    )
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        ctr = daemon_to_container(ctx, self)
    +        return to_deployment_container(ctx, ctr)
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        return extract_uid_gid(ctx, file_path='/etc/nginx/')
    +
    +    def get_daemon_args(self) -> List[str]:
    +        return []
    +
    +    def default_entrypoint(self) -> str:
    +        return ''
    +
    +    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
    +        """Create files under the container data dir"""
    +        if not os.path.isdir(data_dir):
    +            raise OSError('data_dir is not a directory: %s' % (data_dir))
    +        logger.info('Writing mgmt-gateway config...')
    +        config_dir = os.path.join(data_dir, 'etc/')
    +        ssl_dir = os.path.join(data_dir, 'etc/ssl')
    +        for ddir in [config_dir, ssl_dir]:
    +            makedirs(ddir, uid, gid, 0o755)
    +            recursive_chown(ddir, uid, gid)
    +        conf_files = {
    +            fname: content
    +            for fname, content in self.files.items()
    +            if fname.endswith('.conf')
    +        }
    +        cert_files = {
    +            fname: content
    +            for fname, content in self.files.items()
    +            if fname.endswith('.crt') or fname.endswith('.key')
    +        }
    +        populate_files(config_dir, conf_files, uid, gid)
    +        populate_files(ssl_dir, cert_files, uid, gid)
    +
    +    def _get_container_mounts(self, data_dir: str) -> Dict[str, str]:
    +        mounts: Dict[str, str] = {}
    +        mounts[
    +            os.path.join(data_dir, 'nginx.conf')
    +        ] = '/etc/nginx/nginx.conf:Z'
    +        return mounts
    +
    +    @staticmethod
    +    def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]:
    +        """Return the version of the Nginx container"""
    +        version = None
    +        out, err, code = call(
    +            ctx,
    +            [
    +                ctx.container_engine.path,
    +                'exec',
    +                container_id,
    +                'nginx',
    +                '-v',
    +            ],
    +            verbosity=CallVerbosity.QUIET,
    +        )
    +        if code == 0:
    +            # nginx is using stderr to print the version!!
    +            match = re.search(r'nginx version:\s*nginx\/(.+)', err)
    +            if match:
    +                version = match.group(1)
    +        return version
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        uid, _ = self.uid_gid(ctx)
    +        extra_args = [
    +            '--user',
    +            str(uid),
    +        ]
    +        args.extend(extra_args)
    +
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        # The following noqa comment is intentional to suppress warnings about using double quotes
    +        # instead of single quotes. We use double quotes here to ensure that single quotes are
    +        # used in the final parsed output: nginx -g 'daemon off;'
    +        args.extend(['nginx', '-g', "daemon off;"])  # noqa
    +
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        data_dir = self.identity.data_dir(ctx.data_dir)
    +        mounts.update(
    +            {
    +                os.path.join(
    +                    data_dir, 'etc/nginx.conf'
    +                ): '/etc/nginx/nginx.conf:Z',
    +                os.path.join(
    +                    data_dir, 'etc/nginx_internal_server.conf'
    +                ): '/etc/nginx_internal_server.conf:Z',
    +                os.path.join(
    +                    data_dir, 'etc/nginx_external_server.conf'
    +                ): '/etc/nginx_external_server.conf:Z',
    +                os.path.join(data_dir, 'etc/ssl'): '/etc/nginx/ssl/',
    +            }
    +        )
    diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py
    new file mode 100644
    index 000000000000..710093f0f467
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/monitoring.py
    @@ -0,0 +1,397 @@
    +import os
    +
    +from typing import Dict, List, Tuple
    +
    +from ..call_wrappers import call, CallVerbosity
    +from ceph.cephadm.images import (
    +    DEFAULT_ALERTMANAGER_IMAGE,
    +    DEFAULT_GRAFANA_IMAGE,
    +    DEFAULT_LOKI_IMAGE,
    +    DEFAULT_NODE_EXPORTER_IMAGE,
    +    DEFAULT_PROMETHEUS_IMAGE,
    +    DEFAULT_PROMTAIL_IMAGE,
    +)
    +from ..constants import (
    +    UID_NOBODY,
    +    GID_NOGROUP,
    +)
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_types import CephContainer, extract_uid_gid
    +from ..context import CephadmContext
    +from ..context_getters import fetch_configs, fetch_meta
    +from ..daemon_form import register as register_daemon_form
    +from ..daemon_identity import DaemonIdentity
    +from ..deployment_utils import to_deployment_container
    +from ..exceptions import Error
    +from ..net_utils import get_fqdn, get_hostname, get_ip_addresses, wrap_ipv6
    +
    +
    +@register_daemon_form
    +class Monitoring(ContainerDaemonForm):
    +    """Define the configs for the monitoring containers"""
    +
    +    port_map = {
    +        'prometheus': [
    +            9095
    +        ],  # Avoid default 9090, due to conflict with cockpit UI
    +        'node-exporter': [9100],
    +        'grafana': [3000],
    +        'alertmanager': [9093, 9094],
    +        'loki': [3100],
    +        'promtail': [9080],
    +    }
    +
    +    components = {
    +        'prometheus': {
    +            'image': DEFAULT_PROMETHEUS_IMAGE,
    +            'cpus': '2',
    +            'memory': '4GB',
    +            'args': [
    +                '--config.file=/etc/prometheus/prometheus.yml',
    +                '--storage.tsdb.path=/prometheus',
    +            ],
    +            'config-json-files': [
    +                'prometheus.yml',
    +            ],
    +        },
    +        'loki': {
    +            'image': DEFAULT_LOKI_IMAGE,
    +            'cpus': '1',
    +            'memory': '1GB',
    +            'args': [
    +                '--config.file=/etc/loki/loki.yml',
    +            ],
    +            'config-json-files': ['loki.yml'],
    +        },
    +        'promtail': {
    +            'image': DEFAULT_PROMTAIL_IMAGE,
    +            'cpus': '1',
    +            'memory': '1GB',
    +            'args': [
    +                '--config.file=/etc/promtail/promtail.yml',
    +            ],
    +            'config-json-files': [
    +                'promtail.yml',
    +            ],
    +        },
    +        'node-exporter': {
    +            'image': DEFAULT_NODE_EXPORTER_IMAGE,
    +            'cpus': '1',
    +            'memory': '1GB',
    +            'args': ['--no-collector.timex'],
    +        },
    +        'grafana': {
    +            'image': DEFAULT_GRAFANA_IMAGE,
    +            'cpus': '2',
    +            'memory': '4GB',
    +            'args': [],
    +            'config-json-files': [
    +                'grafana.ini',
    +                'provisioning/datasources/ceph-dashboard.yml',
    +                'certs/cert_file',
    +                'certs/cert_key',
    +            ],
    +        },
    +        'alertmanager': {
    +            'image': DEFAULT_ALERTMANAGER_IMAGE,
    +            'cpus': '2',
    +            'memory': '2GB',
    +            'args': [
    +                '--cluster.listen-address=:{}'.format(
    +                    port_map['alertmanager'][1]
    +                ),
    +            ],
    +            'config-json-files': [
    +                'alertmanager.yml',
    +            ],
    +            'config-json-args': [
    +                'peers',
    +            ],
    +        },
    +    }  # type: ignore
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return daemon_type in cls.components
    +
    +    @staticmethod
    +    def get_version(ctx, container_id, daemon_type):
    +        # type: (CephadmContext, str, str) -> str
    +        """
    +        :param: daemon_type Either "prometheus", "alertmanager", "loki", "promtail" or "node-exporter"
    +        """
    +        assert daemon_type in (
    +            'prometheus',
    +            'alertmanager',
    +            'node-exporter',
    +            'loki',
    +            'promtail',
    +        )
    +        cmd = daemon_type.replace('-', '_')
    +        code = -1
    +        err = ''
    +        out = ''
    +        version = ''
    +        if daemon_type == 'alertmanager':
    +            for cmd in ['alertmanager', 'prometheus-alertmanager']:
    +                out, err, code = call(
    +                    ctx,
    +                    [
    +                        ctx.container_engine.path,
    +                        'exec',
    +                        container_id,
    +                        cmd,
    +                        '--version',
    +                    ],
    +                    verbosity=CallVerbosity.QUIET,
    +                )
    +                if code == 0:
    +                    break
    +            cmd = 'alertmanager'  # reset cmd for version extraction
    +        else:
    +            out, err, code = call(
    +                ctx,
    +                [
    +                    ctx.container_engine.path,
    +                    'exec',
    +                    container_id,
    +                    cmd,
    +                    '--version',
    +                ],
    +                verbosity=CallVerbosity.QUIET,
    +            )
    +        if code == 0:
    +            if err.startswith('%s, version ' % cmd):
    +                version = err.split(' ')[2]
    +            elif out.startswith('%s, version ' % cmd):
    +                version = out.split(' ')[2]
    +        return version
    +
    +    @staticmethod
    +    def extract_uid_gid(
    +        ctx: CephadmContext, daemon_type: str
    +    ) -> Tuple[int, int]:
    +        if daemon_type == 'prometheus':
    +            uid, gid = extract_uid_gid(ctx, file_path='/etc/prometheus')
    +        elif daemon_type == 'node-exporter':
    +            uid, gid = UID_NOBODY, GID_NOGROUP
    +        elif daemon_type == 'grafana':
    +            uid, gid = extract_uid_gid(ctx, file_path='/var/lib/grafana')
    +        elif daemon_type == 'loki':
    +            uid, gid = extract_uid_gid(ctx, file_path='/etc/loki')
    +        elif daemon_type == 'promtail':
    +            uid, gid = extract_uid_gid(ctx, file_path='/etc/promtail')
    +        elif daemon_type == 'alertmanager':
    +            uid, gid = extract_uid_gid(
    +                ctx, file_path=['/etc/alertmanager', '/etc/prometheus']
    +            )
    +        else:
    +            raise Error('{} not implemented yet'.format(daemon_type))
    +        return uid, gid
    +
    +    def __init__(self, ctx: CephadmContext, ident: DaemonIdentity) -> None:
    +        self.ctx = ctx
    +        self._identity = ident
    +
    +    @classmethod
    +    def create(
    +        cls, ctx: CephadmContext, ident: DaemonIdentity
    +    ) -> 'Monitoring':
    +        return cls(ctx, ident)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return self._identity
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        self._prevalidate(ctx)
    +        ctr = daemon_to_container(ctx, self)
    +        return to_deployment_container(ctx, ctr)
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        return self.extract_uid_gid(ctx, self.identity.daemon_type)
    +
    +    def _prevalidate(self, ctx: CephadmContext) -> None:
    +        # before being refactored into a ContainerDaemonForm these checks were
    +        # done inside the deploy function. This was the only "family" of daemons
    +        # that performed these checks in that location
    +        daemon_type = self.identity.daemon_type
    +        config = fetch_configs(ctx)  # type: ignore
    +        required_files = self.components[daemon_type].get(
    +            'config-json-files', list()
    +        )
    +        required_args = self.components[daemon_type].get(
    +            'config-json-args', list()
    +        )
    +        if required_files:
    +            if not config or not all(c in config.get('files', {}).keys() for c in required_files):  # type: ignore
    +                raise Error(
    +                    '{} deployment requires config-json which must '
    +                    'contain file content for {}'.format(
    +                        daemon_type.capitalize(), ', '.join(required_files)
    +                    )
    +                )
    +        if required_args:
    +            if not config or not all(c in config.keys() for c in required_args):  # type: ignore
    +                raise Error(
    +                    '{} deployment requires config-json which must '
    +                    'contain arg for {}'.format(
    +                        daemon_type.capitalize(), ', '.join(required_args)
    +                    )
    +                )
    +
    +    def get_daemon_args(self) -> List[str]:
    +        ctx = self.ctx
    +        daemon_type = self.identity.daemon_type
    +        metadata = self.components[daemon_type]
    +        r = list(metadata.get('args', []))
    +        # set ip and port to bind to for nodeexporter,alertmanager,prometheus
    +        if daemon_type not in ['grafana', 'loki', 'promtail']:
    +            ip = ''
    +            port = self.port_map[daemon_type][0]
    +            meta = fetch_meta(ctx)
    +            if meta:
    +                if 'ip' in meta and meta['ip']:
    +                    ip = meta['ip']
    +                if 'ports' in meta and meta['ports']:
    +                    port = meta['ports'][0]
    +            if daemon_type == 'prometheus':
    +                config = fetch_configs(ctx)
    +                ip_to_bind_to = config.get('ip_to_bind_to', '')
    +                if ip_to_bind_to:
    +                    ip = ip_to_bind_to
    +                retention_time = config.get('retention_time', '15d')
    +                retention_size = config.get(
    +                    'retention_size', '0'
    +                )  # default to disabled
    +                use_url_prefix = config.get('use_url_prefix', False)
    +                r += [f'--storage.tsdb.retention.time={retention_time}']
    +                r += [f'--storage.tsdb.retention.size={retention_size}']
    +                scheme = 'http'
    +                host = get_fqdn()
    +                # in case host is not an fqdn then we use the IP to
    +                # avoid producing a broken web.external-url link
    +                if '.' not in host:
    +                    ipv4_addrs, ipv6_addrs = get_ip_addresses(get_hostname())
    +                    # use the first ipv4 (if any) otherwise use the first ipv6
    +                    addr = next(iter(ipv4_addrs or ipv6_addrs), None)
    +                    host = wrap_ipv6(addr) if addr else host
    +                if use_url_prefix:
    +                    r += [
    +                        f'--web.external-url={scheme}://{host}:{port}/prometheus'
    +                    ]
    +                    r += ['--web.route-prefix=/prometheus/']
    +                else:
    +                    r += [f'--web.external-url={scheme}://{host}:{port}']
    +            r += [f'--web.listen-address={ip}:{port}']
    +        if daemon_type == 'alertmanager':
    +            config = fetch_configs(ctx)
    +            use_url_prefix = config.get('use_url_prefix', False)
    +            peers = config.get('peers', list())  # type: ignore
    +            for peer in peers:
    +                r += ['--cluster.peer={}'.format(peer)]
    +            try:
    +                r += [f'--web.config.file={config["web_config"]}']
    +            except KeyError:
    +                pass
    +            # some alertmanager, by default, look elsewhere for a config
    +            r += ['--config.file=/etc/alertmanager/alertmanager.yml']
    +            if use_url_prefix:
    +                r += ['--web.route-prefix=/alertmanager']
    +        if daemon_type == 'promtail':
    +            r += ['--config.expand-env']
    +        if daemon_type == 'prometheus':
    +            config = fetch_configs(ctx)
    +            try:
    +                r += [f'--web.config.file={config["web_config"]}']
    +            except KeyError:
    +                pass
    +        if daemon_type == 'node-exporter':
    +            config = fetch_configs(ctx)
    +            try:
    +                r += [f'--web.config.file={config["web_config"]}']
    +            except KeyError:
    +                pass
    +            r += [
    +                '--path.procfs=/host/proc',
    +                '--path.sysfs=/host/sys',
    +                '--path.rootfs=/rootfs',
    +            ]
    +        return r
    +
    +    def _get_container_mounts(self, data_dir: str) -> Dict[str, str]:
    +        ctx = self.ctx
    +        daemon_type = self.identity.daemon_type
    +        mounts: Dict[str, str] = {}
    +        log_dir = os.path.join(ctx.log_dir, self.identity.fsid)
    +        if daemon_type == 'prometheus':
    +            mounts[
    +                os.path.join(data_dir, 'etc/prometheus')
    +            ] = '/etc/prometheus:Z'
    +            mounts[os.path.join(data_dir, 'data')] = '/prometheus:Z'
    +        elif daemon_type == 'loki':
    +            mounts[os.path.join(data_dir, 'etc/loki')] = '/etc/loki:Z'
    +            mounts[os.path.join(data_dir, 'data')] = '/loki:Z'
    +        elif daemon_type == 'promtail':
    +            mounts[os.path.join(data_dir, 'etc/promtail')] = '/etc/promtail:Z'
    +            mounts[log_dir] = '/var/log/ceph:z'
    +            mounts[os.path.join(data_dir, 'data')] = '/promtail:Z'
    +        elif daemon_type == 'node-exporter':
    +            mounts[
    +                os.path.join(data_dir, 'etc/node-exporter')
    +            ] = '/etc/node-exporter:Z'
    +            mounts['/proc'] = '/host/proc:ro'
    +            mounts['/sys'] = '/host/sys:ro'
    +            mounts['/'] = '/rootfs:ro'
    +        elif daemon_type == 'grafana':
    +            mounts[
    +                os.path.join(data_dir, 'etc/grafana/grafana.ini')
    +            ] = '/etc/grafana/grafana.ini:Z'
    +            mounts[
    +                os.path.join(data_dir, 'etc/grafana/provisioning/datasources')
    +            ] = '/etc/grafana/provisioning/datasources:Z'
    +            mounts[
    +                os.path.join(data_dir, 'etc/grafana/provisioning/dashboards')
    +            ] = '/etc/grafana/provisioning/dashboards:Z'
    +            mounts[
    +                os.path.join(data_dir, 'etc/grafana/certs')
    +            ] = '/etc/grafana/certs:Z'
    +            mounts[
    +                os.path.join(data_dir, 'data/grafana.db')
    +            ] = '/var/lib/grafana/grafana.db:Z'
    +        elif daemon_type == 'alertmanager':
    +            mounts[
    +                os.path.join(data_dir, 'etc/alertmanager')
    +            ] = '/etc/alertmanager:Z'
    +        return mounts
    +
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        data_dir = self.identity.data_dir(ctx.data_dir)
    +        mounts.update(self._get_container_mounts(data_dir))
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        uid, _ = self.uid_gid(ctx)
    +        monitoring_args = [
    +            '--user',
    +            str(uid),
    +            # FIXME: disable cpu/memory limits for the time being (not supported
    +            # by ubuntu 18.04 kernel!)
    +        ]
    +        args.extend(monitoring_args)
    +        if self.identity.daemon_type == 'node-exporter':
    +            # in order to support setting '--path.procfs=/host/proc','--path.sysfs=/host/sys',
    +            # '--path.rootfs=/rootfs' for node-exporter we need to disable selinux separation
    +            # between the node-exporter container and the host to avoid selinux denials
    +            args.extend(['--security-opt', 'label=disable'])
    +
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.extend(self.get_daemon_args())
    +
    +    def default_entrypoint(self) -> str:
    +        return ''
    diff --git a/src/cephadm/cephadmlib/daemons/nfs.py b/src/cephadm/cephadmlib/daemons/nfs.py
    new file mode 100644
    index 000000000000..70ccea65b5b4
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/nfs.py
    @@ -0,0 +1,230 @@
    +import logging
    +import os
    +import re
    +
    +from typing import Dict, List, Optional, Tuple, Union
    +
    +from ..call_wrappers import call, CallVerbosity
    +from ..constants import DEFAULT_IMAGE, CEPH_DEFAULT_CONF
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_types import CephContainer, extract_uid_gid
    +from ..context import CephadmContext
    +from ..context_getters import fetch_configs, get_config_and_keyring
    +from ..daemon_form import register as register_daemon_form
    +from ..daemon_identity import DaemonIdentity
    +from ..data_utils import dict_get, is_fsid
    +from ..deploy import DeploymentType
    +from ..deployment_utils import to_deployment_container
    +from ..exceptions import Error
    +from ..file_utils import makedirs, populate_files, write_new
    +from ..net_utils import EndPoint
    +
    +
    +logger = logging.getLogger()
    +
    +
    +@register_daemon_form
    +class NFSGanesha(ContainerDaemonForm):
    +    """Defines a NFS-Ganesha container"""
    +
    +    daemon_type = 'nfs'
    +    entrypoint = '/usr/bin/ganesha.nfsd'
    +    daemon_args = ['-F', '-L', 'STDERR']
    +
    +    required_files = ['ganesha.conf', 'idmap.conf']
    +
    +    port_map = {
    +        'nfs': 2049,
    +    }
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return cls.daemon_type == daemon_type
    +
    +    def __init__(
    +        self,
    +        ctx: CephadmContext,
    +        fsid: str,
    +        daemon_id: Union[int, str],
    +        config_json: Dict,
    +        image: str = DEFAULT_IMAGE,
    +    ) -> None:
    +        self.ctx = ctx
    +        self.fsid = fsid
    +        self.daemon_id = daemon_id
    +        self.image = image
    +
    +        # config-json options
    +        self.pool = dict_get(config_json, 'pool', require=True)
    +        self.namespace = dict_get(config_json, 'namespace')
    +        self.userid = dict_get(config_json, 'userid')
    +        self.extra_args = dict_get(config_json, 'extra_args', [])
    +        self.files = dict_get(config_json, 'files', {})
    +        self.rgw = dict_get(config_json, 'rgw', {})
    +
    +        # validate the supplied args
    +        self.validate()
    +
    +    @classmethod
    +    def init(
    +        cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
    +    ) -> 'NFSGanesha':
    +        return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
    +
    +    @classmethod
    +    def create(
    +        cls, ctx: CephadmContext, ident: DaemonIdentity
    +    ) -> 'NFSGanesha':
    +        return cls.init(ctx, ident.fsid, ident.daemon_id)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    +
    +    def _get_container_mounts(self, data_dir):
    +        # type: (str) -> Dict[str, str]
    +        mounts = dict()
    +        mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
    +        mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
    +        mounts[os.path.join(data_dir, 'etc/ganesha')] = '/etc/ganesha:z'
    +        if self.rgw:
    +            cluster = self.rgw.get('cluster', 'ceph')
    +            rgw_user = self.rgw.get('user', 'admin')
    +            mounts[
    +                os.path.join(data_dir, 'keyring.rgw')
    +            ] = '/var/lib/ceph/radosgw/%s-%s/keyring:z' % (cluster, rgw_user)
    +        return mounts
    +
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        data_dir = self.identity.data_dir(ctx.data_dir)
    +        mounts.update(self._get_container_mounts(data_dir))
    +
    +    @staticmethod
    +    def get_container_envs():
    +        # type: () -> List[str]
    +        envs = ['CEPH_CONF=%s' % (CEPH_DEFAULT_CONF)]
    +        return envs
    +
    +    @staticmethod
    +    def get_version(ctx, container_id):
    +        # type: (CephadmContext, str) -> Optional[str]
    +        version = None
    +        out, err, code = call(
    +            ctx,
    +            [
    +                ctx.container_engine.path,
    +                'exec',
    +                container_id,
    +                NFSGanesha.entrypoint,
    +                '-v',
    +            ],
    +            verbosity=CallVerbosity.QUIET,
    +        )
    +        if code == 0:
    +            match = re.search(r'NFS-Ganesha Release\s*=\s*[V]*([\d.]+)', out)
    +            if match:
    +                version = match.group(1)
    +        return version
    +
    +    def validate(self):
    +        # type: () -> None
    +        if not is_fsid(self.fsid):
    +            raise Error('not an fsid: %s' % self.fsid)
    +        if not self.daemon_id:
    +            raise Error('invalid daemon_id: %s' % self.daemon_id)
    +        if not self.image:
    +            raise Error('invalid image: %s' % self.image)
    +
    +        # check for the required files
    +        if self.required_files:
    +            for fname in self.required_files:
    +                if fname not in self.files:
    +                    raise Error(
    +                        'required file missing from config-json: %s' % fname
    +                    )
    +
    +        # check for an RGW config
    +        if self.rgw:
    +            if not self.rgw.get('keyring'):
    +                raise Error('RGW keyring is missing')
    +            if not self.rgw.get('user'):
    +                raise Error('RGW user is missing')
    +
    +    def get_daemon_name(self):
    +        # type: () -> str
    +        return '%s.%s' % (self.daemon_type, self.daemon_id)
    +
    +    def get_container_name(self, desc=None):
    +        # type: (Optional[str]) -> str
    +        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
    +        if desc:
    +            cname = '%s-%s' % (cname, desc)
    +        return cname
    +
    +    def get_daemon_args(self):
    +        # type: () -> List[str]
    +        return self.daemon_args + self.extra_args
    +
    +    def create_daemon_dirs(self, data_dir, uid, gid):
    +        # type: (str, int, int) -> None
    +        """Create files under the container data dir"""
    +        if not os.path.isdir(data_dir):
    +            raise OSError('data_dir is not a directory: %s' % (data_dir))
    +
    +        logger.info('Creating ganesha config...')
    +
    +        # create the ganesha conf dir
    +        config_dir = os.path.join(data_dir, 'etc/ganesha')
    +        makedirs(config_dir, uid, gid, 0o755)
    +
    +        # populate files from the config-json
    +        populate_files(config_dir, self.files, uid, gid)
    +
    +        # write the RGW keyring
    +        if self.rgw:
    +            keyring_path = os.path.join(data_dir, 'keyring.rgw')
    +            with write_new(keyring_path, owner=(uid, gid)) as f:
    +                f.write(self.rgw.get('keyring', ''))
    +
    +    def firewall_service_name(self) -> str:
    +        return 'nfs'
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        ctr = daemon_to_container(ctx, self)
    +        return to_deployment_container(ctx, ctr)
    +
    +    def customize_container_endpoints(
    +        self, endpoints: List[EndPoint], deployment_type: DeploymentType
    +    ) -> None:
    +        if deployment_type == DeploymentType.DEFAULT and not endpoints:
    +            nfs_ports = list(NFSGanesha.port_map.values())
    +            endpoints.extend([EndPoint('0.0.0.0', p) for p in nfs_ports])
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        # TODO: extract ganesha uid/gid (997, 994) ?
    +        return extract_uid_gid(ctx)
    +
    +    def config_and_keyring(
    +        self, ctx: CephadmContext
    +    ) -> Tuple[Optional[str], Optional[str]]:
    +        return get_config_and_keyring(ctx)
    +
    +    def customize_container_envs(
    +        self, ctx: CephadmContext, envs: List[str]
    +    ) -> None:
    +        envs.extend(self.get_container_envs())
    +
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.extend(self.get_daemon_args())
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.append(ctx.container_engine.unlimited_pids_option)
    +
    +    def default_entrypoint(self) -> str:
    +        return self.entrypoint
    diff --git a/src/cephadm/cephadmlib/daemons/node_proxy.py b/src/cephadm/cephadmlib/daemons/node_proxy.py
    new file mode 100644
    index 000000000000..f7f0097e7b85
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/node_proxy.py
    @@ -0,0 +1,153 @@
    +import logging
    +import os
    +
    +from typing import Dict, List, Optional, Tuple
    +
    +from ..constants import DEFAULT_IMAGE
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_types import CephContainer, extract_uid_gid
    +from ..context import CephadmContext
    +from ..context_getters import fetch_configs, get_config_and_keyring
    +from ..daemon_form import register as register_daemon_form
    +from ..daemon_identity import DaemonIdentity
    +from ..data_utils import dict_get, is_fsid
    +from ..deployment_utils import to_deployment_container
    +from ..exceptions import Error
    +from ..file_utils import populate_files
    +
    +logger = logging.getLogger()
    +
    +
    +@register_daemon_form
    +class NodeProxy(ContainerDaemonForm):
    +    """Defines a node-proxy container"""
    +
    +    daemon_type = 'node-proxy'
    +    # TODO: update this if we make node-proxy an executable
    +    entrypoint = '/usr/sbin/ceph-node-proxy'
    +    required_files = ['node-proxy.json']
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return cls.daemon_type == daemon_type
    +
    +    def __init__(
    +        self,
    +        ctx: CephadmContext,
    +        ident: DaemonIdentity,
    +        config_json: Dict,
    +        image: str = DEFAULT_IMAGE,
    +    ):
    +        self.ctx = ctx
    +        self._identity = ident
    +        self.image = image
    +
    +        # config-json options
    +        config = dict_get(config_json, 'node-proxy.json', {})
    +        self.files = {'node-proxy.json': config}
    +
    +        # validate the supplied args
    +        self.validate()
    +
    +    @classmethod
    +    def init(
    +        cls, ctx: CephadmContext, fsid: str, daemon_id: str
    +    ) -> 'NodeProxy':
    +        return cls.create(
    +            ctx, DaemonIdentity(fsid, cls.daemon_type, daemon_id)
    +        )
    +
    +    @classmethod
    +    def create(
    +        cls, ctx: CephadmContext, ident: DaemonIdentity
    +    ) -> 'NodeProxy':
    +        return cls(ctx, ident, fetch_configs(ctx), ctx.image)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return self._identity
    +
    +    @property
    +    def fsid(self) -> str:
    +        return self._identity.fsid
    +
    +    @property
    +    def daemon_id(self) -> str:
    +        return self._identity.daemon_id
    +
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        data_dir = self.identity.data_dir(ctx.data_dir)
    +        # TODO: update this when we have the actual location
    +        # in the ceph container we are going to keep node-proxy
    +        mounts.update(
    +            {
    +                os.path.join(
    +                    data_dir, 'node-proxy.json'
    +                ): '/usr/share/ceph/node-proxy.json:z'
    +            }
    +        )
    +
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        # TODO: this corresponds with the mount location of
    +        # the config in _get_container_mounts above. They
    +        # will both need to be updated when we have a proper
    +        # location in the container for node-proxy
    +        args.extend(['--config', '/usr/share/ceph/node-proxy.json'])
    +
    +    def validate(self):
    +        # type: () -> None
    +        if not is_fsid(self.fsid):
    +            raise Error('not an fsid: %s' % self.fsid)
    +        if not self.daemon_id:
    +            raise Error('invalid daemon_id: %s' % self.daemon_id)
    +        if not self.image:
    +            raise Error('invalid image: %s' % self.image)
    +        # check for the required files
    +        if self.required_files:
    +            for fname in self.required_files:
    +                if fname not in self.files:
    +                    raise Error(
    +                        'required file missing from config-json: %s' % fname
    +                    )
    +
    +    def get_daemon_name(self):
    +        # type: () -> str
    +        return '%s.%s' % (self.daemon_type, self.daemon_id)
    +
    +    def get_container_name(self, desc=None):
    +        # type: (Optional[str]) -> str
    +        cname = 'ceph-%s-%s' % (self.fsid, self.get_daemon_name())
    +        if desc:
    +            cname = '%s-%s' % (cname, desc)
    +        return cname
    +
    +    def create_daemon_dirs(self, data_dir, uid, gid):
    +        # type: (str, int, int) -> None
    +        """Create files under the container data dir"""
    +        if not os.path.isdir(data_dir):
    +            raise OSError('data_dir is not a directory: %s' % (data_dir))
    +
    +        logger.info('Writing node-proxy config...')
    +        # populate files from the config-json
    +        populate_files(data_dir, self.files, uid, gid)
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        # So the container can modprobe iscsi_target_mod and have write perms
    +        # to configfs we need to make this a privileged container.
    +        ctr = daemon_to_container(ctx, self, privileged=True)
    +        return to_deployment_container(ctx, ctr)
    +
    +    def config_and_keyring(
    +        self, ctx: CephadmContext
    +    ) -> Tuple[Optional[str], Optional[str]]:
    +        return get_config_and_keyring(ctx)
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        return extract_uid_gid(ctx)
    +
    +    def default_entrypoint(self) -> str:
    +        return self.entrypoint
    diff --git a/src/cephadm/cephadmlib/daemons/nvmeof.py b/src/cephadm/cephadmlib/daemons/nvmeof.py
    new file mode 100644
    index 000000000000..d916c7e63917
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/nvmeof.py
    @@ -0,0 +1,240 @@
    +import logging
    +import os
    +
    +from typing import Dict, List, Optional, Tuple, Union
    +
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_types import CephContainer
    +from ..context_getters import fetch_configs, get_config_and_keyring
    +from ..daemon_form import register as register_daemon_form
    +from ..daemon_identity import DaemonIdentity
    +from ceph.cephadm.images import DEFAULT_NVMEOF_IMAGE
    +from ..context import CephadmContext
    +from ..data_utils import dict_get, is_fsid
    +from ..deployment_utils import to_deployment_container
    +from ..exceptions import Error
    +from ..file_utils import makedirs, populate_files
    +from ..call_wrappers import call
    +
    +
    +logger = logging.getLogger()
    +
    +
    +@register_daemon_form
    +class CephNvmeof(ContainerDaemonForm):
    +    """Defines a Ceph-Nvmeof container"""
    +
    +    daemon_type = 'nvmeof'
    +    required_files = ['ceph-nvmeof.conf']
    +    default_image = DEFAULT_NVMEOF_IMAGE
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return cls.daemon_type == daemon_type
    +
    +    def __init__(
    +        self,
    +        ctx: CephadmContext,
    +        fsid: str,
    +        daemon_id: Union[int, str],
    +        config_json: Dict,
    +        image: str = DEFAULT_NVMEOF_IMAGE,
    +    ) -> None:
    +        self.ctx = ctx
    +        self.fsid = fsid
    +        self.daemon_id = daemon_id
    +        self.image = image
    +
    +        # config-json options
    +        self.files = dict_get(config_json, 'files', {})
    +
    +        # validate the supplied args
    +        self.validate()
    +
    +    @classmethod
    +    def init(
    +        cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
    +    ) -> 'CephNvmeof':
    +        return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
    +
    +    @classmethod
    +    def create(
    +        cls, ctx: CephadmContext, ident: DaemonIdentity
    +    ) -> 'CephNvmeof':
    +        return cls.init(ctx, ident.fsid, ident.daemon_id)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    +
    +    @staticmethod
    +    def _get_container_mounts(
    +        data_dir: str, log_dir: str, mtls_dir: Optional[str] = None
    +    ) -> Dict[str, str]:
    +        mounts = dict()
    +        mounts[os.path.join(data_dir, 'config')] = '/etc/ceph/ceph.conf:z'
    +        mounts[os.path.join(data_dir, 'keyring')] = '/etc/ceph/keyring:z'
    +        mounts[
    +            os.path.join(data_dir, 'ceph-nvmeof.conf')
    +        ] = '/src/ceph-nvmeof.conf:z'
    +        mounts[os.path.join(data_dir, 'configfs')] = '/sys/kernel/config'
    +        mounts[log_dir] = '/var/log/ceph:z'
    +        if mtls_dir:
    +            mounts[mtls_dir] = '/src/mtls:z'
    +        return mounts
    +
    +    def _get_huge_pages_mounts(self, files: Dict[str, str]) -> Dict[str, str]:
    +        mounts = dict()
    +        if 'spdk_mem_size' not in files:
    +            mounts['/dev/hugepages'] = '/dev/hugepages'
    +            mounts['/dev/vfio/vfio'] = '/dev/vfio/vfio'
    +        return mounts
    +
    +    def _get_tls_cert_key_mounts(
    +        self, data_dir: str, files: Dict[str, str]
    +    ) -> Dict[str, str]:
    +        mounts = dict()
    +        for fn in [
    +            'server_cert',
    +            'server_key',
    +            'client_cert',
    +            'client_key',
    +            'root_ca_cert',
    +        ]:
    +            if fn in files:
    +                mounts[
    +                    os.path.join(data_dir, fn)
    +                ] = f'/{fn.replace("_", ".")}'
    +        return mounts
    +
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        data_dir = self.identity.data_dir(ctx.data_dir)
    +        log_dir = os.path.join(ctx.log_dir, self.identity.fsid)
    +        mtls_dir = os.path.join(ctx.data_dir, self.identity.fsid, 'mtls')
    +        if os.path.exists(mtls_dir):
    +            mounts.update(
    +                self._get_container_mounts(
    +                    data_dir, log_dir, mtls_dir=mtls_dir
    +                )
    +            )
    +        else:
    +            mounts.update(self._get_container_mounts(data_dir, log_dir))
    +        mounts.update(self._get_huge_pages_mounts(self.files))
    +        mounts.update(self._get_tls_cert_key_mounts(data_dir, self.files))
    +
    +    def customize_container_binds(
    +        self, ctx: CephadmContext, binds: List[List[str]]
    +    ) -> None:
    +        lib_modules = [
    +            'type=bind',
    +            'source=/lib/modules',
    +            'destination=/lib/modules',
    +            'ro=true',
    +        ]
    +        binds.append(lib_modules)
    +
    +    @staticmethod
    +    def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]:
    +        out, err, ret = call(
    +            ctx,
    +            [
    +                ctx.container_engine.path,
    +                'inspect',
    +                '--format',
    +                '{{index .Config.Labels "io.ceph.version"}}',
    +                container_id,
    +            ],
    +        )
    +        version = None
    +        if ret == 0:
    +            version = out.strip()
    +        return version
    +
    +    def validate(self):
    +        # type: () -> None
    +        if not is_fsid(self.fsid):
    +            raise Error('not an fsid: %s' % self.fsid)
    +        if not self.daemon_id:
    +            raise Error('invalid daemon_id: %s' % self.daemon_id)
    +        if not self.image:
    +            raise Error('invalid image: %s' % self.image)
    +
    +        # check for the required files
    +        if self.required_files:
    +            for fname in self.required_files:
    +                if fname not in self.files:
    +                    raise Error(
    +                        'required file missing from config-json: %s' % fname
    +                    )
    +
    +    def get_daemon_name(self):
    +        # type: () -> str
    +        return '%s.%s' % (self.daemon_type, self.daemon_id)
    +
    +    def get_container_name(self, desc=None):
    +        # type: (Optional[str]) -> str
    +        cname = '%s-%s' % (self.fsid, self.get_daemon_name())
    +        if desc:
    +            cname = '%s-%s' % (cname, desc)
    +        return cname
    +
    +    def create_daemon_dirs(self, data_dir, uid, gid):
    +        # type: (str, int, int) -> None
    +        """Create files under the container data dir"""
    +        if not os.path.isdir(data_dir):
    +            raise OSError('data_dir is not a directory: %s' % (data_dir))
    +
    +        logger.info('Creating ceph-nvmeof config...')
    +        configfs_dir = os.path.join(data_dir, 'configfs')
    +        makedirs(configfs_dir, uid, gid, 0o755)
    +
    +        # populate files from the config-json
    +        populate_files(data_dir, self.files, uid, gid)
    +
    +    @staticmethod
    +    def configfs_mount_umount(data_dir, mount=True):
    +        # type: (str, bool) -> List[str]
    +        mount_path = os.path.join(data_dir, 'configfs')
    +        if mount:
    +            cmd = (
    +                'if ! grep -qs {0} /proc/mounts; then '
    +                'mount -t configfs none {0}; fi'.format(mount_path)
    +            )
    +        else:
    +            cmd = (
    +                'if grep -qs {0} /proc/mounts; then '
    +                'umount {0}; fi'.format(mount_path)
    +            )
    +        return cmd.split()
    +
    +    def get_sysctl_settings(self) -> List[str]:
    +        if 'spdk_mem_size' not in self.files:
    +            return [
    +                'vm.nr_hugepages = 4096',
    +            ]
    +        else:
    +            return []
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        ctr = daemon_to_container(ctx, self)
    +        return to_deployment_container(ctx, ctr)
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        return 167, 167  # TODO: need to get properly the uid/gid
    +
    +    def config_and_keyring(
    +        self, ctx: CephadmContext
    +    ) -> Tuple[Optional[str], Optional[str]]:
    +        return get_config_and_keyring(ctx)
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.append(ctx.container_engine.unlimited_pids_option)
    +        args.extend(['--ulimit', 'memlock=-1:-1'])
    +        args.extend(['--ulimit', 'nofile=10240'])
    +        args.extend(['--cap-add=CAP_SYS_NICE'])
    +        if 'spdk_mem_size' not in self.files:
    +            args.extend(['--cap-add=SYS_ADMIN'])
    diff --git a/src/cephadm/cephadmlib/daemons/oauth2_proxy.py b/src/cephadm/cephadmlib/daemons/oauth2_proxy.py
    new file mode 100644
    index 000000000000..14202111c14e
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/oauth2_proxy.py
    @@ -0,0 +1,166 @@
    +import logging
    +import os
    +from typing import Dict, List, Tuple, Optional
    +import re
    +
    +from ..call_wrappers import call, CallVerbosity
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_types import CephContainer
    +from ..context import CephadmContext
    +from ..context_getters import fetch_configs
    +from ..daemon_form import register as register_daemon_form
    +from ..daemon_identity import DaemonIdentity
    +from ..deployment_utils import to_deployment_container
    +from ceph.cephadm.images import DEFAULT_OAUTH2_PROXY_IMAGE
    +from ..constants import UID_NOBODY, GID_NOGROUP
    +from ..data_utils import dict_get, is_fsid
    +from ..file_utils import populate_files, makedirs, recursive_chown
    +from ..exceptions import Error
    +
    +
    +logger = logging.getLogger()
    +
    +
    +@register_daemon_form
    +class OAuth2Proxy(ContainerDaemonForm):
    +    """Define the configs for the jaeger tracing containers"""
    +
    +    default_image = DEFAULT_OAUTH2_PROXY_IMAGE
    +    daemon_type = 'oauth2-proxy'
    +    required_files = [
    +        'oauth2-proxy.conf',
    +        'oauth2-proxy.crt',
    +        'oauth2-proxy.key',
    +    ]
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return cls.daemon_type == daemon_type
    +
    +    def __init__(
    +        self,
    +        ctx: CephadmContext,
    +        fsid: str,
    +        daemon_id: str,
    +        config_json: Dict,
    +        image: str = DEFAULT_OAUTH2_PROXY_IMAGE,
    +    ):
    +        self.ctx = ctx
    +        self.fsid = fsid
    +        self.daemon_id = daemon_id
    +        self.image = image
    +        self.files = dict_get(config_json, 'files', {})
    +        self.validate()
    +
    +    @classmethod
    +    def init(
    +        cls, ctx: CephadmContext, fsid: str, daemon_id: str
    +    ) -> 'OAuth2Proxy':
    +        return cls(ctx, fsid, daemon_id, fetch_configs(ctx), ctx.image)
    +
    +    @classmethod
    +    def create(
    +        cls, ctx: CephadmContext, ident: DaemonIdentity
    +    ) -> 'OAuth2Proxy':
    +        return cls.init(ctx, ident.fsid, ident.daemon_id)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        ctr = daemon_to_container(ctx, self)
    +        return to_deployment_container(ctx, ctr)
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        return UID_NOBODY, GID_NOGROUP
    +
    +    def get_daemon_args(self) -> List[str]:
    +        return [
    +            '--config=/etc/oauth2-proxy.conf',
    +            '--tls-cert-file=/etc/oauth2-proxy.crt',
    +            '--tls-key-file=/etc/oauth2-proxy.key',
    +        ]
    +
    +    def default_entrypoint(self) -> str:
    +        return ''
    +
    +    def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
    +        """Create files under the container data dir"""
    +        if not os.path.isdir(data_dir):
    +            raise OSError('data_dir is not a directory: %s' % (data_dir))
    +        logger.info('Writing oauth2-proxy config...')
    +        config_dir = os.path.join(data_dir, 'etc/')
    +        makedirs(config_dir, uid, gid, 0o755)
    +        recursive_chown(config_dir, uid, gid)
    +        populate_files(config_dir, self.files, uid, gid)
    +
    +    def validate(self) -> None:
    +        if not is_fsid(self.fsid):
    +            raise Error(f'not an fsid: {self.fsid}')
    +        if not self.daemon_id:
    +            raise Error(f'invalid daemon_id: {self.daemon_id}')
    +        if not self.image:
    +            raise Error(f'invalid image: {self.image}')
    +
    +        # check for the required files
    +        if self.required_files:
    +            for fname in self.required_files:
    +                if fname not in self.files:
    +                    raise Error(
    +                        'required file missing from config-json: %s' % fname
    +                    )
    +
    +    @staticmethod
    +    def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]:
    +        """Return the version of the oauth2-proxy container"""
    +        version = None
    +        out, err, code = call(
    +            ctx,
    +            [
    +                ctx.container_engine.path,
    +                'exec',
    +                container_id,
    +                'oauth2-proxy',
    +                '--version',
    +            ],
    +            verbosity=CallVerbosity.QUIET,
    +        )
    +        if code == 0:
    +            match = re.search(r'oauth2-proxy (v\d+\.\d+\.\d+)', out)
    +            if match:
    +                version = match.group(1)
    +        return version
    +
    +    def customize_container_mounts(
    +        self, ctx: CephadmContext, mounts: Dict[str, str]
    +    ) -> None:
    +        data_dir = self.identity.data_dir(ctx.data_dir)
    +        mounts.update(
    +            {
    +                os.path.join(
    +                    data_dir, 'etc/oauth2-proxy.conf'
    +                ): '/etc/oauth2-proxy.conf:Z',
    +                os.path.join(
    +                    data_dir, 'etc/oauth2-proxy.crt'
    +                ): '/etc/oauth2-proxy.crt:Z',
    +                os.path.join(
    +                    data_dir, 'etc/oauth2-proxy.key'
    +                ): '/etc/oauth2-proxy.key:Z',
    +            }
    +        )
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        uid, _ = self.uid_gid(ctx)
    +        other_args = [
    +            '--user',
    +            str(uid),
    +        ]
    +        args.extend(other_args)
    +
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.extend(self.get_daemon_args())
    diff --git a/src/cephadm/cephadmlib/daemons/smb.py b/src/cephadm/cephadmlib/daemons/smb.py
    new file mode 100644
    index 000000000000..33d43cbe6cea
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/smb.py
    @@ -0,0 +1,784 @@
    +import dataclasses
    +import enum
    +import json
    +import logging
    +import pathlib
    +import re
    +import socket
    +
    +from typing import List, Dict, Tuple, Optional, Any, NamedTuple
    +
    +from .. import context_getters
    +from .. import daemon_form
    +from .. import data_utils
    +from .. import deployment_utils
    +from .. import file_utils
    +from ..call_wrappers import call, CallVerbosity
    +from ceph.cephadm.images import DEFAULT_SAMBA_IMAGE
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_engines import Podman
    +from ..container_types import (
    +    CephContainer,
    +    InitContainer,
    +    Namespace,
    +    SidecarContainer,
    +    enable_shared_namespaces,
    +)
    +from ..context import CephadmContext
    +from ..daemon_identity import DaemonIdentity, DaemonSubIdentity
    +from ..deploy import DeploymentType
    +from ..exceptions import Error
    +from ..host_facts import list_networks
    +from ..net_utils import EndPoint
    +
    +
    +logger = logging.getLogger()
    +
    +# sambacc provided commands we will need (when clustered)
    +_SCC = '/usr/bin/samba-container'
    +_NODES_SUBCMD = [_SCC, 'ctdb-list-nodes']
    +_MUTEX_SUBCMD = [_SCC, 'ctdb-rados-mutex']  # requires rados uri
    +
    +
    +class Features(enum.Enum):
    +    DOMAIN = 'domain'
    +    CLUSTERED = 'clustered'
    +
    +    @classmethod
    +    def valid(cls, value: str) -> bool:
    +        # workaround for older python versions
    +        try:
    +            cls(value)
    +            return True
    +        except ValueError:
    +            return False
    +
    +
    +class ClusterPublicIP(NamedTuple):
    +    address: str
    +    destinations: List[str]
    +
    +    @classmethod
    +    def convert(cls, item: Dict[str, Any]) -> 'ClusterPublicIP':
    +        assert isinstance(item, dict)
    +        address = item['address']
    +        assert isinstance(address, str)
    +        destinations = item['destinations']
    +        assert isinstance(destinations, list)
    +        return cls(address, destinations)
    +
    +
    +@dataclasses.dataclass(frozen=True)
    +class Config:
    +    identity: DaemonIdentity
    +    instance_id: str
    +    source_config: str
    +    domain_member: bool
    +    clustered: bool
    +    samba_debug_level: int = 0
    +    ctdb_log_level: str = ''
    +    debug_delay: int = 0
    +    join_sources: List[str] = dataclasses.field(default_factory=list)
    +    user_sources: List[str] = dataclasses.field(default_factory=list)
    +    custom_dns: List[str] = dataclasses.field(default_factory=list)
    +    smb_port: int = 0
    +    ceph_config_entity: str = 'client.admin'
    +    vhostname: str = ''
    +    metrics_image: str = ''
    +    metrics_port: int = 0
    +    # clustering related values
    +    rank: int = -1
    +    rank_generation: int = -1
    +    cluster_meta_uri: str = ''
    +    cluster_lock_uri: str = ''
    +    cluster_public_addrs: List[ClusterPublicIP] = dataclasses.field(
    +        default_factory=list
    +    )
    +
    +    def config_uris(self) -> List[str]:
    +        uris = [self.source_config]
    +        uris.extend(self.user_sources or [])
    +        if self.clustered:
    +            # When clustered, we inject certain clustering related config vars
    +            # via a config file generated by cephadm (elsewhere in this file)
    +            uris.append('/etc/samba/container/ctdb.json')
    +        return uris
    +
    +
    +def _container_dns_args(cfg: Config) -> List[str]:
    +    cargs = []
    +    for dns in cfg.custom_dns:
    +        cargs.append(f'--dns={dns}')
    +    if cfg.vhostname:
    +        cargs.append(f'--hostname={cfg.vhostname}')
    +    return cargs
    +
    +
    +class ContainerCommon:
    +    def __init__(self, cfg: Config, image: str = '') -> None:
    +        self.cfg = cfg
    +        self.image = image
    +
    +    def name(self) -> str:
    +        raise NotImplementedError('container name')
    +
    +    def envs(self) -> Dict[str, str]:
    +        return {}
    +
    +    def envs_list(self) -> List[str]:
    +        return []
    +
    +    def args(self) -> List[str]:
    +        return []
    +
    +    def container_args(self) -> List[str]:
    +        return []
    +
    +    def container_image(self) -> str:
    +        return self.image
    +
    +
    +class SambaContainerCommon(ContainerCommon):
    +    def __init__(self, cfg: Config, image: str = '') -> None:
    +        self.cfg = cfg
    +        self.image = image
    +
    +    def envs(self) -> Dict[str, str]:
    +        environ = {
    +            'SAMBA_CONTAINER_ID': self.cfg.instance_id,
    +            'SAMBACC_CONFIG': json.dumps(self.cfg.config_uris()),
    +        }
    +        # The CTDB support in sambacc project is considered experimental
    +        # and it refuses to run without setting the following environment
    +        # variable. This can be dropped once sambacc no longer needs it,
    +        # possibly after the next sambacc release.
    +        environ['SAMBACC_CTDB'] = 'ctdb-is-experimental'
    +        if self.cfg.ceph_config_entity:
    +            environ['SAMBACC_CEPH_ID'] = f'name={self.cfg.ceph_config_entity}'
    +        if self.cfg.rank >= 0:
    +            # how the values are known to ceph (for debugging purposes...)
    +            environ['RANK'] = str(self.cfg.rank)
    +            environ['RANK_GENERATION'] = str(self.cfg.rank)
    +            # samba container specific variant
    +            environ['NODE_NUMBER'] = environ['RANK']
    +        return environ
    +
    +    def envs_list(self) -> List[str]:
    +        return [f'{k}={v}' for (k, v) in self.envs().items()]
    +
    +    def args(self) -> List[str]:
    +        args = []
    +        if self.cfg.samba_debug_level:
    +            args.append(f'--samba-debug-level={self.cfg.samba_debug_level}')
    +        if self.cfg.debug_delay:
    +            args.append(f'--debug-delay={self.cfg.debug_delay}')
    +        return args
    +
    +
    +class SambaNetworkedInitContainer(SambaContainerCommon):
    +    """SambaContainerCommon subclass that enables additional networking
    +    params for an init container by default.
    +    NB: By networked we mean needs to use public network resources outside
    +    the ceph cluster.
    +    """
    +
    +    def container_args(self) -> List[str]:
    +        cargs = _container_dns_args(self.cfg)
    +        if self.cfg.clustered:
    +            cargs.append('--network=host')
    +        return cargs
    +
    +
    +class SMBDContainer(SambaContainerCommon):
    +    def name(self) -> str:
    +        return 'smbd'
    +
    +    def args(self) -> List[str]:
    +        args = super().args()
    +        args.append('run')
    +        if self.cfg.clustered:
    +            auth_kind = 'nsswitch' if self.cfg.domain_member else 'users'
    +            args.append(f'--setup={auth_kind}')
    +            args.append('--setup=smb_ctdb')
    +            args.append('--wait-for=ctdb')
    +        args.append('smbd')
    +        return args
    +
    +    def container_args(self) -> List[str]:
    +        cargs = []
    +        if self.cfg.smb_port:
    +            cargs.append(f'--publish={self.cfg.smb_port}:{self.cfg.smb_port}')
    +        if self.cfg.metrics_port:
    +            metrics_port = self.cfg.metrics_port
    +            cargs.append(f'--publish={metrics_port}:{metrics_port}')
    +        cargs.extend(_container_dns_args(self.cfg))
    +        return cargs
    +
    +
    +class WinbindContainer(SambaContainerCommon):
    +    def name(self) -> str:
    +        return 'winbindd'
    +
    +    def args(self) -> List[str]:
    +        args = super().args()
    +        args.append('run')
    +        if self.cfg.clustered:
    +            args.append('--setup=smb_ctdb')
    +            args.append('--wait-for=ctdb')
    +        args.append('winbindd')
    +        return args
    +
    +
    +class ConfigInitContainer(SambaContainerCommon):
    +    def name(self) -> str:
    +        return 'config'
    +
    +    def args(self) -> List[str]:
    +        return super().args() + ['init']
    +
    +
    +class MustJoinContainer(SambaNetworkedInitContainer):
    +    def name(self) -> str:
    +        return 'mustjoin'
    +
    +    def args(self) -> List[str]:
    +        args = super().args()
    +        if self.cfg.clustered:
    +            # TODO: not only do we want to only do this on node 0, we only
    +            # want to do it exactly ONCE per cluster even on pnn 0. This needs
    +            # additional work to get that right.
    +            args.append('--skip-if=env:NODE_NUMBER!=0')
    +        args.append('must-join')
    +        for join_src in self.cfg.join_sources:
    +            args.append(f'-j{join_src}')
    +        return args
    +
    +
    +class ConfigWatchContainer(SambaContainerCommon):
    +    def name(self) -> str:
    +        return 'configwatch'
    +
    +    def args(self) -> List[str]:
    +        return super().args() + ['update-config', '--watch']
    +
    +
    +class SMBMetricsContainer(ContainerCommon):
    +    def name(self) -> str:
    +        return 'smbmetrics'
    +
    +    def args(self) -> List[str]:
    +        args = []
    +        if self.cfg.metrics_port > 0:
    +            args.append(f'--port={self.cfg.metrics_port}')
    +        return args
    +
    +
    +class CTDBMigrateInitContainer(SambaContainerCommon):
    +    def name(self) -> str:
    +        return 'ctdbMigrate'
    +
    +    def args(self) -> List[str]:
    +        # TODO: not only do we want to only do this on node 0, we only
    +        # want to do it exactly ONCE per cluster even on pnn 0. This needs
    +        # additional work to get that right.
    +        return super().args() + [
    +            '--skip-if=env:NODE_NUMBER!=0',
    +            'ctdb-migrate',
    +            '--dest-dir=/var/lib/ctdb/persistent',
    +            '--archive=/var/lib/samba/.migrated',
    +        ]
    +
    +
    +class CTDBMustHaveNodeInitContainer(SambaContainerCommon):
    +    def name(self) -> str:
    +        return 'ctdbMustHaveNode'
    +
    +    def args(self) -> List[str]:
    +        args = super().args()
    +        unique_name = self.cfg.identity.daemon_name
    +        args += [
    +            'ctdb-must-have-node',
    +            # hostname is a misnomer (todo: fix in sambacc)
    +            f'--hostname={unique_name}',
    +            '--take-node-number-from-env',
    +        ]
    +        return args
    +
    +
    +class CTDBDaemonContainer(SambaContainerCommon):
    +    def name(self) -> str:
    +        return 'ctdbd'
    +
    +    def args(self) -> List[str]:
    +        return super().args() + [
    +            'run',
    +            'ctdbd',
    +            '--setup=smb_ctdb',
    +            '--setup=ctdb_config',
    +            '--setup=ctdb_etc',
    +        ]
    +
    +    def container_args(self) -> List[str]:
    +        cargs = super().container_args()
    +        # make conditional?
    +        # CAP_NET_ADMIN is needed for event script to add public ips to iface
    +        cargs.append('--cap-add=NET_ADMIN')
    +        # CAP_NET_RAW allows to send gratuitous ARPs/tickle ACKs via raw sockets
    +        cargs.append('--cap-add=NET_RAW')
    +        return cargs
    +
    +
    +class CTDBNodeMonitorContainer(SambaContainerCommon):
    +    def name(self) -> str:
    +        return 'ctdbNodes'
    +
    +    def args(self) -> List[str]:
    +        args = super().args()
    +        unique_name = self.cfg.identity.daemon_name
    +        args += [
    +            '--debug',
    +            'ctdb-monitor-nodes',
    +            # hostname is a misnomer (todo: fix in sambacc)
    +            f'--hostname={unique_name}',
    +            '--take-node-number-from-env',
    +            '--reload=all',
    +        ]
    +        return args
    +
    +
    +class ContainerLayout:
    +    init_containers: List[SambaContainerCommon]
    +    primary: SambaContainerCommon
    +    supplemental: List[ContainerCommon]
    +
    +    def __init__(
    +        self,
    +        init_containers: List[SambaContainerCommon],
    +        primary: SambaContainerCommon,
    +        supplemental: List[ContainerCommon],
    +    ) -> None:
    +        self.init_containers = init_containers
    +        self.primary = primary
    +        self.supplemental = supplemental
    +
    +
    +@daemon_form.register
    +class SMB(ContainerDaemonForm):
    +    """Provides a form for SMB containers."""
    +
    +    daemon_type = 'smb'
    +    daemon_base = '/usr/sbin/smbd'
    +    default_image = DEFAULT_SAMBA_IMAGE
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return cls.daemon_type == daemon_type
    +
    +    def __init__(self, ctx: CephadmContext, ident: DaemonIdentity):
    +        assert ident.daemon_type == self.daemon_type
    +        self._identity = ident
    +        self._instance_cfg: Optional[Config] = None
    +        self._files: Dict[str, str] = {}
    +        self._raw_configs: Dict[str, Any] = context_getters.fetch_configs(ctx)
    +        self._config_keyring = context_getters.get_config_and_keyring(ctx)
    +        self._cached_layout: Optional[ContainerLayout] = None
    +        self._rank_info = context_getters.fetch_rank_info(ctx) or (-1, -1)
    +        self.smb_port = 445
    +        self.metrics_port = 9922
    +        self._network_mapper = _NetworkMapper(ctx)
    +        logger.debug('Created SMB ContainerDaemonForm instance')
    +
    +    @staticmethod
    +    def get_version(ctx: CephadmContext, container_id: str) -> Optional[str]:
    +        version = None
    +        out, _, ret = call(
    +            ctx,
    +            [
    +                ctx.container_engine.path,
    +                'exec',
    +                container_id,
    +                SMB.daemon_base,
    +                '-V',
    +            ],
    +            verbosity=CallVerbosity.QUIET,
    +        )
    +
    +        if ret == 0:
    +            match = re.search(r'Version\s*([\d.]+)', out)
    +            if match:
    +                version = match.group(1)
    +        return version
    +
    +    def validate(self) -> None:
    +        if self._instance_cfg is not None:
    +            return
    +
    +        configs = self._raw_configs
    +        instance_id = configs.get('cluster_id', '')
    +        source_config = configs.get('config_uri', '')
    +        join_sources = configs.get('join_sources', [])
    +        user_sources = configs.get('user_sources', [])
    +        custom_dns = configs.get('custom_dns', [])
    +        instance_features = configs.get('features', [])
    +        files = data_utils.dict_get(configs, 'files', {})
    +        ceph_config_entity = configs.get('config_auth_entity', '')
    +        vhostname = configs.get('virtual_hostname', '')
    +        metrics_image = configs.get('metrics_image', '')
    +        metrics_port = int(configs.get('metrics_port', '0'))
    +        cluster_meta_uri = configs.get('cluster_meta_uri', '')
    +        cluster_lock_uri = configs.get('cluster_lock_uri', '')
    +        cluster_public_addrs = configs.get('cluster_public_addrs', [])
    +
    +        if not instance_id:
    +            raise Error('invalid instance (cluster) id')
    +        if not source_config:
    +            raise Error('invalid configuration source uri')
    +        invalid_features = {
    +            f for f in instance_features if not Features.valid(f)
    +        }
    +        if invalid_features:
    +            raise Error(
    +                f'invalid instance features: {", ".join(invalid_features)}'
    +            )
    +        if not vhostname:
    +            # if a virtual hostname is not provided, generate one by prefixing
    +            # the cluster/instanced id to the system hostname
    +            hname = socket.getfqdn()
    +            vhostname = f'{instance_id}-{hname}'
    +        _public_addrs = [
    +            ClusterPublicIP.convert(v) for v in cluster_public_addrs
    +        ]
    +        if _public_addrs:
    +            # cache the cephadm networks->devices mapping for later
    +            self._network_mapper.load()
    +
    +        rank, rank_gen = self._rank_info
    +        self._instance_cfg = Config(
    +            identity=self._identity,
    +            instance_id=instance_id,
    +            source_config=source_config,
    +            join_sources=join_sources,
    +            user_sources=user_sources,
    +            custom_dns=custom_dns,
    +            domain_member=Features.DOMAIN.value in instance_features,
    +            clustered=Features.CLUSTERED.value in instance_features,
    +            smb_port=self.smb_port,
    +            ceph_config_entity=ceph_config_entity,
    +            vhostname=vhostname,
    +            metrics_image=metrics_image,
    +            metrics_port=metrics_port,
    +            rank=rank,
    +            rank_generation=rank_gen,
    +            cluster_meta_uri=cluster_meta_uri,
    +            cluster_lock_uri=cluster_lock_uri,
    +            cluster_public_addrs=_public_addrs,
    +        )
    +        self._files = files
    +        logger.debug('SMB Instance Config: %s', self._instance_cfg)
    +        logger.debug('Configured files: %s', self._files)
    +
    +    @property
    +    def _cfg(self) -> Config:
    +        self.validate()
    +        assert self._instance_cfg
    +        return self._instance_cfg
    +
    +    @property
    +    def instance_id(self) -> str:
    +        return self._cfg.instance_id
    +
    +    @property
    +    def source_config(self) -> str:
    +        return self._cfg.source_config
    +
    +    @classmethod
    +    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'SMB':
    +        return cls(ctx, ident)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return self._identity
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        return 0, 0
    +
    +    def config_and_keyring(
    +        self, ctx: CephadmContext
    +    ) -> Tuple[Optional[str], Optional[str]]:
    +        return self._config_keyring
    +
    +    def _layout(self) -> ContainerLayout:
    +        if self._cached_layout:
    +            return self._cached_layout
    +        init_ctrs: List[SambaContainerCommon] = []
    +        ctrs: List[ContainerCommon] = []
    +
    +        init_ctrs.append(ConfigInitContainer(self._cfg))
    +        ctrs.append(ConfigWatchContainer(self._cfg))
    +
    +        if self._cfg.domain_member:
    +            init_ctrs.append(MustJoinContainer(self._cfg))
    +            ctrs.append(WinbindContainer(self._cfg))
    +
    +        metrics_image = self._cfg.metrics_image.strip()
    +        metrics_port = self._cfg.metrics_port
    +        if metrics_image and metrics_port > 0:
    +            ctrs.append(SMBMetricsContainer(self._cfg, metrics_image))
    +
    +        if self._cfg.clustered:
    +            init_ctrs += [
    +                CTDBMigrateInitContainer(self._cfg),
    +                CTDBMustHaveNodeInitContainer(self._cfg),
    +            ]
    +            ctrs += [
    +                CTDBDaemonContainer(self._cfg),
    +                CTDBNodeMonitorContainer(self._cfg),
    +            ]
    +
    +        smbd = SMBDContainer(self._cfg)
    +        self._cached_layout = ContainerLayout(init_ctrs, smbd, ctrs)
    +        return self._cached_layout
    +
    +    def _to_init_container(
    +        self, ctx: CephadmContext, smb_ctr: SambaContainerCommon
    +    ) -> InitContainer:
    +        volume_mounts: Dict[str, str] = {}
    +        container_args: List[str] = smb_ctr.container_args()
    +        self.customize_container_mounts(ctx, volume_mounts)
    +        # XXX: is this needed? if so, can this be simplified
    +        if isinstance(ctx.container_engine, Podman):
    +            ctx.container_engine.update_mounts(ctx, volume_mounts)
    +        identity = DaemonSubIdentity.from_parent(
    +            self.identity, smb_ctr.name()
    +        )
    +        return InitContainer(
    +            ctx,
    +            entrypoint='',
    +            image=ctx.image or self.default_image,
    +            identity=identity,
    +            args=smb_ctr.args(),
    +            container_args=container_args,
    +            envs=smb_ctr.envs_list(),
    +            volume_mounts=volume_mounts,
    +        )
    +
    +    def _to_sidecar_container(
    +        self, ctx: CephadmContext, smb_ctr: ContainerCommon
    +    ) -> SidecarContainer:
    +        volume_mounts: Dict[str, str] = {}
    +        container_args: List[str] = smb_ctr.container_args()
    +        self.customize_container_mounts(ctx, volume_mounts)
    +        shared_ns = {
    +            Namespace.ipc,
    +            Namespace.network,
    +            Namespace.pid,
    +        }
    +        if isinstance(ctx.container_engine, Podman):
    +            # XXX: is this needed? if so, can this be simplified
    +            ctx.container_engine.update_mounts(ctx, volume_mounts)
    +            # docker doesn't support sharing the uts namespace with other
    +            # containers. It may not be entirely needed on podman but it gives
    +            # me warm fuzzies to make sure it gets shared.
    +            shared_ns.add(Namespace.uts)
    +        enable_shared_namespaces(
    +            container_args, self.identity.container_name, shared_ns
    +        )
    +        identity = DaemonSubIdentity.from_parent(
    +            self.identity, smb_ctr.name()
    +        )
    +        img = smb_ctr.container_image() or ctx.image or self.default_image
    +        return SidecarContainer(
    +            ctx,
    +            entrypoint='',
    +            image=img,
    +            identity=identity,
    +            container_args=container_args,
    +            args=smb_ctr.args(),
    +            envs=smb_ctr.envs_list(),
    +            volume_mounts=volume_mounts,
    +            init=False,
    +            remove=True,
    +        )
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        ctr = daemon_to_container(ctx, self, host_network=self._cfg.clustered)
    +        # We want to share the IPC ns between the samba containers for one
    +        # instance.  Cephadm's default, host ipc, is not what we want.
    +        # Unsetting it works fine for podman but docker (on ubuntu 22.04) needs
    +        # to be expliclty told that ipc of the primary container must be
    +        # shareable.
    +        ctr.ipc = 'shareable'
    +        return deployment_utils.to_deployment_container(ctx, ctr)
    +
    +    def init_containers(self, ctx: CephadmContext) -> List[InitContainer]:
    +        return [
    +            self._to_init_container(ctx, smb_ctr)
    +            for smb_ctr in self._layout().init_containers
    +        ]
    +
    +    def sidecar_containers(
    +        self, ctx: CephadmContext
    +    ) -> List[SidecarContainer]:
    +        return [
    +            self._to_sidecar_container(ctx, smb_ctr)
    +            for smb_ctr in self._layout().supplemental
    +        ]
    +
    +    def customize_container_envs(
    +        self, ctx: CephadmContext, envs: List[str]
    +    ) -> None:
    +        clayout = self._layout()
    +        envs.extend(clayout.primary.envs_list())
    +
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        clayout = self._layout()
    +        args.extend(clayout.primary.args())
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.extend(self._layout().primary.container_args())
    +
    +    def customize_container_mounts(
    +        self,
    +        ctx: CephadmContext,
    +        mounts: Dict[str, str],
    +    ) -> None:
    +        self.validate()
    +        data_dir = pathlib.Path(self.identity.data_dir(ctx.data_dir))
    +        etc_samba_ctr = str(data_dir / 'etc-samba-container')
    +        lib_samba = str(data_dir / 'lib-samba')
    +        run_samba = str(data_dir / 'run')
    +        config = str(data_dir / 'config')
    +        keyring = str(data_dir / 'keyring')
    +        mounts[etc_samba_ctr] = '/etc/samba/container:z'
    +        mounts[lib_samba] = '/var/lib/samba:z'
    +        mounts[run_samba] = '/run:z'  # TODO: make this a shared tmpfs
    +        mounts[config] = '/etc/ceph/ceph.conf:z'
    +        mounts[keyring] = '/etc/ceph/keyring:z'
    +        if self._cfg.clustered:
    +            ctdb_persistent = str(data_dir / 'ctdb/persistent')
    +            ctdb_run = str(data_dir / 'ctdb/run')  # TODO: tmpfs too!
    +            ctdb_volatile = str(data_dir / 'ctdb/volatile')
    +            ctdb_etc = str(data_dir / 'ctdb/etc')
    +            mounts[ctdb_persistent] = '/var/lib/ctdb/persistent:z'
    +            mounts[ctdb_run] = '/var/run/ctdb:z'
    +            mounts[ctdb_volatile] = '/var/lib/ctdb/volatile:z'
    +            mounts[ctdb_etc] = '/etc/ctdb:z'
    +            # create a shared smb.conf file for our clustered instances.
    +            # This is a HACK that substitutes for a bunch of architectural
    +            # changes to sambacc *and* smbmetrics (container). In short,
    +            # sambacc can set up the correct cluster enabled conf file for
    +            # samba daemons (smbd, winbindd, etc) but not it's own long running
    +            # tasks.  Similarly, the smbmetrics container always uses the
    +            # registry conf (non-clustered). Having cephadm create a stub
    +            # config that will share the file across all containers is a
    +            # stopgap that resolves the problem for now, but should eventually
    +            # be replaced by a less "leaky" approach in the managed containers.
    +            ctdb_smb_conf = str(data_dir / 'ctdb/smb.conf')
    +            mounts[ctdb_smb_conf] = '/etc/samba/smb.conf:z'
    +
    +    def customize_container_endpoints(
    +        self, endpoints: List[EndPoint], deployment_type: DeploymentType
    +    ) -> None:
    +        if not any(ep.port == self.smb_port for ep in endpoints):
    +            endpoints.append(EndPoint('0.0.0.0', self.smb_port))
    +        if self.metrics_port > 0:
    +            if not any(ep.port == self.metrics_port for ep in endpoints):
    +                endpoints.append(EndPoint('0.0.0.0', self.metrics_port))
    +
    +    def prepare_data_dir(self, data_dir: str, uid: int, gid: int) -> None:
    +        self.validate()
    +        ddir = pathlib.Path(data_dir)
    +        etc_samba_ctr = ddir / 'etc-samba-container'
    +        file_utils.makedirs(etc_samba_ctr, uid, gid, 0o770)
    +        file_utils.makedirs(ddir / 'lib-samba', uid, gid, 0o770)
    +        file_utils.makedirs(ddir / 'run', uid, gid, 0o770)
    +        if self._files:
    +            file_utils.populate_files(data_dir, self._files, uid, gid)
    +        if self._cfg.clustered:
    +            file_utils.makedirs(ddir / 'ctdb/persistent', uid, gid, 0o770)
    +            file_utils.makedirs(ddir / 'ctdb/run', uid, gid, 0o770)
    +            file_utils.makedirs(ddir / 'ctdb/volatile', uid, gid, 0o770)
    +            file_utils.makedirs(ddir / 'ctdb/etc', uid, gid, 0o770)
    +            self._write_ctdb_stub_config(etc_samba_ctr / 'ctdb.json')
    +            self._write_smb_conf_stub(ddir / 'ctdb/smb.conf')
    +
    +    def _write_ctdb_stub_config(self, path: pathlib.Path) -> None:
    +        reclock_cmd = ' '.join(_MUTEX_SUBCMD + [self._cfg.cluster_lock_uri])
    +        nodes_cmd = ' '.join(_NODES_SUBCMD)
    +        stub_config: Dict[str, Any] = {
    +            'samba-container-config': 'v0',
    +            'ctdb': {
    +                # recovery_lock is passed directly to ctdb: needs '!' prefix
    +                'recovery_lock': f'!{reclock_cmd}',
    +                'cluster_meta_uri': self._cfg.cluster_meta_uri,
    +                'nodes_cmd': nodes_cmd,
    +                'public_addresses': self._network_mapper.for_sambacc(
    +                    self._cfg
    +                ),
    +            },
    +        }
    +        if self._cfg.ctdb_log_level:
    +            stub_config['ctdb']['log_level'] = self._cfg.ctdb_log_level
    +        with file_utils.write_new(path) as fh:
    +            json.dump(stub_config, fh)
    +
    +    def _write_smb_conf_stub(self, path: pathlib.Path) -> None:
    +        """Initialize a stub smb conf that will be shared by the primary
    +        and sidecar containers. This is expected to be overwritten by
    +        sambacc.
    +        """
    +        _lines = [
    +            '[global]',
    +            'config backend = registry',
    +        ]
    +        with file_utils.write_new(path) as fh:
    +            for line in _lines:
    +                fh.write(f'{line}\n')
    +
    +
    +class _NetworkMapper:
    +    """Helper class that maps between cephadm-friendly address-networks
    +    groupings to ctdb-friendly address-device groupings.
    +    """
    +
    +    def __init__(self, ctx: CephadmContext):
    +        self._ctx = ctx
    +        self._networks: Dict = {}
    +
    +    def load(self) -> None:
    +        logger.debug('fetching networks')
    +        self._networks = list_networks(self._ctx)
    +
    +    def _convert(self, addr: ClusterPublicIP) -> ClusterPublicIP:
    +        devs = []
    +        for net in addr.destinations:
    +            if net not in self._networks:
    +                # ignore mappings that cant exist on this host
    +                logger.warning(
    +                    'destination network %r not found in %r',
    +                    net,
    +                    self._networks.keys(),
    +                )
    +                continue
    +            for dev in self._networks[net]:
    +                logger.debug(
    +                    'adding device %s from network %r for public ip %s',
    +                    dev,
    +                    net,
    +                    addr.address,
    +                )
    +                devs.append(dev)
    +        return ClusterPublicIP(addr.address, devs)
    +
    +    def for_sambacc(self, cfg: Config) -> List[Dict[str, Any]]:
    +        if not cfg.cluster_public_addrs:
    +            return []
    +        addrs = (self._convert(a) for a in (cfg.cluster_public_addrs or []))
    +        return [
    +            {'address': a.address, 'interfaces': a.destinations}
    +            for a in addrs
    +        ]
    diff --git a/src/cephadm/cephadmlib/daemons/snmp.py b/src/cephadm/cephadmlib/daemons/snmp.py
    new file mode 100644
    index 000000000000..ab84a302f2c9
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/snmp.py
    @@ -0,0 +1,226 @@
    +import json
    +import os
    +
    +from typing import Any, Dict, List, Optional, Tuple, Union
    +from urllib.error import HTTPError, URLError
    +from urllib.request import urlopen
    +
    +from ceph.cephadm.images import DEFAULT_SNMP_GATEWAY_IMAGE
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_types import CephContainer
    +from ..context import CephadmContext
    +from ..context_getters import fetch_configs, fetch_endpoints
    +from ..daemon_form import register as register_daemon_form
    +from ..daemon_identity import DaemonIdentity
    +from ..data_utils import is_fsid
    +from ..deployment_utils import to_deployment_container
    +from ..exceptions import Error
    +from ..file_utils import write_new
    +
    +
    +@register_daemon_form
    +class SNMPGateway(ContainerDaemonForm):
    +    """Defines an SNMP gateway between Prometheus and SNMP monitoring Frameworks"""
    +
    +    daemon_type = 'snmp-gateway'
    +    SUPPORTED_VERSIONS = ['V2c', 'V3']
    +    default_image = DEFAULT_SNMP_GATEWAY_IMAGE
    +    DEFAULT_PORT = 9464
    +    env_filename = 'snmp-gateway.conf'
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return cls.daemon_type == daemon_type
    +
    +    def __init__(
    +        self,
    +        ctx: CephadmContext,
    +        fsid: str,
    +        daemon_id: Union[int, str],
    +        config_json: Dict[str, Any],
    +        image: Optional[str] = None,
    +    ) -> None:
    +        self.ctx = ctx
    +        self.fsid = fsid
    +        self.daemon_id = daemon_id
    +        self.image = image or SNMPGateway.default_image
    +
    +        self.uid = config_json.get('uid', 0)
    +        self.gid = config_json.get('gid', 0)
    +
    +        self.destination = config_json.get('destination', '')
    +        self.snmp_version = config_json.get('snmp_version', 'V2c')
    +        self.snmp_community = config_json.get('snmp_community', 'public')
    +        self.log_level = config_json.get('log_level', 'info')
    +        self.snmp_v3_auth_username = config_json.get(
    +            'snmp_v3_auth_username', ''
    +        )
    +        self.snmp_v3_auth_password = config_json.get(
    +            'snmp_v3_auth_password', ''
    +        )
    +        self.snmp_v3_auth_protocol = config_json.get(
    +            'snmp_v3_auth_protocol', ''
    +        )
    +        self.snmp_v3_priv_protocol = config_json.get(
    +            'snmp_v3_priv_protocol', ''
    +        )
    +        self.snmp_v3_priv_password = config_json.get(
    +            'snmp_v3_priv_password', ''
    +        )
    +        self.snmp_v3_engine_id = config_json.get('snmp_v3_engine_id', '')
    +
    +        self.validate()
    +
    +    @classmethod
    +    def init(
    +        cls, ctx: CephadmContext, fsid: str, daemon_id: Union[int, str]
    +    ) -> 'SNMPGateway':
    +        cfgs = fetch_configs(ctx)
    +        assert cfgs  # assert some config data was found
    +        return cls(ctx, fsid, daemon_id, cfgs, ctx.image)
    +
    +    @classmethod
    +    def create(
    +        cls, ctx: CephadmContext, ident: DaemonIdentity
    +    ) -> 'SNMPGateway':
    +        return cls.init(ctx, ident.fsid, ident.daemon_id)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return DaemonIdentity(self.fsid, self.daemon_type, self.daemon_id)
    +
    +    @staticmethod
    +    def get_version(
    +        ctx: CephadmContext, fsid: str, daemon_id: str
    +    ) -> Optional[str]:
    +        """Return the version of the notifier from it's http endpoint"""
    +        path = os.path.join(
    +            ctx.data_dir, fsid, f'snmp-gateway.{daemon_id}', 'unit.meta'
    +        )
    +        try:
    +            with open(path, 'r') as env:
    +                metadata = json.loads(env.read())
    +        except (OSError, json.JSONDecodeError):
    +            return None
    +
    +        ports = metadata.get('ports', [])
    +        if not ports:
    +            return None
    +
    +        try:
    +            with urlopen(f'http://127.0.0.1:{ports[0]}/') as r:
    +                html = r.read().decode('utf-8').split('\n')
    +        except (HTTPError, URLError):
    +            return None
    +
    +        for h in html:
    +            stripped = h.strip()
    +            if stripped.startswith(('
    ', '
    ')) and stripped.endswith(
    +                ('
    ', '
    ') + ): + #
    (version=1.2.1, branch=HEAD, revision=7...
    +                return stripped.split(',')[0].split('version=')[1]
    +
    +        return None
    +
    +    @property
    +    def port(self) -> int:
    +        endpoints = fetch_endpoints(self.ctx)
    +        if not endpoints:
    +            return self.DEFAULT_PORT
    +        return endpoints[0].port
    +
    +    def get_daemon_args(self) -> List[str]:
    +        v3_args = []
    +        base_args = [
    +            f'--web.listen-address=:{self.port}',
    +            f'--snmp.destination={self.destination}',
    +            f'--snmp.version={self.snmp_version}',
    +            f'--log.level={self.log_level}',
    +            '--snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl',
    +        ]
    +
    +        if self.snmp_version == 'V3':
    +            # common auth settings
    +            v3_args.extend(
    +                [
    +                    '--snmp.authentication-enabled',
    +                    f'--snmp.authentication-protocol={self.snmp_v3_auth_protocol}',
    +                    f'--snmp.security-engine-id={self.snmp_v3_engine_id}',
    +                ]
    +            )
    +            # authPriv setting is applied if we have a privacy protocol setting
    +            if self.snmp_v3_priv_protocol:
    +                v3_args.extend(
    +                    [
    +                        '--snmp.private-enabled',
    +                        f'--snmp.private-protocol={self.snmp_v3_priv_protocol}',
    +                    ]
    +                )
    +
    +        return base_args + v3_args
    +
    +    @property
    +    def data_dir(self) -> str:
    +        return os.path.join(
    +            self.ctx.data_dir,
    +            self.ctx.fsid,
    +            f'{self.daemon_type}.{self.daemon_id}',
    +        )
    +
    +    @property
    +    def conf_file_path(self) -> str:
    +        return os.path.join(self.data_dir, self.env_filename)
    +
    +    def create_daemon_conf(self) -> None:
    +        """Creates the environment file holding 'secrets' passed to the snmp-notifier daemon"""
    +        with write_new(self.conf_file_path) as f:
    +            if self.snmp_version == 'V2c':
    +                f.write(f'SNMP_NOTIFIER_COMMUNITY={self.snmp_community}\n')
    +            else:
    +                f.write(
    +                    f'SNMP_NOTIFIER_AUTH_USERNAME={self.snmp_v3_auth_username}\n'
    +                )
    +                f.write(
    +                    f'SNMP_NOTIFIER_AUTH_PASSWORD={self.snmp_v3_auth_password}\n'
    +                )
    +                if self.snmp_v3_priv_password:
    +                    f.write(
    +                        f'SNMP_NOTIFIER_PRIV_PASSWORD={self.snmp_v3_priv_password}\n'
    +                    )
    +
    +    def validate(self) -> None:
    +        """Validate the settings
    +
    +        Raises:
    +            Error: if the fsid doesn't look like an fsid
    +            Error: if the snmp version is not supported
    +            Error: destination IP and port address missing
    +        """
    +        if not is_fsid(self.fsid):
    +            raise Error(f'not a valid fsid: {self.fsid}')
    +
    +        if self.snmp_version not in SNMPGateway.SUPPORTED_VERSIONS:
    +            raise Error(f'not a valid snmp version: {self.snmp_version}')
    +
    +        if not self.destination:
    +            raise Error(
    +                'config is missing destination attribute(:) of the target SNMP listener'
    +            )
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        ctr = daemon_to_container(ctx, self)
    +        return to_deployment_container(ctx, ctr)
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        return self.uid, self.gid
    +
    +    def customize_container_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.append(f'--env-file={self.conf_file_path}')
    +
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        args.extend(self.get_daemon_args())
    diff --git a/src/cephadm/cephadmlib/daemons/tracing.py b/src/cephadm/cephadmlib/daemons/tracing.py
    new file mode 100644
    index 000000000000..4cf743394556
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/daemons/tracing.py
    @@ -0,0 +1,117 @@
    +import logging
    +
    +from typing import Any, Dict, List, Tuple
    +
    +from ceph.cephadm.images import (
    +    DEFAULT_ELASTICSEARCH_IMAGE,
    +    DEFAULT_JAEGER_AGENT_IMAGE,
    +    DEFAULT_JAEGER_COLLECTOR_IMAGE,
    +    DEFAULT_JAEGER_QUERY_IMAGE,
    +)
    +from ..container_daemon_form import ContainerDaemonForm, daemon_to_container
    +from ..container_types import CephContainer
    +from ..context import CephadmContext
    +from ..context_getters import fetch_configs
    +from ..daemon_form import register as register_daemon_form
    +from ..daemon_identity import DaemonIdentity
    +from ..deployment_utils import to_deployment_container
    +from ..constants import UID_NOBODY, GID_NOGROUP
    +
    +
    +logger = logging.getLogger()
    +
    +
    +@register_daemon_form
    +class Tracing(ContainerDaemonForm):
    +    """Define the configs for the jaeger tracing containers"""
    +
    +    components: Dict[str, Dict[str, Any]] = {
    +        'elasticsearch': {
    +            'image': DEFAULT_ELASTICSEARCH_IMAGE,
    +            'envs': ['discovery.type=single-node'],
    +        },
    +        'jaeger-agent': {
    +            'image': DEFAULT_JAEGER_AGENT_IMAGE,
    +        },
    +        'jaeger-collector': {
    +            'image': DEFAULT_JAEGER_COLLECTOR_IMAGE,
    +        },
    +        'jaeger-query': {
    +            'image': DEFAULT_JAEGER_QUERY_IMAGE,
    +        },
    +    }  # type: ignore
    +
    +    @classmethod
    +    def for_daemon_type(cls, daemon_type: str) -> bool:
    +        return daemon_type in cls.components
    +
    +    @staticmethod
    +    def set_configuration(config: Dict[str, str], daemon_type: str) -> None:
    +        if daemon_type in ['jaeger-collector', 'jaeger-query']:
    +            assert 'elasticsearch_nodes' in config
    +            Tracing.components[daemon_type]['envs'] = [
    +                'SPAN_STORAGE_TYPE=elasticsearch',
    +                f'ES_SERVER_URLS={config["elasticsearch_nodes"]}',
    +            ]
    +        if daemon_type == 'jaeger-agent':
    +            assert 'collector_nodes' in config
    +            Tracing.components[daemon_type]['daemon_args'] = [
    +                f'--reporter.grpc.host-port={config["collector_nodes"]}',
    +                '--processor.jaeger-compact.server-host-port=6799',
    +            ]
    +
    +    def __init__(self, ident: DaemonIdentity) -> None:
    +        self._identity = ident
    +        self._configured = False
    +
    +    def _configure(self, ctx: CephadmContext) -> None:
    +        if self._configured:
    +            return
    +        config = fetch_configs(ctx)
    +        # Currently, this method side-effects the class attribute, and that
    +        # is unpleasant. In the future it would be nice to move all of
    +        # set_configuration into _confiure and only modify each classes data
    +        # independently
    +        self.set_configuration(config, self.identity.daemon_type)
    +        self._configured = True
    +
    +    @classmethod
    +    def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'Tracing':
    +        return cls(ident)
    +
    +    @property
    +    def identity(self) -> DaemonIdentity:
    +        return self._identity
    +
    +    def container(self, ctx: CephadmContext) -> CephContainer:
    +        ctr = daemon_to_container(ctx, self)
    +        return to_deployment_container(ctx, ctr)
    +
    +    def uid_gid(self, ctx: CephadmContext) -> Tuple[int, int]:
    +        return UID_NOBODY, GID_NOGROUP
    +
    +    def get_daemon_args(self) -> List[str]:
    +        return self.components[self.identity.daemon_type].get(
    +            'daemon_args', []
    +        )
    +
    +    def customize_process_args(
    +        self, ctx: CephadmContext, args: List[str]
    +    ) -> None:
    +        self._configure(ctx)
    +        # earlier code did an explicit check if the daemon type was jaeger-agent
    +        # and would only call get_daemon_args if that was true. However, since
    +        # the function only returns a non-empty list in the case of jaeger-agent
    +        # that check is unnecessary and is not brought over.
    +        args.extend(self.get_daemon_args())
    +
    +    def customize_container_envs(
    +        self, ctx: CephadmContext, envs: List[str]
    +    ) -> None:
    +        self._configure(ctx)
    +        envs.extend(
    +            self.components[self.identity.daemon_type].get('envs', [])
    +        )
    +
    +    def default_entrypoint(self) -> str:
    +        return ''
    diff --git a/src/cephadm/cephadmlib/data_utils.py b/src/cephadm/cephadmlib/data_utils.py
    index 9493a37d00f2..9caef3f72e5f 100644
    --- a/src/cephadm/cephadmlib/data_utils.py
    +++ b/src/cephadm/cephadmlib/data_utils.py
    @@ -4,15 +4,20 @@
     import os
     import re
     import uuid
    +import yaml
    +import logging
     
     from configparser import ConfigParser
     
    -from typing import Dict, Any, Optional
    +from typing import Dict, Any, Optional, Iterable, List
     
     from .constants import DATEFMT, DEFAULT_REGISTRY
     from .exceptions import Error
     
     
    +logger = logging.getLogger()
    +
    +
     def dict_get(
         d: Dict, key: str, default: Any = None, require: bool = False
     ) -> Any:
    @@ -160,17 +165,17 @@ def is_fsid(s):
     def normalize_image_digest(digest: str) -> str:
         """
         Normal case:
    -    >>> normalize_image_digest('ceph/ceph', 'docker.io')
    -    'docker.io/ceph/ceph'
    +    >>> normalize_image_digest('ceph/ceph', 'quay.io')
    +    'quay.io/ceph/ceph'
     
         No change:
    -    >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io')
    +    >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'quay.io')
         'quay.ceph.io/ceph/ceph'
     
    -    >>> normalize_image_digest('docker.io/ubuntu', 'docker.io')
    -    'docker.io/ubuntu'
    +    >>> normalize_image_digest('quay.io/ubuntu', 'quay.io')
    +    'quay.io/ubuntu'
     
    -    >>> normalize_image_digest('localhost/ceph', 'docker.io')
    +    >>> normalize_image_digest('localhost/ceph', 'quay.io')
         'localhost/ceph'
         """
         known_shortnames = [
    @@ -184,8 +189,9 @@ def normalize_image_digest(digest: str) -> str:
         return digest
     
     
    -def get_legacy_config_fsid(cluster, legacy_dir=None):
    -    # type: (str, Optional[str]) -> Optional[str]
    +def get_legacy_config_fsid(
    +    cluster: str, legacy_dir: Optional[str] = None
    +) -> Optional[str]:
         config_file = '/etc/ceph/%s.conf' % cluster
         if legacy_dir is not None:
             config_file = os.path.abspath(legacy_dir + config_file)
    @@ -197,3 +203,78 @@ def get_legacy_config_fsid(cluster, legacy_dir=None):
             ):
                 return config.get('global', 'fsid')
         return None
    +
    +
    +def _extract_host_info_from_applied_spec(
    +    f: Iterable[str],
    +) -> List[Dict[str, str]]:
    +    # overall goal of this function is to go through an applied spec and find
    +    # the hostname (and addr is provided) for each host spec in the applied spec.
    +    # Generally, we should be able to just pass the spec to the mgr module where
    +    # proper yaml parsing can happen, but for host specs in particular we want to
    +    # be able to distribute ssh keys, which requires finding the hostname (and addr
    +    # if possible) for each potential host spec in the applied spec.
    +
    +    specs: List[str] = []
    +    current_spec: str = ''
    +    for line in f:
    +        if re.search(r'^---\s+', line):
    +            if current_spec:
    +                specs.append(current_spec)
    +            current_spec = ''
    +        else:
    +            if line:
    +                current_spec += line
    +    if current_spec:
    +        specs.append(current_spec)
    +
    +    host_specs: List[Dict[str, Any]] = []
    +    for spec in specs:
    +        yaml_data = yaml.safe_load(spec)
    +        if 'service_type' in yaml_data.keys():
    +            if yaml_data['service_type'] == 'host':
    +                host_specs.append(yaml_data)
    +        else:
    +            spec_str = yaml.safe_dump(yaml_data)
    +            logger.error(
    +                f'Failed to pull service_type from spec:\n{spec_str}.'
    +            )
    +
    +    host_dicts = []
    +    for s in host_specs:
    +        host_dict = _extract_host_info_from_spec(s)
    +        # if host_dict is empty here, we failed to pull the hostname
    +        # for the host from the spec. This should have already been logged
    +        # so at this point we just don't want to include it in our output
    +        if host_dict:
    +            host_dicts.append(host_dict)
    +
    +    return host_dicts
    +
    +
    +def _extract_host_info_from_spec(host_spec: Dict[str, Any]) -> Dict[str, str]:
    +    # note:for our purposes here, we only really want the hostname
    +    # and address of the host from each of these specs in order to
    +    # be able to distribute ssh keys. We will later apply the spec
    +    # through the mgr module where proper yaml parsing can be done
    +    # The returned dicts from this function should only contain
    +    # one or two entries, one (required) for hostname, one (optional) for addr
    +    # {
    +    #   hostname: 
    +    #   addr: 
    +    # }
    +    # if we fail to find the hostname, an empty dict is returned
    +
    +    host_dict = {}  # type: Dict[str, str]
    +    for field in ['hostname', 'addr']:
    +        try:
    +            host_dict[field] = host_spec[field]
    +        except KeyError as e:
    +            logger.error(
    +                f'Error trying to pull {field} from host spec:\n{host_spec}. Got error: {e}'
    +            )
    +
    +    if 'hostname' not in host_dict:
    +        logger.error(f'Could not find hostname in host spec:\n{host_spec}')
    +        return {}
    +    return host_dict
    diff --git a/src/cephadm/cephadmlib/deployment_utils.py b/src/cephadm/cephadmlib/deployment_utils.py
    new file mode 100644
    index 000000000000..908fa979f1a5
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/deployment_utils.py
    @@ -0,0 +1,35 @@
    +import os
    +
    +from .container_types import CephContainer
    +from .context import CephadmContext
    +from cephadmlib.context_getters import fetch_custom_config_files
    +
    +
    +def to_deployment_container(
    +    ctx: CephadmContext, ctr: CephContainer
    +) -> CephContainer:
    +    """Given a standard ceph container instance return a CephContainer
    +    prepared for a deployment as a daemon, having the extra args and
    +    custom configurations added.
    +    NOTE: The `ctr` object is mutated before being returned.
    +    """
    +    if 'extra_container_args' in ctx and ctx.extra_container_args:
    +        ctr.container_args.extend(ctx.extra_container_args)
    +    if 'extra_entrypoint_args' in ctx and ctx.extra_entrypoint_args:
    +        ctr.args.extend(ctx.extra_entrypoint_args)
    +    ccfiles = fetch_custom_config_files(ctx)
    +    if ccfiles:
    +        mandatory_keys = ['mount_path', 'content']
    +        for conf in ccfiles:
    +            if all(k in conf for k in mandatory_keys):
    +                mount_path = conf['mount_path']
    +                assert ctr.identity
    +                file_path = os.path.join(
    +                    ctx.data_dir,
    +                    ctr.identity.fsid,
    +                    'custom_config_files',
    +                    ctr.identity.daemon_name,
    +                    os.path.basename(mount_path),
    +                )
    +                ctr.volume_mounts[file_path] = mount_path
    +    return ctr
    diff --git a/src/cephadm/cephadmlib/exceptions.py b/src/cephadm/cephadmlib/exceptions.py
    index 0d215fdd3325..762ce7821271 100644
    --- a/src/cephadm/cephadmlib/exceptions.py
    +++ b/src/cephadm/cephadmlib/exceptions.py
    @@ -19,3 +19,16 @@ class UnauthorizedRegistryError(Error):
     
     class PortOccupiedError(Error):
         pass
    +
    +
    +class DaemonStartException(Exception):
    +    """
    +    Special exception type we raise when the
    +    systemctl start command fails during daemon
    +    deployment. Necessary because the cephadm mgr module
    +    needs to handle this case differently than a failure
    +    earlier in the deploy process where no attempt was made
    +    to actually start the daemon
    +    """
    +
    +    pass
    diff --git a/src/cephadm/cephadmlib/file_utils.py b/src/cephadm/cephadmlib/file_utils.py
    index 7c9e6f69e434..27e70e317563 100644
    --- a/src/cephadm/cephadmlib/file_utils.py
    +++ b/src/cephadm/cephadmlib/file_utils.py
    @@ -52,8 +52,9 @@ def write_new(
         os.rename(tempname, destination)
     
     
    -def populate_files(config_dir, config_files, uid, gid):
    -    # type: (str, Dict, int, int) -> None
    +def populate_files(
    +    config_dir: str, config_files: Dict, uid: int, gid: int
    +) -> None:
         """create config files for different services"""
         for fname in config_files:
             config_file = os.path.join(config_dir, fname)
    @@ -71,8 +72,7 @@ def touch(
             os.chown(file_path, uid, gid)
     
     
    -def write_tmp(s, uid, gid):
    -    # type: (str, int, int) -> IO[str]
    +def write_tmp(s: str, uid: int, gid: int) -> IO[str]:
         tmp_f = tempfile.NamedTemporaryFile(mode='w', prefix='ceph-tmp')
         os.fchown(tmp_f.fileno(), uid, gid)
         tmp_f.write(s)
    @@ -81,14 +81,13 @@ def write_tmp(s, uid, gid):
         return tmp_f
     
     
    -def makedirs(dir, uid, gid, mode):
    -    # type: (str, int, int, int) -> None
    -    if not os.path.exists(dir):
    -        os.makedirs(dir, mode=mode)
    +def makedirs(dest: Union[Path, str], uid: int, gid: int, mode: int) -> None:
    +    if not os.path.exists(dest):
    +        os.makedirs(dest, mode=mode)
         else:
    -        os.chmod(dir, mode)
    -    os.chown(dir, uid, gid)
    -    os.chmod(dir, mode)  # the above is masked by umask...
    +        os.chmod(dest, mode)
    +    os.chown(dest, uid, gid)
    +    os.chmod(dest, mode)  # the above is masked by umask...
     
     
     def recursive_chown(path: str, uid: int, gid: int) -> None:
    @@ -98,8 +97,7 @@ def recursive_chown(path: str, uid: int, gid: int) -> None:
                 os.chown(os.path.join(dirpath, filename), uid, gid)
     
     
    -def read_file(path_list, file_name=''):
    -    # type: (List[str], str) -> str
    +def read_file(path_list: List[str], file_name: str = '') -> str:
         """Returns the content of the first file found within the `path_list`
     
         :param path_list: list of file paths to search
    @@ -124,14 +122,12 @@ def read_file(path_list, file_name=''):
         return 'Unknown'
     
     
    -def pathify(p):
    -    # type: (str) -> str
    +def pathify(p: str) -> str:
         p = os.path.expanduser(p)
         return os.path.abspath(p)
     
     
    -def get_file_timestamp(fn):
    -    # type: (str) -> Optional[str]
    +def get_file_timestamp(fn: str) -> Optional[str]:
         try:
             mt = os.path.getmtime(fn)
             return datetime.datetime.fromtimestamp(
    @@ -139,3 +135,25 @@ def get_file_timestamp(fn):
             ).strftime(DATEFMT)
         except Exception:
             return None
    +
    +
    +def make_run_dir(fsid: str, uid: int, gid: int) -> None:
    +    makedirs(f'/var/run/ceph/{fsid}', uid, gid, 0o770)
    +
    +
    +def unlink_file(
    +    path: Union[str, Path],
    +    missing_ok: bool = False,
    +    ignore_errors: bool = False,
    +) -> None:
    +    """Wrapper around unlink that can either ignore missing files or all
    +    errors.
    +    """
    +    try:
    +        Path(path).unlink()
    +    except FileNotFoundError:
    +        if not missing_ok and not ignore_errors:
    +            raise
    +    except Exception:
    +        if not ignore_errors:
    +            raise
    diff --git a/src/cephadm/cephadmlib/firewalld.py b/src/cephadm/cephadmlib/firewalld.py
    index f47e7e71d4dc..ea035790d652 100644
    --- a/src/cephadm/cephadmlib/firewalld.py
    +++ b/src/cephadm/cephadmlib/firewalld.py
    @@ -14,7 +14,6 @@
     
     
     class Firewalld(object):
    -
         # for specifying ports we should always open when opening
         # ports for a daemon of that type. Main use case is for ports
         # that we should open when deploying the daemon type but that
    @@ -51,26 +50,44 @@ def check(self):
         def enable_service_for(self, svc: str) -> None:
             assert svc, 'service name not provided'
             if not self.available:
    -            logger.debug('Not possible to enable service <%s>. firewalld.service is not available' % svc)
    +            logger.debug(
    +                'Not possible to enable service <%s>. firewalld.service is not available'
    +                % svc
    +            )
                 return
     
             if not self.cmd:
                 raise RuntimeError('command not defined')
     
    -        out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-service', svc], verbosity=CallVerbosity.DEBUG)
    +        out, err, ret = call(
    +            self.ctx,
    +            [self.cmd, '--permanent', '--query-service', svc],
    +            verbosity=CallVerbosity.DEBUG,
    +        )
             if ret:
    -            logger.info('Enabling firewalld service %s in current zone...' % svc)
    -            out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-service', svc])
    +            logger.info(
    +                'Enabling firewalld service %s in current zone...' % svc
    +            )
    +            out, err, ret = call(
    +                self.ctx, [self.cmd, '--permanent', '--add-service', svc]
    +            )
                 if ret:
                     raise RuntimeError(
    -                    'unable to add service %s to current zone: %s' % (svc, err))
    +                    'unable to add service %s to current zone: %s'
    +                    % (svc, err)
    +                )
             else:
    -            logger.debug('firewalld service %s is enabled in current zone' % svc)
    +            logger.debug(
    +                'firewalld service %s is enabled in current zone' % svc
    +            )
     
         def open_ports(self, fw_ports):
             # type: (List[int]) -> None
             if not self.available:
    -            logger.debug('Not possible to open ports <%s>. firewalld.service is not available' % fw_ports)
    +            logger.debug(
    +                'Not possible to open ports <%s>. firewalld.service is not available'
    +                % fw_ports
    +            )
                 return
     
             if not self.cmd:
    @@ -78,20 +95,36 @@ def open_ports(self, fw_ports):
     
             for port in fw_ports:
                 tcp_port = str(port) + '/tcp'
    -            out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
    +            out, err, ret = call(
    +                self.ctx,
    +                [self.cmd, '--permanent', '--query-port', tcp_port],
    +                verbosity=CallVerbosity.DEBUG,
    +            )
                 if ret:
    -                logger.info('Enabling firewalld port %s in current zone...' % tcp_port)
    -                out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--add-port', tcp_port])
    +                logger.info(
    +                    'Enabling firewalld port %s in current zone...' % tcp_port
    +                )
    +                out, err, ret = call(
    +                    self.ctx,
    +                    [self.cmd, '--permanent', '--add-port', tcp_port],
    +                )
                     if ret:
    -                    raise RuntimeError('unable to add port %s to current zone: %s' %
    -                                       (tcp_port, err))
    +                    raise RuntimeError(
    +                        'unable to add port %s to current zone: %s'
    +                        % (tcp_port, err)
    +                    )
                 else:
    -                logger.debug('firewalld port %s is enabled in current zone' % tcp_port)
    +                logger.debug(
    +                    'firewalld port %s is enabled in current zone' % tcp_port
    +                )
     
         def close_ports(self, fw_ports):
             # type: (List[int]) -> None
             if not self.available:
    -            logger.debug('Not possible to close ports <%s>. firewalld.service is not available' % fw_ports)
    +            logger.debug(
    +                'Not possible to close ports <%s>. firewalld.service is not available'
    +                % fw_ports
    +            )
                 return
     
             if not self.cmd:
    @@ -99,13 +132,22 @@ def close_ports(self, fw_ports):
     
             for port in fw_ports:
                 tcp_port = str(port) + '/tcp'
    -            out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--query-port', tcp_port], verbosity=CallVerbosity.DEBUG)
    +            out, err, ret = call(
    +                self.ctx,
    +                [self.cmd, '--permanent', '--query-port', tcp_port],
    +                verbosity=CallVerbosity.DEBUG,
    +            )
                 if not ret:
                     logger.info('Disabling port %s in current zone...' % tcp_port)
    -                out, err, ret = call(self.ctx, [self.cmd, '--permanent', '--remove-port', tcp_port])
    +                out, err, ret = call(
    +                    self.ctx,
    +                    [self.cmd, '--permanent', '--remove-port', tcp_port],
    +                )
                     if ret:
    -                    raise RuntimeError('unable to remove port %s from current zone: %s' %
    -                                       (tcp_port, err))
    +                    raise RuntimeError(
    +                        'unable to remove port %s from current zone: %s'
    +                        % (tcp_port, err)
    +                    )
                     else:
                         logger.info(f'Port {tcp_port} disabled')
                 else:
    diff --git a/src/cephadm/cephadmlib/host_facts.py b/src/cephadm/cephadmlib/host_facts.py
    index 1cfb2ac84d92..387a4a3cb0a2 100644
    --- a/src/cephadm/cephadmlib/host_facts.py
    +++ b/src/cephadm/cephadmlib/host_facts.py
    @@ -719,8 +719,9 @@ def _fetch_apparmor() -> Dict[str, str]:
                         else:
                             summary = {}  # type: Dict[str, int]
                             for line in profiles.split('\n'):
    -                            item, mode = line.split(' ')
    -                            mode = mode.strip('()')
    +                            mode = line.rsplit(' ', 1)[-1]
    +                            assert mode[0] == '(' and mode[-1] == ')'
    +                            mode = mode[1:-1]
                                 if mode in summary:
                                     summary[mode] += 1
                                 else:
    diff --git a/src/cephadm/cephadmlib/logging.py b/src/cephadm/cephadmlib/logging.py
    index 5e306484b98d..f5893d3a51d1 100644
    --- a/src/cephadm/cephadmlib/logging.py
    +++ b/src/cephadm/cephadmlib/logging.py
    @@ -12,6 +12,10 @@
     from .context import CephadmContext
     from .constants import QUIET_LOG_LEVEL, LOG_DIR
     
    +from cephadmlib.file_utils import write_new
    +
    +from cephadmlib import templating
    +
     
     class _ExcludeErrorsFilter(logging.Filter):
         def filter(self, record: logging.LogRecord) -> bool:
    @@ -145,18 +149,6 @@ def format(self, record: Any) -> str:
     }
     
     
    -_logrotate_data = """# created by cephadm
    -/var/log/ceph/cephadm.log {
    -    rotate 7
    -    daily
    -    compress
    -    missingok
    -    notifempty
    -    su root root
    -}
    -"""
    -
    -
     _VERBOSE_HANDLERS = [
         'console',
         'console_stdout',
    @@ -222,9 +214,7 @@ def cephadm_init_logging(
     
         logger.setLevel(QUIET_LOG_LEVEL)
     
    -    if not os.path.exists(ctx.logrotate_dir + '/cephadm'):
    -        with open(ctx.logrotate_dir + '/cephadm', 'w') as f:
    -            f.write(_logrotate_data)
    +    write_cephadm_logrotate_config(ctx)
     
         for handler in logger.handlers:
             # the following little hack ensures that no matter how cephadm is named
    @@ -239,3 +229,48 @@ def cephadm_init_logging(
             if ctx.verbose and handler.name in _VERBOSE_HANDLERS:
                 handler.setLevel(QUIET_LOG_LEVEL)
         logger.debug('%s\ncephadm %s' % ('-' * 80, args))
    +
    +
    +def write_cephadm_logrotate_config(ctx: CephadmContext) -> None:
    +    if not os.path.exists(ctx.logrotate_dir + '/cephadm'):
    +        with open(ctx.logrotate_dir + '/cephadm', 'w') as f:
    +            cephadm_logrotate_config = templating.render(
    +                ctx, templating.Templates.cephadm_logrotate_config
    +            )
    +            f.write(cephadm_logrotate_config)
    +
    +
    +def write_cluster_logrotate_config(ctx: CephadmContext, fsid: str) -> None:
    +    # logrotate for the cluster
    +    with write_new(ctx.logrotate_dir + f'/ceph-{fsid}', perms=None) as f:
    +        """
    +        See cephadm/cephadmlib/templates/cluster.logrotate.config.j2 to
    +        get a better idea what this comment is referring to
    +
    +        This is a bit sloppy in that the killall/pkill will touch all ceph daemons
    +        in all containers, but I don't see an elegant way to send SIGHUP *just* to
    +        the daemons for this cluster.  (1) systemd kill -s will get the signal to
    +        podman, but podman will exit.  (2) podman kill will get the signal to the
    +        first child (bash), but that isn't the ceph daemon.  This is simpler and
    +        should be harmless.
    +        """
    +        targets: List[str] = [
    +            'ceph-mon',
    +            'ceph-mgr',
    +            'ceph-mds',
    +            'ceph-osd',
    +            'ceph-fuse',
    +            'radosgw',
    +            'rbd-mirror',
    +            'cephfs-mirror',
    +            'tcmu-runner',
    +        ]
    +
    +        logrotate_config = templating.render(
    +            ctx,
    +            templating.Templates.cluster_logrotate_config,
    +            fsid=fsid,
    +            targets=targets,
    +        )
    +
    +        f.write(logrotate_config)
    diff --git a/src/cephadm/cephadmlib/runscripts.py b/src/cephadm/cephadmlib/runscripts.py
    new file mode 100644
    index 000000000000..b4f83ab3077f
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/runscripts.py
    @@ -0,0 +1,255 @@
    +import contextlib
    +import json
    +import pathlib
    +import shlex
    +
    +from typing import Any, Dict, Union, List, IO, TextIO, Optional, cast
    +
    +from . import templating
    +from .container_engines import Podman
    +from .container_types import CephContainer, InitContainer, SidecarContainer
    +from .context import CephadmContext
    +from .context_getters import fetch_meta
    +from .daemon_identity import DaemonIdentity, DaemonSubIdentity
    +from .file_utils import write_new
    +from .net_utils import EndPoint
    +
    +
    +# Ideally, all ContainerCommands would be converted to init containers. Until
    +# that is done one can wrap a CephContainer in a ContainerCommand object and
    +# pass that as a pre- or post- command to run arbitrary container based
    +# commands in the script.
    +class ContainerCommand:
    +    def __init__(
    +        self,
    +        container: CephContainer,
    +        comment: str = '',
    +        background: bool = False,
    +    ):
    +        self.container = container
    +        self.comment = comment
    +        self.background = background
    +
    +
    +Command = Union[List[str], str, ContainerCommand]
    +
    +
    +def write_service_scripts(
    +    ctx: CephadmContext,
    +    ident: DaemonIdentity,
    +    *,
    +    container: CephContainer,
    +    init_containers: Optional[List[InitContainer]] = None,
    +    sidecars: Optional[List[SidecarContainer]] = None,
    +    endpoints: Optional[List[EndPoint]] = None,
    +    pre_start_commands: Optional[List[Command]] = None,
    +    post_stop_commands: Optional[List[Command]] = None,
    +    timeout: Optional[int] = None,
    +) -> None:
    +    """Write the scripts that systemd services will call in order to
    +    start/stop/etc components of a cephadm managed daemon. Also writes some
    +    metadata about the service getting deployed.
    +    """
    +    data_dir = pathlib.Path(ident.data_dir(ctx.data_dir))
    +    run_file_path = data_dir / 'unit.run'
    +    meta_file_path = data_dir / 'unit.meta'
    +    post_stop_file_path = data_dir / 'unit.poststop'
    +    stop_file_path = data_dir / 'unit.stop'
    +    image_file_path = data_dir / 'unit.image'
    +    initctr_file_path = data_dir / 'init_containers.run'
    +    # use an ExitStack to make writing the files an all-or-nothing affair. If
    +    # any file fails to write then the write_new'd file will not get renamed
    +    # into place
    +    with contextlib.ExitStack() as estack:
    +        # write out the main file to run (start) a service
    +        runf = estack.enter_context(write_new(run_file_path))
    +        runf.write('set -e\n')
    +        for command in pre_start_commands or []:
    +            _write_command(ctx, runf, command)
    +        _write_container_cmd_to_bash(ctx, runf, container, ident.daemon_name)
    +
    +        # some metadata about the deploy
    +        metaf = estack.enter_context(write_new(meta_file_path))
    +        meta: Dict[str, Any] = fetch_meta(ctx)
    +        meta.update(
    +            {
    +                'memory_request': int(ctx.memory_request)
    +                if ctx.memory_request
    +                else None,
    +                'memory_limit': int(ctx.memory_limit)
    +                if ctx.memory_limit
    +                else None,
    +            }
    +        )
    +        if not meta.get('ports'):
    +            if endpoints:
    +                meta['ports'] = [e.port for e in endpoints]
    +            else:
    +                meta['ports'] = []
    +        metaf.write(json.dumps(meta, indent=4) + '\n')
    +
    +        # init-container commands
    +        if init_containers:
    +            initf = estack.enter_context(write_new(initctr_file_path))
    +            _write_init_containers_script(ctx, initf, init_containers)
    +
    +        # sidecar container scripts
    +        for sidecar in sidecars or []:
    +            assert isinstance(sidecar.identity, DaemonSubIdentity)
    +            script_path = sidecar.identity.sidecar_script(ctx.data_dir)
    +            scsf = estack.enter_context(write_new(script_path))
    +            _write_sidecar_script(
    +                ctx,
    +                scsf,
    +                sidecar,
    +                f'sidecar: {sidecar.identity.subcomponent}',
    +            )
    +
    +        # post-stop command(s)
    +        pstopf = estack.enter_context(write_new(post_stop_file_path))
    +        # this is a fallback to eventually stop any underlying container that
    +        # was not stopped properly by unit.stop, this could happen in very slow
    +        # setups as described in the issue
    +        # https://tracker.ceph.com/issues/58242.
    +        _write_stop_actions(ctx, cast(TextIO, pstopf), container, timeout)
    +        for command in post_stop_commands or []:
    +            _write_command(ctx, pstopf, command)
    +
    +        # stop command(s)
    +        stopf = estack.enter_context(write_new(stop_file_path))
    +        _write_stop_actions(ctx, cast(TextIO, stopf), container, timeout)
    +
    +        if container:
    +            imgf = estack.enter_context(write_new(image_file_path))
    +            imgf.write(container.image + '\n')
    +
    +
    +def _write_container_cmd_to_bash(
    +    ctx: CephadmContext,
    +    file_obj: IO[str],
    +    container: 'CephContainer',
    +    comment: Optional[str] = None,
    +    background: Optional[bool] = False,
    +) -> None:
    +    if comment:
    +        # Sometimes adding a comment, especially if there are multiple containers in one
    +        # unit file, makes it easier to read and grok.
    +        assert '\n' not in comment
    +        file_obj.write(f'# {comment}\n')
    +    # Sometimes, adding `--rm` to a run_cmd doesn't work. Let's remove the container manually
    +    _bash_cmd(
    +        file_obj, container.rm_cmd(old_cname=True), check=False, stderr=False
    +    )
    +    _bash_cmd(file_obj, container.rm_cmd(), check=False, stderr=False)
    +
    +    # Sometimes, `podman rm` doesn't find the container. Then you'll have to add `--storage`
    +    if isinstance(ctx.container_engine, Podman):
    +        _bash_cmd(
    +            file_obj,
    +            container.rm_cmd(storage=True),
    +            check=False,
    +            stderr=False,
    +        )
    +        _bash_cmd(
    +            file_obj,
    +            container.rm_cmd(old_cname=True, storage=True),
    +            check=False,
    +            stderr=False,
    +        )
    +
    +    # container run command
    +    _bash_cmd(file_obj, container.run_cmd(), background=bool(background))
    +
    +
    +def _write_stop_actions(
    +    ctx: CephadmContext,
    +    f: TextIO,
    +    container: 'CephContainer',
    +    timeout: Optional[int],
    +) -> None:
    +    # following generated script basically checks if the container exists
    +    # before stopping it. Exit code will be success either if it doesn't
    +    # exist or if it exists and is stopped successfully.
    +    container_exists = f'{ctx.container_engine.path} inspect %s &>/dev/null'
    +    f.write(
    +        f'! {container_exists % container.old_cname} || {" ".join(container.stop_cmd(old_cname=True, timeout=timeout))} \n'
    +    )
    +    f.write(
    +        f'! {container_exists % container.cname} || {" ".join(container.stop_cmd(timeout=timeout))} \n'
    +    )
    +
    +
    +def _write_init_containers_script(
    +    ctx: CephadmContext,
    +    file_obj: IO[str],
    +    init_containers: List[InitContainer],
    +    comment: str = 'start and stop init containers',
    +) -> None:
    +    has_podman_engine = isinstance(ctx.container_engine, Podman)
    +    templating.render_to_file(
    +        file_obj,
    +        ctx,
    +        templating.Templates.init_ctr_run,
    +        init_containers=init_containers,
    +        comment=comment,
    +        has_podman_engine=has_podman_engine,
    +    )
    +
    +
    +def _write_sidecar_script(
    +    ctx: CephadmContext,
    +    file_obj: IO[str],
    +    sidecar: SidecarContainer,
    +    comment: str = '',
    +) -> None:
    +    has_podman_engine = isinstance(ctx.container_engine, Podman)
    +    templating.render_to_file(
    +        file_obj,
    +        ctx,
    +        templating.Templates.sidecar_run,
    +        sidecar=sidecar,
    +        comment=comment,
    +        has_podman_engine=has_podman_engine,
    +    )
    +
    +
    +def _bash_cmd(
    +    fh: IO[str],
    +    cmd: List[str],
    +    check: bool = True,
    +    background: bool = False,
    +    stderr: bool = True,
    +) -> None:
    +    line = ' '.join(shlex.quote(arg) for arg in cmd)
    +    if not check:
    +        line = f'! {line}'
    +    if not stderr:
    +        line = f'{line} 2> /dev/null'
    +    if background:
    +        line = f'{line} &'
    +    fh.write(line)
    +    fh.write('\n')
    +
    +
    +def _write_command(
    +    ctx: CephadmContext,
    +    fh: IO[str],
    +    cmd: Command,
    +) -> None:
    +    """Wrapper func for turning a command list or string into something suitable
    +    for appending to a run script.
    +    """
    +    if isinstance(cmd, list):
    +        _bash_cmd(fh, cmd)
    +    elif isinstance(cmd, ContainerCommand):
    +        _write_container_cmd_to_bash(
    +            ctx,
    +            fh,
    +            cmd.container,
    +            comment=cmd.comment,
    +            background=cmd.background,
    +        )
    +    else:
    +        fh.write(cmd)
    +        if not cmd.endswith('\n'):
    +            fh.write('\n')
    diff --git a/src/cephadm/cephadmlib/sysctl.py b/src/cephadm/cephadmlib/sysctl.py
    index 66a8b0c5ff3e..6c9693ee96ac 100644
    --- a/src/cephadm/cephadmlib/sysctl.py
    +++ b/src/cephadm/cephadmlib/sysctl.py
    @@ -16,10 +16,13 @@
     logger = logging.getLogger()
     
     
    -def install_sysctl(ctx: CephadmContext, fsid: str, daemon: DaemonForm) -> None:
    +def install_sysctl(
    +    ctx: CephadmContext, fsid: str, daemon: DaemonForm
    +) -> None:
         """
         Set up sysctl settings
         """
    +
         def _write(conf: Path, lines: List[str]) -> None:
             lines = [
                 '# created by cephadm',
    @@ -54,11 +57,14 @@ def sysctl_get(ctx: CephadmContext, variable: str) -> Union[str, None]:
         return out or None
     
     
    -def filter_sysctl_settings(ctx: CephadmContext, lines: List[str]) -> List[str]:
    +def filter_sysctl_settings(
    +    ctx: CephadmContext, lines: List[str]
    +) -> List[str]:
         """
         Given a list of sysctl settings, examine the system's current configuration
         and return those which are not currently set as described.
         """
    +
         def test_setting(desired_line: str) -> bool:
             # Remove any comments
             comment_start = desired_line.find('#')
    @@ -67,11 +73,14 @@ def test_setting(desired_line: str) -> bool:
             desired_line = desired_line.strip()
             if not desired_line or desired_line.isspace():
                 return False
    -        setting, desired_value = map(lambda s: s.strip(), desired_line.split('='))
    +        setting, desired_value = map(
    +            lambda s: s.strip(), desired_line.split('=')
    +        )
             if not setting or not desired_value:
                 return False
             actual_value = sysctl_get(ctx, setting)
             return desired_value != actual_value
    +
         return list(filter(test_setting, lines))
     
     
    @@ -81,36 +90,50 @@ def migrate_sysctl_dir(ctx: CephadmContext, fsid: str) -> None:
         This moves it to '/etc/sysctl.d'.
         """
         deprecated_location: str = '/usr/lib/sysctl.d'
    -    deprecated_confs: List[str] = glob(f'{deprecated_location}/90-ceph-{fsid}-*.conf')
    +    deprecated_confs: List[str] = glob(
    +        f'{deprecated_location}/90-ceph-{fsid}-*.conf'
    +    )
         if not deprecated_confs:
             return
     
         file_count: int = len(deprecated_confs)
    -    logger.info(f'Found sysctl {file_count} files in deprecated location {deprecated_location}. Starting Migration.')
    +    logger.info(
    +        f'Found sysctl {file_count} files in deprecated location {deprecated_location}. Starting Migration.'
    +    )
         for conf in deprecated_confs:
             try:
                 shutil.move(conf, ctx.sysctl_dir)
                 file_count -= 1
             except shutil.Error as err:
                 if str(err).endswith('already exists'):
    -                logger.warning(f'Destination file already exists. Deleting {conf}.')
    +                logger.warning(
    +                    f'Destination file already exists. Deleting {conf}.'
    +                )
                     try:
                         os.unlink(conf)
                         file_count -= 1
                     except OSError as del_err:
                         logger.warning(f'Could not remove {conf}: {del_err}.')
                 else:
    -                logger.warning(f'Could not move {conf} from {deprecated_location} to {ctx.sysctl_dir}: {err}')
    +                logger.warning(
    +                    f'Could not move {conf} from {deprecated_location} to {ctx.sysctl_dir}: {err}'
    +                )
     
         # Log successful migration
         if file_count == 0:
    -        logger.info(f'Successfully migrated sysctl config to {ctx.sysctl_dir}.')
    +        logger.info(
    +            f'Successfully migrated sysctl config to {ctx.sysctl_dir}.'
    +        )
             return
     
         # Log partially successful / unsuccessful migration
         files_processed: int = len(deprecated_confs)
         if file_count < files_processed:
    -        status: str = f'partially successful (failed {file_count}/{files_processed})'
    +        status: str = (
    +            f'partially successful (failed {file_count}/{files_processed})'
    +        )
         elif file_count == files_processed:
             status = 'unsuccessful'
    -    logger.warning(f'Migration of sysctl configuration {status}. You may want to perform a migration manually.')
    +    logger.warning(
    +        f'Migration of sysctl configuration {status}. You may want to perform a migration manually.'
    +    )
    diff --git a/src/cephadm/cephadmlib/systemd.py b/src/cephadm/cephadmlib/systemd.py
    index 69fc8b740868..1956957d457b 100644
    --- a/src/cephadm/cephadmlib/systemd.py
    +++ b/src/cephadm/cephadmlib/systemd.py
    @@ -11,8 +11,7 @@
     logger = logging.getLogger()
     
     
    -def check_unit(ctx, unit_name):
    -    # type: (CephadmContext, str) -> Tuple[bool, str, bool]
    +def check_unit(ctx: CephadmContext, unit_name: str) -> Tuple[bool, str, bool]:
         # NOTE: we ignore the exit code here because systemctl outputs
         # various exit codes based on the state of the service, but the
         # string result is more explicit (and sufficient).
    @@ -56,8 +55,9 @@ def check_unit(ctx, unit_name):
         return (enabled, state, installed)
     
     
    -def check_units(ctx, units, enabler=None):
    -    # type: (CephadmContext, List[str], Optional[Packager]) -> bool
    +def check_units(
    +    ctx: CephadmContext, units: List[str], enabler: Optional[Packager] = None
    +) -> bool:
         for u in units:
             (enabled, state, installed) = check_unit(ctx, u)
             if enabled and state == 'running':
    @@ -68,3 +68,21 @@ def check_units(ctx, units, enabler=None):
                     logger.info('Enabling unit %s' % u)
                     enabler.enable_service(u)
         return False
    +
    +
    +def terminate_service(ctx: CephadmContext, service_name: str) -> None:
    +    call(
    +        ctx,
    +        ['systemctl', 'stop', service_name],
    +        verbosity=CallVerbosity.DEBUG,
    +    )
    +    call(
    +        ctx,
    +        ['systemctl', 'reset-failed', service_name],
    +        verbosity=CallVerbosity.DEBUG,
    +    )
    +    call(
    +        ctx,
    +        ['systemctl', 'disable', service_name],
    +        verbosity=CallVerbosity.DEBUG,
    +    )
    diff --git a/src/cephadm/cephadmlib/systemd_unit.py b/src/cephadm/cephadmlib/systemd_unit.py
    new file mode 100644
    index 000000000000..d3543174a8df
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/systemd_unit.py
    @@ -0,0 +1,241 @@
    +# systemd_unit.py - creating/managing systemd unit files
    +
    +import contextlib
    +import os
    +import pathlib
    +
    +from typing import IO, List, Optional, Union
    +
    +from . import templating
    +from .call_wrappers import call_throws
    +from .container_engines import Docker, Podman
    +from .context import CephadmContext
    +from .daemon_identity import DaemonIdentity, DaemonSubIdentity
    +from .file_utils import write_new
    +from .logging import write_cluster_logrotate_config
    +
    +
    +_DROP_IN_FILENAME = '99-cephadm.conf'
    +
    +
    +class PathInfo:
    +    """Utility class to map basic service identities, to the paths used by
    +    their corresponding systemd unit files.
    +    """
    +
    +    def __init__(
    +        self,
    +        unit_dir: Union[str, pathlib.Path],
    +        identity: DaemonIdentity,
    +        sidecar_ids: Optional[List[DaemonSubIdentity]] = None,
    +    ) -> None:
    +        self.identity = identity
    +        self.sidecar_ids = sidecar_ids or []
    +
    +        unit_dir = pathlib.Path(unit_dir)
    +        self.default_unit_file = unit_dir / f'ceph-{identity.fsid}@.service'
    +        self.init_ctr_unit_file = unit_dir / identity.init_service_name
    +        self.sidecar_unit_files = {
    +            si: unit_dir / si.sidecar_service_name for si in self.sidecar_ids
    +        }
    +        dname = f'{identity.service_name}.d'
    +        self.drop_in_file = unit_dir / dname / _DROP_IN_FILENAME
    +
    +
    +def _write_drop_in(
    +    dest: IO,
    +    ctx: CephadmContext,
    +    identity: DaemonIdentity,
    +    enable_init_containers: bool,
    +    sidecar_ids: List[DaemonSubIdentity],
    +) -> None:
    +    templating.render_to_file(
    +        dest,
    +        ctx,
    +        templating.Templates.dropin_service,
    +        identity=identity,
    +        enable_init_containers=enable_init_containers,
    +        sidecar_ids=sidecar_ids,
    +    )
    +
    +
    +def _write_init_containers_unit_file(
    +    dest: IO, ctx: CephadmContext, identity: DaemonIdentity
    +) -> None:
    +    has_docker_engine = isinstance(ctx.container_engine, Docker)
    +    has_podman_engine = isinstance(ctx.container_engine, Podman)
    +    templating.render_to_file(
    +        dest,
    +        ctx,
    +        templating.Templates.init_ctr_service,
    +        identity=identity,
    +        has_docker_engine=has_docker_engine,
    +        has_podman_engine=has_podman_engine,
    +        has_podman_split_version=(
    +            has_podman_engine and ctx.container_engine.supports_split_cgroups
    +        ),
    +    )
    +
    +
    +def _write_sidecar_unit_file(
    +    dest: IO,
    +    ctx: CephadmContext,
    +    primary: DaemonIdentity,
    +    sidecar: DaemonSubIdentity,
    +) -> None:
    +    has_docker_engine = isinstance(ctx.container_engine, Docker)
    +    has_podman_engine = isinstance(ctx.container_engine, Podman)
    +    templating.render_to_file(
    +        dest,
    +        ctx,
    +        templating.Templates.sidecar_service,
    +        primary=primary,
    +        sidecar=sidecar,
    +        sidecar_script=sidecar.sidecar_script(ctx.data_dir),
    +        has_docker_engine=has_docker_engine,
    +        has_podman_engine=has_podman_engine,
    +        has_podman_split_version=(
    +            has_podman_engine and ctx.container_engine.supports_split_cgroups
    +        ),
    +    )
    +
    +
    +def _install_extended_systemd_services(
    +    ctx: CephadmContext,
    +    pinfo: PathInfo,
    +    identity: DaemonIdentity,
    +    enable_init_containers: bool = False,
    +) -> None:
    +    """Install the systemd unit files needed for more complex services
    +    that have init containers and/or sidecars.
    +    """
    +    with contextlib.ExitStack() as estack:
    +        # install the unit file to handle running init containers
    +        if enable_init_containers:
    +            icfh = estack.enter_context(
    +                write_new(pinfo.init_ctr_unit_file, perms=None)
    +            )
    +            _write_init_containers_unit_file(icfh, ctx, identity)
    +
    +        # install the unit files to handle running sidecars
    +        sids = []
    +        for si, sup in pinfo.sidecar_unit_files.items():
    +            sufh = estack.enter_context(write_new(sup, perms=None))
    +            _write_sidecar_unit_file(sufh, ctx, identity, si)
    +            sids.append(si)
    +
    +        # create a drop-in to create a relationship between the primary
    +        # service and the init- and sidecar-based services.
    +        if enable_init_containers or sids:
    +            pinfo.drop_in_file.parent.mkdir(parents=True, exist_ok=True)
    +            difh = estack.enter_context(
    +                write_new(pinfo.drop_in_file, perms=None)
    +            )
    +            _write_drop_in(difh, ctx, identity, enable_init_containers, sids)
    +
    +
    +def _get_unit_file(ctx: CephadmContext, fsid: str) -> str:
    +    has_docker_engine = isinstance(ctx.container_engine, Docker)
    +    has_podman_engine = isinstance(ctx.container_engine, Podman)
    +    has_podman_split_version = (
    +        has_podman_engine and ctx.container_engine.supports_split_cgroups
    +    )
    +    return templating.render(
    +        ctx,
    +        templating.Templates.ceph_service,
    +        fsid=fsid,
    +        has_docker_engine=has_docker_engine,
    +        has_podman_engine=has_podman_engine,
    +        has_podman_split_version=has_podman_split_version,
    +    )
    +
    +
    +def _install_base_units(ctx: CephadmContext, fsid: str) -> None:
    +    """
    +    Set up ceph.target and ceph-$fsid.target units.
    +    """
    +    # global unit
    +    existed = os.path.exists(ctx.unit_dir + '/ceph.target')
    +    with write_new(ctx.unit_dir + '/ceph.target', perms=None) as f:
    +        f.write(
    +            '[Unit]\n'
    +            'Description=All Ceph clusters and services\n'
    +            '\n'
    +            '[Install]\n'
    +            'WantedBy=multi-user.target\n'
    +        )
    +    if not existed:
    +        # we disable before enable in case a different ceph.target
    +        # (from the traditional package) is present; while newer
    +        # systemd is smart enough to disable the old
    +        # (/lib/systemd/...) and enable the new (/etc/systemd/...),
    +        # some older versions of systemd error out with EEXIST.
    +        call_throws(ctx, ['systemctl', 'disable', 'ceph.target'])
    +        call_throws(ctx, ['systemctl', 'enable', 'ceph.target'])
    +        call_throws(ctx, ['systemctl', 'start', 'ceph.target'])
    +
    +    # cluster unit
    +    existed = os.path.exists(ctx.unit_dir + '/ceph-%s.target' % fsid)
    +    with write_new(ctx.unit_dir + f'/ceph-{fsid}.target', perms=None) as f:
    +        f.write(
    +            '[Unit]\n'
    +            'Description=Ceph cluster {fsid}\n'
    +            'PartOf=ceph.target\n'
    +            'Before=ceph.target\n'
    +            '\n'
    +            '[Install]\n'
    +            'WantedBy=multi-user.target ceph.target\n'.format(fsid=fsid)
    +        )
    +    if not existed:
    +        call_throws(ctx, ['systemctl', 'enable', 'ceph-%s.target' % fsid])
    +        call_throws(ctx, ['systemctl', 'start', 'ceph-%s.target' % fsid])
    +
    +    # don't overwrite file in order to allow users to manipulate it
    +    if os.path.exists(ctx.logrotate_dir + f'/ceph-{fsid}'):
    +        return
    +
    +    write_cluster_logrotate_config(ctx, fsid)
    +
    +
    +def update_files(
    +    ctx: CephadmContext,
    +    ident: DaemonIdentity,
    +    *,
    +    init_container_ids: Optional[List[DaemonSubIdentity]] = None,
    +    sidecar_ids: Optional[List[DaemonSubIdentity]] = None,
    +) -> None:
    +    _install_base_units(ctx, ident.fsid)
    +    unit = _get_unit_file(ctx, ident.fsid)
    +    pathinfo = PathInfo(ctx.unit_dir, ident, sidecar_ids=sidecar_ids)
    +    with write_new(pathinfo.default_unit_file, perms=None) as f:
    +        f.write(unit)
    +    _install_extended_systemd_services(
    +        ctx, pathinfo, ident, bool(init_container_ids)
    +    )
    +
    +
    +def sidecars_from_dropin(
    +    pathinfo: PathInfo, missing_ok: bool = False
    +) -> PathInfo:
    +    """Read the list of sidecars for a service from the service's drop in file."""
    +    # This is useful in the cases where the sidecars would be determined from
    +    # input data (deployment) but we lack the original deployment data (rm
    +    # daemon).
    +    sidecars = []
    +    try:
    +        with open(pathinfo.drop_in_file) as fh:
    +            lines = fh.readlines()
    +    except FileNotFoundError:
    +        if missing_ok:
    +            return pathinfo
    +        raise
    +    for line in lines:
    +        if not line.startswith('Wants='):
    +            continue
    +        for item in line[6:].strip().split():
    +            si, category = DaemonSubIdentity.from_service_name(item)
    +            if category == 'sidecar':
    +                sidecars.append(si)
    +    return PathInfo(
    +        pathinfo.default_unit_file.parent, pathinfo.identity, sidecars
    +    )
    diff --git a/src/cephadm/cephadmlib/templates/agent.service.j2 b/src/cephadm/cephadmlib/templates/agent.service.j2
    new file mode 100644
    index 000000000000..4a494e10d624
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/templates/agent.service.j2
    @@ -0,0 +1,15 @@
    +# generated by cephadm
    +[Unit]
    +Description=cephadm agent for cluster {{agent.fsid}}
    +
    +PartOf=ceph-{{agent.fsid}}.target
    +Before=ceph-{{agent.fsid}}.target
    +
    +[Service]
    +Type=forking
    +ExecStart=/bin/bash {{agent.daemon_dir}}/unit.run
    +Restart=on-failure
    +RestartSec=10s
    +
    +[Install]
    +WantedBy=ceph-{{agent.fsid}}.target
    diff --git a/src/cephadm/cephadmlib/templates/ceph.service.j2 b/src/cephadm/cephadmlib/templates/ceph.service.j2
    new file mode 100644
    index 000000000000..c2c4c778be63
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/templates/ceph.service.j2
    @@ -0,0 +1,43 @@
    +# generated by cephadm
    +[Unit]
    +Description=Ceph %i for {{fsid}}
    +
    +# According to:
    +#   http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget
    +# these can be removed once ceph-mon will dynamically change network
    +# configuration.
    +After=network-online.target local-fs.target time-sync.target{% if has_docker_engine %} docker.service{% endif %}
    +Wants=network-online.target local-fs.target time-sync.target
    +{%- if has_docker_engine %}
    +Wants=docker.service
    +{%- endif %}
    +
    +PartOf=ceph-{{fsid}}.target
    +Before=ceph-{{fsid}}.target
    +
    +[Service]
    +LimitNOFILE=1048576
    +LimitNPROC=1048576
    +EnvironmentFile=-/etc/environment
    +ExecStart=/bin/bash {{ctx.data_dir}}/{{fsid}}/%i/unit.run
    +ExecStop=-/bin/bash -c 'bash {{ctx.data_dir}}/{{fsid}}/%i/unit.stop'
    +ExecStopPost=-/bin/bash {{ctx.data_dir}}/{{fsid}}/%i/unit.poststop
    +KillMode=none
    +Restart=on-failure
    +RestartSec=10s
    +TimeoutStartSec=200
    +TimeoutStopSec=120
    +StartLimitInterval=30min
    +StartLimitBurst=5
    +{%- if has_podman_engine %}
    +ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid
    +ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid
    +Type=forking
    +PIDFile=%t/%n-pid
    +{%- if has_podman_split_version %}
    +Delegate=yes
    +{%- endif %}
    +{%- endif %}
    +
    +[Install]
    +WantedBy=ceph-{{fsid}}.target
    diff --git a/src/cephadm/cephadmlib/templates/cephadm.logrotate.config.j2 b/src/cephadm/cephadmlib/templates/cephadm.logrotate.config.j2
    new file mode 100644
    index 000000000000..b18aaff2196e
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/templates/cephadm.logrotate.config.j2
    @@ -0,0 +1,9 @@
    +# created by cephadm
    +/var/log/ceph/cephadm.log {
    +    rotate 7
    +    daily
    +    compress
    +    missingok
    +    notifempty
    +    su root root
    +}
    diff --git a/src/cephadm/cephadmlib/templates/cluster.logrotate.config.j2 b/src/cephadm/cephadmlib/templates/cluster.logrotate.config.j2
    new file mode 100644
    index 000000000000..9af2f955d905
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/templates/cluster.logrotate.config.j2
    @@ -0,0 +1,13 @@
    +# created by cephadm
    +/var/log/ceph/{{ fsid }}/*.log {
    +    rotate 7
    +    daily
    +    compress
    +    sharedscripts
    +    postrotate
    +        killall -q -1 {{ targets|join(' ') }} || pkill -1 -x '{{ targets|join('|') }}' || true
    +    endscript
    +    missingok
    +    notifempty
    +    su root root
    +}
    diff --git a/src/cephadm/cephadmlib/templates/dropin.service.j2 b/src/cephadm/cephadmlib/templates/dropin.service.j2
    new file mode 100644
    index 000000000000..6e261f33beaf
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/templates/dropin.service.j2
    @@ -0,0 +1,9 @@
    +# generated by cephadm
    +[Unit]
    +{%- if enable_init_containers %}
    +Wants={{ identity.init_service_name }}
    +{%- endif %}
    +{%- for sidecar in sidecar_ids %}
    +Wants={{ sidecar.sidecar_service_name }}
    +{%- endfor %}
    +
    diff --git a/src/cephadm/cephadmlib/templates/init_containers.run.j2 b/src/cephadm/cephadmlib/templates/init_containers.run.j2
    new file mode 100644
    index 000000000000..b93b7ac1aef5
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/templates/init_containers.run.j2
    @@ -0,0 +1,57 @@
    +#!/bin/sh
    +# {{ comment }}
    +
    +set -e
    +
    +stop_all_init_containers() {
    +    {%- for ic in init_containers %}
    +    # stop init container {{ loop.index0 }}: {{ ic.cname }}
    +    ! {{ ic.stop_cmd() | map('shellquote') | join(' ') }}
    +    {%- endfor %}
    +    return 0
    +}
    +
    +rm_all_init_containers() {
    +    {%- for ic in init_containers %}
    +    # remove init container {{ loop.index0 }}: {{ ic.cname }}
    +    ! {{ ic.rm_cmd() | map('shellquote') | join(' ') }} 2> /dev/null
    +    {%- if has_podman_engine %}
    +    ! {{ ic.rm_cmd(storage=True) | map('shellquote') | join(' ') }} 2> /dev/null
    +    {%- endif %}
    +    {%- endfor %}
    +    return 0
    +}
    +
    +has_running_init_container() {
    +    {%- for ic in init_containers %}
    +    if {{ ctx.container_engine.path }} inspect {{ ic.cname | shellquote }} &>/dev/null; then return 0; fi
    +    {%- endfor %}
    +    return 1
    +}
    +
    +run_init_containers() {
    +    {%- for ic in init_containers %}
    +    # run init container {{ loop.index0 }}: {{ ic.cname }}
    +    {{ ic.run_cmd() | map('shellquote') | join(' ') }}
    +    # clean up {{ ic.cname }}
    +    ! {{ ic.rm_cmd() | map('shellquote') | join(' ') }} 2> /dev/null
    +    {%- if has_podman_engine %}
    +    ! {{ ic.rm_cmd(storage=True) | map('shellquote') | join(' ') }} 2> /dev/null
    +    {%- endif %}
    +    {%- endfor %}
    +    return 0
    +}
    +
    +if [ "$1" = stop ] || [ "$1" = poststop ]; then
    +    stop_all_init_containers
    +    if has_running_init_container; then
    +        exit 1
    +    fi
    +    exit 0
    +fi
    +
    +# init container cleanup
    +rm_all_init_containers
    +
    +run_init_containers
    +exit 0
    diff --git a/src/cephadm/cephadmlib/templates/init_ctr.service.j2 b/src/cephadm/cephadmlib/templates/init_ctr.service.j2
    new file mode 100644
    index 000000000000..6bf4304e38c0
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/templates/init_ctr.service.j2
    @@ -0,0 +1,39 @@
    +# generated by cephadm
    +[Unit]
    +Description=Ceph Init Containers for %i on {{ identity.fsid }}
    +After=network-online.target local-fs.target time-sync.target
    +Wants=network-online.target local-fs.target time-sync.target
    +{%- if has_docker_engine %}
    +After=docker.service
    +Wants=docker.service
    +{%- endif %}
    +Before=ceph-{{ identity.fsid }}@%i.service
    +
    +PartOf=ceph-{{ identity.fsid }}.target
    +Before=ceph-{{ identity.fsid }}.target
    +
    +[Service]
    +LimitNOFILE=1048576
    +LimitNPROC=1048576
    +EnvironmentFile=-/etc/environment
    +ExecStart=/bin/bash {{ ctx.data_dir }}/{{ identity.fsid }}/%i/init_containers.run
    +ExecStop=/bin/bash {{ ctx.data_dir }}/{{ identity.fsid }}/%i/init_containers.run stop
    +ExecStopPost=-/bin/bash {{ ctx.data_dir }}/{{ identity.fsid }}/%i/init_containers.run  poststop
    +# FIXME: Disable Restart on oneshot service. systemd versions before v224
    +# did not allow Restart=on-failure with a oneshot service. Having it set
    +# prevents the service from starting on centos8. Disable it for now and
    +# revisit this at a later time.
    +#Restart=on-failure
    +#RestartSec=10s
    +TimeoutStopSec=120
    +StartLimitInterval=30min
    +StartLimitBurst=5
    +Type=oneshot
    +RemainAfterExit=yes
    +{%- if has_podman_split_version %}
    +Delegate=yes
    +{%- endif %}
    +
    +[Install]
    +WantedBy=ceph-{{ identity.fsid }}.target
    +
    diff --git a/src/cephadm/cephadmlib/templates/sidecar.run.j2 b/src/cephadm/cephadmlib/templates/sidecar.run.j2
    new file mode 100644
    index 000000000000..b3e4ecdaba3b
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/templates/sidecar.run.j2
    @@ -0,0 +1,17 @@
    +#!/bin/sh
    +# {{ comment }}
    +
    +set -e
    +if [ "$1" = stop ] || [ "$1" = poststop ]; then
    +    ! {{ sidecar.stop_cmd() | map('shellquote') | join(' ') }}
    +    ! {{ ctx.container_engine.path }} inspect {{ sidecar.cname | shellquote }} &>/dev/null
    +    exit $?
    +fi
    +
    +! {{ sidecar.rm_cmd() | map('shellquote') | join(' ') }} 2> /dev/null
    +{%- if has_podman_engine %}
    +! {{ sidecar.rm_cmd(storage=True) | map('shellquote') | join(' ') }} 2> /dev/null
    +{%- endif %}
    +
    +exec {{ sidecar.run_cmd() | map('shellquote') | join(' ') }}
    +
    diff --git a/src/cephadm/cephadmlib/templates/sidecar.service.j2 b/src/cephadm/cephadmlib/templates/sidecar.service.j2
    new file mode 100644
    index 000000000000..62d7337be8c1
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/templates/sidecar.service.j2
    @@ -0,0 +1,41 @@
    +# generated by cephadm
    +[Unit]
    +Description=Ceph sidecar %i for {{ sidecar.fsid }}
    +After=network-online.target local-fs.target time-sync.target
    +Wants=network-online.target local-fs.target time-sync.target
    +{%- if has_docker_engine %}
    +After=docker.service
    +Wants=docker.service
    +{%- endif %}
    +After={{ primary.service_name }}
    +
    +PartOf=ceph-{{ sidecar.fsid }}.target
    +Before=ceph-{{ sidecar.fsid }}.target
    +
    +[Service]
    +LimitNOFILE=1048576
    +LimitNPROC=1048576
    +EnvironmentFile=-/etc/environment
    +ExecStart=/bin/bash {{ sidecar_script }} start
    +ExecStop=/bin/bash {{ sidecar_script }} stop
    +ExecStopPost=-/bin/bash {{ sidecar_script }} poststop
    +KillMode=none
    +Restart=on-failure
    +RestartSec=10s
    +TimeoutStartSec=200
    +TimeoutStopSec=120
    +StartLimitInterval=30min
    +StartLimitBurst=5
    +{%- if has_podman_engine %}
    +ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid
    +ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid
    +Type=forking
    +PIDFile=%t/%n-pid
    +{%- if has_podman_split_version %}
    +Delegate=yes
    +{%- endif %}
    +{%- endif %}
    +
    +[Install]
    +WantedBy=ceph-{{ sidecar.fsid }}.target
    +
    diff --git a/src/cephadm/cephadmlib/templating.py b/src/cephadm/cephadmlib/templating.py
    new file mode 100644
    index 000000000000..04a40cf0afd5
    --- /dev/null
    +++ b/src/cephadm/cephadmlib/templating.py
    @@ -0,0 +1,180 @@
    +# templating.py - functions to wrap string/file templating libs
    +
    +import enum
    +import os
    +import posixpath
    +import shlex
    +import zipimport
    +
    +from typing import Any, Optional, IO, Tuple, Callable, cast
    +
    +import jinja2
    +import jinja2.loaders
    +
    +from .context import CephadmContext
    +
    +_PKG = __name__.rsplit('.', 1)[0]
    +_DIR = 'templates'
    +
    +
    +class Templates(str, enum.Enum):
    +    """Known template files."""
    +
    +    ceph_service = 'ceph.service.j2'
    +    agent_service = 'agent.service.j2'
    +    dropin_service = 'dropin.service.j2'
    +    init_ctr_service = 'init_ctr.service.j2'
    +    sidecar_service = 'sidecar.service.j2'
    +    cluster_logrotate_config = 'cluster.logrotate.config.j2'
    +    cephadm_logrotate_config = 'cephadm.logrotate.config.j2'
    +    sidecar_run = 'sidecar.run.j2'
    +    init_ctr_run = 'init_containers.run.j2'
    +
    +    def __str__(self) -> str:
    +        return self.value
    +
    +    def __repr__(self) -> str:
    +        return repr(self.value)
    +
    +
    +class TemplateNotFoundInZipApp(jinja2.TemplateNotFound):
    +    def __init__(
    +        self,
    +        template: str,
    +        *,
    +        relative_path: str = '',
    +        archive_path: str = '',
    +    ) -> None:
    +        super().__init__(template)
    +        self.relative_path = relative_path
    +        self.archive_path = archive_path
    +
    +    def __str__(self) -> str:
    +        return (
    +            f'{self.message}: path {self.relative_path!r}'
    +            f' not found in {self.archive_path!r}'
    +        )
    +
    +
    +class _PackageLoader(jinja2.PackageLoader):
    +    """Workaround for PackageLoader when using cephadm with relative paths.
    +
    +    It was found that running the cephadm zipapp from a local dir (like:
    +    `./cephadm`) instead of an absolute path (like: `/usr/sbin/cephadm`) caused
    +    the PackageLoader to fail to load the template.  After investigation it was
    +    found to relate to how the PackageLoader tries to normalize paths and yet
    +    the zipimporter type did not have a normalized path (/home/foo/./cephadm
    +    and /home/foo/cephadm respectively).  When a full absolute path is passed
    +    to zipimporter's get_data method it uses the (non normalized) .archive
    +    property to strip the prefix from the argument. When the argument is a
    +    normalized path - the prefix fails to match and is not stripped and then
    +    the full path fails to match any value in the archive.
    +
    +    This shim subclass of jinja2.PackageLoader customizes the code path used to
    +    load files from the zipimporter so that we try to do the prefix handling
    +    all with normalized paths and only path the relative paths to the
    +    zipimporter function.
    +    """
    +
    +    def __init__(self, pkg: str, dir: str) -> None:
    +        super().__init__(pkg, dir)
    +        # see the comment in the get_source function below about
    +        # the _loader attribute. This _original_package_name
    +        # attribute is being set up for dealing with the same
    +        # old jinja2 version that comment references.
    +        self._original_package_name = pkg
    +
    +    def get_source(
    +        self, environment: jinja2.Environment, template: str
    +    ) -> Tuple[str, str, Optional[Callable[[], bool]]]:
    +        if not hasattr(self, '_loader'):
    +            # This if-block is intended to only be run when we are using an old
    +            # enough version of jinja2 that there is no `_loader` attribute
    +            # on the jinja2.PackageLoader class. Specifically the one within
    +            # the current rhel 9 RPM for jinja2. In versions that old
    +            # there is instead a "provider" attribute pointing to an
    +            # IResourceProvider object that seems to itself have a loader
    +            # that we can use. See the changes in
    +            # https://github.com/pallets/jinja/pull/1082 to get a feel for
    +            # the before and after we're expecting from the PackageLoader.
    +            # Becuase of this special case, mypy will complain about
    +            # accessing the provider attribute when run with newer versions
    +            # of Jinja2 that no longer have the attribute. As we generally expect
    +            # to be running unit tests on versions where this is true, this additional
    +            # assertion is needed to make mypy happy
    +            assert hasattr(self, 'provider')
    +            self._loader = self.provider.loader
    +        if isinstance(self._loader, zipimport.zipimporter):
    +            return self._get_archive_source(template)
    +        return super().get_source(environment, template)
    +
    +    def _get_archive_source(self, template: str) -> Tuple[str, str, None]:
    +        assert isinstance(self._loader, zipimport.zipimporter)
    +        if not hasattr(self, 'package_name'):
    +            self.package_name = self._original_package_name
    +        arelpath = posixpath.join(
    +            self.package_name, self.package_path, template
    +        )
    +        if any(p == '.' or p == '..' for p in arelpath.split(posixpath.sep)):
    +            raise ValueError('template path contains invalid components')
    +        try:
    +            source = cast(bytes, self._loader.get_data(arelpath))
    +        except OSError as e:
    +            not_found = TemplateNotFoundInZipApp(
    +                template,
    +                relative_path=arelpath,
    +                archive_path=self._loader.archive,
    +            )
    +            raise not_found from e
    +        path = os.path.normpath(
    +            posixpath.join(self._loader.archive, arelpath)
    +        )
    +        return source.decode(self.encoding), path, None
    +
    +
    +class Templater:
    +    """Cephadm's generic templater class. Based on jinja2."""
    +
    +    # defaults that can be overridden for testing purposes
    +    # and are lazily acquired
    +    _jinja2_loader: Optional[jinja2.BaseLoader] = None
    +    _jinja2_env: Optional[jinja2.Environment] = None
    +    _pkg = _PKG
    +    _dir = _DIR
    +
    +    @property
    +    def _env(self) -> jinja2.Environment:
    +        if self._jinja2_env is None:
    +            self._jinja2_env = jinja2.Environment(loader=self._loader)
    +            self._jinja2_env.filters['shellquote'] = shlex.quote
    +        return self._jinja2_env
    +
    +    @property
    +    def _loader(self) -> jinja2.BaseLoader:
    +        if self._jinja2_loader is None:
    +            self._jinja2_loader = _PackageLoader(self._pkg, self._dir)
    +        return self._jinja2_loader
    +
    +    def render_str(
    +        self, ctx: CephadmContext, template: str, **kwargs: Any
    +    ) -> str:
    +        return self._env.from_string(template).render(ctx=ctx, **kwargs)
    +
    +    def render(self, ctx: CephadmContext, name: str, **kwargs: Any) -> str:
    +        return self._env.get_template(str(name)).render(ctx=ctx, **kwargs)
    +
    +    def render_to_file(
    +        self, fp: IO, ctx: CephadmContext, name: str, **kwargs: Any
    +    ) -> None:
    +        self._env.get_template(str(name)).stream(ctx=ctx, **kwargs).dump(fp)
    +
    +
    +# create a defaultTemplater instace from the Templater class that will
    +# be used to provide a simple set of methods
    +defaultTemplater = Templater()
    +
    +# alias methods as module level functions for convenience. most callers do
    +# not need to care that these are implemented via a class
    +render_str = defaultTemplater.render_str
    +render = defaultTemplater.render
    +render_to_file = defaultTemplater.render_to_file
    diff --git a/src/cephadm/samples/custom_container.json b/src/cephadm/samples/custom_container.json
    index 194a44d2abbf..210cf1e3e552 100644
    --- a/src/cephadm/samples/custom_container.json
    +++ b/src/cephadm/samples/custom_container.json
    @@ -1,5 +1,5 @@
     {
    -    "image": "docker.io/prom/alertmanager:v0.20.0",
    +    "image": "quay.io/prometheus/alertmanager:v0.20.0",
         "ports": [9093, 9094],
         "args": [
             "-p", "9093:9093",
    diff --git a/src/cephadm/samples/nfs.json b/src/cephadm/samples/nfs.json
    index 2e6625101dac..876c8e69ef1d 100644
    --- a/src/cephadm/samples/nfs.json
    +++ b/src/cephadm/samples/nfs.json
    @@ -9,6 +9,7 @@
                 "",
                 "%url    rados://nfs-ganesha/nfs-ns/conf-nfs.a",
                 ""
    -        ]
    +        ],
    +        "idmap.conf": ""
         }
     }
    diff --git a/src/cephadm/tests/build/__init__.py b/src/cephadm/tests/build/__init__.py
    new file mode 100644
    index 000000000000..e69de29bb2d1
    diff --git a/src/cephadm/tests/build/test_cephadm_build.py b/src/cephadm/tests/build/test_cephadm_build.py
    new file mode 100644
    index 000000000000..c2995a76d4b1
    --- /dev/null
    +++ b/src/cephadm/tests/build/test_cephadm_build.py
    @@ -0,0 +1,192 @@
    +# tests for building cephadm into a zipapp using build.py
    +#
    +# these should not be run automatically as they require the use of podman,
    +# which should not be assumed to exist on a typical test node
    +
    +import json
    +import os
    +import pathlib
    +import pytest
    +import subprocess
    +import sys
    +
    +
    +CONTAINERS = {
    +    'centos-8': {
    +        'name': 'cephadm-build-test:centos8-py36',
    +        'base_image': 'quay.io/centos/centos:stream8',
    +        'script': 'dnf install -y python36',
    +    },
    +    'centos-9': {
    +        'name': 'cephadm-build-test:centos9-py3',
    +        'base_image': 'quay.io/centos/centos:stream9',
    +        'script': 'dnf install -y python3',
    +    },
    +    'centos-8-plusdeps': {
    +        'name': 'cephadm-build-test:centos8-py36-deps',
    +        'base_image': 'quay.io/centos/centos:stream8',
    +        'script': 'dnf install -y python36 python3-jinja2 python3-pyyaml',
    +    },
    +    'centos-9-plusdeps': {
    +        'name': 'cephadm-build-test:centos9-py3-deps',
    +        'base_image': 'quay.io/centos/centos:stream9',
    +        'script': 'dnf install -y python3 python3-jinja2 python3-pyyaml',
    +    },
    +    'ubuntu-20.04': {
    +        'name': 'cephadm-build-test:ubuntu-20-04-py3',
    +        'base_image': 'quay.io/library/ubuntu:20.04',
    +        'script': 'apt update && apt install -y python3-venv',
    +    },
    +    'ubuntu-22.04': {
    +        'name': 'cephadm-build-test:ubuntu-22-04-py3',
    +        'base_image': 'quay.io/library/ubuntu:22.04',
    +        'script': 'apt update && apt install -y python3-venv',
    +    },
    +}
    +
    +BUILD_PY = 'src/cephadm/build.py'
    +
    +
    +def _print(*args):
    +    """Print with a highlight prefix."""
    +    print('----->', *args)
    +    sys.stdout.flush()
    +
    +
    +def container_cmd(image, cmd, ceph_dir, out_dir):
    +    return [
    +        'podman',
    +        'run',
    +        '--rm',
    +        f'--volume={ceph_dir}:/ceph:ro',
    +        f'--volume={out_dir}:/out',
    +        image,
    +    ] + list(cmd)
    +
    +
    +def run_container_cmd(image, cmd, ceph_dir, out_dir):
    +    full_cmd = container_cmd(image, cmd, ceph_dir, out_dir)
    +    _print("CMD", full_cmd)
    +    return subprocess.run(full_cmd)
    +
    +
    +def build_container(src_image, dst_image, build_script, workdir):
    +    cfile = pathlib.Path(workdir) / 'Dockerfile'
    +    with open(cfile, 'w') as fh:
    +        fh.write(f'FROM {src_image}\n')
    +        fh.write(f'RUN {build_script}\n')
    +    cmd = ['podman', 'build', '-t', str(dst_image), '-f', str(cfile)]
    +    _print("BUILD CMD", cmd)
    +    subprocess.run(cmd, check=True)
    +
    +
    +def build_in(alias, ceph_dir, out_dir, args):
    +    ctr = CONTAINERS[alias]
    +    build_container(ctr['base_image'], ctr['name'], ctr['script'], out_dir)
    +    cmd = ['/ceph/' + BUILD_PY] + list(args or []) + ['/out/cephadm']
    +    return run_container_cmd(ctr['name'], cmd, ceph_dir, out_dir)
    +
    +
    +@pytest.fixture
    +def source_dir():
    +    return pathlib.Path(__file__).parents[4].absolute()
    +
    +
    +@pytest.mark.parametrize(
    +    'env',
    +    [
    +        'centos-8',
    +        'centos-9',
    +        'ubuntu-20.04',
    +        'ubuntu-22.04',
    +    ],
    +)
    +def test_cephadm_build(env, source_dir, tmp_path):
    +    build_in(env, source_dir, tmp_path, [])
    +    binary = tmp_path / 'cephadm'
    +    assert binary.is_file()
    +    res = subprocess.run(
    +        [sys.executable, str(binary), 'version'],
    +        stdout=subprocess.PIPE,
    +    )
    +    out = res.stdout.decode('utf8')
    +    assert 'version' in out
    +    assert 'UNKNOWN' in out
    +    assert res.returncode != 0
    +    res = subprocess.run(
    +        [sys.executable, str(binary), 'version', '--verbose'],
    +        stdout=subprocess.PIPE,
    +    )
    +    data = json.loads(res.stdout)
    +    assert isinstance(data, dict)
    +    assert 'bundled_packages' in data
    +    assert all(v['package_source'] == 'pip' for v in data['bundled_packages'])
    +    assert all(
    +        v['name'] in ('Jinja2', 'MarkupSafe', 'PyYAML')
    +        for v in data['bundled_packages']
    +    )
    +    assert all('requirements_entry' in v for v in data['bundled_packages'])
    +    assert 'zip_root_entries' in data
    +    zre = data['zip_root_entries']
    +    assert any(e.startswith('Jinja2') for e in zre)
    +    assert any(e.startswith('MarkupSafe') for e in zre)
    +    assert any(e.startswith('jinja2') for e in zre)
    +    assert any(e.startswith('markupsafe') for e in zre)
    +    assert any(e.startswith('cephadmlib') for e in zre)
    +    assert any(e.startswith('_cephadmmeta') for e in zre)
    +
    +
    +@pytest.mark.parametrize(
    +    'env',
    +    [
    +        'centos-8-plusdeps',
    +        'centos-9-plusdeps',
    +        'centos-9',
    +    ],
    +)
    +def test_cephadm_build_from_rpms(env, source_dir, tmp_path):
    +    res = build_in(
    +        env,
    +        source_dir,
    +        tmp_path,
    +        ['-Brpm', '-SCEPH_GIT_VER=0', '-SCEPH_GIT_NICE_VER=foobar'],
    +    )
    +    if 'plusdeps' not in env:
    +        assert res.returncode != 0
    +        return
    +    binary = tmp_path / 'cephadm'
    +    if 'centos-8' in env and sys.version_info[:2] >= (3, 10):
    +        # The version of markupsafe in centos 8 is incompatible with
    +        # python>=3.10 due to changes in the stdlib therefore we can't execute
    +        # the cephadm binary, so we quit the test early.
    +        return
    +    assert binary.is_file()
    +    res = subprocess.run(
    +        [sys.executable, str(binary), 'version'],
    +        stdout=subprocess.PIPE,
    +    )
    +    out = res.stdout.decode('utf8')
    +    assert 'version' in out
    +    assert 'foobar' in out
    +    assert res.returncode == 0
    +    res = subprocess.run(
    +        [sys.executable, str(binary), 'version', '--verbose'],
    +        stdout=subprocess.PIPE,
    +    )
    +    data = json.loads(res.stdout)
    +    assert isinstance(data, dict)
    +    assert 'bundled_packages' in data
    +    assert all(v['package_source'] == 'rpm' for v in data['bundled_packages'])
    +    assert all(
    +        v['name'] in ('Jinja2', 'MarkupSafe', 'PyYAML')
    +        for v in data['bundled_packages']
    +    )
    +    assert all('requirements_entry' in v for v in data['bundled_packages'])
    +    assert 'zip_root_entries' in data
    +    zre = data['zip_root_entries']
    +    assert any(e.startswith('Jinja2') for e in zre)
    +    assert any(e.startswith('MarkupSafe') for e in zre)
    +    assert any(e.startswith('jinja2') for e in zre)
    +    assert any(e.startswith('markupsafe') for e in zre)
    +    assert any(e.startswith('cephadmlib') for e in zre)
    +    assert any(e.startswith('_cephadmmeta') for e in zre)
    diff --git a/src/cephadm/tests/fixtures.py b/src/cephadm/tests/fixtures.py
    index ebed7b3d21ca..572c1f9969d6 100644
    --- a/src/cephadm/tests/fixtures.py
    +++ b/src/cephadm/tests/fixtures.py
    @@ -6,7 +6,7 @@
     from contextlib import contextmanager
     from pyfakefs import fake_filesystem
     
    -from typing import Dict, List, Optional
    +from typing import Dict, List, Optional, Any
     
     
     def import_cephadm():
    @@ -17,17 +17,28 @@ def import_cephadm():
     
     
     def mock_docker():
    -    _cephadm = import_cephadm()
    -    docker = mock.Mock(_cephadm.Docker)
    +    from cephadmlib.container_engines import Docker
    +
    +    docker = mock.Mock(Docker)
         docker.path = '/usr/bin/docker'
    +    type(docker).unlimited_pids_option = Docker.unlimited_pids_option
         return docker
     
     
     def mock_podman():
    -    _cephadm = import_cephadm()
    -    podman = mock.Mock(_cephadm.Podman)
    +    from cephadmlib.container_engines import Podman
    +
    +    podman = mock.Mock(Podman)
         podman.path = '/usr/bin/podman'
         podman.version = (2, 1, 0)
    +    # This next little bit of black magic was adapated from the mock docs for
    +    # PropertyMock. We don't use a PropertyMock but the suggestion to call
    +    # type(...) from the doc allows us to "borrow" the real
    +    # supports_split_cgroups attribute:
    +    # https://docs.python.org/3/library/unittest.mock.html#unittest.mock.Mock
    +    type(podman).supports_split_cgroups = Podman.supports_split_cgroups
    +    type(podman).service_args = Podman.service_args
    +    type(podman).unlimited_pids_option = Podman.unlimited_pids_option
         return podman
     
     
    @@ -69,6 +80,13 @@ def cephadm_fs(
         """
         from cephadmlib import constants
     
    +    # the following is a workaround for the fakefs interfering with jinja2's
    +    # package loader when run in the pytest suite when this fixture is used.
    +    # it effectively maps what is `src/cephadm` as a real fs into the fake fs.`
    +    # See: https://pytest-pyfakefs.readthedocs.io/en/stable/usage.html#access-to-files-in-the-real-file-system
    +    srcdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    +    fs.add_real_directory(srcdir)
    +
         uid = os.getuid()
         gid = os.getgid()
     
    @@ -165,3 +183,83 @@ def with_cephadm_ctx(
             else:
                 yield ctx
     
    +
    +@pytest.fixture()
    +def funkypatch(monkeypatch):
    +    """Defines the funkypatch fixtures that acts like a mixture between
    +    mock.patch and pytest's monkeypatch fixture.
    +    """
    +    fp = FunkyPatcher(monkeypatch)
    +    yield fp
    +
    +
    +class FunkyPatcher:
    +    """FunkyPatcher monkeypatches all imported instances of an object.
    +
    +    Use `patch` to patch the canonical location of an object and FunkyPatcher
    +    will automatically replace other imports of that object.
    +    """
    +
    +    def __init__(self, monkeypatcher):
    +        self._mp = monkeypatcher
    +        # keep track of objects we've already patched. this dictionary
    +        # maps a (module-name, object-name) tuple to the original object
    +        # before patching. This could be used to determine if a name has
    +        # already been patched or compare a patched object to the original.
    +        self._originals: Dict[Tuple[str, str], Any] = {}
    +
    +    def patch(
    +        self,
    +        mod: str,
    +        name: str = '',
    +        *,
    +        dest: Any = None,
    +        force: bool = False,
    +    ) -> Any:
    +        """Patch an object and all existing imports of that object.
    +        Specify mod as `my.mod.name.obj` where obj is name of the object to be
    +        patched or as `my.mod.name` and specify `name` as the name of the
    +        object to be patched.
    +        If the object to be patched is not imported as the same name in `mod`
    +        it will *not* be automatically patched. In other words, `from
    +        my.mod.name import foo` will work, but `from my.mod.name import foo as
    +        _foo` will not.
    +        Use the keyword-only argument `dest` to specify the new object to be
    +        used. A MagicMock will be created and used if dest is None.
    +        Use the keyword-only argument `force` to override checks that a mocked
    +        objects are the same across modules. This can be used in the case that
    +        some other code already patched an object and you want funkypatch to
    +        override that patch (use with caution).
    +        Returns the patched object (the MagicMock or supplied dest).
    +        """
    +        import sys
    +        import importlib
    +
    +        if not name:
    +            mod, name = mod.rsplit('.', 1)
    +        modname = (mod, name)
    +        # We don't strictly need the check but patching already patched objs is
    +        # confusing to think about. It's better to block it for now and perhaps
    +        # later we can relax these restrictions or be clever in some way.
    +        if modname in self._originals:
    +            raise KeyError(f'{modname} already patched')
    +
    +        if dest is None:
    +            dest = mock.MagicMock()
    +
    +        imod = importlib.import_module(mod)
    +        self._originals[modname] = getattr(imod, name)
    +
    +        for mname, imod in sys.modules.items():
    +            try:
    +                obj = getattr(imod, name)
    +            except AttributeError:
    +                # no matching name in module
    +                continue
    +            # make sure that the module imported the same object as the
    +            # one we want to patch out, and not just some naming collision.
    +            # ensure the original object and the one in the module are the
    +            # same object
    +            if obj is self._originals[modname] or force:
    +                self._mp.setattr(imod, name, dest)
    +        return dest
    diff --git a/src/cephadm/tests/test_agent.py b/src/cephadm/tests/test_agent.py
    index 38c35e355830..8e453e3ac3c0 100644
    --- a/src/cephadm/tests/test_agent.py
    +++ b/src/cephadm/tests/test_agent.py
    @@ -69,17 +69,18 @@ def test_agent_deploy_daemon_unit(_call_throws, cephadm_fs):
             _check_file(f'{AGENT_DIR}/unit.meta', json.dumps({'meta': 'data'}, indent=4) + '\n')
     
             # check unit file was created correctly
    -        _check_file(f'{ctx.unit_dir}/{agent.unit_name()}', agent.unit_file())
    +        svcname = agent._service_name()
    +        _check_file(f'{ctx.unit_dir}/{svcname}', agent.unit_file())
     
             expected_call_throws_calls = [
                 mock.call(ctx, ['systemctl', 'daemon-reload']),
    -            mock.call(ctx, ['systemctl', 'enable', '--now', agent.unit_name()]),
    +            mock.call(ctx, ['systemctl', 'enable', '--now', svcname]),
             ]
             _call_throws.assert_has_calls(expected_call_throws_calls)
     
             expected_call_calls = [
    -            mock.call(ctx, ['systemctl', 'stop', agent.unit_name()], verbosity=_cephadm.CallVerbosity.DEBUG),
    -            mock.call(ctx, ['systemctl', 'reset-failed', agent.unit_name()], verbosity=_cephadm.CallVerbosity.DEBUG),
    +            mock.call(ctx, ['systemctl', 'stop', svcname], verbosity=_cephadm.CallVerbosity.DEBUG),
    +            mock.call(ctx, ['systemctl', 'reset-failed', svcname], verbosity=_cephadm.CallVerbosity.DEBUG),
             ]
             _cephadm.call.assert_has_calls(expected_call_calls)
     
    @@ -415,7 +416,7 @@ def test_agent_get_ls(_ls_subset, _ls, cephadm_fs):
     @mock.patch("threading.Event.clear")
     @mock.patch("threading.Event.wait")
     @mock.patch("urllib.request.Request.__init__")
    -@mock.patch("cephadm.urlopen")
    +@mock.patch("cephadmlib.agent.urlopen")
     @mock.patch("cephadm.list_networks")
     @mock.patch("cephadm.HostFacts.dump")
     @mock.patch("cephadm.HostFacts.__init__", lambda _, __: None)
    @@ -530,7 +531,7 @@ class EventCleared(Exception):
                'port': str(open_listener_port)
             }
             _RQ_init.assert_called_with(
    -            f'https://{target_ip}:{target_port}/data/',
    +            f'https://{target_ip}:{target_port}/data',
                 json.dumps(expected_data).encode('ascii'),
                 {'Content-Type': 'application/json'}
             )
    @@ -667,7 +668,7 @@ def recv(self, len: Optional[int] = None):
             agent.mgr_listener.run()
     
             # verify payload was correctly extracted
    -        assert _handle_json_payload.called_with(json.loads(payload))
    +        _handle_json_payload.assert_called_with(json.loads(payload))
             FakeConn.send.assert_called_once_with(b'ACK')
     
             # second run, with bad json data received
    diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py
    index ff474c23ccd9..bbaaf2d39f87 100644
    --- a/src/cephadm/tests/test_cephadm.py
    +++ b/src/cephadm/tests/test_cephadm.py
    @@ -1,5 +1,7 @@
     # type: ignore
     
    +import contextlib
    +import copy
     import errno
     import json
     import mock
    @@ -16,6 +18,7 @@
         with_cephadm_ctx,
         mock_bad_firewalld,
         import_cephadm,
    +    funkypatch,
     )
     
     from pyfakefs import fake_filesystem
    @@ -36,16 +39,14 @@ def get_ceph_conf(
             mon_host = {mon_host}
     '''
     
    -class TestCephAdm(object):
    +@contextlib.contextmanager
    +def bootstrap_test_ctx(*args, **kwargs):
    +    with with_cephadm_ctx(*args, **kwargs) as ctx:
    +        ctx.no_cleanup_on_failure = True
    +        yield ctx
     
    -    def test_docker_unit_file(self):
    -        ctx = _cephadm.CephadmContext()
    -        ctx.container_engine = mock_docker()
    -        r = _cephadm.get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
    -        assert 'Requires=docker.service' in r
    -        ctx.container_engine = mock_podman()
    -        r = _cephadm.get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
    -        assert 'Requires=docker.service' not in r
    +
    +class TestCephAdm(object):
     
         @mock.patch('cephadm.logger')
         def test_attempt_bind(self, _logger):
    @@ -289,7 +290,8 @@ def wrap_test(address, expected):
         @mock.patch('cephadmlib.firewalld.Firewalld', mock_bad_firewalld)
         @mock.patch('cephadm.Firewalld', mock_bad_firewalld)
         @mock.patch('cephadm.logger')
    -    def test_skip_firewalld(self, _logger, cephadm_fs):
    +    @mock.patch('cephadm.json_loads_retry', return_value=None)
    +    def test_skip_firewalld(self, _logger, _jlr, cephadm_fs):
             """
             test --skip-firewalld actually skips changing firewall
             """
    @@ -326,13 +328,17 @@ def test_skip_firewalld(self, _logger, cephadm_fs):
             with pytest.raises(Exception):
                 _cephadm.prepare_dashboard(ctx, 0, 0, lambda _, extra_mounts=None, ___=None : '5', lambda : None)
     
    -    @mock.patch('cephadm.logger')
    -    @mock.patch('cephadm.fetch_custom_config_files')
    -    @mock.patch('cephadm.get_container')
    -    def test_get_deployment_container(self, _get_container, _get_config, _logger):
    +    def test_to_deployment_container(self, funkypatch):
             """
    -        test get_deployment_container properly makes use of extra container args and custom conf files
    +        test to_deployment_container properly makes use of extra container args and custom conf files
             """
    +        from cephadmlib.deployment_utils import to_deployment_container
    +
    +        funkypatch.patch('cephadm.logger')
    +        _get_config = funkypatch.patch(
    +            'cephadmlib.deployment_utils.fetch_custom_config_files'
    +        )
    +        _get_container = funkypatch.patch('cephadm.get_container')
     
             ctx = _cephadm.CephadmContext()
             ctx.config_json = '-'
    @@ -365,31 +371,47 @@ def test_get_deployment_container(self, _get_container, _get_config, _logger):
                 ptrace=False,
                 host_network=True,
             )
    -        c = _cephadm.get_deployment_container(ctx, ident)
    +        c = _cephadm.get_container(ctx, ident)
    +        c = to_deployment_container(ctx, c)
     
             assert '--pids-limit=12345' in c.container_args
             assert '--something' in c.container_args
             assert os.path.join('data', '9b9d7609-f4d5-4aba-94c8-effa764d96c9', 'custom_config_files', 'grafana.host1', 'testing.str') in c.volume_mounts
             assert c.volume_mounts[os.path.join('data', '9b9d7609-f4d5-4aba-94c8-effa764d96c9', 'custom_config_files', 'grafana.host1', 'testing.str')] == '/etc/testing.str'
     
    -    @mock.patch('cephadm.logger')
    -    @mock.patch('cephadm.FileLock')
    -    @mock.patch('cephadm.deploy_daemon')
    -    @mock.patch('cephadm.make_var_run')
    -    @mock.patch('cephadm.migrate_sysctl_dir')
    -    @mock.patch('cephadm.check_unit', lambda *args, **kwargs: (None, 'running', None))
    -    @mock.patch('cephadm.get_unit_name', lambda *args, **kwargs: 'mon-unit-name')
    -    @mock.patch('cephadm.extract_uid_gid', lambda *args, **kwargs: (0, 0))
    -    @mock.patch('cephadm.get_deployment_container')
    -    @mock.patch('cephadm.apply_deploy_config_to_ctx', lambda d, c: None)
    -    def test_mon_crush_location(self, _get_deployment_container, _migrate_sysctl, _make_var_run, _deploy_daemon, _file_lock, _logger, monkeypatch):
    +    def test_mon_crush_location(self, funkypatch):
             """
             test that crush location for mon is set if it is included in config_json
             """
    -        _fetch_configs = mock.MagicMock()
    -        monkeypatch.setattr('cephadmlib.context_getters.fetch_configs', _fetch_configs)
    -        monkeypatch.setattr('cephadm.fetch_configs', _fetch_configs)
    -        monkeypatch.setattr('cephadm.read_configuration_source', lambda c: {})
    +        funkypatch.patch('cephadm.logger')
    +        funkypatch.patch('cephadm.FileLock')
    +        _deploy_daemon = funkypatch.patch('cephadm.deploy_daemon')
    +        funkypatch.patch('cephadm.make_var_run')
    +        funkypatch.patch('cephadmlib.file_utils.make_run_dir')
    +        funkypatch.patch('os.mkdir')
    +        _migrate_sysctl = funkypatch.patch('cephadm.migrate_sysctl_dir')
    +        funkypatch.patch(
    +            'cephadm.check_unit',
    +            dest=lambda *args, **kwargs: (None, 'running', None),
    +        )
    +        funkypatch.patch(
    +            'cephadm.get_unit_name',
    +            dest=lambda *args, **kwargs: 'mon-unit-name',
    +        )
    +        funkypatch.patch(
    +            'cephadm.extract_uid_gid', dest=lambda *args, **kwargs: (0, 0)
    +        )
    +        _get_container = funkypatch.patch('cephadm.get_container')
    +        funkypatch.patch(
    +            'cephadm.apply_deploy_config_to_ctx', dest=lambda d, c: None
    +        )
    +        _fetch_configs = funkypatch.patch(
    +            'cephadmlib.context_getters.fetch_configs'
    +        )
    +        funkypatch.patch(
    +            'cephadm.read_configuration_source', dest=lambda c: {}
    +        )
    +        funkypatch.patch('cephadm.fetch_custom_config_files')
     
             ctx = _cephadm.CephadmContext()
             ctx.name = 'mon.test'
    @@ -404,7 +426,7 @@ def test_mon_crush_location(self, _get_deployment_container, _migrate_sysctl, _m
                 'crush_location': 'database=a'
             }
     
    -        _get_deployment_container.return_value = _cephadm.CephContainer.for_daemon(
    +        _get_container.return_value = _cephadm.CephContainer.for_daemon(
                 ctx,
                 ident=_cephadm.DaemonIdentity(
                     fsid='9b9d7609-f4d5-4aba-94c8-effa764d96c9',
    @@ -423,13 +445,12 @@ def test_mon_crush_location(self, _get_deployment_container, _migrate_sysctl, _m
             )
     
             def _crush_location_checker(ctx, ident, container, uid, gid, **kwargs):
    -            print(container.args)
    -            raise Exception(' '.join(container.args))
    +            argval = ' '.join(container.args)
    +            assert '--set-crush-location database=a' in argval
     
             _deploy_daemon.side_effect = _crush_location_checker
    -
    -        with pytest.raises(Exception, match='--set-crush-location database=a'):
    -            _cephadm.command_deploy_from(ctx)
    +        _cephadm.command_deploy_from(ctx)
    +        _deploy_daemon.assert_called()
     
         @mock.patch('cephadm.logger')
         @mock.patch('cephadm.fetch_custom_config_files')
    @@ -520,12 +541,12 @@ def test_registry_login(self, _logger, _get_parm, _call_throws):
     
         def test_get_image_info_from_inspect(self):
             # podman
    -        out = """204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1,[docker.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992]"""
    +        out = """204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1,[quay.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992]"""
             r = _cephadm.get_image_info_from_inspect(out, 'registry/ceph/ceph:latest')
             print(r)
             assert r == {
                 'image_id': '204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1',
    -            'repo_digests': ['docker.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992']
    +            'repo_digests': ['quay.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992']
             }
     
             # docker
    @@ -537,37 +558,43 @@ def test_get_image_info_from_inspect(self):
             }
     
             # multiple digests (podman)
    -        out = """e935122ab143a64d92ed1fbb27d030cf6e2f0258207be1baf1b509c466aeeb42,[docker.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4 docker.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a]"""
    +        out = """e935122ab143a64d92ed1fbb27d030cf6e2f0258207be1baf1b509c466aeeb42,[quay.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4 quay.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a]"""
             r = _cephadm.get_image_info_from_inspect(out, 'registry/prom/prometheus:latest')
             assert r == {
                 'image_id': 'e935122ab143a64d92ed1fbb27d030cf6e2f0258207be1baf1b509c466aeeb42',
                 'repo_digests': [
    -                'docker.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4',
    -                'docker.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a',
    +                'quay.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4',
    +                'quay.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a',
                 ]
             }
     
     
         def test_dict_get(self):
    -        result = _cephadm.dict_get({'a': 1}, 'a', require=True)
    +        from cephadmlib.data_utils import dict_get
    +
    +        result = dict_get({'a': 1}, 'a', require=True)
             assert result == 1
    -        result = _cephadm.dict_get({'a': 1}, 'b')
    +        result = dict_get({'a': 1}, 'b')
             assert result is None
    -        result = _cephadm.dict_get({'a': 1}, 'b', default=2)
    +        result = dict_get({'a': 1}, 'b', default=2)
             assert result == 2
     
         def test_dict_get_error(self):
    +        from cephadmlib.data_utils import dict_get
    +
             with pytest.raises(_cephadm.Error):
    -            _cephadm.dict_get({'a': 1}, 'b', require=True)
    +            dict_get({'a': 1}, 'b', require=True)
     
         def test_dict_get_join(self):
    -        result = _cephadm.dict_get_join({'foo': ['a', 'b']}, 'foo')
    +        from cephadmlib.data_utils import dict_get_join
    +
    +        result = dict_get_join({'foo': ['a', 'b']}, 'foo')
             assert result == 'a\nb'
    -        result = _cephadm.dict_get_join({'foo': [1, 2]}, 'foo')
    +        result = dict_get_join({'foo': [1, 2]}, 'foo')
             assert result == '1\n2'
    -        result = _cephadm.dict_get_join({'bar': 'a'}, 'bar')
    +        result = dict_get_join({'bar': 'a'}, 'bar')
             assert result == 'a'
    -        result = _cephadm.dict_get_join({'a': 1}, 'a')
    +        result = dict_get_join({'a': 1}, 'a')
             assert result == 1
     
         @mock.patch('os.listdir', return_value=[])
    @@ -585,7 +612,7 @@ def test_infer_local_ceph_image(self, _logger, _listdir):
                                      '')
             out = '''quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185|dad864ee21e9|main|2022-03-23 16:29:19 +0000 UTC
             quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e|514e6a882f6e|pacific|2022-03-23 15:58:34 +0000 UTC
    -        docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
    +        quay.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
             with mock.patch('cephadm.call_throws', return_value=(out, '', '')):
                 with mock.patch('cephadm.get_container_info', return_value=cinfo):
                     image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine)
    @@ -594,7 +621,7 @@ def test_infer_local_ceph_image(self, _logger, _listdir):
             # make sure first valid image is used when no container_info is found
             out = '''quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185|dad864ee21e9|main|2022-03-23 16:29:19 +0000 UTC
             quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e|514e6a882f6e|pacific|2022-03-23 15:58:34 +0000 UTC
    -        docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
    +        quay.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
             with mock.patch('cephadm.call_throws', return_value=(out, '', '')):
                 with mock.patch('cephadm.get_container_info', return_value=None):
                     image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine)
    @@ -602,12 +629,12 @@ def test_infer_local_ceph_image(self, _logger, _listdir):
     
             # make sure images without digest are discarded (no container_info is found)
             out = '''quay.ceph.io/ceph-ci/ceph@|||
    -        docker.io/ceph/ceph@|||
    -        docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
    +        quay.io/ceph/ceph@|||
    +        quay.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
             with mock.patch('cephadm.call_throws', return_value=(out, '', '')):
                 with mock.patch('cephadm.get_container_info', return_value=None):
                     image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine)
    -                assert image == 'docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508'
    +                assert image == 'quay.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508'
     
     
     
    @@ -768,25 +795,101 @@ def test_get_container_info(self, _logger, daemon_filter, by_name, daemon_list,
                 with mock.patch('cephadm.get_container_stats', return_value=container_stats):
                     assert _cephadm.get_container_info(ctx, daemon_filter, by_name) == output
     
    +    @mock.patch('cephadm.list_daemons')
    +    @mock.patch('cephadm.get_container_stats')
    +    @mock.patch('cephadm.get_container_stats_by_image_name')
    +    def test_get_container_info_daemon_down(self, _get_stats_by_name, _get_stats, _list_daemons):
    +        ctx = _cephadm.CephadmContext()
    +        ctx.fsid = '5e39c134-dfc5-11ee-a344-5254000ee071'
    +        ctx.container_engine = mock_podman()
    +
    +        # list_daemons output taken from cephadm ls of an
    +        # OSD that was stopped, with subsititutions
    +        # true -> True
    +        # null -> None
    +        down_osd_json = {
    +                "style": "cephadm:v1",
    +                "name": "osd.2",
    +                "fsid": "5e39c134-dfc5-11ee-a344-5254000ee071",
    +                "systemd_unit": "ceph-5e39c134-dfc5-11ee-a344-5254000ee071@osd.2",
    +                "enabled": True,
    +                "state": "stopped",
    +                "service_name": "osd.foo",
    +                "ports": [],
    +                "ip": None,
    +                "deployed_by": [
    +                    "quay.io/adk3798/ceph@sha256:7da0af22ce45aac97dff00125af590506d8e36ab97d78e5175149643562bfb0b"
    +                ],
    +                "rank": None,
    +                "rank_generation": None,
    +                "extra_container_args": None,
    +                "extra_entrypoint_args": None,
    +                "memory_request": None,
    +                "memory_limit": None,
    +                "container_id": None,
    +                "container_image_name": "quay.io/adk3798/ceph@sha256:7da0af22ce45aac97dff00125af590506d8e36ab97d78e5175149643562bfb0b",
    +                "container_image_id": None,
    +                "container_image_digests": None,
    +                "version": None,
    +                "started": None,
    +                "created": "2024-03-11T17:17:49.533757Z",
    +                "deployed": "2024-03-11T17:37:23.520061Z",
    +                "configured": "2024-03-11T17:37:28.494075Z"
    +        }
    +        _list_daemons.return_value = [down_osd_json]
    +        _get_stats_by_name.return_value = (('a03c201ff4080204949932f367545cd381c4acee0d48dbc15f2eac1e35f22318,'
    +                                   '2023-11-28 21:34:38.045413692 +0000 UTC,'),
    +                                   '', 0)
    +
    +        expected_container_info = _cephadm.ContainerInfo(
    +            container_id='',
    +            image_name='quay.io/adk3798/ceph@sha256:7da0af22ce45aac97dff00125af590506d8e36ab97d78e5175149643562bfb0b',
    +            image_id='a03c201ff4080204949932f367545cd381c4acee0d48dbc15f2eac1e35f22318',
    +            start='2023-11-28 21:34:38.045413692 +0000 UTC',
    +            version='')
    +
    +        assert _cephadm.get_container_info(ctx, 'osd.2', by_name=True) == expected_container_info
    +        assert not _get_stats.called, 'only get_container_stats_by_image_name should have been called'
    +
    +        # If there is one down and one up daemon of the same name, it should use the up one
    +        # In this case, we would be using the running container to get the image, so
    +        # all the info will come from the return value of get_container_stats, rather
    +        # than it partially being taken from the list_daemons output
    +        up_osd_json = copy.deepcopy(down_osd_json)
    +        up_osd_json['state'] = 'running'
    +        _get_stats.return_value = (('container_id,image_name,image_id,the_past,'), '', 0)
    +        _list_daemons.return_value = [down_osd_json, up_osd_json]
    +
    +        expected_container_info = _cephadm.ContainerInfo(
    +            container_id='container_id',
    +            image_name='image_name',
    +            image_id='image_id',
    +            start='the_past',
    +            version='')
    +
    +        assert _cephadm.get_container_info(ctx, 'osd.2', by_name=True) == expected_container_info
    +
         def test_should_log_to_journald(self):
    +        from cephadmlib import context_getters
    +
             ctx = _cephadm.CephadmContext()
             # explicit
             ctx.log_to_journald = True
    -        assert _cephadm.should_log_to_journald(ctx)
    +        assert context_getters.should_log_to_journald(ctx)
     
             ctx.log_to_journald = None
             # enable if podman support --cgroup=split
             ctx.container_engine = mock_podman()
             ctx.container_engine.version = (2, 1, 0)
    -        assert _cephadm.should_log_to_journald(ctx)
    +        assert context_getters.should_log_to_journald(ctx)
     
             # disable on old podman
             ctx.container_engine.version = (2, 0, 0)
    -        assert not _cephadm.should_log_to_journald(ctx)
    +        assert not context_getters.should_log_to_journald(ctx)
     
             # disable on docker
             ctx.container_engine = mock_docker()
    -        assert not _cephadm.should_log_to_journald(ctx)
    +        assert not context_getters.should_log_to_journald(ctx)
     
         def test_normalize_image_digest(self):
             s = 'myhostname:5000/ceph/ceph@sha256:753886ad9049004395ae990fbb9b096923b5a518b819283141ee8716ddf55ad1'
    @@ -1146,7 +1249,7 @@ def test_enter_failure_1(self, _target_state, _logger, _call, _listdir):
             ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
                 ['host-maintenance', 'enter', '--fsid', TestMaintenance.fsid])
             ctx.container_engine = mock_podman()
    -        retval = _cephadm.command_maintenance(ctx)
    +        retval = _cephadm.change_maintenance_mode(ctx)
             assert retval.startswith('failed')
     
         @mock.patch('os.listdir', return_value=[])
    @@ -1159,7 +1262,7 @@ def test_enter_failure_2(self, _target_state, _logger, _call, _listdir):
             ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
                 ['host-maintenance', 'enter', '--fsid', TestMaintenance.fsid])
             ctx.container_engine = mock_podman()
    -        retval = _cephadm.command_maintenance(ctx)
    +        retval = _cephadm.change_maintenance_mode(ctx)
             assert retval.startswith('failed')
     
         @mock.patch('os.listdir', return_value=[])
    @@ -1174,7 +1277,7 @@ def test_exit_failure_1(self, _target_exists, _target_state, _logger, _call, _li
             ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
                 ['host-maintenance', 'exit', '--fsid', TestMaintenance.fsid])
             ctx.container_engine = mock_podman()
    -        retval = _cephadm.command_maintenance(ctx)
    +        retval = _cephadm.change_maintenance_mode(ctx)
             assert retval.startswith('failed')
     
         @mock.patch('os.listdir', return_value=[])
    @@ -1189,20 +1292,22 @@ def test_exit_failure_2(self, _target_exists, _target_state, _logger, _call, _li
             ctx: _cephadm.CephadmContext = _cephadm.cephadm_init_ctx(
                 ['host-maintenance', 'exit', '--fsid', TestMaintenance.fsid])
             ctx.container_engine = mock_podman()
    -        retval = _cephadm.command_maintenance(ctx)
    +        retval = _cephadm.change_maintenance_mode(ctx)
             assert retval.startswith('failed')
     
     
     class TestMonitoring(object):
    -    @mock.patch('cephadm.call')
    +    @mock.patch('cephadmlib.daemons.monitoring.call')
         def test_get_version_alertmanager(self, _call):
    +        from cephadmlib.daemons import monitoring
    +
             ctx = _cephadm.CephadmContext()
             ctx.container_engine = mock_podman()
             daemon_type = 'alertmanager'
     
             # binary `prometheus`
             _call.return_value = '', '{}, version 0.16.1'.format(daemon_type), 0
    -        version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
    +        version = monitoring.Monitoring.get_version(ctx, 'container_id', daemon_type)
             assert version == '0.16.1'
     
             # binary `prometheus-alertmanager`
    @@ -1213,13 +1318,15 @@ def test_get_version_alertmanager(self, _call):
             version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
             assert version == '0.16.1'
     
    -    @mock.patch('cephadm.call')
    +    @mock.patch('cephadmlib.daemons.monitoring.call')
         def test_get_version_prometheus(self, _call):
    +        from cephadmlib.daemons import monitoring
    +
             ctx = _cephadm.CephadmContext()
             ctx.container_engine = mock_podman()
             daemon_type = 'prometheus'
             _call.return_value = '', '{}, version 0.16.1'.format(daemon_type), 0
    -        version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
    +        version = monitoring.Monitoring.get_version(ctx, 'container_id', daemon_type)
             assert version == '0.16.1'
     
         def test_prometheus_external_url(self):
    @@ -1228,18 +1335,20 @@ def test_prometheus_external_url(self):
             daemon_type = 'prometheus'
             daemon_id = 'home'
             fsid = 'aaf5a720-13fe-4a3b-82b9-2d99b7fd9704'
    -        args = _cephadm.get_daemon_args(
    +        args = _cephadm.Monitoring.create(
                 ctx, _cephadm.DaemonIdentity(fsid, daemon_type, daemon_id)
    -        )
    +        ).get_daemon_args()
             assert any([x.startswith('--web.external-url=http://') for x in args])
     
    -    @mock.patch('cephadm.call')
    +    @mock.patch('cephadmlib.daemons.monitoring.call')
         def test_get_version_node_exporter(self, _call):
    +        from cephadmlib.daemons import monitoring
    +
             ctx = _cephadm.CephadmContext()
             ctx.container_engine = mock_podman()
             daemon_type = 'node-exporter'
             _call.return_value = '', '{}, version 0.16.1'.format(daemon_type.replace('-', '_')), 0
    -        version = _cephadm.Monitoring.get_version(ctx, 'container_id', daemon_type)
    +        version = monitoring.Monitoring.get_version(ctx, 'container_id', daemon_type)
             assert version == '0.16.1'
     
         def test_create_daemon_dirs_prometheus(self, cephadm_fs):
    @@ -1321,7 +1430,9 @@ def _get_cmd(*args):
     
     ###############################################3
     
    -    def test_config(self, cephadm_fs):
    +    def test_config(self, cephadm_fs, funkypatch):
    +        funkypatch.patch('cephadmlib.systemd.call')
    +
             conf_file = 'foo'
             cmd = self._get_cmd(
                 '--mon-ip', '192.168.1.1',
    @@ -1329,33 +1440,36 @@ def test_config(self, cephadm_fs):
                 '--config', conf_file,
             )
     
    -        with with_cephadm_ctx(cmd) as ctx:
    +        with bootstrap_test_ctx(cmd) as ctx:
                 msg = r'No such file or directory'
                 with pytest.raises(_cephadm.Error, match=msg):
                     _cephadm.command_bootstrap(ctx)
     
             cephadm_fs.create_file(conf_file)
    -        with with_cephadm_ctx(cmd) as ctx:
    +        with bootstrap_test_ctx(cmd) as ctx:
                 retval = _cephadm.command_bootstrap(ctx)
                 assert retval == 0
     
    -    def test_no_mon_addr(self, cephadm_fs):
    +    def test_no_mon_addr(self, cephadm_fs, funkypatch):
    +        funkypatch.patch('cephadmlib.systemd.call')
    +
             cmd = self._get_cmd()
    -        with with_cephadm_ctx(cmd) as ctx:
    +        with bootstrap_test_ctx(cmd) as ctx:
                 msg = r'must specify --mon-ip or --mon-addrv'
                 with pytest.raises(_cephadm.Error, match=msg):
                     _cephadm.command_bootstrap(ctx)
     
    -    def test_skip_mon_network(self, cephadm_fs):
    +    def test_skip_mon_network(self, cephadm_fs, funkypatch):
    +        funkypatch.patch('cephadmlib.systemd.call')
             cmd = self._get_cmd('--mon-ip', '192.168.1.1')
     
    -        with with_cephadm_ctx(cmd, list_networks={}) as ctx:
    +        with bootstrap_test_ctx(cmd, list_networks={}) as ctx:
                 msg = r'--skip-mon-network'
                 with pytest.raises(_cephadm.Error, match=msg):
                     _cephadm.command_bootstrap(ctx)
     
             cmd += ['--skip-mon-network']
    -        with with_cephadm_ctx(cmd, list_networks={}) as ctx:
    +        with bootstrap_test_ctx(cmd, list_networks={}) as ctx:
                 retval = _cephadm.command_bootstrap(ctx)
                 assert retval == 0
     
    @@ -1429,15 +1543,17 @@ def test_skip_mon_network(self, cephadm_fs):
                     True,
                 ),
             ])
    -    def test_mon_ip(self, mon_ip, list_networks, result, cephadm_fs):
    +    def test_mon_ip(self, mon_ip, list_networks, result, cephadm_fs, funkypatch):
    +        funkypatch.patch('cephadmlib.systemd.call')
    +
             cmd = self._get_cmd('--mon-ip', mon_ip)
             if not result:
    -            with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
    +            with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx:
                     msg = r'--skip-mon-network'
                     with pytest.raises(_cephadm.Error, match=msg):
                         _cephadm.command_bootstrap(ctx)
             else:
    -            with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
    +            with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx:
                     retval = _cephadm.command_bootstrap(ctx)
                     assert retval == 0
     
    @@ -1491,31 +1607,35 @@ def test_mon_ip(self, mon_ip, list_networks, result, cephadm_fs):
                     None,
                 ),
             ])
    -    def test_mon_addrv(self, mon_addrv, list_networks, err, cephadm_fs):
    +    def test_mon_addrv(self, mon_addrv, list_networks, err, cephadm_fs, funkypatch):
    +        funkypatch.patch('cephadmlib.systemd.call')
    +
             cmd = self._get_cmd('--mon-addrv', mon_addrv)
             if err:
    -            with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
    +            with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx:
                     with pytest.raises(_cephadm.Error, match=err):
                         _cephadm.command_bootstrap(ctx)
             else:
    -            with with_cephadm_ctx(cmd, list_networks=list_networks) as ctx:
    +            with bootstrap_test_ctx(cmd, list_networks=list_networks) as ctx:
                     retval = _cephadm.command_bootstrap(ctx)
                     assert retval == 0
     
    -    def test_allow_fqdn_hostname(self, cephadm_fs):
    +    def test_allow_fqdn_hostname(self, cephadm_fs, funkypatch):
    +        funkypatch.patch('cephadmlib.systemd.call')
    +
             hostname = 'foo.bar'
             cmd = self._get_cmd(
                 '--mon-ip', '192.168.1.1',
                 '--skip-mon-network',
             )
     
    -        with with_cephadm_ctx(cmd, hostname=hostname) as ctx:
    +        with bootstrap_test_ctx(cmd, hostname=hostname) as ctx:
                 msg = r'--allow-fqdn-hostname'
                 with pytest.raises(_cephadm.Error, match=msg):
                     _cephadm.command_bootstrap(ctx)
     
             cmd += ['--allow-fqdn-hostname']
    -        with with_cephadm_ctx(cmd, hostname=hostname) as ctx:
    +        with bootstrap_test_ctx(cmd, hostname=hostname) as ctx:
                 retval = _cephadm.command_bootstrap(ctx)
                 assert retval == 0
     
    @@ -1525,14 +1645,16 @@ def test_allow_fqdn_hostname(self, cephadm_fs):
                 ('00000000-0000-0000-0000-0000deadbeef', None),
                 ('00000000-0000-0000-0000-0000deadbeez', 'not an fsid'),
             ])
    -    def test_fsid(self, fsid, err, cephadm_fs):
    +    def test_fsid(self, fsid, err, cephadm_fs, funkypatch):
    +        funkypatch.patch('cephadmlib.systemd.call')
    +
             cmd = self._get_cmd(
                 '--mon-ip', '192.168.1.1',
                 '--skip-mon-network',
                 '--fsid', fsid,
             )
     
    -        with with_cephadm_ctx(cmd) as ctx:
    +        with bootstrap_test_ctx(cmd) as ctx:
                 if err:
                     with pytest.raises(_cephadm.Error, match=err):
                         _cephadm.command_bootstrap(ctx)
    @@ -1547,7 +1669,7 @@ def test_fsid(self, cephadm_fs):
             fsid = '00000000-0000-0000-0000-0000deadbeef'
     
             cmd = ['shell', '--fsid', fsid]
    -        with with_cephadm_ctx(cmd) as ctx:
    +        with bootstrap_test_ctx(cmd) as ctx:
                 retval = _cephadm.command_shell(ctx)
                 assert retval == 0
                 assert ctx.fsid == fsid
    @@ -1583,7 +1705,7 @@ def test_name(self, cephadm_fs):
                 retval = _cephadm.command_shell(ctx)
                 assert retval == 0
     
    -        cmd = ['shell', '--name', 'foo.bar']
    +        cmd = ['shell', '--name', 'mgr.bar']
             with with_cephadm_ctx(cmd) as ctx:
                 err = r'must pass --fsid'
                 with pytest.raises(_cephadm.Error, match=err):
    @@ -1591,7 +1713,7 @@ def test_name(self, cephadm_fs):
                     assert retval == 1
     
             fsid = '00000000-0000-0000-0000-0000deadbeef'
    -        cmd = ['shell', '--name', 'foo.bar', '--fsid', fsid]
    +        cmd = ['shell', '--name', 'mgr.bar', '--fsid', fsid]
             with with_cephadm_ctx(cmd) as ctx:
                 retval = _cephadm.command_shell(ctx)
                 assert retval == 0
    @@ -1736,7 +1858,11 @@ def test_keyring(self, cephadm_fs):
     
     
     class TestIscsi:
    -    def test_unit_run(self, cephadm_fs):
    +    def test_unit_run(self, cephadm_fs, funkypatch):
    +        funkypatch.patch(
    +            'cephadmlib.daemons.iscsi.extract_uid_gid'
    +        ).return_value = (123, 123)
    +
             fsid = '9b9d7609-f4d5-4aba-94c8-effa764d96c9'
             config_json = {
                     'files': {'iscsi-gateway.cfg': ''}
    @@ -1749,27 +1875,35 @@ def test_unit_run(self, cephadm_fs):
                 _cephadm.get_parm.return_value = config_json
     
                 ident = _cephadm.DaemonIdentity(fsid, 'iscsi', 'daemon_id')
    -            c = _cephadm.get_container(ctx, ident)
    -            _cephadm.make_data_dir(ctx, ident)
    -            _cephadm.deploy_daemon_units(
    -                ctx,
    -                ident,
    -                0, 0,
    -                c,
    -                True, True
    +
    +            _cephadm._deploy_daemon_container(
    +                ctx, ident, [], _cephadm.DeploymentType.DEFAULT
                 )
     
                 with open('/var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/unit.run') as f:
    -                assert f.read() == """set -e
    +                contents = f.read()
    +                assert contents == """set -e
     if ! grep -qs /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs /proc/mounts; then mount -t configfs none /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs; fi
    -# iscsi tcmu-runner container
    -! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.daemon_id-tcmu 2> /dev/null
    -! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu 2> /dev/null
    -/usr/bin/docker run --rm --ipc=host --stop-signal=SIGTERM --ulimit nofile=1048576 --net=host --entrypoint /usr/local/scripts/tcmu-runner-entrypoint.sh --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/tcmu-runner-entrypoint.sh:/usr/local/scripts/tcmu-runner-entrypoint.sh -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph &
     # iscsi.daemon_id
     ! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi.daemon_id 2> /dev/null
     ! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id 2> /dev/null
     /usr/bin/docker run --rm --ipc=host --stop-signal=SIGTERM --ulimit nofile=1048576 --net=host --entrypoint /usr/bin/rbd-target-api --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/tcmu-runner-entrypoint.sh:/usr/local/scripts/tcmu-runner-entrypoint.sh -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph
    +"""
    +            with open('/var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/sidecar-tcmu.run') as f:
    +                contents = f.read()
    +            assert contents == """#!/bin/sh
    +# sidecar: tcmu
    +
    +set -e
    +if [ "$1" = stop ] || [ "$1" = poststop ]; then
    +    ! /usr/bin/docker stop ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu
    +    ! /usr/bin/docker inspect ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu &>/dev/null
    +    exit $?
    +fi
    +
    +! /usr/bin/docker rm -f ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu 2> /dev/null
    +
    +exec /usr/bin/docker run --rm --ipc=host --stop-signal=SIGTERM --ulimit nofile=1048576 --net=host --entrypoint /usr/local/scripts/tcmu-runner-entrypoint.sh --privileged --group-add=disk --init --name ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9-iscsi-daemon_id-tcmu --pids-limit=0 -e CONTAINER_IMAGE=ceph/ceph -e NODE_NAME=host1 -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/config:/etc/ceph/ceph.conf:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/keyring:/etc/ceph/keyring:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/configfs:/sys/kernel/config -v /var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/iscsi.daemon_id/tcmu-runner-entrypoint.sh:/usr/local/scripts/tcmu-runner-entrypoint.sh -v /var/log/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9:/var/log:z -v /dev:/dev --mount type=bind,source=/lib/modules,destination=/lib/modules,ro=true ceph/ceph
     """
     
         def test_get_container(self):
    @@ -1781,6 +1915,11 @@ def test_get_container(self):
             """
             fsid = '9b9d7609-f4d5-4aba-94c8-effa764d96c9'
             with with_cephadm_ctx(['--image=ceph/ceph'], list_networks={}) as ctx:
    +            ctx.config_blobs = {
    +                'files': {
    +                    'iscsi-gateway.cfg': 'portal',
    +                },
    +            }
                 ctx.fsid = fsid
                 c = _cephadm.get_container(
                     ctx, _cephadm.DaemonIdentity(fsid, 'iscsi', 'something')
    @@ -2116,16 +2255,12 @@ def test_http_validation(self, _logger, _find_executable, values, cephadm_fs):
     
     
     class TestPull:
    -
    -    @mock.patch('time.sleep')
    -    @mock.patch('cephadm.get_image_info_from_inspect', return_value={})
    -    @mock.patch('cephadm.logger')
    -    def test_error(self, _logger, _get_image_info_from_inspect, _sleep, monkeypatch):
    -        # manually create a mock and use pytest's monkeypatch fixture to set
    -        # multiple targets to the *same* mock
    -        _call = mock.MagicMock()
    -        monkeypatch.setattr('cephadm.call', _call)
    -        monkeypatch.setattr('cephadmlib.call_wrappers.call', _call)
    +    def test_error(self, funkypatch):
    +        funkypatch.patch('time.sleep')
    +        funkypatch.patch('cephadm.logger')
    +        _giifi = funkypatch.patch('cephadm.get_image_info_from_inspect')
    +        _giifi.return_value = {}
    +        _call = funkypatch.patch('cephadmlib.call_wrappers.call')
             ctx = _cephadm.CephadmContext()
             ctx.container_engine = mock_podman()
             ctx.insecure = False
    @@ -2282,7 +2417,7 @@ class TestSNMPGateway:
     
         def test_unit_run_V2c(self, cephadm_fs):
             fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
    -        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
    +        with with_cephadm_ctx(['--image=quay.io/ceph/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
                 import json
                 ctx.config_json = json.dumps(self.V2c_config)
                 ctx.fsid = fsid
    @@ -2307,11 +2442,11 @@ def test_unit_run_V2c(self, cephadm_fs):
                 )
                 with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f:
                     run_cmd = f.readlines()[-1].rstrip()
    -                assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V2c --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl')
    +                assert run_cmd.endswith('quay.io/ceph/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V2c --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl')
     
         def test_unit_run_V3_noPriv(self, cephadm_fs):
             fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
    -        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
    +        with with_cephadm_ctx(['--image=quay.io/ceph/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
                 import json
                 ctx.config_json = json.dumps(self.V3_no_priv_config)
                 ctx.fsid = fsid
    @@ -2336,11 +2471,11 @@ def test_unit_run_V3_noPriv(self, cephadm_fs):
                 )
                 with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f:
                     run_cmd = f.readlines()[-1].rstrip()
    -                assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9465 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000')
    +                assert run_cmd.endswith('quay.io/ceph/snmp-notifier:v1.2.1 --web.listen-address=:9465 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000')
     
         def test_unit_run_V3_Priv(self, cephadm_fs):
             fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
    -        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
    +        with with_cephadm_ctx(['--image=quay.io/ceph/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
                 import json
                 ctx.config_json = json.dumps(self.V3_priv_config)
                 ctx.fsid = fsid
    @@ -2365,11 +2500,11 @@ def test_unit_run_V3_Priv(self, cephadm_fs):
                 )
                 with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f:
                     run_cmd = f.readlines()[-1].rstrip()
    -                assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000 --snmp.private-enabled --snmp.private-protocol=DES')
    +                assert run_cmd.endswith('quay.io/ceph/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000 --snmp.private-enabled --snmp.private-protocol=DES')
     
         def test_unit_run_no_dest(self, cephadm_fs):
             fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
    -        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
    +        with with_cephadm_ctx(['--image=quay.io/ceph/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
                 import json
                 ctx.config_json = json.dumps(self.no_destination_config)
                 ctx.fsid = fsid
    @@ -2385,7 +2520,7 @@ def test_unit_run_no_dest(self, cephadm_fs):
     
         def test_unit_run_bad_version(self, cephadm_fs):
             fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
    -        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
    +        with with_cephadm_ctx(['--image=quay.io/ceph/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
                 import json
                 ctx.config_json = json.dumps(self.bad_version_config)
                 ctx.fsid = fsid
    diff --git a/src/cephadm/tests/test_container_engine.py b/src/cephadm/tests/test_container_engine.py
    index 7c5ef5131271..49f9f9a2e50a 100644
    --- a/src/cephadm/tests/test_container_engine.py
    +++ b/src/cephadm/tests/test_container_engine.py
    @@ -4,7 +4,7 @@
     
     from tests.fixtures import with_cephadm_ctx, import_cephadm
     
    -_cephadm = import_cephadm()
    +from cephadmlib import container_engines
     
     
     _find_program_loc = 'cephadmlib.container_engine_base.find_program'
    @@ -29,7 +29,7 @@ class PhonyContainerEngine(ContainerEngine):
     def test_podman():
         with mock.patch(_find_program_loc) as find_program:
             find_program.return_value = "/usr/bin/podman"
    -        pm = _cephadm.Podman()
    +        pm = container_engines.Podman()
             find_program.assert_called()
             with pytest.raises(RuntimeError):
                 pm.version
    @@ -44,7 +44,7 @@ def test_podman():
     def test_podman_badversion():
         with mock.patch(_find_program_loc) as find_program:
             find_program.return_value = "/usr/bin/podman"
    -        pm = _cephadm.Podman()
    +        pm = container_engines.Podman()
             find_program.assert_called()
             with mock.patch(_call_throws_loc) as call_throws:
                 call_throws.return_value = ("4.10.beta2", None, None)
    @@ -56,5 +56,5 @@ def test_podman_badversion():
     def test_docker():
         with mock.patch(_find_program_loc) as find_program:
             find_program.return_value = "/usr/bin/docker"
    -        docker = _cephadm.Docker()
    +        docker = container_engines.Docker()
             assert str(docker) == "docker (/usr/bin/docker)"
    diff --git a/src/cephadm/tests/test_custom_container.py b/src/cephadm/tests/test_custom_container.py
    index cff217a84044..197ed38dca3b 100644
    --- a/src/cephadm/tests/test_custom_container.py
    +++ b/src/cephadm/tests/test_custom_container.py
    @@ -47,7 +47,7 @@ def setUp(self):
                         ]
                     ]
                 },
    -            image='docker.io/library/hello-world:latest'
    +            image='quay.io/hello-world/hello-world:latest'
             )
     
         def test_entrypoint(self):
    @@ -72,14 +72,20 @@ def test_get_container_envs(self):
             self.assertEqual(result, ['SECRET=password'])
     
         def test_get_container_mounts(self):
    -        result = self.cc.get_container_mounts('/xyz')
    +        # TODO: get_container_mounts was made private. test the private func for
    +        # now. in the future update to test base class func
    +        # customize_container_mounts
    +        result = self.cc._get_container_mounts('/xyz')
             self.assertDictEqual(result, {
                 '/CONFIG_DIR': '/foo/conf',
                 '/xyz/bar/config': '/bar:ro'
             })
     
         def test_get_container_binds(self):
    -        result = self.cc.get_container_binds('/xyz')
    +        # TODO: get_container_binds was made private. test the private func for
    +        # now. in the future update to test base class fune
    +        # customize_container_binds
    +        result = self.cc._get_container_binds('/xyz')
             self.assertEqual(result, [
                 [
                     'type=bind',
    @@ -115,6 +121,9 @@ def test_deploy_custom_container(cephadm_fs):
                 '--servers',
                 '192.168.8.42,192.168.8.43,192.168.12.11',
             ]
    +        ctx.config_blobs = {
    +            'envs': ['FOO=1', 'BAR=77'],
    +        }
     
             _cephadm._common_deploy(ctx)
     
    @@ -132,6 +141,8 @@ def test_deploy_custom_container(cephadm_fs):
                 ' --cgroups=split --no-hosts'
                 ' -e CONTAINER_IMAGE=quay.io/foobar/quux:latest'
                 ' -e NODE_NAME=host1'
    +            ' -e FOO=1'
    +            ' -e BAR=77'
                 ' quay.io/foobar/quux:latest'
                 ' --label frobnicationist --servers 192.168.8.42,192.168.8.43,192.168.12.11'
             )
    @@ -210,14 +221,20 @@ def test_deploy_custom_container_and_inits(cephadm_fs):
                 if not l.startswith(('#', 'set', '/usr/bin/podman run'))
             ]), 'remaining commands should be "rms"'
     
    -        idx = runfile_lines.index('# init container cleanup')
    -        assert idx > 0
    -        assert runfile_lines[idx + 1].startswith('! /usr/bin/podman rm')
    -        assert runfile_lines[idx + 2].startswith('! /usr/bin/podman rm')
    +        with open(f'/var/lib/ceph/{fsid}/container.tdccai/init_containers.run') as f:
    +            icfile_lines = f.read().splitlines()
    +
    +        idx = icfile_lines.index('# init container cleanup')
    +        assert idx >= 0
    +        assert any(
    +            l.strip().startswith('! /usr/bin/podman rm')
    +            for l in icfile_lines
    +        )
     
    -        idx = runfile_lines.index('# init container 0: ceph-b01dbeef-701d-9abe-0000-e1e5a47004a7-container-tdccai-init')
    +        slines = [l.strip() for l in icfile_lines]
    +        idx = slines.index('# run init container 0: ceph-b01dbeef-701d-9abe-0000-e1e5a47004a7-container-tdccai-init')
             assert idx > 0
    -        assert runfile_lines[idx + 1] == (
    +        assert slines[idx + 1] == (
                 '/usr/bin/podman run'
                 ' --stop-signal=SIGTERM'
                 ' --entrypoint /usr/local/bin/prepare.sh'
    @@ -226,12 +243,12 @@ def test_deploy_custom_container_and_inits(cephadm_fs):
                 ' -v /var/lib/ceph/b01dbeef-701d-9abe-0000-e1e5a47004a7/container.tdccai/data1:/var/lib/myapp'
                 ' quay.io/foobar/quux:latest'
             )
    -        assert runfile_lines[idx + 2].startswith('! /usr/bin/podman rm')
    -        assert runfile_lines[idx + 3].startswith('! /usr/bin/podman rm')
    +        assert slines[idx + 3].startswith('! /usr/bin/podman rm')
    +        assert slines[idx + 4].startswith('! /usr/bin/podman rm')
     
    -        idx = runfile_lines.index('# init container 1: ceph-b01dbeef-701d-9abe-0000-e1e5a47004a7-container-tdccai-init')
    +        idx = slines.index('# run init container 1: ceph-b01dbeef-701d-9abe-0000-e1e5a47004a7-container-tdccai-init')
             assert idx > 0
    -        assert runfile_lines[idx + 1] == (
    +        assert slines[idx + 1] == (
                 '/usr/bin/podman run'
                 ' --stop-signal=SIGTERM'
                 ' --entrypoint /usr/local/bin/populate.sh'
    @@ -242,5 +259,5 @@ def test_deploy_custom_container_and_inits(cephadm_fs):
                 ' quay.io/foobar/quux:latest'
                 ' --source=https://my.cool.example.com/samples/geo.1.txt'
             )
    -        assert runfile_lines[idx + 2].startswith('! /usr/bin/podman rm')
    -        assert runfile_lines[idx + 3].startswith('! /usr/bin/podman rm')
    +        assert slines[idx + 3].startswith('! /usr/bin/podman rm')
    +        assert slines[idx + 4].startswith('! /usr/bin/podman rm')
    diff --git a/src/cephadm/tests/test_daemon_form.py b/src/cephadm/tests/test_daemon_form.py
    index 428183aaa3e1..a2d1773f1c84 100644
    --- a/src/cephadm/tests/test_daemon_form.py
    +++ b/src/cephadm/tests/test_daemon_form.py
    @@ -6,6 +6,7 @@
     
     from cephadmlib import daemon_form
     from cephadmlib import daemon_identity
    +from cephadmlib import daemons
     
     _cephadm = import_cephadm()
     
    @@ -22,7 +23,7 @@
             ('mon', _cephadm.Ceph),
             ('nfs', _cephadm.NFSGanesha),
             ('nvmeof', _cephadm.CephNvmeof),
    -        ('osd', _cephadm.OSD),
    +        ('osd', daemons.OSD),
             ('prometheus', _cephadm.Monitoring),
             ('snmp-gateway', _cephadm.SNMPGateway),
         ],
    @@ -61,7 +62,7 @@ def test_is_sysctl_daemon_form(dt, is_sdf):
         assert isinstance(inst, daemon_form.SysctlDaemonForm) == is_sdf
     
     
    -def test_can_create_all_daemon_forms():
    +def test_can_create_all_daemon_forms(monkeypatch):
         uuid = 'daeb985e-58c7-11ee-a536-201e8814f771'
         ctx = mock.MagicMock()
         ctx.config_blobs = {
    @@ -69,6 +70,8 @@ def test_can_create_all_daemon_forms():
             'pool': 'swimming',
             'destination': 'earth',
         }
    +    _os_path_isdir = mock.MagicMock(return_value=True)
    +    monkeypatch.setattr('os.path.isdir', _os_path_isdir)
         dtypes = _cephadm.get_supported_daemons()
         for daemon_type in dtypes:
             if daemon_type == 'agent':
    diff --git a/src/cephadm/tests/test_deploy.py b/src/cephadm/tests/test_deploy.py
    index 6fd36cc6eb18..c5094db335fd 100644
    --- a/src/cephadm/tests/test_deploy.py
    +++ b/src/cephadm/tests/test_deploy.py
    @@ -1,3 +1,4 @@
    +import os
     import pathlib
     import unittest
     from unittest import mock
    @@ -7,18 +8,36 @@
         import_cephadm,
         mock_podman,
         with_cephadm_ctx,
    +    FunkyPatcher,
    +    funkypatch,
     )
     
     
     _cephadm = import_cephadm()
     
     
    -def test_deploy_nfs_container(cephadm_fs, monkeypatch):
    -    _call = mock.MagicMock(return_value=('', '', 0))
    -    monkeypatch.setattr('cephadmlib.container_types.call', _call)
    -    _firewalld = mock.MagicMock()
    +def _common_patches(funkypatch):
    +    mocks = {}
    +    _call = funkypatch.patch('cephadmlib.container_types.call')
    +    _call.return_value = ('', '', 0)
    +    mocks['call'] = _call
    +    _call_throws = funkypatch.patch('cephadmlib.container_types.call_throws')
    +    _call_throws.return_value = ('', '', 0)
    +    mocks['call_throws'] = _call_throws
    +    _firewalld = funkypatch.patch('cephadm.Firewalld')
         _firewalld().external_ports.get.return_value = []
    -    monkeypatch.setattr('cephadm.Firewalld', _firewalld)
    +    mocks['Firewalld'] = _firewalld
    +    _extract_uid_gid = funkypatch.patch('cephadm.extract_uid_gid', force=True)
    +    _extract_uid_gid.return_value = (8765, 8765)
    +    mocks['extract_uid_gid'] = _extract_uid_gid
    +    _install_sysctl = funkypatch.patch('cephadm.install_sysctl')
    +    mocks['install_sysctl'] = _install_sysctl
    +    return mocks
    +
    +
    +def test_deploy_nfs_container(cephadm_fs, funkypatch):
    +    mocks = _common_patches(funkypatch)
    +    _firewalld = mocks['Firewalld']
         fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
         with with_cephadm_ctx([]) as ctx:
             ctx.container_engine = mock_podman()
    @@ -30,6 +49,7 @@ def test_deploy_nfs_container(cephadm_fs, monkeypatch):
                 'pool': 'foo',
                 'files': {
                     'ganesha.conf': 'FAKE',
    +                'idmap.conf': 'FAKE',
                 },
                 'config': 'BALONEY',
                 'keyring': 'BUNKUS',
    @@ -40,6 +60,10 @@ def test_deploy_nfs_container(cephadm_fs, monkeypatch):
             runfile_lines = f.read().splitlines()
         assert 'podman' in runfile_lines[-1]
         assert runfile_lines[-1].endswith('quay.io/ceph/ceph:latest -F -L STDERR')
    +    assert '-e TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES' not in runfile_lines[-1]
    +    assert '--pids-limit' in runfile_lines[-1]
    +    assert '-e CEPH_CONF=' in runfile_lines[-1]
    +    assert f'-v /var/lib/ceph/{fsid}/nfs.fun/etc/ganesha:/etc/ganesha:z' in runfile_lines[-1]
         _firewalld().open_ports.assert_called_with([2049])
         with open(f'/var/lib/ceph/{fsid}/nfs.fun/config') as f:
             assert f.read() == 'BALONEY'
    @@ -49,16 +73,9 @@ def test_deploy_nfs_container(cephadm_fs, monkeypatch):
             assert f.read() == 'FAKE'
     
     
    -def test_deploy_snmp_container(cephadm_fs, monkeypatch):
    -    _call = mock.MagicMock(return_value=('', '', 0))
    -    monkeypatch.setattr('cephadmlib.container_types.call', _call)
    -    _call_throws = mock.MagicMock(return_value=0)
    -    monkeypatch.setattr(
    -        'cephadmlib.container_types.call_throws', _call_throws
    -    )
    -    _firewalld = mock.MagicMock()
    -    _firewalld().external_ports.get.return_value = []
    -    monkeypatch.setattr('cephadm.Firewalld', _firewalld)
    +def test_deploy_snmp_container(cephadm_fs, funkypatch):
    +    mocks = _common_patches(funkypatch)
    +    _firewalld = mocks['Firewalld']
         fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
         with with_cephadm_ctx([]) as ctx:
             ctx.container_engine = mock_podman()
    @@ -79,8 +96,472 @@ def test_deploy_snmp_container(cephadm_fs, monkeypatch):
         assert runfile_lines[-1].endswith(
             'quay.io/aaabbb/snmp:latest --web.listen-address=:9464 --snmp.destination=192.168.100.10:8899 --snmp.version=V2c --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl'
         )
    +    assert '--pids-limit' not in runfile_lines[-1]
    +    assert f'--env-file=/var/lib/ceph/{fsid}/snmp-gateway.sunmop/snmp-gateway.conf' in runfile_lines[-1]
         _firewalld().open_ports.assert_not_called()
         basedir = pathlib.Path(f'/var/lib/ceph/{fsid}/snmp-gateway.sunmop')
         assert basedir.is_dir()
         assert not (basedir / 'config').exists()
         assert not (basedir / 'keyring').exists()
    +
    +
    +def test_deploy_keepalived_container(cephadm_fs, funkypatch):
    +    mocks = _common_patches(funkypatch)
    +    _firewalld = mocks['Firewalld']
    +    _install_sysctl = mocks['install_sysctl']
    +    fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
    +    with with_cephadm_ctx([]) as ctx:
    +        ctx.container_engine = mock_podman()
    +        ctx.fsid = fsid
    +        ctx.name = 'keepalived.uiop'
    +        ctx.image = 'quay.io/eeranimated/keepalived:latest'
    +        ctx.reconfig = False
    +        ctx.config_blobs = {
    +            'destination': '192.168.100.10:8899',
    +            'config': 'XXXXXXX',
    +            'keyring': 'YYYYYY',
    +            'files': {
    +                'keepalived.conf': 'neversayneveragain',
    +            },
    +        }
    +        _cephadm._common_deploy(ctx)
    +
    +    basedir = pathlib.Path(f'/var/lib/ceph/{fsid}/keepalived.uiop')
    +    assert basedir.is_dir()
    +    with open(basedir / 'unit.run') as f:
    +        runfile_lines = f.read().splitlines()
    +    assert 'podman' in runfile_lines[-1]
    +    assert runfile_lines[-1].endswith('quay.io/eeranimated/keepalived:latest')
    +    assert '-e KEEPALIVED_AUTOCONF=false' in runfile_lines[-1]
    +    assert '-e KEEPALIVED_DEBUG=false' in runfile_lines[-1]
    +    assert '--cap-add=NET_ADMIN' in runfile_lines[-1]
    +    assert '--cap-add=NET_RAW' in runfile_lines[-1]
    +    assert f'-v {basedir}/keepalived.conf:/etc/keepalived/keepalived.conf' in runfile_lines[-1]
    +    _firewalld().open_ports.assert_not_called()
    +    assert not (basedir / 'config').exists()
    +    assert not (basedir / 'keyring').exists()
    +    with open(basedir / 'keepalived.conf') as f:
    +        assert f.read() == 'neversayneveragain'
    +    with open(basedir / 'keepalived.conf') as f:
    +        assert f.read() == 'neversayneveragain'
    +        si = os.fstat(f.fileno())
    +        assert (si.st_uid, si.st_gid) == (8765, 8765)
    +    assert (basedir / 'keepalived').is_dir()
    +    si = (basedir / 'keepalived').stat()
    +    assert (si.st_uid, si.st_gid) == (8765, 8765)
    +    assert _install_sysctl.call_count == 1
    +    assert len(_install_sysctl.call_args[0][-1].get_sysctl_settings()) > 1
    +
    +
    +def test_deploy_haproxy_container(cephadm_fs, funkypatch):
    +    mocks = _common_patches(funkypatch)
    +    _firewalld = mocks['Firewalld']
    +    _install_sysctl = mocks['install_sysctl']
    +    fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
    +    with with_cephadm_ctx([]) as ctx:
    +        ctx.container_engine = mock_podman()
    +        ctx.fsid = fsid
    +        ctx.name = 'haproxy.yyz'
    +        ctx.image = 'quay.io/lfeuwbo/haproxy:latest'
    +        ctx.reconfig = False
    +        ctx.config_blobs = {
    +            'config': 'XXXXXXX',
    +            'keyring': 'YYYYYY',
    +            'files': {
    +                'haproxy.cfg': 'bifrost',
    +            },
    +        }
    +        _cephadm._common_deploy(ctx)
    +
    +    basedir = pathlib.Path(f'/var/lib/ceph/{fsid}/haproxy.yyz')
    +    assert basedir.is_dir()
    +    with open(basedir / 'unit.run') as f:
    +        runfile_lines = f.read().splitlines()
    +    assert 'podman' in runfile_lines[-1]
    +    assert runfile_lines[-1].endswith(
    +        'quay.io/lfeuwbo/haproxy:latest haproxy -f /var/lib/haproxy/haproxy.cfg'
    +    )
    +    assert '--pids-limit' not in runfile_lines[-1]
    +    assert '--user=root' in runfile_lines[-1]
    +    assert f'-v {basedir}/haproxy:/var/lib/haproxy' in runfile_lines[-1]
    +    _firewalld().open_ports.assert_not_called()
    +    assert not (basedir / 'config').exists()
    +    assert not (basedir / 'keyring').exists()
    +    assert (basedir / 'haproxy').is_dir()
    +    si = (basedir / 'haproxy').stat()
    +    assert (si.st_uid, si.st_gid) == (8765, 8765)
    +    with open(basedir / 'haproxy/haproxy.cfg') as f:
    +        assert f.read() == 'bifrost'
    +        si = os.fstat(f.fileno())
    +        assert (si.st_uid, si.st_gid) == (8765, 8765)
    +    assert _install_sysctl.call_count == 1
    +    assert len(_install_sysctl.call_args[0][-1].get_sysctl_settings()) > 1
    +
    +
    +def test_deploy_iscsi_container(cephadm_fs, funkypatch):
    +    mocks = _common_patches(funkypatch)
    +    _firewalld = mocks['Firewalld']
    +    fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
    +    with with_cephadm_ctx([]) as ctx:
    +        ctx.container_engine = mock_podman()
    +        ctx.fsid = fsid
    +        ctx.name = 'iscsi.wuzzy'
    +        ctx.image = 'quay.io/ayeaye/iscsi:latest'
    +        ctx.reconfig = False
    +        ctx.config_blobs = {
    +            'config': 'XXXXXXX',
    +            'keyring': 'YYYYYY',
    +            'files': {
    +                'iscsi-gateway.cfg': 'portal',
    +            },
    +        }
    +        _cephadm._common_deploy(ctx)
    +
    +    basedir = pathlib.Path(f'/var/lib/ceph/{fsid}/iscsi.wuzzy')
    +    assert basedir.is_dir()
    +    with open(basedir / 'unit.run') as f:
    +        runfile_lines = f.read().splitlines()
    +    assert 'podman' in runfile_lines[-1]
    +    assert runfile_lines[-1].endswith('quay.io/ayeaye/iscsi:latest')
    +    assert '-e TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES' not in runfile_lines[-1]
    +    assert '--pids-limit' in runfile_lines[-1]
    +    assert '--privileged' in runfile_lines[-1]
    +    assert f'-v {basedir}/iscsi-gateway.cfg:/etc/ceph/iscsi-gateway.cfg:z' in runfile_lines[-1]
    +    assert '--mount type=bind,source=/lib/modules,destination=/lib/modules' in runfile_lines[-1]
    +    _firewalld().open_ports.assert_not_called()
    +    with open(basedir / 'config') as f:
    +        assert f.read() == 'XXXXXXX'
    +    with open(basedir / 'keyring') as f:
    +        assert f.read() == 'YYYYYY'
    +    assert (basedir / 'configfs').is_dir()
    +    si = (basedir / 'configfs').stat()
    +    assert (si.st_uid, si.st_gid) == (8765, 8765)
    +    with open(basedir / 'iscsi-gateway.cfg') as f:
    +        assert f.read() == 'portal'
    +        si = os.fstat(f.fileno())
    +        assert (si.st_uid, si.st_gid) == (8765, 8765)
    +
    +
    +def test_deploy_nvmeof_container(cephadm_fs, funkypatch):
    +    mocks = _common_patches(funkypatch)
    +    _firewalld = mocks['Firewalld']
    +    fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
    +    with with_cephadm_ctx([]) as ctx:
    +        ctx.container_engine = mock_podman()
    +        ctx.fsid = fsid
    +        ctx.name = 'nvmeof.andu'
    +        ctx.image = 'quay.io/ownf/nmve:latest'
    +        ctx.reconfig = False
    +        ctx.config_blobs = {
    +            'config': 'XXXXXXX',
    +            'keyring': 'YYYYYY',
    +            'files': {
    +                'ceph-nvmeof.conf': 'icantbeliveitsnotiscsi',
    +            },
    +        }
    +        _cephadm._common_deploy(ctx)
    +
    +    basedir = pathlib.Path(f'/var/lib/ceph/{fsid}/nvmeof.andu')
    +    assert basedir.is_dir()
    +    with open(basedir / 'unit.run') as f:
    +        runfile_lines = f.read().splitlines()
    +    assert 'podman' in runfile_lines[-1]
    +    assert runfile_lines[-1].endswith('quay.io/ownf/nmve:latest')
    +    assert '-e TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES' not in runfile_lines[-1]
    +    assert '--pids-limit' in runfile_lines[-1]
    +    assert '--ulimit memlock=-1:-1' in runfile_lines[-1]
    +    assert '--cap-add=SYS_ADMIN' in runfile_lines[-1]
    +    assert '--cap-add=CAP_SYS_NICE' in runfile_lines[-1]
    +    assert f'-v {basedir}/ceph-nvmeof.conf:/src/ceph-nvmeof.conf:z' in runfile_lines[-1]
    +    assert '--mount type=bind,source=/lib/modules,destination=/lib/modules' in runfile_lines[-1]
    +    _firewalld().open_ports.assert_not_called()
    +    with open(basedir / 'config') as f:
    +        assert f.read() == 'XXXXXXX'
    +    with open(basedir / 'keyring') as f:
    +        assert f.read() == 'YYYYYY'
    +    assert (basedir / 'configfs').is_dir()
    +    si = (basedir / 'configfs').stat()
    +    assert (si.st_uid, si.st_gid) == (167, 167)
    +    with open(basedir / 'ceph-nvmeof.conf') as f:
    +        assert f.read() == 'icantbeliveitsnotiscsi'
    +        si = os.fstat(f.fileno())
    +        assert (si.st_uid, si.st_gid) == (167, 167)
    +
    +
    +def test_deploy_a_monitoring_container(cephadm_fs, funkypatch):
    +    mocks = _common_patches(funkypatch)
    +    _firewalld = mocks['Firewalld']
    +    _get_ip_addresses = funkypatch.patch('cephadmlib.net_utils.get_ip_addresses')
    +    _get_ip_addresses.return_value = (['10.10.10.10'], [])
    +    fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
    +    with with_cephadm_ctx([]) as ctx:
    +        ctx.container_engine = mock_podman()
    +        ctx.fsid = fsid
    +        ctx.name = 'prometheus.fire'
    +        ctx.image = 'quay.io/titans/prometheus:latest'
    +        ctx.reconfig = False
    +        ctx.config_blobs = {
    +            'config': 'XXXXXXX',
    +            'keyring': 'YYYYYY',
    +            'files': {
    +                'prometheus.yml': 'bettercallherc',
    +            },
    +            'ip_to_bind_to': '1.2.3.4'
    +        }
    +        _cephadm._common_deploy(ctx)
    +
    +    basedir = pathlib.Path(f'/var/lib/ceph/{fsid}/prometheus.fire')
    +    assert basedir.is_dir()
    +    with open(basedir / 'unit.run') as f:
    +        runfile_lines = f.read().splitlines()
    +    assert 'podman' in runfile_lines[-1]
    +    assert runfile_lines[-1].endswith(
    +        'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095 --web.listen-address=1.2.3.4:9095'
    +    )
    +    assert '--user 8765' in runfile_lines[-1]
    +    assert f'-v /var/lib/ceph/{fsid}/prometheus.fire/etc/prometheus:/etc/prometheus:Z' in runfile_lines[-1]
    +    _firewalld().open_ports.assert_not_called()
    +    assert not (basedir / 'config').exists()
    +    assert not (basedir / 'keyring').exists()
    +    with open(basedir / 'etc/prometheus/prometheus.yml') as f:
    +        assert f.read() == 'bettercallherc'
    +        si = os.fstat(f.fileno())
    +        assert (si.st_uid, si.st_gid) == (8765, 8765)
    +
    +
    +def test_deploy_a_tracing_container(cephadm_fs, funkypatch):
    +    mocks = _common_patches(funkypatch)
    +    _firewalld = mocks['Firewalld']
    +    fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
    +    with with_cephadm_ctx([]) as ctx:
    +        ctx.container_engine = mock_podman()
    +        ctx.fsid = fsid
    +        ctx.name = 'elasticsearch.band'
    +        ctx.image = 'quay.io/rubber/elasticsearch:latest'
    +        ctx.reconfig = False
    +        ctx.config_blobs = {
    +            'config': 'XXXXXXX',
    +            'keyring': 'YYYYYY',
    +            'files': {
    +                'prometheus.yml': 'bettercallherc',
    +            },
    +        }
    +        _cephadm._common_deploy(ctx)
    +
    +    basedir = pathlib.Path(f'/var/lib/ceph/{fsid}/elasticsearch.band')
    +    assert basedir.is_dir()
    +    with open(basedir / 'unit.run') as f:
    +        runfile_lines = f.read().splitlines()
    +    assert 'podman' in runfile_lines[-1]
    +    assert '-e discovery.type=single-node' in runfile_lines[-1]
    +    assert runfile_lines[-1].endswith('quay.io/rubber/elasticsearch:latest')
    +    _firewalld().open_ports.assert_not_called()
    +    assert not (basedir / 'config').exists()
    +    assert not (basedir / 'keyring').exists()
    +
    +
    +def test_deploy_ceph_mgr_container(cephadm_fs, funkypatch):
    +    mocks = _common_patches(funkypatch)
    +    _firewalld = mocks['Firewalld']
    +    _make_run_dir = funkypatch.patch('cephadmlib.file_utils.make_run_dir')
    +    fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
    +    with with_cephadm_ctx([]) as ctx:
    +        ctx.container_engine = mock_podman()
    +        ctx.fsid = fsid
    +        ctx.name = 'mgr.foo'
    +        ctx.image = 'quay.io/ceph/ceph:latest'
    +        ctx.reconfig = False
    +        ctx.allow_ptrace = False
    +        ctx.osd_fsid = '00000000-0000-0000-0000-000000000000'
    +        ctx.config_blobs = {
    +            'config': 'XXXXXXX',
    +            'keyring': 'YYYYYY',
    +        }
    +        _cephadm._common_deploy(ctx)
    +
    +    basedir = pathlib.Path(f'/var/lib/ceph/{fsid}/mgr.foo')
    +    assert basedir.is_dir()
    +    with open(basedir / 'unit.run') as f:
    +        runfile_lines = f.read().splitlines()
    +    assert 'podman' in runfile_lines[-1]
    +    assert runfile_lines[-1].endswith(
    +        'quay.io/ceph/ceph:latest -n mgr.foo -f --setuser ceph --setgroup ceph --default-log-to-file=false --default-log-to-journald=true --default-log-to-stderr=false'
    +    )
    +    assert '-e TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES' in runfile_lines[-1]
    +    assert '--pids-limit' in runfile_lines[-1]
    +    assert '--entrypoint /usr/bin/ceph-mgr' in runfile_lines[-1]
    +    assert f'-v /var/lib/ceph/{fsid}/mgr.foo:/var/lib/ceph/mgr/ceph-foo:z' in runfile_lines[-1]
    +    assert f'-v /var/log/ceph/{fsid}:/var/log/ceph:z' in runfile_lines[-1]
    +    _firewalld().open_ports.assert_not_called()
    +    with open(basedir / 'config') as f:
    +        assert f.read() == 'XXXXXXX'
    +    with open(basedir / 'keyring') as f:
    +        assert f.read() == 'YYYYYY'
    +    assert _make_run_dir.call_count == 1
    +    assert _make_run_dir.call_args[0][1] == 8765
    +    assert _make_run_dir.call_args[0][2] == 8765
    +
    +
    +def test_deploy_ceph_osd_container(cephadm_fs, funkypatch):
    +    mocks = _common_patches(funkypatch)
    +    _firewalld = mocks['Firewalld']
    +    _make_run_dir = funkypatch.patch('cephadmlib.file_utils.make_run_dir')
    +    fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
    +    with with_cephadm_ctx([]) as ctx:
    +        ctx.container_engine = mock_podman()
    +        ctx.fsid = fsid
    +        ctx.name = 'osd.quux'
    +        ctx.image = 'quay.io/ceph/ceph:latest'
    +        ctx.reconfig = False
    +        ctx.allow_ptrace = False
    +        ctx.osd_fsid = '00000000-0000-0000-0000-000000000000'
    +        ctx.config_blobs = {
    +            'config': 'XXXXXXX',
    +            'keyring': 'YYYYYY',
    +        }
    +        _cephadm._common_deploy(ctx)
    +
    +    basedir = pathlib.Path(f'/var/lib/ceph/{fsid}/osd.quux')
    +    assert basedir.is_dir()
    +    with open(basedir / 'unit.run') as f:
    +        runfile_lines = f.read().splitlines()
    +    assert 'podman' in runfile_lines[-1]
    +    assert runfile_lines[-1].endswith(
    +        'quay.io/ceph/ceph:latest -n osd.quux -f --setuser ceph --setgroup ceph --default-log-to-file=false --default-log-to-journald=true --default-log-to-stderr=false'
    +    )
    +    assert '-e TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES' in runfile_lines[-1]
    +    assert '--privileged' in runfile_lines[-1]
    +    assert '--pids-limit' in runfile_lines[-1]
    +    assert '--entrypoint /usr/bin/ceph-osd' in runfile_lines[-1]
    +    assert f'-v /var/lib/ceph/{fsid}/osd.quux:/var/lib/ceph/osd/ceph-quux:z' in runfile_lines[-1]
    +    assert f'-v /var/log/ceph/{fsid}:/var/log/ceph:z' in runfile_lines[-1]
    +    assert '-v /dev:/dev' in runfile_lines[-1]
    +    _firewalld().open_ports.assert_not_called()
    +    with open(basedir / 'config') as f:
    +        assert f.read() == 'XXXXXXX'
    +    with open(basedir / 'keyring') as f:
    +        assert f.read() == 'YYYYYY'
    +    assert _make_run_dir.call_count == 1
    +    assert _make_run_dir.call_args[0][1] == 8765
    +    assert _make_run_dir.call_args[0][2] == 8765
    +
    +
    +def test_deploy_ceph_exporter_container(cephadm_fs, funkypatch):
    +    mocks = _common_patches(funkypatch)
    +    _firewalld = mocks['Firewalld']
    +    _get_ip_addresses = funkypatch.patch('cephadmlib.net_utils.get_ip_addresses')
    +    _get_ip_addresses.return_value = (['10.10.10.10'], [])
    +    _make_run_dir = funkypatch.patch('cephadmlib.file_utils.make_run_dir')
    +    fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
    +    with with_cephadm_ctx([]) as ctx:
    +        ctx.container_engine = mock_podman()
    +        ctx.fsid = fsid
    +        ctx.name = 'ceph-exporter.zaq'
    +        ctx.image = 'quay.io/ceph/ceph:latest'
    +        ctx.reconfig = False
    +        ctx.allow_ptrace = False
    +        ctx.osd_fsid = '00000000-0000-0000-0000-000000000000'
    +        ctx.config_blobs = {
    +            'config': 'XXXXXXX',
    +            'keyring': 'YYYYYY',
    +            'prio-limit': 12,
    +        }
    +
    +        # ceph-exporter is weird and special. it requires the "sock dir"
    +        # to already exist. that dir defaults to /var/run/ceph
    +        vrc = pathlib.Path('/var/run/ceph')
    +        (vrc / fsid).mkdir(parents=True)
    +
    +        _cephadm._common_deploy(ctx)
    +
    +    basedir = pathlib.Path(f'/var/lib/ceph/{fsid}/ceph-exporter.zaq')
    +    assert basedir.is_dir()
    +    with open(basedir / 'unit.run') as f:
    +        runfile_lines = f.read().splitlines()
    +    assert 'podman' in runfile_lines[-1]
    +    assert runfile_lines[-1].endswith(
    +        'quay.io/ceph/ceph:latest -n client.ceph-exporter.zaq -f --sock-dir=/var/run/ceph/ --addrs=0.0.0.0 --port=9926 --prio-limit=12 --stats-period=5'
    +    )
    +    assert '--entrypoint /usr/bin/ceph-exporter' in runfile_lines[-1]
    +    assert '-e TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES' in runfile_lines[-1]
    +    assert '--pids-limit' in runfile_lines[-1]
    +    _firewalld().open_ports.assert_not_called()
    +    with open(basedir / 'config') as f:
    +        assert f.read() == 'XXXXXXX'
    +    with open(basedir / 'keyring') as f:
    +        assert f.read() == 'YYYYYY'
    +
    +
    +def test_deploy_and_rm_iscsi(cephadm_fs, funkypatch):
    +    # Test that the deploy and remove paths for iscsi (which has sidecar container)
    +    # create and remove the correct unit files.
    +    funkypatch.patch('shutil.rmtree')  # fakefs + shutil.rmtree breaks on py3.12
    +    mocks = _common_patches(funkypatch)
    +    _firewalld = mocks['Firewalld']
    +    fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
    +    with with_cephadm_ctx([]) as ctx:
    +        ctx.container_engine = mock_podman()
    +        ctx.fsid = fsid
    +        ctx.name = 'iscsi.wuzzy'
    +        ctx.image = 'quay.io/ayeaye/iscsi:latest'
    +        ctx.reconfig = False
    +        ctx.config_blobs = {
    +            'config': 'XXXXXXX',
    +            'keyring': 'YYYYYY',
    +            'files': {
    +                'iscsi-gateway.cfg': 'portal',
    +            },
    +        }
    +        _cephadm._common_deploy(ctx)
    +
    +    unit_dir = pathlib.Path('/etc/systemd/system')
    +    assert unit_dir.is_dir()
    +    assert (unit_dir / f'ceph-{fsid}@.service').exists()
    +    drop_in = unit_dir / f'ceph-{fsid}@iscsi.wuzzy.service.d/99-cephadm.conf'
    +    assert drop_in.parent.is_dir()
    +    assert drop_in.exists()
    +    assert 'tcmu' in drop_in.read_text()
    +    tcmu_sidecar = unit_dir / f'ceph-{fsid}-sidecar@iscsi.wuzzy:tcmu.service'
    +    assert tcmu_sidecar.exists()
    +    assert 'sidecar-tcmu.run' in tcmu_sidecar.read_text()
    +
    +    with with_cephadm_ctx([]) as ctx:
    +        ctx.container_engine = mock_podman()
    +        ctx.fsid = fsid
    +        ctx.name = 'iscsi.wuzzy'
    +        ctx.image = 'quay.io/ayeaye/iscsi:latest'
    +        _cephadm.command_rm_daemon(ctx)
    +
    +    assert not drop_in.exists()
    +    assert not drop_in.parent.exists()
    +    assert not tcmu_sidecar.exists()
    +
    +
    +def test_deploy_smb_container(cephadm_fs, funkypatch):
    +    mocks = _common_patches(funkypatch)
    +    fsid = 'b01dbeef-701d-9abe-0000-e1e5a47004a7'
    +    with with_cephadm_ctx([]) as ctx:
    +        ctx.container_engine = mock_podman()
    +        ctx.fsid = fsid
    +        ctx.name = 'smb.b01s'
    +        ctx.image = 'quay.io/essembee/samba-server:latest'
    +        ctx.reconfig = False
    +        ctx.config_blobs = {
    +            'cluster_id': 'smb1',
    +            'config_uri': 'http://localhost:9876/smb.json',
    +            'config': 'SAMPLE',
    +            'keyring': 'SOMETHING',
    +        }
    +        _cephadm._common_deploy(ctx)
    +
    +    basedir = pathlib.Path(f'/var/lib/ceph/{fsid}/smb.b01s')
    +    assert basedir.is_dir()
    +    with open(basedir / 'unit.run') as f:
    +        runfile_lines = f.read().splitlines()
    +    assert 'podman' in runfile_lines[-1]
    +    assert runfile_lines[-1].endswith('quay.io/essembee/samba-server:latest run smbd')
    +    assert f'-v {basedir}/etc-samba-container:/etc/samba/container:z' in runfile_lines[-1]
    +    assert f'-v {basedir}/lib-samba:/var/lib/samba:z' in runfile_lines[-1]
    +    assert '-e SAMBA_CONTAINER_ID=smb1' in runfile_lines[-1]
    +    assert '-e \'SAMBACC_CONFIG=["http://localhost:9876/smb.json"]\'' in runfile_lines[-1]
    +    assert '--publish' in runfile_lines[-1]
    diff --git a/src/cephadm/tests/test_enclosure.py b/src/cephadm/tests/test_enclosure.py
    deleted file mode 100644
    index 48d05cf83188..000000000000
    --- a/src/cephadm/tests/test_enclosure.py
    +++ /dev/null
    @@ -1,74 +0,0 @@
    -import pytest
    -
    -from unittest import mock
    -from tests.fixtures import host_sysfs, import_cephadm
    -
    -from cephadmlib.host_facts import Enclosure
    -
    -_cephadm = import_cephadm()
    -
    -
    -@pytest.fixture
    -def enclosure(host_sysfs):
    -    e = Enclosure(
    -        enc_id='1',
    -        enc_path='/sys/class/scsi_generic/sg2/device/enclosure/0:0:1:0',
    -        dev_path='/sys/class/scsi_generic/sg2')
    -    yield e
    -
    -
    -class TestEnclosure:
    -
    -    def test_enc_metadata(self, enclosure):
    -        """Check metadata for the enclosure e.g. vendor and model"""
    -       
    -        assert enclosure.vendor == "EnclosuresInc"
    -        assert enclosure.components == '12'
    -        assert enclosure.model == "D12"
    -        assert enclosure.enc_id == '1'
    -
    -        assert enclosure.ses_paths == ['sg2']
    -        assert enclosure.path_count == 1
    -
    -    def test_enc_slots(self, enclosure):
    -        """Check slot count"""
    -
    -        assert len(enclosure.slot_map) == 12
    -
    -    def test_enc_slot_format(self, enclosure):
    -        """Check the attributes of a slot are as expected"""
    -
    -        assert all(k in ['fault', 'locate', 'serial', 'status'] 
    -                   for k, _v in enclosure.slot_map['0'].items())
    -
    -    def test_enc_slot_status(self, enclosure):
    -        """Check the number of occupied slots is correct"""
    -
    -        occupied_slots = [slot_id for slot_id in enclosure.slot_map 
    -                          if enclosure.slot_map[slot_id].get('status').upper() == 'OK']
    -
    -        assert len(occupied_slots) == 6
    -
    -    def test_enc_disk_count(self, enclosure):
    -        """Check the disks found matches the slot info"""
    -
    -        assert len(enclosure.device_lookup) == 6
    -        assert enclosure.device_count == 6
    -
    -    def test_enc_device_serial(self, enclosure):
    -        """Check the device serial numbers are as expected"""
    -        
    -        assert all(fake_serial in enclosure.device_lookup.keys() 
    -                   for fake_serial in [
    -                       'fake000',
    -                       'fake001',
    -                       'fake002',
    -                       'fake003',
    -                       'fake004',
    -                       'fake005'])
    -
    -    def test_enc_slot_to_serial(self, enclosure):
    -        """Check serial number to slot matches across slot_map and device_lookup"""
    -
    -        for serial, slot in enclosure.device_lookup.items():
    -            assert enclosure.slot_map[slot].get('serial') == serial
    diff --git a/src/cephadm/tests/test_host_facts.py b/src/cephadm/tests/test_host_facts.py
    new file mode 100644
    index 000000000000..a48089f77f6c
    --- /dev/null
    +++ b/src/cephadm/tests/test_host_facts.py
    @@ -0,0 +1,117 @@
    +import pytest
    +
    +from unittest import mock
    +from tests.fixtures import host_sysfs, import_cephadm, cephadm_fs
    +
    +from cephadmlib.host_facts import Enclosure
    +
    +_cephadm = import_cephadm()
    +
    +
    +@pytest.fixture
    +def enclosure(host_sysfs):
    +    e = Enclosure(
    +        enc_id='1',
    +        enc_path='/sys/class/scsi_generic/sg2/device/enclosure/0:0:1:0',
    +        dev_path='/sys/class/scsi_generic/sg2',
    +    )
    +    yield e
    +
    +
    +class TestEnclosure:
    +
    +    def test_enc_metadata(self, enclosure):
    +        """Check metadata for the enclosure e.g. vendor and model"""
    +
    +        assert enclosure.vendor == "EnclosuresInc"
    +        assert enclosure.components == '12'
    +        assert enclosure.model == "D12"
    +        assert enclosure.enc_id == '1'
    +
    +        assert enclosure.ses_paths == ['sg2']
    +        assert enclosure.path_count == 1
    +
    +    def test_enc_slots(self, enclosure):
    +        """Check slot count"""
    +
    +        assert len(enclosure.slot_map) == 12
    +
    +    def test_enc_slot_format(self, enclosure):
    +        """Check the attributes of a slot are as expected"""
    +
    +        assert all(
    +            k in ['fault', 'locate', 'serial', 'status']
    +            for k, _v in enclosure.slot_map['0'].items()
    +        )
    +
    +    def test_enc_slot_status(self, enclosure):
    +        """Check the number of occupied slots is correct"""
    +
    +        occupied_slots = [
    +            slot_id
    +            for slot_id in enclosure.slot_map
    +            if enclosure.slot_map[slot_id].get('status').upper() == 'OK'
    +        ]
    +
    +        assert len(occupied_slots) == 6
    +
    +    def test_enc_disk_count(self, enclosure):
    +        """Check the disks found matches the slot info"""
    +
    +        assert len(enclosure.device_lookup) == 6
    +        assert enclosure.device_count == 6
    +
    +    def test_enc_device_serial(self, enclosure):
    +        """Check the device serial numbers are as expected"""
    +
    +        assert all(
    +            fake_serial in enclosure.device_lookup.keys()
    +            for fake_serial in [
    +                'fake000',
    +                'fake001',
    +                'fake002',
    +                'fake003',
    +                'fake004',
    +                'fake005',
    +            ]
    +        )
    +
    +    def test_enc_slot_to_serial(self, enclosure):
    +        """Check serial number to slot matches across slot_map and device_lookup"""
    +
    +        for serial, slot in enclosure.device_lookup.items():
    +            assert enclosure.slot_map[slot].get('serial') == serial
    +
    +
    +def test_host_facts_security(cephadm_fs):
    +    cephadm_fs.create_file('/sys/kernel/security/lsm', contents='apparmor\n')
    +    cephadm_fs.create_file('/etc/apparmor', contents='foo\n')
    +    # List from https://tracker.ceph.com/issues/66389
    +    profiles_lines = [
    +        'foo (complain)',
    +        '/usr/bin/man (enforce)',
    +        '1password (unconfined)',
    +        'Discord (unconfined)',
    +        'MongoDB Compass (unconfined)',
    +        'profile name with spaces (enforce)',
    +    ]
    +    cephadm_fs.create_file(
    +        '/sys/kernel/security/apparmor/profiles',
    +        contents='\n'.join(profiles_lines),
    +    )
    +
    +    from cephadmlib.host_facts import HostFacts
    +
    +    class TestHostFacts(HostFacts):
    +        def _populate_sysctl_options(self):
    +            return {}
    +
    +    ctx = mock.MagicMock()
    +    hfacts = TestHostFacts(ctx)
    +    ksec = hfacts.kernel_security
    +    assert ksec
    +    assert ksec['type'] == 'AppArmor'
    +    assert ksec['type'] == 'AppArmor'
    +    assert ksec['complain'] == 0
    +    assert ksec['enforce'] == 1
    +    assert ksec['unconfined'] == 2
    diff --git a/src/cephadm/tests/test_ingress.py b/src/cephadm/tests/test_ingress.py
    index 798c73708686..7f23f64f51fc 100644
    --- a/src/cephadm/tests/test_ingress.py
    +++ b/src/cephadm/tests/test_ingress.py
    @@ -90,7 +90,7 @@ def test_haproxy_container_mounts():
                 good_haproxy_json(),
                 SAMPLE_HAPROXY_IMAGE,
             )
    -        cmounts = hap.get_container_mounts("/var/tmp")
    +        cmounts = hap._get_container_mounts("/var/tmp")
             assert len(cmounts) == 1
             assert cmounts["/var/tmp/haproxy"] == "/var/lib/haproxy"
     
    @@ -166,9 +166,9 @@ def test_haproxy_extract_uid_gid_haproxy():
                 good_haproxy_json(),
                 SAMPLE_HAPROXY_IMAGE,
             )
    -        with mock.patch("cephadm.CephContainer") as cc:
    +        with mock.patch("cephadmlib.container_types.CephContainer") as cc:
                 cc.return_value.run.return_value = "500 500"
    -            uid, gid = hap.extract_uid_gid_haproxy()
    +            uid, gid = hap.uid_gid(ctx)
                 cc.return_value.run.assert_called()
             assert uid == 500
             assert gid == 500
    @@ -244,7 +244,7 @@ def test_keepalived_container_mounts():
                 good_keepalived_json(),
                 SAMPLE_KEEPALIVED_IMAGE,
             )
    -        cmounts = kad.get_container_mounts("/var/tmp")
    +        cmounts = kad._get_container_mounts("/var/tmp")
             assert len(cmounts) == 1
             assert (
                 cmounts["/var/tmp/keepalived.conf"]
    @@ -329,9 +329,9 @@ def test_keepalived_extract_uid_gid_keepalived():
                 good_keepalived_json(),
                 SAMPLE_KEEPALIVED_IMAGE,
             )
    -        with mock.patch("cephadm.CephContainer") as cc:
    +        with mock.patch("cephadmlib.container_types.CephContainer") as cc:
                 cc.return_value.run.return_value = "500 500"
    -            uid, gid = kad.extract_uid_gid_keepalived()
    +            uid, gid = kad.uid_gid(ctx)
                 cc.return_value.run.assert_called()
             assert uid == 500
             assert gid == 500
    diff --git a/src/cephadm/tests/test_logrotate_config.py b/src/cephadm/tests/test_logrotate_config.py
    new file mode 100644
    index 000000000000..c97f21019d86
    --- /dev/null
    +++ b/src/cephadm/tests/test_logrotate_config.py
    @@ -0,0 +1,57 @@
    +from unittest import mock
    +
    +import pytest
    +
    +from tests.fixtures import import_cephadm, cephadm_fs
    +
    +from cephadmlib import logging
    +
    +
    +_cephadm = import_cephadm()
    +
    +def test_cluster_logrotate_config(cephadm_fs):
    +    ctx = _cephadm.CephadmContext()
    +    ctx.logrotate_dir = '/my/log/dir'
    +    fsid = '5dcc9af0-7cd3-11ee-9e84-525400babd0a'
    +
    +    cephadm_fs.create_dir(ctx.logrotate_dir)
    +
    +    expected_cluster_logrotate_file = """# created by cephadm
    +/var/log/ceph/5dcc9af0-7cd3-11ee-9e84-525400babd0a/*.log {
    +    rotate 7
    +    daily
    +    compress
    +    sharedscripts
    +    postrotate
    +        killall -q -1 ceph-mon ceph-mgr ceph-mds ceph-osd ceph-fuse radosgw rbd-mirror cephfs-mirror tcmu-runner || pkill -1 -x 'ceph-mon|ceph-mgr|ceph-mds|ceph-osd|ceph-fuse|radosgw|rbd-mirror|cephfs-mirror|tcmu-runner' || true
    +    endscript
    +    missingok
    +    notifempty
    +    su root root
    +}"""
    +
    +    logging.write_cluster_logrotate_config(ctx, fsid)
    +
    +    with open(ctx.logrotate_dir + f'/ceph-{fsid}', 'r') as f:
    +        assert f.read() == expected_cluster_logrotate_file
    +
    +def test_cephadm_logrotate_config(cephadm_fs):
    +    ctx = _cephadm.CephadmContext()
    +    ctx.logrotate_dir = '/my/log/dir'
    +
    +    cephadm_fs.create_dir(ctx.logrotate_dir)
    +
    +    expected_cephadm_logrotate_file = """# created by cephadm
    +/var/log/ceph/cephadm.log {
    +    rotate 7
    +    daily
    +    compress
    +    missingok
    +    notifempty
    +    su root root
    +}"""
    +
    +    logging.write_cephadm_logrotate_config(ctx)
    +
    +    with open(ctx.logrotate_dir + f'/cephadm', 'r') as f:
    +        assert f.read() == expected_cephadm_logrotate_file
    diff --git a/src/cephadm/tests/test_nfs.py b/src/cephadm/tests/test_nfs.py
    index 0649ef934c16..1b468516e67b 100644
    --- a/src/cephadm/tests/test_nfs.py
    +++ b/src/cephadm/tests/test_nfs.py
    @@ -25,6 +25,7 @@ def nfs_json(**kwargs):
         if kwargs.get("files"):
             result["files"] = {
                 "ganesha.conf": "",
    +            "idmap.conf": "",
             }
         if kwargs.get("rgw_content"):
             result["rgw"] = dict(kwargs["rgw_content"])
    @@ -117,7 +118,7 @@ def test_nfsganesha_container_mounts():
                 "fred",
                 good_nfs_json(),
             )
    -        cmounts = nfsg.get_container_mounts("/var/tmp")
    +        cmounts = nfsg._get_container_mounts("/var/tmp")
             assert len(cmounts) == 3
             assert cmounts["/var/tmp/config"] == "/etc/ceph/ceph.conf:z"
             assert cmounts["/var/tmp/keyring"] == "/etc/ceph/keyring:z"
    @@ -130,7 +131,7 @@ def test_nfsganesha_container_mounts():
                 "fred",
                 nfs_json(pool=True, files=True, rgw=True),
             )
    -        cmounts = nfsg.get_container_mounts("/var/tmp")
    +        cmounts = nfsg._get_container_mounts("/var/tmp")
             assert len(cmounts) == 4
             assert cmounts["/var/tmp/config"] == "/etc/ceph/ceph.conf:z"
             assert cmounts["/var/tmp/keyring"] == "/etc/ceph/keyring:z"
    @@ -155,15 +156,17 @@ def test_nfsganesha_container_envs():
     
     
     def test_nfsganesha_get_version():
    +    from cephadmlib.daemons import nfs
    +
         with with_cephadm_ctx([]) as ctx:
    -        nfsg = _cephadm.NFSGanesha(
    +        nfsg = nfs.NFSGanesha(
                 ctx,
                 SAMPLE_UUID,
                 "fred",
                 good_nfs_json(),
             )
     
    -        with mock.patch("cephadm.call") as _call:
    +        with mock.patch("cephadmlib.daemons.nfs.call") as _call:
                 _call.return_value = ("NFS-Ganesha Release = V100", "", 0)
                 ver = nfsg.get_version(ctx, "fake_version")
                 _call.assert_called()
    diff --git a/src/cephadm/tests/test_unit_file.py b/src/cephadm/tests/test_unit_file.py
    new file mode 100644
    index 000000000000..74cd89c1a823
    --- /dev/null
    +++ b/src/cephadm/tests/test_unit_file.py
    @@ -0,0 +1,149 @@
    +# Tests for various assorted utility functions found within cephadm
    +#
    +from unittest import mock
    +
    +import functools
    +import io
    +import os
    +import sys
    +
    +import pytest
    +
    +from tests.fixtures import (
    +    import_cephadm,
    +    mock_docker,
    +    mock_podman,
    +    with_cephadm_ctx,
    +)
    +
    +from cephadmlib import context
    +from cephadmlib import systemd_unit
    +from cephadmlib.constants import CGROUPS_SPLIT_PODMAN_VERSION
    +
    +_cephadm = import_cephadm()
    +
    +
    +def _get_unit_file(ctx, fsid):
    +    return str(systemd_unit._get_unit_file(ctx, fsid))
    +
    +
    +def test_docker_engine_wants_docker():
    +    ctx = context.CephadmContext()
    +    ctx.container_engine = mock_docker()
    +    r = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
    +    assert 'Wants=docker.service' in r
    +
    +
    +def test_podman_engine_does_not_req_docker():
    +    ctx = context.CephadmContext()
    +    ctx.container_engine = mock_podman()
    +    r = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
    +    assert 'Requires=docker.service' not in r
    +
    +
    +def test_podman_engine_forking_service():
    +    # verity that the podman service uses the forking service type
    +    # and related parameters
    +    ctx = context.CephadmContext()
    +    ctx.container_engine = mock_podman()
    +    r = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
    +    assert 'Type=forking' in r
    +    assert 'PIDFile=' in r
    +    assert 'ExecStartPre' in r
    +    assert 'ExecStopPost' in r
    +
    +
    +def test_podman_with_split_cgroups_sets_delegate():
    +    ctx = context.CephadmContext()
    +    ctx.container_engine = mock_podman()
    +    ctx.container_engine.version = CGROUPS_SPLIT_PODMAN_VERSION
    +    r = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
    +    assert 'Type=forking' in r
    +    assert 'Delegate=yes' in r
    +
    +
    +def _ignore_blank_lines(value):
    +    return [v for v in value.splitlines() if v]
    +
    +
    +def test_new_docker():
    +    ctx = context.CephadmContext()
    +    ctx.container_engine = mock_docker()
    +    ru = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
    +    assert _ignore_blank_lines(ru) == [
    +        '# generated by cephadm',
    +        '[Unit]',
    +        'Description=Ceph %i for 9b9d7609-f4d5-4aba-94c8-effa764d96c9',
    +        '# According to:',
    +        '#   http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget',
    +        '# these can be removed once ceph-mon will dynamically change network',
    +        '# configuration.',
    +        'After=network-online.target local-fs.target time-sync.target docker.service',
    +        'Wants=network-online.target local-fs.target time-sync.target',
    +        'Wants=docker.service',
    +        'PartOf=ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9.target',
    +        'Before=ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9.target',
    +        '[Service]',
    +        'LimitNOFILE=1048576',
    +        'LimitNPROC=1048576',
    +        'EnvironmentFile=-/etc/environment',
    +        'ExecStart=/bin/bash '
    +        '/var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/%i/unit.run',
    +        "ExecStop=-/bin/bash -c 'bash "
    +        "/var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/%i/unit.stop'",
    +        'ExecStopPost=-/bin/bash '
    +        '/var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/%i/unit.poststop',
    +        'KillMode=none',
    +        'Restart=on-failure',
    +        'RestartSec=10s',
    +        'TimeoutStartSec=200',
    +        'TimeoutStopSec=120',
    +        'StartLimitInterval=30min',
    +        'StartLimitBurst=5',
    +        '[Install]',
    +        'WantedBy=ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9.target',
    +    ]
    +
    +
    +def test_new_podman():
    +    ctx = context.CephadmContext()
    +    ctx.container_engine = mock_podman()
    +    ctx.container_engine.version = CGROUPS_SPLIT_PODMAN_VERSION
    +    ru = _get_unit_file(ctx, '9b9d7609-f4d5-4aba-94c8-effa764d96c9')
    +    assert _ignore_blank_lines(ru) == [
    +        '# generated by cephadm',
    +        '[Unit]',
    +        'Description=Ceph %i for 9b9d7609-f4d5-4aba-94c8-effa764d96c9',
    +        '# According to:',
    +        '#   http://www.freedesktop.org/wiki/Software/systemd/NetworkTarget',
    +        '# these can be removed once ceph-mon will dynamically change network',
    +        '# configuration.',
    +        'After=network-online.target local-fs.target time-sync.target',
    +        'Wants=network-online.target local-fs.target time-sync.target',
    +        'PartOf=ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9.target',
    +        'Before=ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9.target',
    +        '[Service]',
    +        'LimitNOFILE=1048576',
    +        'LimitNPROC=1048576',
    +        'EnvironmentFile=-/etc/environment',
    +        'ExecStart=/bin/bash '
    +        '/var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/%i/unit.run',
    +        "ExecStop=-/bin/bash -c 'bash "
    +        "/var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/%i/unit.stop'",
    +        'ExecStopPost=-/bin/bash '
    +        '/var/lib/ceph/9b9d7609-f4d5-4aba-94c8-effa764d96c9/%i/unit.poststop',
    +        'KillMode=none',
    +        'Restart=on-failure',
    +        'RestartSec=10s',
    +        'TimeoutStartSec=200',
    +        'TimeoutStopSec=120',
    +        'StartLimitInterval=30min',
    +        'StartLimitBurst=5',
    +        'ExecStartPre=-/bin/rm -f %t/%n-pid %t/%n-cid',
    +        'ExecStopPost=-/bin/rm -f %t/%n-pid %t/%n-cid',
    +        'Type=forking',
    +        'PIDFile=%t/%n-pid',
    +        'Delegate=yes',
    +        '[Install]',
    +        'WantedBy=ceph-9b9d7609-f4d5-4aba-94c8-effa764d96c9.target',
    +    ]
    diff --git a/src/cephadm/tests/test_util_funcs.py b/src/cephadm/tests/test_util_funcs.py
    index ffcf3909c4ee..92872b196f31 100644
    --- a/src/cephadm/tests/test_util_funcs.py
    +++ b/src/cephadm/tests/test_util_funcs.py
    @@ -558,7 +558,7 @@ class FakeContext:
         with_cephadm_ctx is not appropriate (it enables too many mocks, etc).
         """
     
    -    timeout = 30
    +    timeout = 300
     
     
     def _has_non_zero_exit(clog):
    @@ -810,3 +810,161 @@ def test_apply_deploy_config_to_ctx(cc, monkeypatch):
         ctx = FakeContext()
         _cephadm.apply_deploy_config_to_ctx(cc.cfg_data, ctx)
         cc.check(ctx)
    +
    +
    +def test_daemon_sub_identity_from_sidecar_service():
    +    from cephadmlib.daemon_identity import DaemonSubIdentity
    +
    +    dsi = DaemonSubIdentity(
    +        '244c9842-866b-11ee-80ad-3497f6318048', 'iscsi', 'rab.oof', 'tcmu'
    +    )
    +    service_name = dsi.sidecar_service_name
    +    assert (
    +        service_name
    +        == 'ceph-244c9842-866b-11ee-80ad-3497f6318048-sidecar@iscsi.rab.oof:tcmu.service'
    +    )
    +    d2, category = DaemonSubIdentity.from_service_name(service_name)
    +    assert category == 'sidecar'
    +    assert d2.fsid == '244c9842-866b-11ee-80ad-3497f6318048'
    +    assert d2.daemon_type == 'iscsi'
    +    assert d2.daemon_id == 'rab.oof'
    +    assert d2.subcomponent == 'tcmu'
    +
    +
    +def test_daemon_sub_identity_from_init_service():
    +    from cephadmlib.daemon_identity import DaemonIdentity, DaemonSubIdentity
    +
    +    di = DaemonIdentity(
    +        '244c9842-866b-11ee-80ad-3497f6318048', 'putrats', 'wow',
    +    )
    +    service_name = di.init_service_name
    +    assert (
    +        service_name
    +        == 'ceph-244c9842-866b-11ee-80ad-3497f6318048-init@putrats.wow.service'
    +    )
    +    d2, category = DaemonSubIdentity.from_service_name(service_name)
    +    assert category == 'init'
    +    assert d2.fsid == '244c9842-866b-11ee-80ad-3497f6318048'
    +    assert d2.daemon_type == 'putrats'
    +    assert d2.daemon_id == 'wow'
    +    assert d2.subcomponent == 'init'
    +
    +
    +def test_daemon_sub_identity_from_service_invalid():
    +    from cephadmlib.daemon_identity import DaemonSubIdentity
    +
    +    service_name = 'ceph-244c9842-866b-11ee-80ad-3497f6318048-morbo@iscsi.rab.oof.tcmu.service'
    +    with pytest.raises(ValueError):
    +        DaemonSubIdentity.from_service_name(service_name)
    +
    +    service_name = 'ceph-244c9842-866b-11ee-80ad-3497f6318048@iscsi.rab.oof.service'
    +    with pytest.raises(ValueError):
    +        DaemonSubIdentity.from_service_name(service_name)
    +
    +    service_name = 'ceph-244c9842-866b-11ee-80ad-3497f6318048-sidecar@foo.bar.baz:acolon:toomany.service'
    +    with pytest.raises(ValueError):
    +        DaemonSubIdentity.from_service_name(service_name)
    +
    +    service_name = 'ceph-244c9842-866b-11ee-80ad-3497f6318048-init@foo.bar.baz:woops.service'
    +    with pytest.raises(ValueError):
    +        DaemonSubIdentity.from_service_name(service_name)
    +
    +    service_name = 'random-task@elsewise.service'
    +    with pytest.raises(ValueError):
    +        DaemonSubIdentity.from_service_name(service_name)
    +
    +
    +def test_daemon_id_systemd_names():
    +    from cephadmlib.daemon_identity import DaemonIdentity
    +
    +    di = DaemonIdentity(
    +        '244c9842-866b-11ee-80ad-3497f6318048', 'test', 'foo.bar'
    +    )
    +    assert (
    +        di.unit_name
    +        == 'ceph-244c9842-866b-11ee-80ad-3497f6318048@test.foo.bar'
    +    )
    +    assert (
    +        di.service_name
    +        == 'ceph-244c9842-866b-11ee-80ad-3497f6318048@test.foo.bar.service'
    +    )
    +    assert (
    +        di.init_service_name
    +        == 'ceph-244c9842-866b-11ee-80ad-3497f6318048-init@test.foo.bar.service'
    +    )
    +
    +
    +def test_daemon_sub_id_systemd_names():
    +    from cephadmlib.daemon_identity import DaemonSubIdentity
    +
    +    dsi = DaemonSubIdentity(
    +        '244c9842-866b-11ee-80ad-3497f6318048', 'test', 'foo.bar', 'quux',
    +    )
    +    assert (
    +        dsi.sidecar_service_name
    +        == 'ceph-244c9842-866b-11ee-80ad-3497f6318048-sidecar@test.foo.bar:quux.service'
    +    )
    +    with pytest.raises(ValueError):
    +        dsi.service_name
    +
    +
    +@pytest.mark.parametrize(
    +    "args,new_arg,expected",
    +    [
    +        (['--foo=77'], '--bar', ['--foo=77', '--bar']),
    +        (['--foo=77'], '--foo=12', ['--foo=12']),
    +        (
    +            ['--foo=77', '--quux=later', '--range=2-5'],
    +            '--quux=now',
    +            ['--foo=77', '--range=2-5', '--quux=now'],
    +        ),
    +        (
    +            ['--foo=77', '--quux', 'later', '--range=2-5'],
    +            '--quux=now',
    +            ['--foo=77', '--range=2-5', '--quux=now'],
    +        ),
    +        (
    +            ['--foo=77', '--quux', 'later', '--range=2-5'],
    +            '--jiffy',
    +            ['--foo=77', '--quux', 'later', '--range=2-5', '--jiffy'],
    +        ),
    +        (
    +            ['--foo=77', '--quux=buff', '--range=2-5'],
    +            '--quux',
    +            ['--foo=77', '--range=2-5', '--quux'],
    +        ),
    +    ],
    +)
    +def test_replace_container_args(args, new_arg, expected):
    +    from cephadmlib.container_types import _replace_container_arg
    +
    +    _args = list(args)  # preserve the input so test input is not mutated
    +    _replace_container_arg(_args, new_arg)
    +    assert _args == expected
    +
    +
    +
    +def test_enable_shared_namespaces():
    +    from cephadmlib.container_types import enable_shared_namespaces, Namespace
    +
    +    args = []
    +    enable_shared_namespaces(args, 'c001d00d', {Namespace.ipc})
    +    assert args == ['--ipc=container:c001d00d']
    +
    +    enable_shared_namespaces(
    +        args, 'c001d00d', [Namespace.uts, Namespace.network]
    +    )
    +    assert args == [
    +        '--ipc=container:c001d00d',
    +        '--uts=container:c001d00d',
    +        '--network=container:c001d00d',
    +    ]
    +
    +    enable_shared_namespaces(
    +        args, 'badd33d5', [Namespace.network]
    +    )
    +    assert args == [
    +        '--ipc=container:c001d00d',
    +        '--uts=container:c001d00d',
    +        '--network=container:badd33d5',
    +    ]
    diff --git a/src/cephadm/tox.ini b/src/cephadm/tox.ini
    index 266520ff5727..d643b1ba74f9 100644
    --- a/src/cephadm/tox.ini
    +++ b/src/cephadm/tox.ini
    @@ -1,68 +1,64 @@
     [tox]
     envlist =
    -    py3
    -    mypy
    -    fix
         flake8
    +    mypy
    +    check-black
    +    py3
     skipsdist = true
    +# REMINDER: run `tox -e format-black` to apply black formatting
    +# with the exact same specs as `check-black` expects.
     
     [flake8]
     max-line-length = 100
     inline-quotes = '
     ignore =
    -    E501, \
    +    E501,
         W503,
     exclude =
    -    .tox, \
    -    .vagrant, \
    -    __pycache__, \
    -    *.pyc, \
    -    templates, \
    +    .tox,
    +    .vagrant,
    +    __pycache__,
    +    *.pyc,
    +    templates,
         .eggs
     statistics = True
     
    -[autopep8]
    -addopts =
    -    --max-line-length {[flake8]max-line-length} \
    -    --ignore "{[flake8]ignore}" \
    -    --exclude "{[flake8]exclude}" \
    -    --in-place \
    -    --recursive \
    -    --ignore-local-config
    -
     [testenv]
    +setenv =
    +    PYTHONPATH = $PYTHONPATH:..:{toxinidir}/../python-common
    +passenv =
    +    PYTHONPATH
     skip_install=true
     deps =
    +  -rzipapp-reqs.txt
       pyfakefs == 4.5.6 ; python_version < "3.7"
    -  pyfakefs >= 5, < 6 ; python_version >= "3.7"
    +  pyfakefs == 5.3.5 ; python_version >= "3.7"
       mock
       pytest
    +  pyyaml
     commands=pytest {posargs}
     
     [testenv:mypy]
    -basepython = python3
    +setenv =
    +    MYPYPATH = {toxinidir}/..:{toxinidir}/../python-common
    +passenv =
    +    MYPYPATH
     deps =
         mypy
    +    types-PyYAML
    +    -rzipapp-reqs.txt
         -c{toxinidir}/../mypy-constrains.txt
     commands = mypy --config-file ../mypy.ini {posargs:cephadm.py cephadmlib}
     
    -[testenv:fix]
    -basepython = python3
    -deps =
    -    autopep8
    -commands =
    -    python --version
    -    autopep8 {[autopep8]addopts} {posargs: cephadm.py}
    -
     [testenv:flake8]
    -basepython = python3
     allowlist_externals = bash
     deps =
    -    flake8 == 5.0.4
    +    flake8
         flake8-quotes
     commands =
         flake8 --config=tox.ini {posargs:cephadm.py cephadmlib}
    -    bash -c 'test $(git ls-files 'cephadm.py' 'cephadmlib/*.py' | sort -u | xargs grep "docker.io" | wc -l) == 11'
    +    bash -c 'test $(git ls-files 'cephadm.py' 'cephadmlib/*.py' | sort -u | xargs grep "docker.io" | wc -l) == 1'
    +    bash -c 'test $(git ls-files 'cephadm.py' 'cephadmlib/*.py' | sort -u | xargs grep "quay.io" | wc -l) == 7'
     # Downstream distributions may choose to alter this "docker.io" number,
     # to make sure no new references to docker.io are creeping in unnoticed.
     
    @@ -87,3 +83,12 @@ deps =
         black>=23,<24
     commands =
         black -q -l78 -t py36 --skip-string-normalization cephadmlib/
    +
    +# test_build env is intentionally left out of the envlist. It is here for developers
    +# to run locally as it has some unusual requirements: needs podman, etc
    +[testenv:test_build]
    +skip_install=true
    +deps =
    +  {[testenv]deps}
    +commands =
    +  pytest {posargs} tests/build
    diff --git a/src/cephadm/zipapp-reqs.txt b/src/cephadm/zipapp-reqs.txt
    new file mode 100644
    index 000000000000..cf36f87b2bc0
    --- /dev/null
    +++ b/src/cephadm/zipapp-reqs.txt
    @@ -0,0 +1,16 @@
    +# Requirements for the cephadm zipapp (aka the binary).
    +#
    +# IMPORTANT: The cephadm binary is expected to be portable across python
    +# versions and CPU architectures. Dependencies are copied into the zipapp
    +# by the build script and must not require compiled C (or C++, Rust, etc)
    +# modules. Modules that have an optional C accelerator but can fall back
    +# to pure python are OK. When you add a package to this list verify that
    +# build.py creates the zipapp with only python files.
    +#
    +# IMPORTANT: This file is only used for installing the requirements that
    +# cephaadm needs for the tox/unit tests. The actual zipapp is build using
    +# the build.py script. The PY_REQUIREMENTS value in that script *must*
    +# be kept in sync with this list.
    +#
    +MarkupSafe >= 2.1.3, <2.2
    +Jinja2 >= 3.1.2, <3.2
    diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt
    index 8897ada7b598..ddc77c66147a 100644
    --- a/src/client/CMakeLists.txt
    +++ b/src/client/CMakeLists.txt
    @@ -10,4 +10,6 @@ set(libclient_srcs
       posix_acl.cc
       Delegation.cc)
     add_library(client STATIC ${libclient_srcs})
    -target_link_libraries(client osdc)
    +target_link_libraries(client
    +  legacy-option-headers
    +  osdc)
    diff --git a/src/client/Client.cc b/src/client/Client.cc
    index 4e7e3961e8e1..c404057b929d 100644
    --- a/src/client/Client.cc
    +++ b/src/client/Client.cc
    @@ -14,6 +14,7 @@
     
     
     // unix-ey fs stuff
    +#include 
     #include 
     #include 
     #include 
    @@ -257,10 +258,10 @@ int Client::get_fd_inode(int fd, InodeRef *in) {
       return r;
     }
     
    -dir_result_t::dir_result_t(Inode *in, const UserPerm& perms)
    +dir_result_t::dir_result_t(Inode *in, const UserPerm& perms, int fd)
       : inode(in), offset(0), next_offset(2),
         release_count(0), ordered_count(0), cache_index(0), start_shared_gen(0),
    -    perms(perms)
    +    perms(perms), fd(fd)
       { }
     
     void Client::_reset_faked_inos()
    @@ -338,10 +339,13 @@ vinodeno_t Client::_map_faked_ino(ino_t ino)
       vinodeno_t vino;
       if (ino == 1)
         vino = root->vino();
    -  else if (faked_ino_map.count(ino))
    -    vino = faked_ino_map[ino];
    -  else
    -    vino = vinodeno_t(0, CEPH_NOSNAP);
    +  else {
    +    auto it = faked_ino_map.find(ino);
    +   if (it != faked_ino_map.end())
    +     vino = it->second;
    +   else
    +     vino = vinodeno_t(0, CEPH_NOSNAP);
    +  }
       ldout(cct, 10) << __func__ << " " << ino << " -> " << vino << dendl;
       return vino;
     }
    @@ -392,6 +396,12 @@ Client::Client(Messenger *m, MonClient *mc, Objecter *objecter_)
       if (cct->_conf->client_acl_type == "posix_acl")
         acl_type = POSIX_ACL;
     
    +  if (auto str = cct->_conf->client_debug_inject_features; !str.empty()) {
    +    myfeatures = feature_bitset_t(str);
    +  } else {
    +    myfeatures = feature_bitset_t(CEPHFS_FEATURES_CLIENT_SUPPORTED);
    +  }
    +
       lru.lru_set_midpoint(cct->_conf->client_cache_mid);
     
       // file handles
    @@ -1008,12 +1018,13 @@ Inode * Client::add_update_inode(InodeStat *st, utime_t from,
     {
       Inode *in;
       bool was_new = false;
    -  if (inode_map.count(st->vino)) {
    -    in = inode_map[st->vino];
    +  auto [it, b] = inode_map.try_emplace(st->vino);
    +  if (!b) {
    +    in = it->second;
         ldout(cct, 12) << __func__ << " had " << *in << " caps " << ccap_string(st->cap.caps) << dendl;
       } else {
         in = new Inode(this, st->vino, &st->layout);
    -    inode_map[st->vino] = in;
    +    it->second = in;
     
         if (use_faked_inos())
           _assign_faked_ino(in);
    @@ -1165,8 +1176,9 @@ Inode * Client::add_update_inode(InodeStat *st, utime_t from,
     
       if (need_snapdir_attr_refresh && in->is_dir() && in->snapid == CEPH_NOSNAP) {
         vinodeno_t vino(in->ino, CEPH_SNAPDIR);
    -    if (inode_map.count(vino)) {
    -      refresh_snapdir_attrs(inode_map[vino], in);
    +    auto it = inode_map.find(vino);
    +    if (it != inode_map.end()) {
    +      refresh_snapdir_attrs(it->second, in);
         }
       }
     
    @@ -1182,8 +1194,9 @@ Dentry *Client::insert_dentry_inode(Dir *dir, const string& dname, LeaseStat *dl
     				    Dentry *old_dentry)
     {
       Dentry *dn = NULL;
    -  if (dir->dentries.count(dname))
    -    dn = dir->dentries[dname];
    +  auto it = dir->dentries.find(dname);
    +  if (it != dir->dentries.end())
    +    dn = it->second;
     
       ldout(cct, 12) << __func__ << " '" << dname << "' vino " << in->vino()
     		 << " in dir " << dir->parent_inode->vino() << " dn " << dn
    @@ -1425,8 +1438,9 @@ void Client::insert_readdir_results(MetaRequest *request, MetaSession *session,
             effective_dir = dir_other;
           }
           Dentry *dn;
    -      if (effective_dir->dentries.count(dname)) {
    -	Dentry *olddn = effective_dir->dentries[dname];
    +      auto it = effective_dir->dentries.find(dname);
    +      if (it != effective_dir->dentries.end()) {
    +	Dentry *olddn = it->second;
     	if (olddn->inode != in) {
     	  // replace incorrect dentry
     	  unlink(olddn, true, true);  // keep dir, dentry
    @@ -1605,11 +1619,14 @@ Inode* Client::insert_trace(MetaRequest *request, MetaSession *session)
                               (op == CEPH_MDS_OP_RENAME) ? request->old_dentry() : NULL);
         } else {
           Dentry *dn = NULL;
    -      if (diri->dir && diri->dir->dentries.count(dname)) {
    -	dn = diri->dir->dentries[dname];
    -	if (dn->inode) {
    -	  clear_dir_complete_and_ordered(diri, false);
    -	  unlink(dn, true, true);  // keep dir, dentry
    +      if (diri->dir) {
    +        auto it = diri->dir->dentries.find(dname);
    +        if (it != diri->dir->dentries.end()) {
    +	  dn = it->second;
    +	  if (dn->inode) {
    +	    clear_dir_complete_and_ordered(diri, false);
    +	    unlink(dn, true, true);  // keep dir, dentry
    +	  }
     	}
           }
           if (dlease.duration_ms > 0) {
    @@ -1626,8 +1643,9 @@ Inode* Client::insert_trace(MetaRequest *request, MetaSession *session)
         // fake it for snap lookup
         vinodeno_t vino = ist.vino;
         vino.snapid = CEPH_SNAPDIR;
    -    ceph_assert(inode_map.count(vino));
    -    diri = inode_map[vino];
    +    auto it = inode_map.find(vino);
    +    ceph_assert(it != inode_map.end());
    +    diri = it->second;
         
         string dname = request->path.last_dentry();
         
    @@ -1638,10 +1656,13 @@ Inode* Client::insert_trace(MetaRequest *request, MetaSession *session)
           Dir *dir = diri->open_dir();
           insert_dentry_inode(dir, dname, &dlease, in, request->sent_stamp, session);
         } else {
    -      if (diri->dir && diri->dir->dentries.count(dname)) {
    -	Dentry *dn = diri->dir->dentries[dname];
    -	if (dn->inode)
    -	  unlink(dn, true, true);  // keep dir, dentry
    +      if (diri->dir) {
    +        auto it = diri->dir->dentries.find(dname);
    +        if (it != diri->dir->dentries.end()) {
    +	  Dentry *dn = it->second;
    +	  if (dn->inode)
    +	    unlink(dn, true, true);  // keep dir, dentry
    +	}
           }
         }
       }
    @@ -1692,7 +1713,6 @@ mds_rank_t Client::choose_target_mds(MetaRequest *req, Inode** phash_diri)
     
       if (req->resend_mds >= 0) {
         mds = req->resend_mds;
    -    req->resend_mds = -1;
         ldout(cct, 10) << __func__ << " resend_mds specified as mds." << mds << dendl;
         goto out;
       }
    @@ -1758,13 +1778,16 @@ mds_rank_t Client::choose_target_mds(MetaRequest *req, Inode** phash_diri)
               auto r = ceph::util::generate_random_number(0, repmap.size()-1);
               mds = repmap.at(r);
             }
    -      } else if (in->fragmap.count(fg)) {
    -	mds = in->fragmap[fg];
    -	if (phash_diri)
    -	  *phash_diri = in;
    -      } else if (in->auth_cap) {
    -	req->send_to_auth = true;
    -	mds = in->auth_cap->session->mds_num;
    +      } else {
    +        auto it = in->fragmap.find(fg);
    +        if (it != in->fragmap.end()) {
    +	  mds = it->second;
    +	  if (phash_diri)
    +	    *phash_diri = in;
    +        } else if (in->auth_cap) {
    +	  req->send_to_auth = true;
    +	  mds = in->auth_cap->session->mds_num;
    +	}
           }
           if (mds >= 0) {
     	ldout(cct, 10) << __func__ << " from dirfragtree hash" << dendl;
    @@ -2048,6 +2071,7 @@ int Client::make_request(MetaRequest *request,
     
         // wait for signal
         ldout(cct, 20) << "awaiting reply|forward|kick on " << &caller_cond << dendl;
    +    request->resend_mds = -1; /* reset for retries */
         request->kick = false;
         std::unique_lock l{client_lock, std::adopt_lock};
         caller_cond.wait(l, [request] {
    @@ -2354,7 +2378,7 @@ MetaSessionRef Client::_open_mds_session(mds_rank_t mds)
     
       auto m = make_message(CEPH_SESSION_REQUEST_OPEN);
       m->metadata = metadata;
    -  m->supported_features = feature_bitset_t(CEPHFS_FEATURES_CLIENT_SUPPORTED);
    +  m->supported_features = myfeatures;
       m->metric_spec = feature_bitset_t(CEPHFS_METRIC_FEATURES_ALL);
       session->con->send_message2(std::move(m));
       return session;
    @@ -2384,6 +2408,12 @@ void Client::_closed_mds_session(MetaSession *s, int err, bool rejected)
         mds_sessions.erase(s->mds_num);
     }
     
    +static void reinit_mds_features(MetaSession *session,
    +				const MConstRef& m) {
    +  session->mds_features = std::move(m->supported_features);
    +  session->mds_metric_flags = std::move(m->metric_spec.metric_flags);
    +}
    +
     void Client::handle_client_session(const MConstRef& m)
     {
       mds_rank_t from = mds_rank_t(m->get_source().num());
    @@ -2402,6 +2432,13 @@ void Client::handle_client_session(const MConstRef& m)
           if (session->state == MetaSession::STATE_OPEN) {
             ldout(cct, 10) << "mds." << from << " already opened, ignore it"
                            << dendl;
    +	// The MDS could send a client_session(open) message even when
    +	// the session state is STATE_OPEN. Normally, its fine to
    +	// ignore this message, but, if the MDS sent this message just
    +	// after it got upgraded, the MDS feature bits could differ
    +	// than the one before the upgrade - so, refresh the feature
    +	// bits the client holds.
    +	reinit_mds_features(session.get(), m);
             return;
           }
           /*
    @@ -2411,8 +2448,7 @@ void Client::handle_client_session(const MConstRef& m)
           if (!session->seq && m->get_seq())
             session->seq = m->get_seq();
     
    -      session->mds_features = std::move(m->supported_features);
    -      session->mds_metric_flags = std::move(m->metric_spec.metric_flags);
    +      reinit_mds_features(session.get(), m);
           cap_auths = std::move(m->cap_auths);
     
           renew_caps(session.get());
    @@ -2650,12 +2686,13 @@ void Client::handle_client_request_forward(const MConstRefget_tid();
     
    -  if (mds_requests.count(tid) == 0) {
    +  auto it = mds_requests.find(tid);
    +  if ( it == mds_requests.end()) {
         ldout(cct, 10) << __func__ << " no pending request on tid " << tid << dendl;
         return;
       }
     
    -  MetaRequest *request = mds_requests[tid];
    +  MetaRequest *request = it->second;
       ceph_assert(request);
     
       /*
    @@ -2718,12 +2755,13 @@ void Client::handle_client_reply(const MConstRef& reply)
       ceph_tid_t tid = reply->get_tid();
       bool is_safe = reply->is_safe();
     
    -  if (mds_requests.count(tid) == 0) {
    +  auto it = mds_requests.find(tid);
    +  if (it == mds_requests.end()) {
         lderr(cct) << __func__ << " no pending request on tid " << tid
     	       << " safe is:" << is_safe << dendl;
         return;
       }
    -  MetaRequest *request = mds_requests.at(tid);
    +  MetaRequest *request = it->second;
     
       ldout(cct, 20) << __func__ << " got a reply. Safe:" << is_safe
     		 << " tid " << tid << dendl;
    @@ -3016,15 +3054,10 @@ void Client::handle_fs_map_user(const MConstRef& m)
     // Cancel all the commands for missing or laggy GIDs
     void Client::cancel_commands(const MDSMap& newmap)
     {
    -  std::vector cancel_ops;
    -
    -  std::scoped_lock cmd_lock(command_lock);
    -  auto &commands = command_table.get_commands();
    -  for (const auto &[tid, op] : commands) {
    +  cancel_commands_if([=, this](MDSCommandOp const& op) {
         const mds_gid_t op_mds_gid = op.mds_gid;
         if (newmap.is_dne_gid(op_mds_gid) || newmap.is_laggy_gid(op_mds_gid)) {
    -      ldout(cct, 1) << __func__ << ": cancelling command op " << tid << dendl;
    -      cancel_ops.push_back(tid);
    +      ldout(cct, 1) << "cancel_commands: cancelling command op " << op.tid << dendl;
           if (op.outs) {
             std::ostringstream ss;
             ss << "MDS " << op_mds_gid << " went away";
    @@ -3036,13 +3069,10 @@ void Client::cancel_commands(const MDSMap& newmap)
            * has its own lock.
            */
           op.con->mark_down();
    -      if (op.on_finish)
    -        op.on_finish->complete(-CEPHFS_ETIMEDOUT);
    +      return -CEPHFS_ETIMEDOUT;
         }
    -  }
    -
    -  for (const auto &tid : cancel_ops)
    -    command_table.erase(tid);
    +    return 0;
    +  });
     }
     
     void Client::handle_mds_map(const MConstRef& m)
    @@ -3199,10 +3229,10 @@ void Client::send_reconnect(MetaSession *session)
     		 snap_follows,
     		 flockbl);
     
    -      if (did_snaprealm.count(in->snaprealm->ino) == 0) {
    +      auto [it, inserted] = did_snaprealm.emplace(in->snaprealm->ino);
    +      if (inserted) {
     	ldout(cct, 10) << " snaprealm " << *in->snaprealm << dendl;
     	m->add_snaprealm(in->snaprealm->ino, in->snaprealm->seq, in->snaprealm->parent);
    -	did_snaprealm.insert(in->snaprealm->ino);
           }
         }
       }
    @@ -3363,18 +3393,24 @@ void Client::handle_lease(const MConstRef& m)
     
       Inode *in;
       vinodeno_t vino(m->get_ino(), CEPH_NOSNAP);
    -  if (inode_map.count(vino) == 0) {
    +  auto it = inode_map.find(vino);
    +  if (it == inode_map.end()) {
         ldout(cct, 10) << " don't have vino " << vino << dendl;
         goto revoke;
       }
    -  in = inode_map[vino];
    +  in = it->second;
     
       if (m->get_mask() & CEPH_LEASE_VALID) {
    -    if (!in->dir || in->dir->dentries.count(m->dname) == 0) {
    -      ldout(cct, 10) << " don't have dir|dentry " << m->get_ino() << "/" << m->dname <dir) {
    +      ldout(cct, 10) << " don't have dir " << m->get_ino() << "/" << m->dname <dir->dentries.find(m->dname);
    +    if (it == in->dir->dentries.end()) {
    +      ldout(cct, 10) << " don't have dentry " << m->get_ino() << "/" << m->dname <dir->dentries[m->dname];
    +    Dentry *dn = it->second;
         ldout(cct, 10) << " revoked DN lease on " << dn << dendl;
         dn->lease_mds = -1;
       }
    @@ -3610,6 +3646,9 @@ void Client::put_cap_ref(Inode *in, int cap)
         if (last & CEPH_CAP_FILE_CACHE) {
           ldout(cct, 5) << __func__ << " dropped last FILE_CACHE ref on " << *in << dendl;
           ++put_nref;
    +
    +      ldout(cct, 10) << __func__ << " calling signal_caps_inode" << dendl;
    +      signal_caps_inode(in);
         }
         if (drop)
           check_caps(in, 0);
    @@ -3804,6 +3843,7 @@ void Client::send_cap(Inode *in, MetaSession *session, Cap *cap,
     				   want,
     				   flush,
     				   cap->mseq,
    +                                   cap->issue_seq,
                                        cap_epoch_barrier);
       /*
        * Since the setattr will check the cephx mds auth access before
    @@ -3817,7 +3857,6 @@ void Client::send_cap(Inode *in, MetaSession *session, Cap *cap,
       m->caller_uid = -1;
       m->caller_gid = -1;
     
    -  m->head.issue_seq = cap->issue_seq;
       m->set_tid(flush_tid);
     
       m->head.uid = in->uid;
    @@ -4064,7 +4103,7 @@ void Client::check_caps(Inode *in, unsigned flags)
     }
     
     
    -void Client::queue_cap_snap(Inode *in, SnapContext& old_snapc)
    +void Client::queue_cap_snap(Inode *in, const SnapContext& old_snapc)
     {
       int used = get_caps_used(in);
       int dirty = in->caps_dirty();
    @@ -4230,7 +4269,7 @@ void Client::signal_cond_list(list& ls)
       }
     }
     
    -void Client::wait_on_context_list(list& ls)
    +void Client::wait_on_context_list(std::vector& ls)
     {
       ceph::condition_variable cond;
       bool done = false;
    @@ -4241,30 +4280,14 @@ void Client::wait_on_context_list(list& ls)
       l.release();
     }
     
    -void Client::signal_context_list(list& ls)
    -{
    -  while (!ls.empty()) {
    -    ls.front()->complete(0);
    -    ls.pop_front();
    -  }
    -}
    -
     void Client::signal_caps_inode(Inode *in)
     {
       // Process the waitfor_caps list
    -  while (!in->waitfor_caps.empty()) {
    -    in->waitfor_caps.front()->complete(0);
    -    in->waitfor_caps.pop_front();
    -  }
    +  signal_context_list(in->waitfor_caps);
     
       // New items may have been added to the pending list, move them onto the
       // waitfor_caps list
    -  while (!in->waitfor_caps_pending.empty()) {
    -    Context *ctx = in->waitfor_caps_pending.front();
    -
    -    in->waitfor_caps_pending.pop_front();
    -    in->waitfor_caps.push_back(ctx);
    -  }
    +  std::swap(in->waitfor_caps, in->waitfor_caps_pending);
     }
     
     void Client::wake_up_session_caps(MetaSession *s, bool reconnect)
    @@ -4796,6 +4819,9 @@ void Client::trim_caps(MetaSession *s, uint64_t max)
         // is deleted inside remove_cap
         ++p;
     
    +    if (in->dirty_caps || in->cap_snaps.size())
    +      cap_delay_requeue(in.get());
    +
         if (in->caps.size() > 1 && cap != in->auth_cap) {
           int mine = cap->issued | cap->implemented;
           int oissued = in->auth_cap ? in->auth_cap->issued : 0;
    @@ -4833,7 +4859,8 @@ void Client::trim_caps(MetaSession *s, uint64_t max)
           }
           if (all && in->ino != CEPH_INO_ROOT) {
             ldout(cct, 20) << __func__ << " counting as trimmed: " << *in << dendl;
    -	trimmed++;
    +	if (!in->dirty_caps && !in->cap_snaps.size())
    +	  trimmed++;
           }
         }
       }
    @@ -5081,11 +5108,12 @@ SnapRealm *Client::get_snap_realm(inodeno_t r)
     
     SnapRealm *Client::get_snap_realm_maybe(inodeno_t r)
     {
    -  if (snap_realms.count(r) == 0) {
    +  auto it = snap_realms.find(r);
    +  if ( it == snap_realms.end()) {
         ldout(cct, 20) << __func__ << " " << r << " fail" << dendl;
         return NULL;
       }
    -  SnapRealm *realm = snap_realms[r];
    +  SnapRealm *realm = it->second;
       ldout(cct, 20) << __func__ << " " << r << " " << realm << " " << realm->nref << " -> " << (realm->nref + 1) << dendl;
       realm->nref++;
       return realm;
    @@ -5184,10 +5212,11 @@ void Client::update_snap_trace(MetaSession *session, const bufferlist& bl, SnapR
     	       p != realm->pchildren.end();
     	       ++p)
     	    q.push_back(*p);
    -
    -	  if (dirty_realms.count(realm) == 0) {
    +          auto it =
    +            dirty_realms.lower_bound(realm);
    +	  if (it->first != realm) {
     	    realm->nref++;
    -	    dirty_realms[realm] = realm->get_snap_context();
    +	    dirty_realms.emplace_hint(it, realm, realm->get_snap_context());
     	  }
     	}
           }
    @@ -5270,8 +5299,9 @@ void Client::handle_snap(const MConstRef& m)
         ldout(cct, 10) << " splitting off " << *realm << dendl;
         for (auto& ino : m->split_inos) {
           vinodeno_t vino(ino, CEPH_NOSNAP);
    -      if (inode_map.count(vino)) {
    -	Inode *in = inode_map[vino];
    +      auto it = inode_map.find(vino);
    +      if (it != inode_map.end()) {
    +	Inode *in = it->second;
     	if (!in->snaprealm || in->snaprealm == realm)
     	  continue;
     	if (in->snaprealm->created > info.created()) {
    @@ -5330,10 +5360,9 @@ void Client::handle_quota(const MConstRef& m)
       ldout(cct, 10) << __func__ << " " << *m << " from mds." << mds << dendl;
     
       vinodeno_t vino(m->ino, CEPH_NOSNAP);
    -  if (inode_map.count(vino)) {
    -    Inode *in = NULL;
    -    in = inode_map[vino];
    -
    +  auto it = inode_map.find(vino);
    +  if (it != inode_map.end()) {
    +    Inode *in = it->second;
         if (in) {
           in->quota = m->quota;
           in->rstat = m->rstat;
    @@ -5492,10 +5521,10 @@ void Client::handle_cap_export(MetaSession *session, Inode *in, const MConstRef<
             if (it != in->caps.end()) {
     	  Cap &tcap = it->second;
     	  if (tcap.cap_id == m->peer.cap_id &&
    -	      ceph_seq_cmp(tcap.seq, m->peer.seq) < 0) {
    +	      ceph_seq_cmp(tcap.seq, m->peer.issue_seq) < 0) {
     	    tcap.cap_id = m->peer.cap_id;
    -	    tcap.seq = m->peer.seq - 1;
    -	    tcap.issue_seq = tcap.seq;
    +	    tcap.seq = m->peer.issue_seq - 1;
    +	    tcap.issue_seq = tcap.issue_seq;
     	    tcap.issued |= cap.issued;
     	    tcap.implemented |= cap.issued;
     	    if (&cap == in->auth_cap)
    @@ -5505,7 +5534,7 @@ void Client::handle_cap_export(MetaSession *session, Inode *in, const MConstRef<
     	  }
             } else {
     	  add_update_cap(in, tsession.get(), m->peer.cap_id, cap.issued, 0,
    -		         m->peer.seq - 1, m->peer.mseq, (uint64_t)-1,
    +		         m->peer.issue_seq - 1, m->peer.mseq, (uint64_t)-1,
     		         &cap == in->auth_cap ? CEPH_CAP_FLAG_AUTH : 0,
     		         cap.latest_perms);
             }
    @@ -5919,6 +5948,11 @@ int Client::mds_check_access(std::string& path, const UserPerm& perms, int mask)
         }
       }
     
    +  // drop any leading /
    +  while (path.length() && path[0] == '/') {
    +    path = path.substr(1);
    +  }
    +
       for (auto& s: cap_auths) {
         ldout(cct, 20) << __func__ << " auth match path " << s.match.path << " r: " << s.readable
                        << " w: " << s.writeable << dendl;
    @@ -6094,6 +6128,10 @@ int Client::may_open(Inode *in, int flags, const UserPerm& perms)
       int r = 0;
       switch (in->mode & S_IFMT) {
         case S_IFLNK:
    +#if defined(__linux__) && defined(O_PATH)
    +      if (flags & O_PATH)
    +        break;
    +#endif
           r = -CEPHFS_ELOOP;
           goto out;
         case S_IFDIR:
    @@ -6257,6 +6295,11 @@ int Client::resolve_mds(
       if (role_r == 0) {
         // We got a role, resolve it to a GID
         const auto& mdsmap = fsmap->get_filesystem(role.fscid).get_mds_map();
    +    if (mdsmap.is_down(role.rank)) {
    +      lderr(cct) << __func__ << ": targets rank: " << role.rank
    +                 << " is down" << dendl;
    +      return -CEPHFS_EAGAIN;
    +    }
         auto& info = mdsmap.get_info(role.rank);
         ldout(cct, 10) << __func__ << ": resolved " << mds_spec << " to role '"
           << role << "' aka " << info.human_name() << dendl;
    @@ -6395,7 +6438,8 @@ int Client::mds_command(
         const bufferlist& inbl,
         bufferlist *outbl,
         string *outs,
    -    Context *onfinish)
    +    Context *onfinish,
    +    bool one_shot)
     {
       RWRef_t iref_reader(initialize_state, CLIENT_INITIALIZED);
       if (!iref_reader.is_state_satisfied())
    @@ -6454,6 +6498,9 @@ int Client::mds_command(
     
         // Open a connection to the target MDS
         ConnectionRef conn = messenger->connect_to_mds(info.get_addrs());
    +    if (one_shot) {
    +      conn->send_keepalive();
    +    }
     
         cl.unlock();
         {
    @@ -6468,6 +6515,7 @@ int Client::mds_command(
           op.inbl = inbl;
           op.mds_gid = target_gid;
           op.con = conn;
    +      op.one_shot = one_shot;
     
           ldout(cct, 4) << __func__ << ": new command op to " << target_gid
             << " tid=" << op.tid << " multi_id=" << op.multi_target_id << " "<< cmd << dendl;
    @@ -6965,11 +7013,13 @@ void Client::_unmount(bool abort)
     
     void Client::unmount()
     {
    +  ldout(cct, 2) << __func__ << dendl;
       _unmount(false);
     }
     
     void Client::abort_conn()
     {
    +  ldout(cct, 2) << __func__ << dendl;
       _unmount(true);
     }
     
    @@ -7238,7 +7288,9 @@ void Client::renew_caps(MetaSession *session)
       ldout(cct, 10) << "renew_caps mds." << session->mds_num << dendl;
       session->last_cap_renew_request = ceph_clock_now();
       uint64_t seq = ++session->cap_renew_seq;
    -  session->con->send_message2(make_message(CEPH_SESSION_REQUEST_RENEWCAPS, seq));
    +  auto m = make_message(CEPH_SESSION_REQUEST_RENEWCAPS, seq);
    +  m->oldest_client_tid = oldest_tid;
    +  session->con->send_message2(std::move(m));
     }
     
     
    @@ -7272,16 +7324,17 @@ bool Client::_dentry_valid(const Dentry *dn)
     
       // is dn lease valid?
       utime_t now = ceph_clock_now();
    -  if (dn->lease_mds >= 0 && dn->lease_ttl > now &&
    -      mds_sessions.count(dn->lease_mds)) {
    -    auto s = mds_sessions.at(dn->lease_mds);
    -    if (s->cap_ttl > now && s->cap_gen == dn->lease_gen) {
    -      dlease_hit();
    -      return true;
    -    }
    +  if (dn->lease_mds >= 0 && dn->lease_ttl > now) {
    +    if (auto it = mds_sessions.find(dn->lease_mds); it != mds_sessions.end()) {
    +      auto s = it->second;
    +      if (s->cap_ttl > now && s->cap_gen == dn->lease_gen) {
    +        dlease_hit();
    +        return true;
    +      }
     
    -    ldout(cct, 20) << " bad lease, cap_ttl " << s->cap_ttl << ", cap_gen " << s->cap_gen
    -                   << " vs lease_gen " << dn->lease_gen << dendl;
    +      ldout(cct, 20) << " bad lease, cap_ttl " << s->cap_ttl << ", cap_gen " << s->cap_gen
    +                     << " vs lease_gen " << dn->lease_gen << dendl;
    +    }
       }
     
       dlease_miss();
    @@ -7341,9 +7394,13 @@ int Client::_lookup(Inode *dir, const string& dname, int mask, InodeRef *target,
       }
     
     relookup:
    -  if (dir->dir &&
    -      dir->dir->dentries.count(dname)) {
    -    dn = dir->dir->dentries[dname];
    +
    +  if (dir->dir) {
    +    auto it = dir->dir->dentries.find(dname);
    +    dn = it != dir->dir->dentries.end() ? it->second : nullptr;
    +  }
    +
    +  if (dn) {
     
         ldout(cct, 20) << __func__ << " have " << *dn << " from mds." << dn->lease_mds
             << " ttl " << dn->lease_ttl << " seq " << dn->lease_seq << dendl;
    @@ -7428,8 +7485,9 @@ Dentry *Client::get_or_create(Inode *dir, const char* name)
       // lookup
       ldout(cct, 20) << __func__ << " " << *dir << " name " << name << dendl;
       dir->open_dir();
    -  if (dir->dir->dentries.count(name))
    -    return dir->dir->dentries[name];
    +  auto it = dir->dir->dentries.find(name);
    +  if (it != dir->dir->dentries.end())
    +    return it->second;
       else // otherwise link up a new one
         return link(dir->dir, name, NULL, NULL);
     }
    @@ -7902,6 +7960,12 @@ int Client::readlinkat(int dirfd, const char *relpath, char *buf, loff_t size, c
         return r;
       }
     
    +  if (!strcmp(relpath, "")) {
    +    if (!dirinode.get()->is_symlink())
    +      return -CEPHFS_ENOENT;
    +    return _readlink(dirinode.get(), buf, size);
    +  }
    +
       InodeRef in;
       filepath path(relpath);
       r = path_walk(path, &in, perms, false, 0, dirinode);
    @@ -8000,6 +8064,25 @@ int Client::_getvxattr(
       return res;
     }
     
    +bool Client::make_absolute_path_string(Inode *in, std::string& path)
    +{
    +  auto it = metadata.find("root");
    +  if (it == metadata.end() || !in)
    +    return false;
    +
    +  path = it->second.data();
    +  if (!in->make_path_string(path)) {
    +    path.clear();
    +    return false;
    +  }
    +
    +  // Make sure this function returns path with single leading '/'
    +  if (path.length() && path[0] == '/' && path[1] == '/')
    +    path = path.substr(1);
    +
    +  return true;
    +}
    +
     int Client::_do_setattr(Inode *in, struct ceph_statx *stx, int mask,
     			const UserPerm& perms, InodeRef *inp,
     			std::vector* aux)
    @@ -8042,11 +8125,8 @@ int Client::_do_setattr(Inode *in, struct ceph_statx *stx, int mask,
       int res;
       {
         std::string path;
    -    res = in->make_path_string(path);
    -    if (res) {
    +    if (make_absolute_path_string(in, path)) {
           ldout(cct, 20) << " absolute path: " << path << dendl;
    -      if (path.length())
    -        path = path.substr(1);    // drop leading /
           res = mds_check_access(path, perms, MAY_WRITE);
           if (res) {
             goto out;
    @@ -8266,8 +8346,9 @@ int Client::_do_setattr(Inode *in, struct ceph_statx *stx, int mask,
         in->change_attr++;
         if (in->is_dir() && in->snapid == CEPH_NOSNAP) {
           vinodeno_t vino(in->ino, CEPH_SNAPDIR);
    -      if (inode_map.count(vino)) {
    -        refresh_snapdir_attrs(inode_map[vino], in);
    +      auto it = inode_map.find(vino);
    +      if (it != inode_map.end()) {
    +        refresh_snapdir_attrs(it->second, in);
           }
         }
         return 0;
    @@ -9091,7 +9172,9 @@ int Client::fdopendir(int dirfd, dir_result_t **dirpp, const UserPerm &perms) {
           return r;
         }
       }
    -  r = _opendir(dirinode.get(), dirpp, perms);
    +  // Posix says that closedir will also close the file descriptor passed to fdopendir, so we associate
    +  // dirfd to the new dir_result_t so that it can be closed later.
    +  r = _opendir(dirinode.get(), dirpp, perms, dirfd);
       /* if ENOTDIR, dirpp will be an uninitialized point and it's very dangerous to access its value */
       if (r != -CEPHFS_ENOTDIR) {
           tout(cct) << (uintptr_t)*dirpp << std::endl;
    @@ -9099,11 +9182,11 @@ int Client::fdopendir(int dirfd, dir_result_t **dirpp, const UserPerm &perms) {
       return r;
     }
     
    -int Client::_opendir(Inode *in, dir_result_t **dirpp, const UserPerm& perms)
    +int Client::_opendir(Inode *in, dir_result_t **dirpp, const UserPerm& perms, int fd)
     {
       if (!in->is_dir())
         return -CEPHFS_ENOTDIR;
    -  *dirpp = new dir_result_t(in, perms);
    +  *dirpp = new dir_result_t(in, perms, fd);
       opened_dirs.insert(*dirpp);
       ldout(cct, 8) << __func__ << "(" << in->ino << ") = " << 0 << " (" << *dirpp << ")" << dendl;
       return 0;
    @@ -9131,6 +9214,12 @@ void Client::_closedir(dir_result_t *dirp)
       }
       _readdir_drop_dirp_buffer(dirp);
       opened_dirs.erase(dirp);
    +
    +  /* Close the associated fd if this dir_result_t comes from an fdopendir request. */
    +  if (dirp->fd >= 0) {
    +    _close(dirp->fd);
    +  }
    +
       delete dirp;
     }
     
    @@ -9200,14 +9289,19 @@ void Client::seekdir(dir_result_t *dirp, loff_t offset)
     //};
     void Client::fill_dirent(struct dirent *de, const char *name, int type, uint64_t ino, loff_t next_off)
     {
    -  strncpy(de->d_name, name, 255);
    -  de->d_name[255] = '\0';
    +  size_t len = strlen(name);
    +  len = std::min(len, (size_t)255);
    +  memcpy(de->d_name, name, len);
    +  de->d_name[len] = '\0';
     #if !defined(__CYGWIN__) && !(defined(_WIN32))
       de->d_ino = ino;
     #if !defined(__APPLE__) && !defined(__FreeBSD__)
       de->d_off = next_off;
     #endif
    -  de->d_reclen = 1;
    +  // Calculate the real used size of the record
    +  len = (uintptr_t)&de->d_name[len] - (uintptr_t)de + 1;
    +  // The record size must be a multiple of the alignment of 'struct dirent'
    +  de->d_reclen = (len + alignof(struct dirent) - 1) & ~(alignof(struct dirent) - 1);
       de->d_type = IFTODT(type);
       ldout(cct, 10) << __func__ << " '" << de->d_name << "' -> " << inodeno_t(de->d_ino)
     	   << " type " << (int)de->d_type << " w/ next_off " << hex << next_off << dec << dendl;
    @@ -9356,6 +9450,12 @@ int Client::_readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p,
         int r = _getattr(dn->inode, mask, dirp->perms);
         if (r < 0)
           return r;
    +
    +    /* fix https://tracker.ceph.com/issues/56288 */
    +    if (dirp->inode->dir == NULL) {
    +      ldout(cct, 0) << " dir is closed, so we should return" << dendl;
    +      return -CEPHFS_EAGAIN;
    +    }
         
         // the content of readdir_cache may change after _getattr(), so pd may be invalid iterator    
         pd = dir->readdir_cache.begin() + idx;
    @@ -9996,8 +10096,8 @@ int Client::create_and_open(int dirfd, const char *relpath, int flags,
         // allocate a integer file descriptor
         ceph_assert(fh);
         r = get_fd();
    -    ceph_assert(fd_map.count(r) == 0);
    -    fd_map[r] = fh;
    +    auto [it, b] = fd_map.try_emplace(r, fh);
    +    ceph_assert(b);
       }
       
      out:
    @@ -10272,11 +10372,8 @@ int Client::_open(Inode *in, int flags, mode_t mode, Fh **fhp,
         }
     
         std::string path;
    -    int result = in->make_path_string(path);
    -    if (result) {
    +    if (make_absolute_path_string(in, path)) {
           ldout(cct, 20) << __func__ << " absolute path: " << path << dendl;
    -      if (path.length())
    -        path = path.substr(1);    // drop leading /
           result = mds_check_access(path, perms, mask);
           if (result) {
             return result;
    @@ -10625,8 +10722,6 @@ int Client::read(int fd, char *buf, loff_t size, loff_t offset)
     
     int Client::preadv(int fd, const struct iovec *iov, int iovcnt, loff_t offset)
     {
    -  if (iovcnt < 0)
    -    return -CEPHFS_EINVAL;
       return _preadv_pwritev(fd, iov, iovcnt, offset, false);
     }
     
    @@ -10716,7 +10811,6 @@ void Client::C_Read_Sync_NonBlocking::finish(int r)
             goto success;
         }
     
    -    clnt->put_cap_ref(in, CEPH_CAP_FILE_RD);
         // reverify size
         {
           r = clnt->_getattr(in, CEPH_STAT_CAP_SIZE, f->actor_perms);
    @@ -10728,14 +10822,6 @@ void Client::C_Read_Sync_NonBlocking::finish(int r)
         if ((uint64_t)pos >= in->size)
           goto success;
     
    -    {
    -      int have_caps2 = 0;
    -      r = clnt->get_caps(f, CEPH_CAP_FILE_RD, have_caps, &have_caps2, -1);
    -      if (r < 0) {
    -        goto error;
    -      }
    -    }
    -
         wanted = left;
         retry();
         clnt->client_lock.unlock();
    @@ -10889,6 +10975,20 @@ int64_t Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl,
         // branch below but in a non-blocking fashion. The code in _read_sync
         // is duplicated and modified and exists in
         // C_Read_Sync_NonBlocking::finish().
    +
    +    // trim read based on file size?
    +    if ((offset >= in->size) || (size == 0)) {
    +      // read is requested at the EOF or the read len is zero, therefore just
    +      // release managed pointers and complete the C_Read_Finisher immediately with 0 bytes
    +
    +      Context *iof = iofinish.release();
    +      crf.release();
    +      iof->complete(0);
    +
    +      // Signal async completion
    +      return 0;
    +    }
    +
         C_Read_Sync_NonBlocking *crsa =
           new C_Read_Sync_NonBlocking(this, iofinish.release(), f, in, f->pos,
                                       offset, size, bl, filer.get(), have);
    @@ -10999,15 +11099,11 @@ void Client::do_readahead(Fh *f, Inode *in, uint64_t off, uint64_t len)
     
     void Client::C_Read_Async_Finisher::finish(int r)
     {
    -  clnt->client_lock.lock();
    -
       // Do read ahead as long as we aren't completing with 0 bytes
       if (r != 0)
         clnt->do_readahead(f, in, off, len);
     
       onfinish->complete(r);
    -
    -  clnt->client_lock.unlock();
     }
     
     int Client::_read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl,
    @@ -11022,6 +11118,9 @@ int Client::_read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl,
     
       ldout(cct, 10) << __func__ << " " << *in << " " << off << "~" << len << dendl;
     
    +  // get Fc cap ref before commencing read
    +  get_cap_ref(in, CEPH_CAP_FILE_CACHE);
    +
       if (onfinish != nullptr) {
         io_finish.reset(new C_Read_Async_Finisher(this, onfinish, f, in,
                                                   f->pos, off, len));
    @@ -11029,9 +11128,14 @@ int Client::_read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl,
     
       // trim read based on file size?
       if ((off >= in->size) || (len == 0)) {
    +    // read is requested at the EOF or the read len is zero, therefore release
    +    // Fc cap first before proceeding further
    +    put_cap_ref(in, CEPH_CAP_FILE_CACHE);
    +
         // If not async, immediate return of 0 bytes
    -    if (onfinish == nullptr) 
    +    if (onfinish == nullptr) {
           return 0;
    +    }
     
         // Release C_Read_Async_Finisher from managed pointer, we need to complete
         // immediately. The C_Read_Async_Finisher is safely handled and won't be
    @@ -11039,9 +11143,7 @@ int Client::_read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl,
         Context *crf = io_finish.release();
     
         // Complete the crf immediately with 0 bytes
    -    client_lock.unlock();
         crf->complete(0);
    -    client_lock.lock();
     
         // Signal async completion
         return 0;
    @@ -11066,6 +11168,8 @@ int Client::_read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl,
     			      off, len, bl, 0, io_finish.get());
     
       if (onfinish != nullptr) {
    +    // put the cap ref since we're releasing C_Read_Async_Finisher
    +    put_cap_ref(in, CEPH_CAP_FILE_CACHE);
         // Release C_Read_Async_Finisher from managed pointer, either
         // file_read will result in non-blocking complete, or we need to complete
         // immediately. In either case, the C_Read_Async_Finisher is safely
    @@ -11073,22 +11177,20 @@ int Client::_read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl,
         Context *crf = io_finish.release();
         if (r != 0) {
           // need to do readahead, so complete the crf
    -      client_lock.unlock();
           crf->complete(r);
    -      client_lock.lock();
    -    } else {
    -      get_cap_ref(in, CEPH_CAP_FILE_CACHE);
         }
         return 0;
       }
     
    +  // Wait for the blocking read to complete and then do readahead
       if (r == 0) {
    -    get_cap_ref(in, CEPH_CAP_FILE_CACHE);
         client_lock.unlock();
         r = io_finish_cond->wait();
         client_lock.lock();
         put_cap_ref(in, CEPH_CAP_FILE_CACHE);
         update_read_io_size(bl->length());
    +  } else {
    +    put_cap_ref(in, CEPH_CAP_FILE_CACHE);
       }
     
       do_readahead(f, in, off, len);
    @@ -11197,13 +11299,11 @@ int Client::write(int fd, const char *buf, loff_t size, loff_t offset)
     
     int Client::pwritev(int fd, const struct iovec *iov, int iovcnt, int64_t offset)
     {
    -  if (iovcnt < 0)
    -    return -CEPHFS_EINVAL;
       return _preadv_pwritev(fd, iov, iovcnt, offset, true);
     }
     
     int64_t Client::_preadv_pwritev_locked(Fh *fh, const struct iovec *iov,
    -                                       unsigned iovcnt, int64_t offset,
    +                                       int iovcnt, int64_t offset,
                                            bool write, bool clamp_to_int,
                                            Context *onfinish, bufferlist *blp,
                                            bool do_fsync, bool syncdataonly)
    @@ -11214,8 +11314,11 @@ int64_t Client::_preadv_pwritev_locked(Fh *fh, const struct iovec *iov,
         if (fh->flags & O_PATH)
             return -CEPHFS_EBADF;
     #endif
    +    if(iovcnt < 0) {
    +      return -CEPHFS_EINVAL;
    +    }
         loff_t totallen = 0;
    -    for (unsigned i = 0; i < iovcnt; i++) {
    +    for (int i = 0; i < iovcnt; i++) {
             totallen += iov[i].iov_len;
         }
     
    @@ -11238,22 +11341,17 @@ int64_t Client::_preadv_pwritev_locked(Fh *fh, const struct iovec *iov,
                               onfinish);
             ldout(cct, 3) << "preadv(" << fh << ", " <<  offset << ") = " << r << dendl;
             if (r <= 0) {
    -          if (r < 0 && onfinish != nullptr) {
    -            client_lock.unlock();
    -            onfinish->complete(r);
    -            client_lock.lock();
    -          }
               return r;
             }
     
             client_lock.unlock();
    -        copy_bufferlist_to_iovec(iov, iovcnt, &bl, r);
    +        copy_bufferlist_to_iovec(iov, iovcnt, blp ? blp : &bl, r);
             client_lock.lock();
             return r;
         }
     }
     
    -int Client::_preadv_pwritev(int fd, const struct iovec *iov, unsigned iovcnt,
    +int Client::_preadv_pwritev(int fd, const struct iovec *iov, int iovcnt,
                                 int64_t offset, bool write, Context *onfinish,
                                 bufferlist *blp)
     {
    @@ -11319,10 +11417,18 @@ int64_t Client::_write_success(Fh *f, utime_t start, uint64_t fpos,
       return r;
     }
     
    +void Client::C_Lock_Client_Finisher::finish(int r)
    +{
    +  std::scoped_lock lock(clnt->client_lock);
    +  onfinish->complete(r);
    +}
    +
     void Client::C_Write_Finisher::finish_io(int r)
     {
       bool fini;
     
    +  ceph_assert(ceph_mutex_is_locked_by_me(clnt->client_lock));
    +
       clnt->put_cap_ref(in, CEPH_CAP_FILE_BUFFER);
     
       if (r >= 0) {
    @@ -11358,6 +11464,8 @@ void Client::C_Write_Finisher::finish_fsync(int r)
       bool fini;
       client_t const whoami = clnt->whoami;  // For the benefit of ldout prefix
     
    +  ceph_assert(ceph_mutex_is_locked_by_me(clnt->client_lock));
    +
       ldout(clnt->cct, 3) << "finish_fsync r = " << r << dendl;
     
       fsync_finished = true;
    @@ -11420,6 +11528,10 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf,
       CWF_iofinish *cwf_iofinish = NULL;
       C_SaferCond *cond_iofinish = NULL;
     
    +  if (size < 1) { // zero bytes write is not supported by osd
    +    return -CEPHFS_EINVAL;
    +  }
    +
       if ( (uint64_t)(offset+size) > mdsmap->get_max_filesize() && //exceeds config
            (uint64_t)(offset+size) > in->size ) { //exceeds filesize 
           return -CEPHFS_EFBIG;              
    @@ -11514,6 +11626,7 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf,
     
       std::unique_ptr iofinish = nullptr;
       std::unique_ptr cwf = nullptr;
    +  std::unique_ptr filer_iofinish = nullptr;
       
       if (in->inline_version < CEPH_INLINE_NONE) {
         if (endoff > cct->_conf->client_max_inline_size ||
    @@ -11625,7 +11738,10 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf,
         if (onfinish == nullptr) {
           // We need a safer condition to wait on.
           cond_iofinish = new C_SaferCond();
    -      iofinish.reset(cond_iofinish);
    +      filer_iofinish.reset(cond_iofinish);
    +    } else {
    +      //Register a wrapper callback for the C_Write_Finisher which takes 'client_lock'
    +      filer_iofinish.reset(new C_Lock_Client_Finisher(this, iofinish.get()));
         }
     
         get_cap_ref(in, CEPH_CAP_FILE_BUFFER);
    @@ -11633,11 +11749,12 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf,
         filer->write_trunc(in->ino, &in->layout, in->snaprealm->get_snap_context(),
     		       offset, size, bl, ceph::real_clock::now(), 0,
     		       in->truncate_size, in->truncate_seq,
    -		       iofinish.get());
    +		       filer_iofinish.get());
     
         if (onfinish) {
           // handle non-blocking caller (onfinish != nullptr), we can now safely
           // release all the managed pointers
    +      filer_iofinish.release();
           iofinish.release();
           onuninline.release();
           cwf.release();
    @@ -11820,7 +11937,7 @@ void Client::C_nonblocking_fsync_state::advance()
           ldout(clnt->cct, 15) << "waiting on unsafe requests, last tid " << req->get_tid() <<  dendl;
     
           req->get();
    -      clnt->add_nonblocking_onfinish_to_context_list(req->waitfor_safe, advancer);
    +      req->waitfor_safe.push_back(advancer);
           // ------------  here is a state machine break point
           return;
         }
    @@ -11846,7 +11963,7 @@ void Client::C_nonblocking_fsync_state::advance()
             ldout(clnt->cct, 10) << "ino " << in->ino << " has " << in->cap_refs[CEPH_CAP_FILE_BUFFER]
                                  << " uncommitted, waiting" << dendl;
             advancer = new C_nonblocking_fsync_state_advancer(clnt, this);
    -        clnt->add_nonblocking_onfinish_to_context_list(in->waitfor_commit, advancer);
    +        in->waitfor_commit.push_back(advancer);
             // ------------  here is a state machine break point but we have to
             //               return to this case because this might loop.
             progress = 1;
    @@ -11904,9 +12021,9 @@ void Client::C_nonblocking_fsync_state::advance()
                                  << " for C_nonblocking_fsync_state " << this
                                  << dendl;
             if (progress == 3)
    -          clnt->add_nonblocking_onfinish_to_context_list(in->waitfor_caps, advancer);
    +          in->waitfor_caps.push_back(advancer);
             else
    -          clnt->add_nonblocking_onfinish_to_context_list(in->waitfor_caps_pending, advancer);
    +          in->waitfor_caps_pending.push_back(advancer);
             // ------------  here is a state machine break point
             //               the advancer completion will resume with case 3
             progress = 4;
    @@ -12776,6 +12893,8 @@ int Client::_sync_fs()
       // flush the mdlog before waiting for unsafe requests.
       flush_mdlog_sync();
     
    +  flush_cap_releases();
    +
       // wait for unsafe mds requests
       wait_unsafe_requests();
     
    @@ -13005,16 +13124,17 @@ Inode *Client::open_snapdir(Inode *diri)
     {
       Inode *in;
       vinodeno_t vino(diri->ino, CEPH_SNAPDIR);
    -  if (!inode_map.count(vino)) {
    +  auto [it, b] = inode_map.try_emplace(vino, nullptr);
    +  if (b) {
         in = new Inode(this, vino, &diri->layout);
         refresh_snapdir_attrs(in, diri);
         diri->flags |= I_SNAPDIR_OPEN;
    -    inode_map[vino] = in;
    +    it->second = in;
         if (use_faked_inos())
           _assign_faked_ino(in);
         ldout(cct, 10) << "open_snapdir created snapshot inode " << *in << dendl;
       } else {
    -    in = inode_map[vino];
    +    in = it->second;
         ldout(cct, 10) << "open_snapdir had snapshot inode " << *in << dendl;
       }
       return in;
    @@ -13708,7 +13828,9 @@ int Client::_getxattr(Inode *in, const char *name, void *value, size_t size,
     
       if (!strncmp(name, "ceph.", 5)) {
         r = _getvxattr(in, perms, name, size, value, MDS_RANK_NONE);
    -    goto out;
    +    if (r != -ENODATA) {
    +      goto out;
    +    }
       }
     
       if (acl_type == NO_ACL && !strncmp(name, "system.", 7)) {
    @@ -13720,11 +13842,12 @@ int Client::_getxattr(Inode *in, const char *name, void *value, size_t size,
       if (r == 0) {
         string n(name);
         r = -CEPHFS_ENODATA;
    -    if (in->xattrs.count(n)) {
    -      r = in->xattrs[n].length();
    +    auto it = in->xattrs.find(n);
    +    if (it != in->xattrs.end()) {
    +      r = it->second.length();
           if (r > 0 && size != 0) {
     	if (size >= (unsigned)r)
    -	  memcpy(value, in->xattrs[n].c_str(), r);
    +	  memcpy(value, it->second.c_str(), r);
     	else
     	  r = -CEPHFS_ERANGE;
           }
    @@ -13828,7 +13951,7 @@ int Client::_do_setxattr(Inode *in, const char *name, const void *value,
     
       int xattr_flags = 0;
       if (!value)
    -    xattr_flags |= CEPH_XATTR_REMOVE;
    +    xattr_flags |= CEPH_XATTR_REMOVE | CEPH_XATTR_REMOVE2;
       if (flags & XATTR_CREATE)
         xattr_flags |= CEPH_XATTR_CREATE;
       if (flags & XATTR_REPLACE)
    @@ -13886,6 +14009,7 @@ int Client::_setxattr(Inode *in, const char *name, const void *value,
           mode_t new_mode = in->mode;
           if (value) {
     	int ret = posix_acl_equiv_mode(value, size, &new_mode);
    +	ldout(cct, 3) << __func__ << "(" << in->ino << ", \"" << name << "\") = " << ret << dendl;
     	if (ret < 0)
     	  return ret;
     	if (ret == 0) {
    @@ -13935,6 +14059,11 @@ int Client::_setxattr(Inode *in, const char *name, const void *value,
           ret = -CEPHFS_EOPNOTSUPP;
       }
     
    +  if ((!strcmp(name, ACL_EA_ACCESS) ||
    +      !strcmp(name, ACL_EA_DEFAULT)) &&
    +      ret == -CEPHFS_ENODATA)
    +    ret = 0;
    +
       return ret;
     }
     
    @@ -14023,7 +14152,7 @@ int Client::ll_setxattr(Inode *in, const char *name, const void *value,
     
       vinodeno_t vino = _get_vino(in);
     
    -  ldout(cct, 3) << __func__ << " " << vino << " " << name << " size " << size << dendl;
    +  ldout(cct, 3) << __func__ << " " << vino << " " << name << " size " << size << " value " << !!value << dendl;
       tout(cct) << __func__ << std::endl;
       tout(cct) << vino.ino.val << std::endl;
       tout(cct) << name << std::endl;
    @@ -14045,10 +14174,11 @@ int Client::_removexattr(Inode *in, const char *name, const UserPerm& perms)
     
       // same xattrs supported by kernel client
       if (strncmp(name, "user.", 5) &&
    -      strncmp(name, "system.", 7) &&
           strncmp(name, "security.", 9) &&
           strncmp(name, "trusted.", 8) &&
    -      strncmp(name, "ceph.", 5))
    +      strncmp(name, "ceph.", 5) &&
    +      strcmp(name, ACL_EA_ACCESS) &&
    +      strcmp(name, ACL_EA_DEFAULT))
         return -CEPHFS_EOPNOTSUPP;
     
       const VXattr *vxattr = _match_vxattr(in, name);
    @@ -14064,6 +14194,11 @@ int Client::_removexattr(Inode *in, const char *name, const UserPerm& perms)
      
       int res = make_request(req, perms);
     
    +  if ((!strcmp(name, ACL_EA_ACCESS) ||
    +      !strcmp(name, ACL_EA_DEFAULT)) &&
    +      res == -CEPHFS_ENODATA)
    +    res = 0;
    +
       trim_cache();
       ldout(cct, 8) << "_removexattr(" << in->ino << ", \"" << name << "\") = " << res << dendl;
       return res;
    @@ -15736,8 +15871,18 @@ loff_t Client::ll_lseek(Fh *fh, loff_t offset, int whence)
     int Client::ll_read(Fh *fh, loff_t off, loff_t len, bufferlist *bl)
     {
       RWRef_t mref_reader(mount_state, CLIENT_MOUNTING);
    -  if (!mref_reader.is_state_satisfied())
    +  if (!mref_reader.is_state_satisfied()) {
         return -CEPHFS_ENOTCONN;
    +  }
    +
    +  /* We can't return bytes written larger than INT_MAX, clamp len to that */
    +  len = std::min(len, (loff_t)INT_MAX);
    +
    +  std::scoped_lock lock(client_lock);
    +  if (fh == NULL || !_ll_fh_exists(fh)) {
    +    ldout(cct, 3) << "(fh)" << fh << " is invalid" << dendl;
    +    return -CEPHFS_EBADF;
    +  }
     
       ldout(cct, 3) << "ll_read " << fh << " " << fh->inode->ino << " " << " " << off << "~" << len << dendl;
       tout(cct) << "ll_read" << std::endl;
    @@ -15745,10 +15890,6 @@ int Client::ll_read(Fh *fh, loff_t off, loff_t len, bufferlist *bl)
       tout(cct) << off << std::endl;
       tout(cct) << len << std::endl;
     
    -  /* We can't return bytes written larger than INT_MAX, clamp len to that */
    -  len = std::min(len, (loff_t)INT_MAX);
    -  std::scoped_lock lock(client_lock);
    -
       int r = _read(fh, off, len, bl);
       ldout(cct, 3) << "ll_read " << fh << " " << off << "~" << len << " = " << r
     		<< dendl;
    @@ -15874,20 +16015,26 @@ int Client::ll_commit_blocks(Inode *in,
     
     int Client::ll_write(Fh *fh, loff_t off, loff_t len, const char *data)
     {
    -  ldout(cct, 3) << "ll_write " << fh << " " << fh->inode->ino << " " << off <<
    -    "~" << len << dendl;
    -  tout(cct) << "ll_write" << std::endl;
    -  tout(cct) << (uintptr_t)fh << std::endl;
    -  tout(cct) << off << std::endl;
    -  tout(cct) << len << std::endl;
    -
       RWRef_t mref_reader(mount_state, CLIENT_MOUNTING);
    -  if (!mref_reader.is_state_satisfied())
    +  if (!mref_reader.is_state_satisfied()) {
         return -CEPHFS_ENOTCONN;
    +  }
     
       /* We can't return bytes written larger than INT_MAX, clamp len to that */
       len = std::min(len, (loff_t)INT_MAX);
    +
       std::scoped_lock lock(client_lock);
    +  if (fh == NULL || !_ll_fh_exists(fh)) {
    +    ldout(cct, 3) << "(fh)" << fh << " is invalid" << dendl;
    +    return -CEPHFS_EBADF;
    +  }
    +
    +  ldout(cct, 3) << "ll_write " << fh << " " << fh->inode->ino << " " << off <<
    +    "~" << len << dendl;
    +  tout(cct) << "ll_write" << std::endl;
    +  tout(cct) << (uintptr_t)fh << std::endl;
    +  tout(cct) << off << std::endl;
    +  tout(cct) << len << std::endl;
     
       int r = _write(fh, off, len, data, NULL, 0);
       ldout(cct, 3) << "ll_write " << fh << " " << off << "~" << len << " = " << r
    @@ -15898,20 +16045,30 @@ int Client::ll_write(Fh *fh, loff_t off, loff_t len, const char *data)
     int64_t Client::ll_writev(struct Fh *fh, const struct iovec *iov, int iovcnt, int64_t off)
     {
       RWRef_t mref_reader(mount_state, CLIENT_MOUNTING);
    -  if (!mref_reader.is_state_satisfied())
    +  if (!mref_reader.is_state_satisfied()) {
         return -CEPHFS_ENOTCONN;
    +  }
     
       std::scoped_lock cl(client_lock);
    +  if (fh == NULL || !_ll_fh_exists(fh)) {
    +    ldout(cct, 3) << "(fh)" << fh << " is invalid" << dendl;
    +    return -CEPHFS_EBADF;
    +  }
       return _preadv_pwritev_locked(fh, iov, iovcnt, off, true, false);
     }
     
     int64_t Client::ll_readv(struct Fh *fh, const struct iovec *iov, int iovcnt, int64_t off)
     {
       RWRef_t mref_reader(mount_state, CLIENT_MOUNTING);
    -  if (!mref_reader.is_state_satisfied())
    +  if (!mref_reader.is_state_satisfied()) {
         return -CEPHFS_ENOTCONN;
    +  }
     
       std::scoped_lock cl(client_lock);
    +  if (fh == NULL || !_ll_fh_exists(fh)) {
    +    ldout(cct, 3) << "(fh)" << fh << " is invalid" << dendl;
    +    return -CEPHFS_EBADF;
    +  }
       return _preadv_pwritev_locked(fh, iov, iovcnt, off, false, false);
     }
     
    @@ -15920,13 +16077,67 @@ int64_t Client::ll_preadv_pwritev(struct Fh *fh, const struct iovec *iov,
                                       Context *onfinish, bufferlist *bl,
                                       bool do_fsync, bool syncdataonly)
     {
    +    int64_t retval = -1;
    +
         RWRef_t mref_reader(mount_state, CLIENT_MOUNTING);
    -    if (!mref_reader.is_state_satisfied())
    -      return -CEPHFS_ENOTCONN;
    +    if (!mref_reader.is_state_satisfied()) {
    +      retval = -CEPHFS_ENOTCONN;
    +      if (onfinish != nullptr) {
    +        onfinish->complete(retval);
    +        /* async call should always return zero to caller and allow the
    +        caller to wait on callback for the actual errno. */
    +        retval = 0;
    +      }
    +      return retval;
    +    }
     
    -    std::scoped_lock cl(client_lock);
    -    return _preadv_pwritev_locked(fh, iov, iovcnt, offset, write, true,
    -    				  onfinish, bl, do_fsync, syncdataonly);
    +    retval = 0;
    +    std::unique_lock cl(client_lock);
    +
    +    if(fh == NULL || !_ll_fh_exists(fh)) {
    +      ldout(cct, 3) << "(fh)" << fh << " is invalid" << dendl;
    +      retval = -CEPHFS_EBADF;
    +    }
    +
    +    if (retval != 0) {
    +      if (onfinish != nullptr) {
    +        cl.unlock();
    +        onfinish->complete(retval);
    +        cl.lock();
    +        retval = 0;
    +      }
    +      return retval;
    +    }
    +
    +    retval = _preadv_pwritev_locked(fh, iov, iovcnt, offset, write, true,
    +                                    onfinish, bl, do_fsync, syncdataonly);
    +    /* There are two scenarios with each having two cases to handle here
    +    1) async io
    +      1.a) r == 0:
    +        async call in progress, the context will be automatically invoked,
    +        so just return the retval (i.e. zero).
    +      1.b) r < 0:
    +        There was an error; no context completion should've took place so
    +        complete the context with retval followed by returning zero to the
    +        caller.
    +    2) sync io
    +      2.a) r >= 0:
    +        sync call success; return the no. of bytes read/written.
    +      2.b) r < 0:
    +        sync call failed; return the errno. */
    +
    +    if (retval < 0) {
    +      if (onfinish != nullptr) {
    +        //async io failed
    +        cl.unlock();
    +        onfinish->complete(retval);
    +        cl.lock();
    +        /* async call should always return zero to caller and allow the
    +        caller to wait on callback for the actual errno/retval. */
    +        retval = 0;
    +      }
    +    }
    +    return retval;
     }
     
     int Client::ll_flush(Fh *fh)
    @@ -15968,7 +16179,7 @@ int Client::ll_sync_inode(Inode *in, bool syncdataonly)
       if (!mref_reader.is_state_satisfied())
         return -CEPHFS_ENOTCONN;
     
    -  ldout(cct, 3) << "ll_sync_inode " << *in << " " << dendl;
    +  ldout(cct, 3) << "ll_sync_inode " << _get_vino(in) << " " << dendl;
       tout(cct) << "ll_sync_inode" << std::endl;
       tout(cct) << (uintptr_t)in << std::endl;
     
    @@ -16023,7 +16234,7 @@ int Client::_fallocate(Fh *fh, int mode, int64_t offset, int64_t length)
       if (offset < 0 || length <= 0)
         return -CEPHFS_EINVAL;
     
    -  if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
    +  if (mode == 0 || (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)))
         return -CEPHFS_EOPNOTSUPP;
     
       if ((mode & FALLOC_FL_PUNCH_HOLE) && !(mode & FALLOC_FL_KEEP_SIZE))
    @@ -16190,8 +16401,7 @@ int Client::ll_release(Fh *fh)
     
       std::scoped_lock lock(client_lock);
     
    -  if (ll_unclosed_fh_set.count(fh))
    -    ll_unclosed_fh_set.erase(fh);
    +  ll_unclosed_fh_set.erase(fh);
       return _release_fh(fh);
     }
     
    @@ -16567,13 +16777,41 @@ void Client::ms_handle_connect(Connection *con)
     bool Client::ms_handle_reset(Connection *con)
     {
       ldout(cct, 0) << __func__ << " on " << con->get_peer_addr() << dendl;
    +
    +  cancel_commands_if([=, this](MDSCommandOp const& op) {
    +    if (op.one_shot && op.con.get() == con) {
    +      ldout(cct, 1) << "ms_handle_reset: aborting one-shot command op " << op.tid << dendl;
    +      if (op.outs) {
    +        std::ostringstream ss;
    +        ss << "MDS connection reset";
    +        *(op.outs) = ss.str();
    +      }
    +      return -EPIPE;
    +    }
    +    return 0;
    +  });
    +
       return false;
     }
     
     void Client::ms_handle_remote_reset(Connection *con)
     {
    -  std::scoped_lock lock(client_lock);
       ldout(cct, 0) << __func__ << " on " << con->get_peer_addr() << dendl;
    +
    +  cancel_commands_if([=, this](MDSCommandOp const& op) {
    +    if (op.one_shot && op.con.get() == con) {
    +      ldout(cct, 1) << "ms_handle_remote_reset: aborting one-shot command op " << op.tid << dendl;
    +      if (op.outs) {
    +        std::ostringstream ss;
    +        ss << "MDS remote session reset";
    +        *(op.outs) = ss.str();
    +      }
    +      return -EPIPE;
    +    }
    +    return 0;
    +  });
    +
    +  std::scoped_lock lock(client_lock);
       switch (con->get_peer_type()) {
       case CEPH_ENTITY_TYPE_MDS:
         {
    @@ -16597,7 +16835,7 @@ void Client::ms_handle_remote_reset(Connection *con)
     	case MetaSession::STATE_OPENING:
     	  {
     	    ldout(cct, 1) << "reset from mds we were opening; retrying" << dendl;
    -	    list waiters;
    +	    std::vector waiters;
     	    waiters.swap(s->waiting_for_open);
     	    _closed_mds_session(s.get());
     	    auto news = _get_or_open_mds_session(mds);
    @@ -16632,6 +16870,20 @@ void Client::ms_handle_remote_reset(Connection *con)
     bool Client::ms_handle_refused(Connection *con)
     {
       ldout(cct, 1) << __func__ << " on " << con->get_peer_addr() << dendl;
    +
    +  cancel_commands_if([=, this](MDSCommandOp const& op) {
    +    if (op.one_shot && op.con.get() == con) {
    +      ldout(cct, 1) << "ms_handle_refused: aborting one-shot command op " << op.tid << dendl;
    +      if (op.outs) {
    +        std::ostringstream ss;
    +        ss << "MDS connection refused";
    +        *(op.outs) = ss.str();
    +      }
    +      return -EPIPE;
    +    }
    +    return 0;
    +  });
    +
       return false;
     }
     
    @@ -16845,8 +17097,9 @@ int Client::check_pool_perm(Inode *in, int need)
     int Client::_posix_acl_permission(Inode *in, const UserPerm& perms, unsigned want)
     {
       if (acl_type == POSIX_ACL) {
    -    if (in->xattrs.count(ACL_EA_ACCESS)) {
    -      const bufferptr& access_acl = in->xattrs[ACL_EA_ACCESS];
    +    auto it = in->xattrs.find(ACL_EA_ACCESS);
    +    if (it != in->xattrs.end()) {
    +      const bufferptr& access_acl = it->second;
     
           return posix_acl_permits(access_acl, in->uid, in->gid, perms, want);
         }
    @@ -16864,8 +17117,9 @@ int Client::_posix_acl_chmod(Inode *in, mode_t mode, const UserPerm& perms)
         goto out;
     
       if (acl_type == POSIX_ACL) {
    -    if (in->xattrs.count(ACL_EA_ACCESS)) {
    -      const bufferptr& access_acl = in->xattrs[ACL_EA_ACCESS];
    +    auto it = in->xattrs.find(ACL_EA_ACCESS);
    +    if (it != in->xattrs.end()) {
    +      const bufferptr& access_acl = it->second;
           bufferptr acl(access_acl.c_str(), access_acl.length());
           r = posix_acl_access_chmod(acl, mode);
           if (r < 0)
    @@ -16894,10 +17148,11 @@ int Client::_posix_acl_create(Inode *dir, mode_t *mode, bufferlist& xattrs_bl,
         goto out;
     
       if (acl_type == POSIX_ACL) {
    -    if (dir->xattrs.count(ACL_EA_DEFAULT)) {
    +    auto it = dir->xattrs.find(ACL_EA_DEFAULT);
    +    if (it != dir->xattrs.end()) {
           map xattrs;
     
    -      const bufferptr& default_acl = dir->xattrs[ACL_EA_DEFAULT];
    +      const bufferptr& default_acl = it->second;
           bufferptr acl(default_acl.c_str(), default_acl.length());
           r = posix_acl_inherit_mode(acl, mode);
           if (r < 0)
    @@ -16912,7 +17167,7 @@ int Client::_posix_acl_create(Inode *dir, mode_t *mode, bufferlist& xattrs_bl,
           }
     
           if (S_ISDIR(*mode))
    -	xattrs[ACL_EA_DEFAULT] = dir->xattrs[ACL_EA_DEFAULT];
    +	xattrs[ACL_EA_DEFAULT] = it->second;
     
           r = xattrs.size();
           if (r > 0)
    diff --git a/src/client/Client.h b/src/client/Client.h
    index 63df6b815bd2..f8c39e2fdd6a 100644
    --- a/src/client/Client.h
    +++ b/src/client/Client.h
    @@ -96,6 +96,7 @@ class MDSCommandOp : public CommandOp
     {
       public:
       mds_gid_t     mds_gid;
    +  bool          one_shot = false;
     
       explicit MDSCommandOp(ceph_tid_t t) : CommandOp(t) {}
       explicit MDSCommandOp(ceph_tid_t t, ceph_tid_t multi_id) : CommandOp(t, multi_id) {}
    @@ -163,7 +164,7 @@ struct dir_result_t {
       };
     
     
    -  explicit dir_result_t(Inode *in, const UserPerm& perms);
    +  explicit dir_result_t(Inode *in, const UserPerm& perms, int fd);
     
     
       static uint64_t make_fpos(unsigned h, unsigned l, bool hash) {
    @@ -240,6 +241,8 @@ struct dir_result_t {
     
       std::vector buffer;
       struct dirent de;
    +
    +  int fd;                // fd attached using fdopendir (-1 if none)
     };
     
     class Client : public Dispatcher, public md_config_obs_t {
    @@ -333,7 +336,7 @@ class Client : public Dispatcher, public md_config_obs_t {
         const std::string &mds_spec,
         const std::vector& cmd,
         const bufferlist& inbl,
    -    bufferlist *poutbl, std::string *prs, Context *onfinish);
    +    bufferlist *poutbl, std::string *prs, Context *onfinish, bool one_shot = false);
     
       // these should (more or less) mirror the actual system calls.
       int statfs(const char *path, struct statvfs *stbuf, const UserPerm& perms);
    @@ -485,7 +488,6 @@ class Client : public Dispatcher, public md_config_obs_t {
       int preadv(int fd, const struct iovec *iov, int iovcnt, loff_t offset=-1);
       int write(int fd, const char *buf, loff_t size, loff_t offset=-1);
       int pwritev(int fd, const struct iovec *iov, int iovcnt, loff_t offset=-1);
    -  int fake_write_size(int fd, loff_t size);
       int ftruncate(int fd, loff_t size, const UserPerm& perms);
       int fsync(int fd, bool syncdataonly);
       int fstat(int fd, struct stat *stbuf, const UserPerm& perms,
    @@ -713,6 +715,27 @@ class Client : public Dispatcher, public md_config_obs_t {
       virtual void shutdown();
     
       // messaging
    +  int cancel_commands_if(std::regular_invocable auto && error_for_op)
    +  {
    +    std::vector cancel_ops;
    +
    +    std::scoped_lock cmd_lock(command_lock);
    +    auto& commands = command_table.get_commands();
    +    for (const auto &[tid, op]: commands) {
    +      int rc = static_cast(error_for_op(op));
    +      if (rc) {
    +        cancel_ops.push_back(tid);
    +        if (op.on_finish)
    +          op.on_finish->complete(rc);
    +      }
    +    }
    +
    +    for (const auto& tid : cancel_ops)
    +      command_table.erase(tid);
    +
    +    return cancel_ops.size();
    +  }
    +
       void cancel_commands(const MDSMap& newmap);
       void handle_mds_map(const MConstRef& m);
       void handle_fs_map(const MConstRef& m);
    @@ -767,7 +790,7 @@ class Client : public Dispatcher, public md_config_obs_t {
       void submit_sync_caps(Inode *in, ceph_tid_t want, Context *onfinish);
       void wait_sync_caps(Inode *in, ceph_tid_t want);
       void wait_sync_caps(ceph_tid_t want);
    -  void queue_cap_snap(Inode *in, SnapContext &old_snapc);
    +  void queue_cap_snap(Inode *in, const SnapContext &old_snapc);
       void finish_cap_snap(Inode *in, CapSnap &capsnap, int used);
     
       void _schedule_invalidate_dentry_callback(Dentry *dn, bool del);
    @@ -1027,15 +1050,17 @@ class Client : public Dispatcher, public md_config_obs_t {
         return it->second;
       }
       int get_fd_inode(int fd, InodeRef *in);
    +  bool _ll_fh_exists(Fh *f) {
    +    return ll_unclosed_fh_set.count(f);
    +  }
     
       // helpers
       void wake_up_session_caps(MetaSession *s, bool reconnect);
     
    -  void add_nonblocking_onfinish_to_context_list(std::list& ls, Context *onfinish) {
    -    ls.push_back(onfinish);
    +  void wait_on_context_list(std::vector& ls);
    +  void signal_context_list(std::vector& ls) {
    +    finish_contexts(cct, ls, 0);
       }
    -  void wait_on_context_list(std::list& ls);
    -  void signal_context_list(std::list& ls);
       void signal_caps_inode(Inode *in);
     
       // -- metadata cache stuff
    @@ -1384,6 +1409,21 @@ class Client : public Dispatcher, public md_config_obs_t {
         void finish(int r) override;
       };
     
    +  // A wrapper callback which takes the 'client_lock' and finishes the context.
    +  // One of the usecase is the filer->write_trunc which doesn't hold client_lock
    +  // in the call back passed. So, use this wrapper in such cases.
    +  class C_Lock_Client_Finisher : public Context {
    +  public:
    +    C_Lock_Client_Finisher(Client *clnt, Context *onfinish)
    +      : clnt(clnt), onfinish(onfinish) {}
    +
    +  private:
    +    Client *clnt;
    +    Context *onfinish;
    +
    +    void finish(int r) override;
    +  };
    +
       class C_Write_Finisher : public Context {
       public:
         void finish_io(int r);
    @@ -1564,7 +1604,7 @@ class Client : public Dispatcher, public md_config_obs_t {
     
       void fill_dirent(struct dirent *de, const char *name, int type, uint64_t ino, loff_t next_off);
     
    -  int _opendir(Inode *in, dir_result_t **dirpp, const UserPerm& perms);
    +  int _opendir(Inode *in, dir_result_t **dirpp, const UserPerm& perms, int fd = -1);
       void _readdir_drop_dirp_buffer(dir_result_t *dirp);
       bool _readdir_have_frag(dir_result_t *dirp);
       void _readdir_next_frag(dir_result_t *dirp);
    @@ -1625,6 +1665,7 @@ class Client : public Dispatcher, public md_config_obs_t {
     	       const UserPerm& perms, std::string alternate_name, InodeRef *inp = 0);
       int _mknod(Inode *dir, const char *name, mode_t mode, dev_t rdev,
     	     const UserPerm& perms, InodeRef *inp = 0);
    +  bool make_absolute_path_string(Inode *in, std::string& path);
       int _do_setattr(Inode *in, struct ceph_statx *stx, int mask,
     		  const UserPerm& perms, InodeRef *inp,
     		  std::vector* aux=nullptr);
    @@ -1677,12 +1718,12 @@ class Client : public Dispatcher, public md_config_obs_t {
               const struct iovec *iov, int iovcnt, Context *onfinish = nullptr,
               bool do_fsync = false, bool syncdataonly = false);
       int64_t _preadv_pwritev_locked(Fh *fh, const struct iovec *iov,
    -                                 unsigned iovcnt, int64_t offset,
    +                                 int iovcnt, int64_t offset,
                                      bool write, bool clamp_to_int,
                                      Context *onfinish = nullptr,
                                      bufferlist *blp = nullptr,
                                      bool do_fsync = false, bool syncdataonly = false);
    -  int _preadv_pwritev(int fd, const struct iovec *iov, unsigned iovcnt,
    +  int _preadv_pwritev(int fd, const struct iovec *iov, int iovcnt,
                           int64_t offset, bool write, Context *onfinish = nullptr,
                           bufferlist *blp = nullptr);
       int _flush(Fh *fh);
    @@ -1909,6 +1950,8 @@ class Client : public Dispatcher, public md_config_obs_t {
       uint64_t nr_write_request = 0;
     
       std::vector cap_auths;
    +
    +  feature_bitset_t myfeatures;
     };
     
     /**
    diff --git a/src/client/Dentry.h b/src/client/Dentry.h
    index c66aca6f1e04..47d320ecbbcf 100644
    --- a/src/client/Dentry.h
    +++ b/src/client/Dentry.h
    @@ -84,7 +84,8 @@ class Dentry : public LRUObject {
         if (dir) {
           ret = dir->parent_inode->make_path_string(s);
         } else {
    -      s = "???";
    +      // Couldn't link all the way to our mount point
    +      return false;
         }
         s += "/";
         s.append(name.data(), name.length());
    diff --git a/src/client/Inode.h b/src/client/Inode.h
    index 6392619335ce..61188bd2f447 100644
    --- a/src/client/Inode.h
    +++ b/src/client/Inode.h
    @@ -238,9 +238,9 @@ struct Inode : RefCountedObject {
       std::map fragmap;  // known frag -> mds mappings
       std::map> frag_repmap; // non-auth mds mappings
     
    -  std::list waitfor_caps;
    -  std::list waitfor_caps_pending;
    -  std::list waitfor_commit;
    +  std::vector waitfor_caps;
    +  std::vector waitfor_caps_pending;
    +  std::vector waitfor_commit;
       std::list waitfor_deleg;
     
       Dentry *get_first_parent() {
    diff --git a/src/client/MetaRequest.h b/src/client/MetaRequest.h
    index 240c0cd02a39..1b447050800c 100644
    --- a/src/client/MetaRequest.h
    +++ b/src/client/MetaRequest.h
    @@ -70,7 +70,7 @@ struct MetaRequest {
     
       ceph::condition_variable *caller_cond = NULL;   // who to take up
       ceph::condition_variable *dispatch_cond = NULL; // who to kick back
    -  std::list waitfor_safe;
    +  std::vector waitfor_safe;
     
       InodeRef target;
       UserPerm perms;
    diff --git a/src/client/MetaSession.cc b/src/client/MetaSession.cc
    index b5160a84331b..3baa833851fd 100644
    --- a/src/client/MetaSession.cc
    +++ b/src/client/MetaSession.cc
    @@ -56,7 +56,7 @@ void MetaSession::enqueue_cap_release(inodeno_t ino, uint64_t cap_id, ceph_seq_t
       ceph_mds_cap_item i;
       i.ino = ino;
       i.cap_id = cap_id;
    -  i.seq = iseq;
    +  i.issue_seq = iseq;
       i.migrate_seq = mseq;
       release->caps.push_back(i);
     }
    diff --git a/src/client/MetaSession.h b/src/client/MetaSession.h
    index 301306263e66..058272de053e 100644
    --- a/src/client/MetaSession.h
    +++ b/src/client/MetaSession.h
    @@ -47,7 +47,7 @@ struct MetaSession {
       int mds_state = MDSMap::STATE_NULL;
       bool readonly = false;
     
    -  std::list waiting_for_open;
    +  std::vector waiting_for_open;
     
       xlist caps;
       // dirty_list keeps all the dirty inodes before flushing in current session.
    diff --git a/src/client/SyntheticClient.cc b/src/client/SyntheticClient.cc
    index 3b408dd3f2df..6b315d2dee34 100644
    --- a/src/client/SyntheticClient.cc
    +++ b/src/client/SyntheticClient.cc
    @@ -290,6 +290,7 @@ SyntheticClient::SyntheticClient(StandaloneClient *client, int w)
     
     void *synthetic_client_thread_entry(void *ptr)
     {
    +  ceph_pthread_setname("client");
       SyntheticClient *sc = static_cast(ptr);
       //int r = 
       sc->run();
    @@ -945,7 +946,6 @@ int SyntheticClient::start_thread()
     
       pthread_create(&thread_id, NULL, synthetic_client_thread_entry, this);
       ceph_assert(thread_id);
    -  ceph_pthread_setname(thread_id, "client");
       return 0;
     }
     
    diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc
    index 7f92dd668ba3..67f8d8ea7686 100644
    --- a/src/client/fuse_ll.cc
    +++ b/src/client/fuse_ll.cc
    @@ -753,6 +753,15 @@ static void fuse_ll_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
     #endif
                                )
     {
    +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0)
    +  // cephfs does not support renameat2 flavors; follow same logic as done in
    +  // kclient's ceph_rename()
    +  if (flags) {
    +    fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL));
    +    return;
    +  }
    +#endif
    +
       CephFuse::Handle *cfuse = fuse_ll_req_prepare(req);
       const struct fuse_ctx *ctx = fuse_req_ctx(req);
       UserPerm perm(ctx->uid, ctx->gid);
    @@ -1257,7 +1266,7 @@ static int remount_cb(void *handle)
       // trims all unused dentries in the file system
       char cmd[128+PATH_MAX];
       CephFuse::Handle *cfuse = (CephFuse::Handle *)handle;
    -  snprintf(cmd, sizeof(cmd), "LIBMOUNT_FSTAB=/dev/null mount -i -o remount %s",
    +  snprintf(cmd, sizeof(cmd), "LIBMOUNT_FSTAB=/dev/null LIBMOUNT_FORCE_MOUNT2=always mount -i -o remount %s",
     #if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0)
                       cfuse->opts.mountpoint);
     #else
    diff --git a/src/client/hypertable/CephBroker.cc b/src/client/hypertable/CephBroker.cc
    deleted file mode 100644
    index 596e722871f1..000000000000
    --- a/src/client/hypertable/CephBroker.cc
    +++ /dev/null
    @@ -1,526 +0,0 @@
    -/** -*- C++ -*-
    - * Copyright (C) 2009-2011 New Dream Network
    - *
    - * This file is part of Hypertable.
    - *
    - * Hypertable is free software; you can redistribute it and/or
    - * modify it under the terms of the GNU General Public License
    - * as published by the Free Software Foundation; either version 2
    - * of the License, or any later version.
    - *
    - * Hypertable is distributed in the hope that it will be useful,
    - * but WITHOUT ANY WARRANTY; without even the implied warranty of
    - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    - * GNU General Public License for more details.
    - *
    - * You should have received a copy of the GNU General Public License
    - * along with Hypertable. If not, see 
    - *
    - * Authors:
    - * Gregory Farnum 
    - * Colin McCabe 
    - */
    -
    -#include "Common/Compat.h"
    -
    -#include "CephBroker.h"
    -#include "Common/Error.h"
    -#include "Common/FileUtils.h"
    -#include "Common/Filesystem.h"
    -#include "Common/System.h"
    -
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -#include 
    -
    -using namespace Hypertable;
    -
    -std::atomic CephBroker::ms_next_fd{0};
    -
    -/* A thread-safe version of strerror */
    -static std::string cpp_strerror(int err)
    -{
    -  char buf[128];
    -  if (err < 0)
    -    err = -err;
    -  std::ostringstream oss;
    -  oss << strerror_r(err, buf, sizeof(buf));
    -  return oss.str();
    -}
    -
    -OpenFileDataCeph::OpenFileDataCeph(struct ceph_mount_info *cmount_, const String& fname,
    -				   int _fd, int _flags) 
    -  : cmount(cmount_), fd(_fd), flags(_flags), filename(fname)
    -{
    -}
    -
    -OpenFileDataCeph::~OpenFileDataCeph() {
    -  ceph_close(cmount, fd);
    -}
    -
    -CephBroker::CephBroker(PropertiesPtr& cfg)
    -  : cmount(NULL)
    -{
    -  int ret;
    -  String id(cfg->get_str("CephBroker.Id"));
    -  m_verbose = cfg->get_bool("Hypertable.Verbose");
    -  m_root_dir = cfg->get_str("CephBroker.RootDir");
    -  String mon_addr(cfg->get_str("CephBroker.MonAddr"));
    -
    -  HT_INFO("Calling ceph_create");
    -  ret = ceph_create(&cmount, id.empty() ? NULL : id.c_str());
    -  if (ret) {
    -    throw Hypertable::Exception(ret, "ceph_create failed");
    -  }
    -  ret = ceph_conf_set(cmount, "mon_host", mon_addr.c_str());
    -  if (ret) {
    -    ceph_shutdown(cmount);
    -    throw Hypertable::Exception(ret, "ceph_conf_set(mon_addr) failed");
    -  }
    -
    -  // For Ceph debugging, uncomment these lines
    -  //ceph_conf_set(cmount, "debug_client", "1");
    -  //ceph_conf_set(cmount, "debug_ms", "1");
    -
    -  HT_INFO("Calling ceph_mount");
    -  ret = ceph_mount(cmount, m_root_dir.empty() ? NULL : m_root_dir.c_str());
    -  if (ret) {
    -    ceph_shutdown(cmount);
    -    throw Hypertable::Exception(ret, "ceph_mount failed");
    -  }
    -  HT_INFO("Mounted Ceph filesystem.");
    -}
    -
    -CephBroker::~CephBroker()
    -{
    -  ceph_shutdown(cmount);
    -  cmount = NULL;
    -}
    -
    -void CephBroker::open(ResponseCallbackOpen *cb, const char *fname,
    -		      uint32_t flags, uint32_t bufsz) {
    -  int fd, ceph_fd;
    -  String abspath;
    -  HT_DEBUGF("open file='%s' bufsz=%d", fname, bufsz);
    -
    -  make_abs_path(fname, abspath);
    -
    -  fd = atomic_inc_return(&ms_next_fd);
    -
    -  if ((ceph_fd = ceph_open(cmount, abspath.c_str(), O_RDONLY, 0)) < 0) {
    -    report_error(cb, -ceph_fd);
    -    return;
    -  }
    -  HT_INFOF("open (%s) fd=%" PRIu32 " ceph_fd=%d", fname, fd, ceph_fd);
    -
    -  {
    -    struct sockaddr_in addr;
    -    OpenFileDataCephPtr fdata(new OpenFileDataCeph(cmount, abspath, ceph_fd, O_RDONLY));
    -
    -    cb->get_address(addr);
    -
    -    m_open_file_map.create(fd, addr, fdata);
    -
    -    cb->response(fd);
    -  }
    -}
    -
    -void CephBroker::create(ResponseCallbackOpen *cb, const char *fname, uint32_t flags,
    -			int32_t bufsz, int16_t replication, int64_t blksz){
    -  int fd, ceph_fd;
    -  int oflags;
    -  String abspath;
    -
    -  make_abs_path(fname, abspath);
    -  HT_DEBUGF("create file='%s' flags=%u bufsz=%d replication=%d blksz=%lld",
    -            fname, flags, bufsz, (int)replication, (Lld)blksz);
    -
    -  fd = atomic_inc_return(&ms_next_fd);
    -
    -  if (flags & Filesystem::OPEN_FLAG_OVERWRITE)
    -    oflags = O_WRONLY | O_CREAT | O_TRUNC;
    -  else
    -    oflags = O_WRONLY | O_CREAT | O_APPEND;
    -
    -  //make sure the directories in the path exist
    -  String directory = abspath.substr(0, abspath.rfind('/'));
    -  int r;
    -  HT_INFOF("Calling mkdirs on %s", directory.c_str());
    -  if((r=ceph_mkdirs(cmount, directory.c_str(), 0644)) < 0 && r!=-CEPHFS_EEXIST) {
    -    HT_ERRORF("create failed on mkdirs: dname='%s' - %d", directory.c_str(), -r);
    -    report_error(cb, -r);
    -    return;
    -  }
    -
    -  //create file
    -  if ((ceph_fd = ceph_open(cmount, abspath.c_str(), oflags, 0644)) < 0) {
    -    std::string errs(cpp_strerror(-ceph_fd));
    -    HT_ERRORF("open failed: file=%s - %s",  abspath.c_str(), errs.c_str());
    -    report_error(cb, ceph_fd);
    -    return;
    -  }
    -
    -  HT_INFOF("create %s  = %d", fname, ceph_fd);
    -
    -  {
    -    struct sockaddr_in addr;
    -    OpenFileDataCephPtr fdata (new OpenFileDataCeph(cmount, fname, ceph_fd, O_WRONLY));
    -
    -    cb->get_address(addr);
    -
    -    m_open_file_map.create(fd, addr, fdata);
    -
    -    cb->response(fd);
    -  }
    -}
    -
    -void CephBroker::close(ResponseCallback *cb, uint32_t fd) {
    -  if (m_verbose) {
    -    HT_INFOF("close fd=%" PRIu32, fd);
    -  }
    -  OpenFileDataCephPtr fdata;
    -  m_open_file_map.get(fd, fdata);
    -  m_open_file_map.remove(fd);
    -  cb->response_ok();
    -}
    -
    -void CephBroker::read(ResponseCallbackRead *cb, uint32_t fd, uint32_t amount) {
    -  OpenFileDataCephPtr fdata;
    -  ssize_t nread;
    -  int64_t offset;
    -  StaticBuffer buf(new uint8_t [amount], amount);
    -
    -  HT_DEBUGF("read fd=%" PRIu32 " amount = %d", fd, amount);
    -
    -  if (!m_open_file_map.get(fd, fdata)) {
    -    char errbuf[32];
    -    sprintf(errbuf, "%" PRIu32, fd);
    -    cb->error(Error::DFSBROKER_BAD_FILE_HANDLE, errbuf);
    -    HT_ERRORF("bad file handle: %" PRIu32, fd);
    -    return;
    -  }
    -
    -  if ((offset = ceph_lseek(cmount, fdata->fd, 0, SEEK_CUR)) < 0) {
    -    std::string errs(cpp_strerror(offset));
    -    HT_ERRORF("lseek failed: fd=%" PRIu32 " ceph_fd=%d offset=0 SEEK_CUR - %s",
    -	      fd, fdata->fd, errs.c_str());
    -    report_error(cb, offset);
    -    return;
    -  }
    -
    -  if ((nread = ceph_read(cmount, fdata->fd, (char *)buf.base, amount, 0)) < 0 ) {
    -    HT_ERRORF("read failed: fd=%" PRIu32 " ceph_fd=%d amount=%d", fd, fdata->fd, amount);
    -    report_error(cb, -nread);
    -    return;
    -  }
    -
    -  buf.size = nread;
    -  cb->response((uint64_t)offset, buf);
    -}
    -
    -void CephBroker::append(ResponseCallbackAppend *cb, uint32_t fd,
    -			uint32_t amount, const void *data, bool sync)
    -{
    -  OpenFileDataCephPtr fdata;
    -  ssize_t nwritten;
    -  int64_t offset;
    -
    -  HT_DEBUG_OUT << "append fd="<< fd <<" amount="<< amount <<" data='"
    -	       << format_bytes(20, data, amount) <<" sync="<< sync << HT_END;
    -
    -  if (!m_open_file_map.get(fd, fdata)) {
    -    char errbuf[32];
    -    sprintf(errbuf, "%" PRIu32, fd);
    -    cb->error(Error::DFSBROKER_BAD_FILE_HANDLE, errbuf);
    -    return;
    -  }
    -
    -  if ((offset = ceph_lseek(cmount, fdata->fd, 0, SEEK_CUR)) < 0) {
    -    std::string errs(cpp_strerror(offset));
    -    HT_ERRORF("lseek failed: fd=%" PRIu32 " ceph_fd=%d offset=0 SEEK_CUR - %s", fd, fdata->fd,
    -              errs.c_str());
    -    report_error(cb, offset);
    -    return;
    -  }
    -
    -  if ((nwritten = ceph_write(cmount, fdata->fd, (const char *)data, amount, 0)) < 0) {
    -    std::string errs(cpp_strerror(nwritten));
    -    HT_ERRORF("write failed: fd=%" PRIu32 " ceph_fd=%d amount=%d - %s",
    -	      fd, fdata->fd, amount, errs.c_str());
    -    report_error(cb, -nwritten);
    -    return;
    -  }
    -
    -  int r;
    -  if (sync && ((r = ceph_fsync(cmount, fdata->fd, true)) != 0)) {
    -    std::string errs(cpp_strerror(errno));
    -    HT_ERRORF("flush failed: fd=%" PRIu32 " ceph_fd=%d - %s", fd, fdata->fd, errs.c_str());
    -    report_error(cb, r);
    -    return;
    -  }
    -
    -  cb->response((uint64_t)offset, nwritten);
    -}
    -
    -void CephBroker::seek(ResponseCallback *cb, uint32_t fd, uint64_t offset) {
    -  OpenFileDataCephPtr fdata;
    -
    -  HT_DEBUGF("seek fd=%" PRIu32 " offset=%llu", fd, (Llu)offset);
    -
    -  if (!m_open_file_map.get(fd, fdata)) {
    -    char errbuf[32];
    -    sprintf(errbuf, "%" PRIu32, fd);
    -    cb->error(Error::DFSBROKER_BAD_FILE_HANDLE, errbuf);
    -    return;
    -  }
    -  loff_t res = ceph_lseek(cmount, fdata->fd, offset, SEEK_SET);
    -  if (res < 0) {
    -    std::string errs(cpp_strerror((int)res));
    -    HT_ERRORF("lseek failed: fd=%" PRIu32 " ceph_fd=%d offset=%llu - %s",
    -	      fd, fdata->fd, (Llu)offset, errs.c_str());
    -    report_error(cb, offset);
    -    return;
    -  }
    -
    -  cb->response_ok();
    -}
    -
    -void CephBroker::remove(ResponseCallback *cb, const char *fname) {
    -  String abspath;
    -  
    -  HT_DEBUGF("remove file='%s'", fname);
    -  
    -  make_abs_path(fname, abspath);
    -  
    -  int r;
    -  if ((r = ceph_unlink(cmount, abspath.c_str())) < 0) {
    -    std::string errs(cpp_strerror(r));
    -    HT_ERRORF("unlink failed: file='%s' - %s", abspath.c_str(), errs.c_str());
    -    report_error(cb, r);
    -    return;
    -  }
    -  cb->response_ok();
    -}
    -
    -void CephBroker::length(ResponseCallbackLength *cb, const char *fname, bool) {
    -  int r;
    -  struct ceph_statx stx;
    -
    -  HT_DEBUGF("length file='%s'", fname);
    -
    -  if ((r = ceph_statx(cmount, fname, &stx, CEPH_STATX_SIZE, AT_SYMLINK_NOFOLLOW)) < 0) {
    -    String abspath;
    -    make_abs_path(fname, abspath);
    -    std::string errs(cpp_strerror(r));
    -    HT_ERRORF("length (stat) failed: file='%s' - %s", abspath.c_str(), errs.c_str());
    -    report_error(cb,- r);
    -    return;
    -  }
    -  cb->response(stx.stx_size);
    -}
    -
    -void CephBroker::pread(ResponseCallbackRead *cb, uint32_t fd, uint64_t offset,
    -		       uint32_t amount, bool) {
    -  OpenFileDataCephPtr fdata;
    -  ssize_t nread;
    -  StaticBuffer buf(new uint8_t [amount], amount);
    -
    -  HT_DEBUGF("pread fd=%" PRIu32 " offset=%llu amount=%d", fd, (Llu)offset, amount);
    -
    -  if (!m_open_file_map.get(fd, fdata)) {
    -    char errbuf[32];
    -    sprintf(errbuf, "%" PRIu32, fd);
    -    cb->error(Error::DFSBROKER_BAD_FILE_HANDLE, errbuf);
    -    return;
    -  }
    -
    -  if ((nread = ceph_read(cmount, fdata->fd, (char *)buf.base, amount, offset)) < 0) {
    -    std::string errs(cpp_strerror(nread));
    -    HT_ERRORF("pread failed: fd=%" PRIu32 " ceph_fd=%d amount=%d offset=%llu - %s",
    -	      fd, fdata->fd, amount, (Llu)offset, errs.c_str());
    -    report_error(cb, nread);
    -    return;
    -  }
    -
    -  buf.size = nread;
    -
    -  cb->response(offset, buf);
    -}
    -
    -void CephBroker::mkdirs(ResponseCallback *cb, const char *dname) {
    -  String absdir;
    -
    -  HT_DEBUGF("mkdirs dir='%s'", dname);
    -
    -  make_abs_path(dname, absdir);
    -  int r;
    -  if((r=ceph_mkdirs(cmount, absdir.c_str(), 0644)) < 0 && r!=-CEPHFS_EEXIST) {
    -    HT_ERRORF("mkdirs failed: dname='%s' - %d", absdir.c_str(), -r);
    -    report_error(cb, -r);
    -    return;
    -  }
    -  cb->response_ok();
    -}
    -
    -void CephBroker::rmdir(ResponseCallback *cb, const char *dname) {
    -  String absdir;
    -  int r;
    -
    -  make_abs_path(dname, absdir);
    -  if((r = rmdir_recursive(absdir.c_str())) < 0) {
    -      HT_ERRORF("failed to remove dir %s, got error %d", absdir.c_str(), r);
    -      report_error(cb, -r);
    -      return;
    -  }
    -  cb->response_ok();
    -}
    -
    -int CephBroker::rmdir_recursive(const char *directory) {
    -  struct ceph_dir_result *dirp;
    -  struct dirent de;
    -  struct ceph_statx stx;
    -  int r;
    -  if ((r = ceph_opendir(cmount, directory, &dirp)) < 0)
    -    return r; //failed to open
    -  while ((r = ceph_readdirplus_r(cmount, dirp, &de, &stx, CEPH_STATX_INO, AT_STATX_DONT_SYNC, NULL)) > 0) {
    -    String new_dir = de.d_name;
    -    if(!(new_dir.compare(".")==0 || new_dir.compare("..")==0)) {
    -      new_dir = directory;
    -      new_dir += '/';
    -      new_dir += de.d_name;
    -      if (S_ISDIR(stx.stx_mode)) { //it's a dir, clear it out...
    -	if((r=rmdir_recursive(new_dir.c_str())) < 0) return r;
    -      } else { //delete this file
    -	if((r=ceph_unlink(cmount, new_dir.c_str())) < 0) return r;
    -      }
    -    }
    -  }
    -  if (r < 0) return r; //we got an error
    -  if ((r = ceph_closedir(cmount, dirp)) < 0) return r;
    -  return ceph_rmdir(cmount, directory);
    -}
    -
    -void CephBroker::flush(ResponseCallback *cb, uint32_t fd) {
    -  OpenFileDataCephPtr fdata;
    -
    -  HT_DEBUGF("flush fd=%" PRIu32, fd);
    -
    -  if (!m_open_file_map.get(fd, fdata)) {
    -    char errbuf[32];
    -    sprintf(errbuf, "%" PRIu32, fd);
    -    cb->error(Error::DFSBROKER_BAD_FILE_HANDLE, errbuf);
    -    return;
    -  }
    -
    -  int r;
    -  if ((r = ceph_fsync(cmount, fdata->fd, true)) != 0) {
    -    std::string errs(cpp_strerror(r));
    -    HT_ERRORF("flush failed: fd=%" PRIu32 " ceph_fd=%d - %s", fd, fdata->fd, errs.c_str());
    -    report_error(cb, -r);
    -    return;
    -  }
    -
    -  cb->response_ok();
    -}
    -
    -void CephBroker::status(ResponseCallback *cb) {
    -  cb->response_ok();
    -  /*perhaps a total cheat, but both the local and Kosmos brokers
    -    included in Hypertable also do this. */
    -}
    -
    -void CephBroker::shutdown(ResponseCallback *cb) {
    -  m_open_file_map.remove_all();
    -  cb->response_ok();
    -  poll(0, 0, 2000);
    -}
    -
    -void CephBroker::readdir(ResponseCallbackReaddir *cb, const char *dname) {
    -  std::vector listing;
    -  String absdir;
    -
    -  HT_DEBUGF("Readdir dir='%s'", dname);
    -
    -  //get from ceph in a buffer
    -  make_abs_path(dname, absdir);
    -
    -  struct ceph_dir_result *dirp;
    -  ceph_opendir(cmount, absdir.c_str(), &dirp);
    -  int r;
    -  int buflen = 100; //good default?
    -  char *buf = new char[buflen];
    -  String *ent;
    -  int bufpos;
    -  while (1) {
    -    r = ceph_getdnames(cmount, dirp, buf, buflen);
    -    if (r==-CEPHFS_ERANGE) { //expand the buffer
    -      delete [] buf;
    -      buflen *= 2;
    -      buf = new char[buflen];
    -      continue;
    -    }
    -    if (r<=0) break;
    -
    -    //if we make it here, we got at least one name, maybe more
    -    bufpos = 0;
    -    while (bufposcompare(".") && ent->compare(".."))
    -	listing.push_back(*ent);
    -      bufpos+=ent->size()+1;
    -      delete ent;
    -    }
    -  }
    -  delete [] buf;
    -  ceph_closedir(cmount, dirp);
    -
    -  if (r < 0) report_error(cb, -r); //Ceph shouldn't return r<0 on getdnames
    -  //(except for ERANGE) so if it happens this is bad
    -  cb->response(listing);
    -}
    -
    -void CephBroker::exists(ResponseCallbackExists *cb, const char *fname) {
    -  String abspath;
    -  struct ceph_statx stx;
    -  
    -  HT_DEBUGF("exists file='%s'", fname);
    -  make_abs_path(fname, abspath);
    -  cb->response(ceph_statx(cmount, abspath.c_str(), &stx, 0, AT_SYMLINK_NOFOLLOW) == 0);
    -}
    -
    -void CephBroker::rename(ResponseCallback *cb, const char *src, const char *dst) {
    -  String src_abs;
    -  String dest_abs;
    -  int r;
    -
    -  make_abs_path(src, src_abs);
    -  make_abs_path(dst, dest_abs);
    -  if ((r = ceph_rename(cmount, src_abs.c_str(), dest_abs.c_str())) <0 ) {
    -    report_error(cb, r);
    -    return;
    -  }
    -  cb->response_ok();
    -}
    -
    -void CephBroker::debug(ResponseCallback *cb, int32_t command,
    -		       StaticBuffer &serialized_parameters) {
    -  HT_ERROR("debug commands not implemented!");
    -  cb->error(Error::NOT_IMPLEMENTED, format("Debug commands not supported"));
    -}
    -
    -void CephBroker::report_error(ResponseCallback *cb, int error) {
    -  char errbuf[128];
    -  errbuf[0] = 0;
    -
    -  strerror_r(error, errbuf, 128);
    -
    -  cb->error(Error::DFSBROKER_IO_ERROR, errbuf);
    -}
    -
    -
    diff --git a/src/client/hypertable/CephBroker.h b/src/client/hypertable/CephBroker.h
    deleted file mode 100644
    index d2df38909ae8..000000000000
    --- a/src/client/hypertable/CephBroker.h
    +++ /dev/null
    @@ -1,117 +0,0 @@
    -/** -*- C++ -*-
    - * Copyright (C) 2009-2011 New Dream Network
    - *
    - * This file is part of Hypertable.
    - *
    - * Hypertable is free software; you can redistribute it and/or
    - * modify it under the terms of the GNU General Public License
    - * as published by the Free Software Foundation; either version 2
    - * of the License, or any later version.
    - *
    - * Hypertable is distributed in the hope that it will be useful,
    - * but WITHOUT ANY WARRANTY; without even the implied warranty of
    - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    - * GNU General Public License for more details.
    - *
    - * You should have received a copy of the GNU General Public License
    - * along with Hypertable. If not, see 
    - *
    - * Authors:
    - * Gregory Farnum 
    - * Colin McCabe 
    - */
    -
    -#ifndef HYPERTABLE_CEPHBROKER_H
    -#define HYPERTABLE_CEPHBROKER_H
    -
    -extern "C" {
    -#include 
    -}
    -#include 
    -#include "Common/String.h"
    -#include "Common/Properties.h"
    -
    -#include "DfsBroker/Lib/Broker.h"
    -
    -#include 
    -
    -namespace Hypertable {
    -  using namespace DfsBroker;
    -  /**
    -   *
    -   */
    -  class OpenFileDataCeph : public OpenFileData {
    -  public:
    -    OpenFileDataCeph(struct ceph_mount_info *cmount_, const String& fname,
    -		     int _fd, int _flags);
    -    virtual ~OpenFileDataCeph();
    -    struct ceph_mount_info *cmount;
    -    int fd;
    -    int flags;
    -    String filename;
    -  };
    -
    -  /**
    -   *
    -   */
    -  class OpenFileDataCephPtr : public OpenFileDataPtr {
    -  public:
    -    OpenFileDataCephPtr() : OpenFileDataPtr() { }
    -    explicit OpenFileDataCephPtr(OpenFileDataCeph *ofdl) : OpenFileDataPtr(ofdl, true) { }
    -    OpenFileDataCeph *operator->() const { return static_cast(get()); }
    -  };
    -
    -  /**
    -   *
    -   */
    -  class CephBroker : public DfsBroker::Broker {
    -  public:
    -    explicit CephBroker(PropertiesPtr& cfg);
    -    virtual ~CephBroker();
    -
    -    virtual void open(ResponseCallbackOpen *cb, const char *fname,
    -                      uint32_t flags, uint32_t bufsz);
    -    virtual void
    -    create(ResponseCallbackOpen *cb, const char *fname, uint32_t flags,
    -           int32_t bufsz, int16_t replication, int64_t blksz);
    -    virtual void close(ResponseCallback *cb, uint32_t fd);
    -    virtual void read(ResponseCallbackRead *cb, uint32_t fd, uint32_t amount);
    -    virtual void append(ResponseCallbackAppend *cb, uint32_t fd,
    -                        uint32_t amount, const void *data, bool sync);
    -    virtual void seek(ResponseCallback *cb, uint32_t fd, uint64_t offset);
    -    virtual void remove(ResponseCallback *cb, const char *fname);
    -    virtual void length(ResponseCallbackLength *cb, const char *fname, bool);
    -    virtual void pread(ResponseCallbackRead *cb, uint32_t fd, uint64_t offset,
    -                       uint32_t amount, bool);
    -    virtual void mkdirs(ResponseCallback *cb, const char *dname);
    -    virtual void rmdir(ResponseCallback *cb, const char *dname);
    -    virtual void flush(ResponseCallback *cb, uint32_t fd);
    -    virtual void status(ResponseCallback *cb);
    -    virtual void shutdown(ResponseCallback *cb);
    -    virtual void readdir(ResponseCallbackReaddir *cb, const char *dname);
    -    virtual void exists(ResponseCallbackExists *cb, const char *fname);
    -    virtual void rename(ResponseCallback *cb, const char *src, const char *dst);
    -    virtual void debug(ResponseCallback *, int32_t command,
    -                       StaticBuffer &serialized_parameters);
    -
    -  private:
    -    struct ceph_mount_info *cmount;
    -    static std::atomic ms_next_fd;
    -
    -    virtual void report_error(ResponseCallback *cb, int error);
    -
    -    void make_abs_path(const char *fname, String& abs) {
    -      if (fname[0] == '/')
    -	abs = fname;
    -      else
    -	abs = m_root_dir + "/" + fname;
    -    }
    -
    -    int rmdir_recursive(const char *directory);
    -
    -    bool m_verbose;
    -    String m_root_dir;
    -  };
    -}
    -
    -#endif //HYPERTABLE_CEPH_BROKER_H
    diff --git a/src/cls/2pc_queue/cls_2pc_queue.cc b/src/cls/2pc_queue/cls_2pc_queue.cc
    index 019f2c96deaf..6e6b6e02db5e 100644
    --- a/src/cls/2pc_queue/cls_2pc_queue.cc
    +++ b/src/cls/2pc_queue/cls_2pc_queue.cc
    @@ -135,7 +135,7 @@ static int cls_2pc_queue_reserve(cls_method_context_t hctx, bufferlist *in, buff
       }
     
       urgent_data.reserved_size += res_op.size + overhead;
    -  // note that last id is incremented regadless of failures
    +  // note that last id is incremented regardless of failures
       // to avoid "old reservation" issues below
       ++urgent_data.last_id;
       bool result;
    @@ -578,6 +578,19 @@ static int cls_2pc_queue_list_entries(cls_method_context_t hctx, bufferlist *in,
       return 0;
     }
     
    +static int cls_2pc_queue_count_entries(cls_method_context_t hctx, cls_queue_list_op& op, cls_queue_head& head,
    +                                       uint32_t& entries_to_remove)
    +{
    +  cls_queue_list_ret op_ret;
    +  auto ret = queue_list_entries(hctx, op, op_ret, head);
    +  if (ret < 0) {
    +    return ret;
    +  }
    +
    +  entries_to_remove = op_ret.entries.size();
    +  return 0;
    +}
    +
     static int cls_2pc_queue_remove_entries(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
     {
       auto in_iter = in->cbegin();
    @@ -594,6 +607,21 @@ static int cls_2pc_queue_remove_entries(cls_method_context_t hctx, bufferlist *i
       if (ret < 0) {
         return ret;
       }
    +
    +  // Old RGW is running, and it sent cls_queue_remove_op instead of cls_2pc_queue_remove_op
    +  if (rem_2pc_op.entries_to_remove == 0) {
    +    CLS_LOG(10, "INFO: cls_2pc_queue_remove_entries: incompatible RGW with rados, counting entries to remove...");
    +    cls_queue_list_op list_op;
    +    list_op.max = std::numeric_limits::max(); // max length because endmarker is the stopping condition.
    +    list_op.end_marker = rem_2pc_op.end_marker;
    +    ret = cls_2pc_queue_count_entries(hctx, list_op, head, rem_2pc_op.entries_to_remove);
    +    if (ret < 0) {
    +      CLS_LOG(1, "ERROR: cls_2pc_queue_remove_entries: returned: %d", ret);
    +      return ret;
    +    }
    +    CLS_LOG(10, "INFO: cls_2pc_queue_remove_entries: counted: %u", rem_2pc_op.entries_to_remove);
    +  }
    +
       cls_queue_remove_op rem_op;
       rem_op.end_marker = std::move(rem_2pc_op.end_marker);
       ret = queue_remove_entries(hctx, rem_op, head);
    diff --git a/src/cls/2pc_queue/cls_2pc_queue_client.h b/src/cls/2pc_queue/cls_2pc_queue_client.h
    index c806d30f59e0..0d55d68e7a05 100644
    --- a/src/cls/2pc_queue/cls_2pc_queue_client.h
    +++ b/src/cls/2pc_queue/cls_2pc_queue_client.h
    @@ -87,5 +87,8 @@ void cls_2pc_queue_expire_reservations(librados::ObjectWriteOperation& op,
             ceph::coarse_real_time stale_time);
     
     // remove all entries up to the given marker
    -void cls_2pc_queue_remove_entries(librados::ObjectWriteOperation& op, const std::string& end_marker, uint64_t entries_to_remove);
    +// if there is no race condition, providing the number of entries_to_remove is recommended, as it is more efficient.
    +// if there is no guarantee against two clienst deleting entries at the same time, you can leave the entries_to_remove unprovided or input zero entries_to_remove
    +// the function will count how many entries it needs to removed
    +void cls_2pc_queue_remove_entries(librados::ObjectWriteOperation& op, const std::string& end_marker, uint64_t entries_to_remove=0);
     
    diff --git a/src/cls/2pc_queue/cls_2pc_queue_ops.h b/src/cls/2pc_queue/cls_2pc_queue_ops.h
    index bb61ef341ac1..fa4f8765ffe2 100644
    --- a/src/cls/2pc_queue/cls_2pc_queue_ops.h
    +++ b/src/cls/2pc_queue/cls_2pc_queue_ops.h
    @@ -3,12 +3,13 @@
     
     #pragma once
     
    +#include "common/ceph_json.h"
     #include "include/types.h"
     #include "cls_2pc_queue_types.h"
     
     struct cls_2pc_queue_reserve_op {
       uint64_t size;
    -  uint32_t entries;
    +  uint32_t entries{0};
     
       void encode(ceph::buffer::list& bl) const {
         ENCODE_START(1, 1, bl);
    @@ -23,6 +24,19 @@ struct cls_2pc_queue_reserve_op {
         decode(entries, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_unsigned("size", size);
    +    f->dump_unsigned("entries", entries);
    +  }
    +
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new cls_2pc_queue_reserve_op);
    +    ls.back()->size = 0;
    +    ls.push_back(new cls_2pc_queue_reserve_op);
    +    ls.back()->size = 123;
    +    ls.back()->entries = 456;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_2pc_queue_reserve_op)
     
    @@ -40,6 +54,15 @@ struct cls_2pc_queue_reserve_ret {
         decode(id, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_unsigned("id", id);
    +  }
    +
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new cls_2pc_queue_reserve_ret);
    +    ls.back()->id = 123;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_2pc_queue_reserve_ret)
     
    @@ -61,6 +84,19 @@ struct cls_2pc_queue_commit_op {
         DECODE_FINISH(bl);
       }
     
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_unsigned("id", id);
    +    encode_json("bl_data_vec", bl_data_vec, f);
    +  }
    +
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new cls_2pc_queue_commit_op);
    +    ls.back()->id = 123;
    +    ls.back()->bl_data_vec.push_back(ceph::buffer::list());
    +    ls.back()->bl_data_vec.back().append("foo");
    +    ls.back()->bl_data_vec.push_back(ceph::buffer::list());
    +    ls.back()->bl_data_vec.back().append("bar");
    +  }
     };
     WRITE_CLASS_ENCODER(cls_2pc_queue_commit_op)
     
    @@ -78,6 +114,13 @@ struct cls_2pc_queue_abort_op {
         decode(id, bl);
         DECODE_FINISH(bl);
       }
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_unsigned("id", id);
    +  }
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new cls_2pc_queue_abort_op);
    +    ls.back()->id = 1;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_2pc_queue_abort_op)
     
    @@ -96,6 +139,14 @@ struct cls_2pc_queue_expire_op {
         decode(stale_time, bl);
         DECODE_FINISH(bl);
       }
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_stream("stale_time") << stale_time;
    +  }
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new cls_2pc_queue_expire_op);
    +    ls.push_back(new cls_2pc_queue_expire_op);
    +    ls.back()->stale_time = ceph::coarse_real_time::min();
    +  }
     };
     WRITE_CLASS_ENCODER(cls_2pc_queue_expire_op)
     
    @@ -113,26 +164,45 @@ struct cls_2pc_queue_reservations_ret {
         decode(reservations, bl);
         DECODE_FINISH(bl);
       }
    +  void dump(ceph::Formatter *f) const {
    +    f->open_array_section("reservations");
    +    for (const auto& i : reservations) {
    +      f->open_object_section("reservation");
    +      f->dump_unsigned("id", i.first);
    +      i.second.dump(f);
    +      f->close_section();
    +    }
    +    f->close_section();
    +  }
    +
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new cls_2pc_queue_reservations_ret);
    +    ls.push_back(new cls_2pc_queue_reservations_ret);
    +    ls.back()->reservations[1] = cls_2pc_reservation();
    +    ls.back()->reservations[2] = cls_2pc_reservation();
    +  }
     };
     WRITE_CLASS_ENCODER(cls_2pc_queue_reservations_ret)
     
     struct cls_2pc_queue_remove_op {
       std::string end_marker;
    -  uint32_t entries_to_remove;
    +  uint32_t entries_to_remove = 0;
     
       cls_2pc_queue_remove_op() {}
     
       void encode(ceph::buffer::list& bl) const {
    -    ENCODE_START(1, 1, bl);
    +    ENCODE_START(2, 1, bl);
         encode(end_marker, bl);
         encode(entries_to_remove, bl);
         ENCODE_FINISH(bl);
       }
     
       void decode(ceph::buffer::list::const_iterator& bl) {
    -    DECODE_START(1, bl);
    +    DECODE_START(2, bl);
         decode(end_marker, bl);
    -    decode(entries_to_remove, bl);
    +    if (struct_v > 1) {
    +      decode(entries_to_remove, bl);
    +    }
         DECODE_FINISH(bl);
       }
     };
    diff --git a/src/cls/2pc_queue/cls_2pc_queue_types.h b/src/cls/2pc_queue/cls_2pc_queue_types.h
    index 2413fd7043da..093b69cb193d 100644
    --- a/src/cls/2pc_queue/cls_2pc_queue_types.h
    +++ b/src/cls/2pc_queue/cls_2pc_queue_types.h
    @@ -8,9 +8,9 @@ struct cls_2pc_reservation
     {
       using id_t = uint32_t;
       inline static const id_t NO_ID{0};
    -  uint64_t size;                     // how much size to reserve (bytes)
    +  uint64_t size = 0;                 // how much size to reserve (bytes)
       ceph::coarse_real_time timestamp;  // when the reservation was done (used for cleaning stale reservations)
    -  uint32_t entries;                  // how many entries are reserved
    +  uint32_t entries = 0;              // how many entries are reserved
     
       cls_2pc_reservation(uint64_t _size, ceph::coarse_real_time _timestamp, uint32_t _entries) :
           size(_size), timestamp(_timestamp), entries(_entries) {}
    @@ -34,6 +34,19 @@ struct cls_2pc_reservation
         }
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_unsigned("size", size);
    +    f->dump_stream("timestamp") << timestamp;
    +  }
    +
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new cls_2pc_reservation);
    +    ls.back()->size = 0;
    +    ls.push_back(new cls_2pc_reservation);
    +    ls.back()->size = 123;
    +    ls.back()->timestamp = ceph::coarse_real_clock::zero();
    +  }
     };
     WRITE_CLASS_ENCODER(cls_2pc_reservation)
     
    @@ -68,5 +81,28 @@ struct cls_2pc_urgent_data
         }
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_unsigned("reserved_size", reserved_size);
    +    f->dump_unsigned("last_id", last_id);
    +    f->open_array_section("reservations");
    +    for (const auto& [id, res] : reservations) {
    +      f->open_object_section("reservation");
    +      f->dump_unsigned("id", id);
    +      res.dump(f);
    +      f->close_section();
    +    }
    +    f->close_section();
    +    f->dump_bool("has_xattrs", has_xattrs);
    +  }
    +
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new cls_2pc_urgent_data);
    +    ls.push_back(new cls_2pc_urgent_data);
    +    ls.back()->reserved_size = 123;
    +    ls.back()->last_id = 456;
    +    ls.back()->reservations.emplace(789, cls_2pc_reservation(1, ceph::coarse_real_clock::zero(), 2));
    +    ls.back()->has_xattrs = true;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_2pc_urgent_data)
    diff --git a/src/cls/CMakeLists.txt b/src/cls/CMakeLists.txt
    index 57d0dace67c5..953ac83195f2 100644
    --- a/src/cls/CMakeLists.txt
    +++ b/src/cls/CMakeLists.txt
    @@ -76,8 +76,7 @@ if (WITH_RADOSGW)
       target_link_libraries(cls_otp OATH::OATH)
       target_include_directories(cls_otp
     	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/driver/rados"
    -	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw"
    -	  PUBLIC "${CMAKE_SOURCE_DIR}/src/spawn/include")
    +	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw")
       set_target_properties(cls_otp PROPERTIES
         VERSION "1.0.0"
         SOVERSION "1"
    @@ -201,11 +200,10 @@ if (WITH_RADOSGW)
         rgw/cls_rgw_types.cc
         ${CMAKE_SOURCE_DIR}/src/common/ceph_json.cc)
       add_library(cls_rgw SHARED ${cls_rgw_srcs})
    -  target_link_libraries(cls_rgw fmt json_spirit)
    +  target_link_libraries(cls_rgw ${FMT_LIB} json_spirit)
       target_include_directories(cls_rgw
     	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/driver/rados"
    -	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw"
    -	  PUBLIC "${CMAKE_SOURCE_DIR}/src/spawn/include")
    +	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw")
       set_target_properties(cls_rgw PROPERTIES
         VERSION "1.0.0"
         SOVERSION "1"
    @@ -220,8 +218,7 @@ if (WITH_RADOSGW)
       add_library(cls_rgw_client STATIC ${cls_rgw_client_srcs})
       target_include_directories(cls_rgw_client
     	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/driver/rados"
    -	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw"
    -	  PUBLIC "${CMAKE_SOURCE_DIR}/src/spawn/include")
    +	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw")
     
     endif (WITH_RADOSGW)
     
    @@ -313,8 +310,7 @@ if (WITH_RADOSGW)
       add_library(cls_rgw_gc SHARED ${cls_rgw_gc_srcs})
       target_include_directories(cls_rgw_gc
     	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/driver/rados"
    -	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw"
    -	  PUBLIC "${CMAKE_SOURCE_DIR}/src/spawn/include")
    +	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw")
       set_target_properties(cls_rgw_gc PROPERTIES
         VERSION "1.0.0"
         SOVERSION "1"
    @@ -328,8 +324,7 @@ if (WITH_RADOSGW)
       add_library(cls_rgw_gc_client STATIC ${cls_rgw_gc_client_srcs})
       target_include_directories(cls_rgw_gc_client
     	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw/driver/rados"
    -	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw"
    -	  PUBLIC "${CMAKE_SOURCE_DIR}/src/spawn/include")
    +	  PUBLIC "${CMAKE_SOURCE_DIR}/src/rgw")
     endif (WITH_RADOSGW)
     
     
    @@ -360,15 +355,7 @@ set_target_properties(cls_fifo PROPERTIES
       SOVERSION "1"
       INSTALL_RPATH ""
       CXX_VISIBILITY_PRESET hidden)
    -target_link_libraries(cls_fifo fmt)
    +target_link_libraries(cls_fifo ${FMT_LIB})
     install(TARGETS cls_fifo DESTINATION ${cls_dir})
     
    -# cls_test_remote_reads
    -set(cls_test_remote_reads_srcs test_remote_reads/cls_test_remote_reads.cc)
    -add_library(cls_test_remote_reads SHARED ${cls_test_remote_reads_srcs})
    -set_target_properties(cls_test_remote_reads PROPERTIES
    -  VERSION "1.0.0"
    -  SOVERSION "1"
    -  INSTALL_RPATH ""
    -  CXX_VISIBILITY_PRESET hidden)
    -install(TARGETS cls_test_remote_reads DESTINATION ${cls_dir})
    +
    diff --git a/src/cls/cas/cls_cas_internal.h b/src/cls/cas/cls_cas_internal.h
    index 09e7f9f1f69d..0b5c56977d29 100644
    --- a/src/cls/cas/cls_cas_internal.h
    +++ b/src/cls/cas/cls_cas_internal.h
    @@ -145,6 +145,12 @@ struct chunk_refs_by_object_t : public chunk_refs_t::refs_t {
         }
         f->close_section();
       }
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new chunk_refs_by_object_t());
    +    ls.push_back(new chunk_refs_by_object_t());
    +    ls.back()->by_object.insert(hobject_t(sobject_t("foo", CEPH_NOSNAP)));
    +    ls.back()->by_object.insert(hobject_t(sobject_t("bar", CEPH_NOSNAP)));
    +  }
     };
     WRITE_CLASS_ENCODER(chunk_refs_by_object_t)
     
    @@ -238,7 +244,7 @@ struct chunk_refs_by_hash_t : public chunk_refs_t::refs_t {
         int hash_bytes = (hash_bits + 7) / 8;
         while (n--) {
           int64_t poolid;
    -      ceph_le32 hash;
    +      ceph_le32 hash{0};
           uint64_t count;
           denc_signed_varint(poolid, p);
           memcpy(&hash, p.get_pos_add(hash_bytes), hash_bytes);
    @@ -386,6 +392,11 @@ struct chunk_refs_count_t : public chunk_refs_t::refs_t {
         f->dump_string("type", "count");
         f->dump_unsigned("count", total);
       }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new chunk_refs_count_t);
    +    o.push_back(new chunk_refs_count_t);
    +    o.back()->total = 123;
    +  }
     };
     WRITE_CLASS_ENCODER(chunk_refs_count_t)
     
    diff --git a/src/cls/fifo/cls_fifo.cc b/src/cls/fifo/cls_fifo.cc
    index 85022eeb061a..4b02903916dc 100644
    --- a/src/cls/fifo/cls_fifo.cc
    +++ b/src/cls/fifo/cls_fifo.cc
    @@ -14,6 +14,7 @@
     
     #include 
     
    +#include "common/Formatter.h"
     #include "include/buffer.h"
     #include "include/types.h"
     
    @@ -53,6 +54,14 @@ struct entry_header {
         decode(mtime, bl);
         DECODE_FINISH(bl);
       }
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_stream("mtime") << mtime;
    +  }
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new entry_header);
    +    ls.push_back(new entry_header);
    +    ls.back()->mtime = ceph::real_clock::now();
    +  }
     };
     WRITE_CLASS_ENCODER(entry_header)
     
    diff --git a/src/cls/fifo/cls_fifo_ops.h b/src/cls/fifo/cls_fifo_ops.h
    index e850c635c0b8..d466122a9527 100644
    --- a/src/cls/fifo/cls_fifo_ops.h
    +++ b/src/cls/fifo/cls_fifo_ops.h
    @@ -67,6 +67,31 @@ struct create_meta
         decode(exclusive, bl);
         DECODE_FINISH(bl);
       }
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_string("id", id);
    +    f->dump_object("version", version.value_or(objv()));
    +    f->dump_string("pool_name", pool.name);
    +    f->dump_string("pool_ns", pool.ns);
    +    f->dump_string("oid_prefix", oid_prefix.value_or(""));
    +    f->dump_unsigned("max_part_size", max_part_size);
    +    f->dump_unsigned("max_entry_size", max_entry_size);
    +    f->dump_bool("exclusive", exclusive);
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new create_meta);
    +    o.push_back(new create_meta);
    +    o.back()->id = "id";
    +    objv v1;
    +    v1.instance = "inst1";
    +    v1.ver = 1;
    +    o.back()->version = v1;
    +    o.back()->pool.name = "pool";
    +    o.back()->pool.ns = "ns";
    +    o.back()->oid_prefix = "prefix";
    +    o.back()->max_part_size = 1024;
    +    o.back()->max_entry_size = 1024;
    +    o.back()->exclusive = true;
    +  }
     };
     WRITE_CLASS_ENCODER(create_meta)
     
    @@ -84,6 +109,17 @@ struct get_meta
         decode(version, bl);
         DECODE_FINISH(bl);
       }
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_object("version", version.value_or(objv()));
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new get_meta);
    +    o.push_back(new get_meta);
    +    objv v1;
    +    v1.instance = "inst1";
    +    v1.ver = 1;
    +    o.back()->version = v1;
    +  }
     };
     WRITE_CLASS_ENCODER(get_meta)
     
    @@ -108,6 +144,18 @@ struct get_meta_reply
         decode(part_entry_overhead, bl);
         DECODE_FINISH(bl);
       }
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_object("info", info);
    +    f->dump_unsigned("part_header_size", part_header_size);
    +    f->dump_unsigned("part_entry_overhead", part_entry_overhead);
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new get_meta_reply);
    +    o.push_back(new get_meta_reply);
    +    o.back()->info = fifo::info();
    +    o.back()->part_header_size = 1024;
    +    o.back()->part_entry_overhead = 1024;
    +  }
     };
     WRITE_CLASS_ENCODER(get_meta_reply)
     
    diff --git a/src/cls/fifo/cls_fifo_types.h b/src/cls/fifo/cls_fifo_types.h
    index 1c69c1f08718..2ae601e4aafa 100644
    --- a/src/cls/fifo/cls_fifo_types.h
    +++ b/src/cls/fifo/cls_fifo_types.h
    @@ -54,7 +54,16 @@ struct objv {
         decode(ver, bl);
         DECODE_FINISH(bl);
       }
    -  void dump(ceph::Formatter* f) const;
    +  void dump(ceph::Formatter* f) const {
    +    f->dump_string("instance", instance);
    +    f->dump_unsigned("ver", ver);
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new objv);
    +    o.push_back(new objv);
    +    o.back()->instance = "instance";
    +    o.back()->ver = 1;
    +  }
       void decode_json(JSONObj* obj);
     
       bool operator ==(const objv& rhs) const {
    @@ -103,7 +112,18 @@ struct data_params {
         decode(full_size_threshold, bl);
         DECODE_FINISH(bl);
       }
    -  void dump(ceph::Formatter* f) const;
    +  void dump(ceph::Formatter* f) const {
    +    f->dump_unsigned("max_part_size", max_part_size);
    +    f->dump_unsigned("max_entry_size", max_entry_size);
    +    f->dump_unsigned("full_size_threshold", full_size_threshold);
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new data_params);
    +    o.push_back(new data_params);
    +    o.back()->max_part_size = 1;
    +    o.back()->max_entry_size = 2;
    +    o.back()->full_size_threshold = 3;
    +  }
       void decode_json(JSONObj* obj);
     
       auto operator <=>(const data_params&) const = default;
    @@ -161,7 +181,10 @@ struct journal_entry {
         decode(part_tag, bl);
         DECODE_FINISH(bl);
       }
    -  void dump(ceph::Formatter* f) const;
    +  void dump(ceph::Formatter* f) const {
    +    f->dump_int("op", (int)op);
    +    f->dump_int("part_num", part_num);
    +  }
     
       auto operator <=>(const journal_entry&) const = default;
     };
    @@ -397,7 +420,38 @@ struct info {
         decode_journal(bl);
         DECODE_FINISH(bl);
       }
    -  void dump(ceph::Formatter* f) const;
    +  void dump(ceph::Formatter* f) const {
    +    f->dump_string("id", id);
    +    f->dump_object("version", version);
    +    f->dump_string("oid_prefix", oid_prefix);
    +    f->dump_object("params", params);
    +    f->dump_int("tail_part_num", tail_part_num);
    +    f->dump_int("head_part_num", head_part_num);
    +    f->dump_int("min_push_part_num", min_push_part_num);
    +    f->dump_int("max_push_part_num", max_push_part_num);
    +    f->open_array_section("journal");
    +    for (const auto& entry : journal) {
    +      f->open_object_section("entry");
    +      f->dump_object("entry", entry);
    +      f->close_section();
    +    }
    +    f->close_section();
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new info);
    +    o.push_back(new info);
    +    o.back()->id = "myid";
    +    o.back()->version = objv();
    +    o.back()->oid_prefix = "myprefix";
    +    o.back()->params = data_params();
    +    o.back()->tail_part_num = 123;
    +    o.back()->head_part_num = 456;
    +    o.back()->min_push_part_num = 789;
    +    o.back()->max_push_part_num = 101112;
    +    o.back()->journal.insert(journal_entry(journal_entry::Op::create, 1));
    +    o.back()->journal.insert(journal_entry(journal_entry::Op::create, 2));
    +    o.back()->journal.insert(journal_entry(journal_entry::Op::create, 3));
    +  }
       void decode_json(JSONObj* obj);
     
       std::string part_oid(std::int64_t part_num) const {
    diff --git a/src/cls/log/cls_log_ops.h b/src/cls/log/cls_log_ops.h
    index 5a65892598b6..4d3b2f5d3091 100644
    --- a/src/cls/log/cls_log_ops.h
    +++ b/src/cls/log/cls_log_ops.h
    @@ -4,6 +4,7 @@
     #ifndef CEPH_CLS_LOG_OPS_H
     #define CEPH_CLS_LOG_OPS_H
     
    +#include "common/ceph_json.h"
     #include "cls_log_types.h"
     
     struct cls_log_add_op {
    @@ -73,6 +74,21 @@ struct cls_log_list_op {
         decode(max_entries, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter* f) const {
    +    f->dump_stream("from_time") << from_time;
    +    f->dump_string("marker", marker);
    +    f->dump_stream("to_time") << to_time;
    +    f->dump_int("max_entries", max_entries);
    +  }
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new cls_log_list_op);
    +    ls.push_back(new cls_log_list_op);
    +    ls.back()->from_time = utime_t(1, 2);
    +    ls.back()->marker = "marker";
    +    ls.back()->to_time = utime_t(3, 4);
    +    ls.back()->max_entries = 5;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_log_list_op)
     
    @@ -98,6 +114,25 @@ struct cls_log_list_ret {
         decode(truncated, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter* f) const {
    +    encode_json("entries", entries, f);
    +    f->dump_string("marker", marker);
    +    f->dump_bool("truncated", truncated);
    +  }
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new cls_log_list_ret);
    +    ls.push_back(new cls_log_list_ret);
    +    ls.back()->entries.push_back(cls_log_entry());
    +    ls.back()->entries.push_back(cls_log_entry());
    +    ls.back()->entries.back().section = "section";
    +    ls.back()->entries.back().name = "name";
    +    ls.back()->entries.back().timestamp = utime_t(1, 2);
    +    ls.back()->entries.back().data.append("data");
    +    ls.back()->entries.back().id = "id";
    +    ls.back()->marker = "marker";
    +    ls.back()->truncated = true;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_log_list_ret)
     
    @@ -133,6 +168,20 @@ struct cls_log_trim_op {
         }
         DECODE_FINISH(bl);
       }
    +  void dump(ceph::Formatter* f) const {
    +    f->dump_stream("from_time") << from_time;
    +    f->dump_stream("to_time") << to_time;
    +    f->dump_string("from_marker", from_marker);
    +    f->dump_string("to_marker", to_marker);
    +  }
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new cls_log_trim_op);
    +    ls.push_back(new cls_log_trim_op);
    +    ls.back()->from_time = utime_t(1, 2);
    +    ls.back()->to_time = utime_t(3, 4);
    +    ls.back()->from_marker = "from_marker";
    +    ls.back()->to_marker = "to_marker";
    +  }
     };
     WRITE_CLASS_ENCODER(cls_log_trim_op)
     
    @@ -150,6 +199,13 @@ struct cls_log_info_op {
         // currently empty request
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter* f) const {
    +  }
    +
    +  static void generate_test_instances(std::list& ls) {
    +    ls.push_back(new cls_log_info_op);
    +  }
     };
     WRITE_CLASS_ENCODER(cls_log_info_op)
     
    diff --git a/src/cls/log/cls_log_types.h b/src/cls/log/cls_log_types.h
    index 33b8cce51e5e..29aa2bae8fe9 100644
    --- a/src/cls/log/cls_log_types.h
    +++ b/src/cls/log/cls_log_types.h
    @@ -92,6 +92,16 @@ struct cls_log_header {
         decode(max_time, bl);
         DECODE_FINISH(bl);
       }
    +  void dump(ceph::Formatter* f) const {
    +    f->dump_string("max_marker", max_marker);
    +    f->dump_stream("max_time") << max_time;
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_log_header);
    +    o.push_back(new cls_log_header);
    +    o.back()->max_marker = "test_marker";
    +    o.back()->max_time = utime_t();
    +  }
     };
     inline bool operator ==(const cls_log_header& lhs, const cls_log_header& rhs) {
       return (lhs.max_marker == rhs.max_marker &&
    diff --git a/src/cls/queue/cls_queue_client.cc b/src/cls/queue/cls_queue_client.cc
    index 87d17bb9e315..d3d38a9214e5 100644
    --- a/src/cls/queue/cls_queue_client.cc
    +++ b/src/cls/queue/cls_queue_client.cc
    @@ -48,16 +48,9 @@ void cls_queue_enqueue(ObjectWriteOperation& op, uint32_t expiration_secs, vecto
       op.exec(QUEUE_CLASS, QUEUE_ENQUEUE, in);
     }
     
    -int cls_queue_list_entries(IoCtx& io_ctx, const string& oid, const string& marker, uint32_t max,
    -                            vector& entries,
    -                            bool *truncated, string& next_marker)
    +int cls_queue_list_entries_inner(IoCtx& io_ctx, const string& oid, vector& entries,
    +                                 bool *truncated, string& next_marker, bufferlist& in, bufferlist& out)
     {
    -  bufferlist in, out;
    -  cls_queue_list_op op;
    -  op.start_marker = marker;
    -  op.max = max;
    -  encode(op, in);
    -
       int r = io_ctx.exec(oid, QUEUE_CLASS, QUEUE_LIST_ENTRIES, in, out);
       if (r < 0)
         return r;
    @@ -78,6 +71,33 @@ int cls_queue_list_entries(IoCtx& io_ctx, const string& oid, const string& marke
       return 0;
     }
     
    +int cls_queue_list_entries(IoCtx& io_ctx, const string& oid, const string& marker, uint32_t max,
    +                            vector& entries,
    +                            bool *truncated, string& next_marker)
    +{
    +  bufferlist in, out;
    +  cls_queue_list_op op;
    +  op.start_marker = marker;
    +  op.max = max;
    +  encode(op, in);
    +
    +  return cls_queue_list_entries_inner(io_ctx, oid, entries, truncated, next_marker, in, out);
    +}
    +
    +int cls_queue_list_entries(IoCtx& io_ctx, const string& oid, const string& marker, const string& end_marker,
    +                           vector& entries,
    +                           bool *truncated, string& next_marker)
    +{
    +  bufferlist in, out;
    +  cls_queue_list_op op;
    +  op.start_marker = marker;
    +  op.max = std::numeric_limits::max();
    +  op.end_marker = end_marker;
    +  encode(op, in);
    +
    +  return cls_queue_list_entries_inner(io_ctx, oid, entries, truncated, next_marker, in, out);
    +}
    +
     void cls_queue_remove_entries(ObjectWriteOperation& op, const string& end_marker)
     {
       bufferlist in, out;
    diff --git a/src/cls/queue/cls_queue_client.h b/src/cls/queue/cls_queue_client.h
    index 895a51c11737..903448fd480b 100644
    --- a/src/cls/queue/cls_queue_client.h
    +++ b/src/cls/queue/cls_queue_client.h
    @@ -11,6 +11,8 @@ int cls_queue_get_capacity(librados::IoCtx& io_ctx, const std::string& oid, uint
     void cls_queue_enqueue(librados::ObjectWriteOperation& op, uint32_t expiration_secs, std::vector bl_data_vec);
     int cls_queue_list_entries(librados::IoCtx& io_ctx, const std::string& oid, const std::string& marker, uint32_t max,
                         std::vector& entries, bool *truncated, std::string& next_marker);
    +int cls_queue_list_entries(librados::IoCtx& io_ctx, const std::string& oid, const std::string& marker, const std::string& end_marker,
    +                           std::vector& entries, bool *truncated, std::string& next_marker);
     void cls_queue_remove_entries(librados::ObjectWriteOperation& op, const std::string& end_marker);
     
     #endif
    diff --git a/src/cls/queue/cls_queue_ops.h b/src/cls/queue/cls_queue_ops.h
    index 8209659bda90..25bf1200c547 100644
    --- a/src/cls/queue/cls_queue_ops.h
    +++ b/src/cls/queue/cls_queue_ops.h
    @@ -4,6 +4,7 @@
     #ifndef CEPH_CLS_QUEUE_OPS_H
     #define CEPH_CLS_QUEUE_OPS_H
     
    +#include "common/ceph_json.h"
     #include "cls/queue/cls_queue_types.h"
     
     struct cls_queue_init_op {
    @@ -29,6 +30,19 @@ struct cls_queue_init_op {
         DECODE_FINISH(bl);
       }
     
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_unsigned("queue_size", queue_size);
    +    f->dump_unsigned("max_urgent_data_size", max_urgent_data_size);
    +    f->dump_unsigned("urgent_data_len", bl_urgent_data.length());
    +  }
    +
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_queue_init_op);
    +    o.push_back(new cls_queue_init_op);
    +    o.back()->queue_size = 1024;
    +    o.back()->max_urgent_data_size = 1024;
    +    o.back()->bl_urgent_data.append(std::string_view("data"));
    +  }
     };
     WRITE_CLASS_ENCODER(cls_queue_init_op)
     
    @@ -47,29 +61,57 @@ struct cls_queue_enqueue_op {
         DECODE_START(1, bl);
         decode(bl_data_vec, bl);
         DECODE_FINISH(bl);
    -  } 
    +  }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_unsigned("data_vec_len", bl_data_vec.size());
    +  }
    +
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_queue_enqueue_op);
    +    o.push_back(new cls_queue_enqueue_op);
    +    o.back()->bl_data_vec.push_back(ceph::buffer::list());
    +    o.back()->bl_data_vec.back().append(std::string_view("data"));
    +  }
     };
     WRITE_CLASS_ENCODER(cls_queue_enqueue_op)
     
     struct cls_queue_list_op {
    -  uint64_t max;
    +  uint64_t max{0};
       std::string start_marker;
    +  std::string end_marker;
     
       cls_queue_list_op() {}
     
       void encode(ceph::buffer::list& bl) const {
    -    ENCODE_START(1, 1, bl);
    +    ENCODE_START(2, 1, bl);
         encode(max, bl);
         encode(start_marker, bl);
    +    encode(end_marker, bl);
         ENCODE_FINISH(bl);
       }
     
       void decode(ceph::buffer::list::const_iterator& bl) {
    -    DECODE_START(1, bl);
    +    DECODE_START(2, bl);
         decode(max, bl);
         decode(start_marker, bl);
    +    if (struct_v > 1) {
    +      decode(end_marker, bl);
    +    }
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_unsigned("max", max);
    +    f->dump_string("start_marker", start_marker);
    +  }
    +
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_queue_list_op);
    +    o.push_back(new cls_queue_list_op);
    +    o.back()->max = 123;
    +    o.back()->start_marker = "foo";
    +  }
     };
     WRITE_CLASS_ENCODER(cls_queue_list_op)
     
    @@ -95,6 +137,22 @@ struct cls_queue_list_ret {
         decode(entries, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_bool("is_truncated", is_truncated);
    +    f->dump_string("next_marker", next_marker);
    +    encode_json("entries", entries, f);
    +  }
    +
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_queue_list_ret);
    +    o.back()->is_truncated = true;
    +    o.back()->next_marker = "foo";
    +    o.back()->entries.push_back(cls_queue_entry());
    +    o.back()->entries.push_back(cls_queue_entry());
    +    o.back()->entries.back().marker = "id";
    +    o.back()->entries.back().data.append(std::string_view("data"));
    +  }
     };
     WRITE_CLASS_ENCODER(cls_queue_list_ret)
     
    @@ -114,6 +172,15 @@ struct cls_queue_remove_op {
         decode(end_marker, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_string("end_marker", end_marker);
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_queue_remove_op);
    +    o.push_back(new cls_queue_remove_op);
    +    o.back()->end_marker = "foo";
    +  }
     };
     WRITE_CLASS_ENCODER(cls_queue_remove_op)
     
    @@ -133,6 +200,14 @@ struct cls_queue_get_capacity_ret {
         decode(queue_capacity, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_unsigned("queue_capacity", queue_capacity);
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_queue_get_capacity_ret);
    +    o.back()->queue_capacity = 123;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_queue_get_capacity_ret)
     
    diff --git a/src/cls/queue/cls_queue_src.cc b/src/cls/queue/cls_queue_src.cc
    index b34d9929b93a..37de32ea018d 100644
    --- a/src/cls/queue/cls_queue_src.cc
    +++ b/src/cls/queue/cls_queue_src.cc
    @@ -327,11 +327,11 @@ int queue_list_entries(cls_method_context_t hctx, const cls_queue_list_op& op, c
         }
     
         //If there is leftover data from previous iteration, append new data to leftover data
    -    uint64_t entry_start_offset = start_offset - bl.length();
    +    uint64_t entry_start_offset = start_offset - bl.length(); //NOLINT(bugprone-use-after-move)
         CLS_LOG(20, "INFO: queue_list_entries(): Entry start offset accounting for leftover data is %lu", entry_start_offset);
         bl.claim_append(bl_chunk);
         bl_chunk = std::move(bl);
    -
    +    bl.clear(); //NOLINT(bugprone-use-after-move)
         CLS_LOG(20, "INFO: queue_list_entries(): size of chunk %u", bl_chunk.length());
     
         //Process the chunk of data read
    @@ -400,6 +400,10 @@ int queue_list_entries(cls_method_context_t hctx, const cls_queue_list_op& op, c
             CLS_LOG(10, "INFO: queue_list_entries(): not enough data to read data, breaking out!");
             break;
           }
    +      if (!op.end_marker.empty() && entry.marker == op.end_marker) {
    +        last_marker = entry.marker;
    +        break;
    +      }
           op_ret.entries.emplace_back(entry);
           // Resetting some values
           offset_populated = false;
    @@ -414,11 +418,17 @@ int queue_list_entries(cls_method_context_t hctx, const cls_queue_list_op& op, c
           }
         } while(index < bl_chunk.length());
     
    -    CLS_LOG(10, "INFO: num_ops: %lu and op.max is %lu\n", num_ops, op.max);
    +    CLS_LOG(10, "INFO: num_ops: %lu and op.max is %lu, last_marker: %s and op.end_marker is %s\n",
    +            num_ops, op.max, last_marker.c_str(), op.end_marker.c_str());
     
    -    if (num_ops == op.max) {
    -      next_marker = cls_queue_marker{(entry_start_offset + index), gen};
    -      CLS_LOG(10, "INFO: queue_list_entries(): num_ops is same as op.max, hence breaking out from outer loop with next offset: %lu", next_marker.offset);
    +    if (num_ops == op.max || (!op.end_marker.empty() && op.end_marker == last_marker)) {
    +      if (!op.end_marker.empty()) {
    +        next_marker.from_str(op.end_marker.c_str());
    +      } else {
    +        next_marker = cls_queue_marker{(entry_start_offset + index), gen};
    +      }
    +      CLS_LOG(10, "INFO: queue_list_entries(): either num_ops is same as op.max or last_marker is same as op.end_marker, "
    +                  "hence breaking out from outer loop with next offset: %lu", next_marker.offset);
           break;
         }
     
    diff --git a/src/cls/queue/cls_queue_types.h b/src/cls/queue/cls_queue_types.h
    index cc46df405052..3c3e828edf0a 100644
    --- a/src/cls/queue/cls_queue_types.h
    +++ b/src/cls/queue/cls_queue_types.h
    @@ -34,6 +34,17 @@ struct cls_queue_entry
         decode(marker, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_string("marker", marker);
    +    f->dump_unsigned("data_len", data.length());
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_queue_entry);
    +    o.push_back(new cls_queue_entry);
    +    o.back()->data.append(std::string_view("data"));
    +    o.back()->marker = "marker";
    +  }
     };
     WRITE_CLASS_ENCODER(cls_queue_entry)
     
    @@ -80,7 +91,16 @@ struct cls_queue_marker
         }
         return 0;
       }
    -
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_unsigned("offset", offset);
    +    f->dump_unsigned("gen", gen);
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_queue_marker);
    +    o.push_back(new cls_queue_marker);
    +    o.back()->offset = 1024;
    +    o.back()->gen = 0;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_queue_marker)
     
    @@ -114,6 +134,27 @@ struct cls_queue_head
         decode(bl_urgent_data, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_unsigned("max_head_size", max_head_size);
    +    f->dump_unsigned("queue_size", queue_size);
    +    f->dump_unsigned("max_urgent_data_size", max_urgent_data_size);
    +    f->dump_unsigned("front_offset", front.offset);
    +    f->dump_unsigned("front_gen", front.gen);
    +    f->dump_unsigned("tail_offset", tail.offset);
    +    f->dump_unsigned("tail_gen", tail.gen);
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_queue_head);
    +    o.push_back(new cls_queue_head);
    +    o.back()->max_head_size = 1024;
    +    o.back()->front.offset = 1024;
    +    o.back()->front.gen = 0;
    +    o.back()->tail.offset = 1024;
    +    o.back()->tail.gen = 0;
    +    o.back()->queue_size = 1024;
    +    o.back()->max_urgent_data_size = 0;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_queue_head)
     
    diff --git a/src/cls/rbd/cls_rbd.cc b/src/cls/rbd/cls_rbd.cc
    index 255220d6b72f..d0d6bd118404 100644
    --- a/src/cls/rbd/cls_rbd.cc
    +++ b/src/cls/rbd/cls_rbd.cc
    @@ -4624,6 +4624,7 @@ static const std::string STATUS_GLOBAL_KEY_PREFIX("status_global_");
     static const std::string REMOTE_STATUS_GLOBAL_KEY_PREFIX("remote_status_global_");
     static const std::string INSTANCE_KEY_PREFIX("instance_");
     static const std::string MIRROR_IMAGE_MAP_KEY_PREFIX("image_map_");
    +static const std::string REMOTE_NAMESPACE("remote_namespace");
     
     std::string peer_key(const std::string &uuid) {
       return PEER_KEY_PREFIX + uuid;
    @@ -5920,6 +5921,56 @@ int mirror_mode_set(cls_method_context_t hctx, bufferlist *in,
         if (r < 0) {
           return r;
         }
    +
    +    r = remove_key(hctx, mirror::REMOTE_NAMESPACE);
    +    if (r < 0) {
    +      return r;
    +    }
    +  }
    +  return 0;
    +}
    +
    +int mirror_remote_namespace_get(cls_method_context_t hctx, bufferlist *in,
    +                                bufferlist *out) {
    +  std::string mirror_ns_decode;
    +  int r = read_key(hctx, mirror::REMOTE_NAMESPACE, &mirror_ns_decode);
    +  if (r < 0) {
    +    CLS_ERR("error getting mirror remote namespace: %s",
    +            cpp_strerror(r).c_str());
    +    return r;
    +  }
    +
    +  encode(mirror_ns_decode, *out);
    +  return 0;
    +}
    +
    +int mirror_remote_namespace_set(cls_method_context_t hctx, bufferlist *in,
    +                                bufferlist *out) {
    +  std::string mirror_namespace;
    +  try {
    +    auto bl_it = in->cbegin();
    +    decode(mirror_namespace, bl_it);
    +  } catch (const ceph::buffer::error &err) {
    +    return -EINVAL;
    +  }
    +
    +  uint32_t mirror_mode;
    +  int r = read_key(hctx, mirror::MODE, &mirror_mode);
    +  if (r < 0 && r != -ENOENT) {
    +    return r;
    +  } else if (r == 0 && mirror_mode != cls::rbd::MIRROR_MODE_DISABLED) {
    +    CLS_ERR("cannot set mirror remote namespace while mirroring enabled");
    +    return -EINVAL;
    +  }
    +
    +  bufferlist bl;
    +  encode(mirror_namespace, bl);
    +
    +  r = cls_cxx_map_set_val(hctx, mirror::REMOTE_NAMESPACE, &bl);
    +  if (r < 0) {
    +    CLS_ERR("error setting mirror remote namespace: %s",
    +            cpp_strerror(r).c_str());
    +    return r;
       }
       return 0;
     }
    @@ -6898,6 +6949,8 @@ int dir_remove(cls_method_context_t hctx,
     }
     
     static const string RBD_GROUP_SNAP_KEY_PREFIX = "snapshot_";
    +static const string RBD_GROUP_SNAP_ORDER_KEY_PREFIX = "snap_order_";
    +static const string RBD_GROUP_SNAP_MAX_ORDER_KEY = "snap_max_order";
     
     std::string snap_key(const std::string &snap_id) {
       ostringstream oss;
    @@ -6905,10 +6958,19 @@ std::string snap_key(const std::string &snap_id) {
       return oss.str();
     }
     
    +std::string snap_order_key(const std::string &snap_id) {
    +  ostringstream oss;
    +  oss << RBD_GROUP_SNAP_ORDER_KEY_PREFIX << snap_id;
    +  return oss.str();
    +}
    +
    +std::string snap_id_from_order_key(const string &key) {
    +  return key.substr(RBD_GROUP_SNAP_ORDER_KEY_PREFIX.size());
    +}
    +
     int snap_list(cls_method_context_t hctx, cls::rbd::GroupSnapshot start_after,
                   uint64_t max_return,
    -              std::vector *group_snaps)
    -{
    +              std::vector *group_snaps) {
       int max_read = RBD_MAX_KEYS_READ;
       std::map vals;
       string last_read = snap_key(start_after.id);
    @@ -6941,6 +7003,8 @@ int snap_list(cls_method_context_t hctx, cls::rbd::GroupSnapshot start_after,
     
         if (!vals.empty()) {
           last_read = vals.rbegin()->first;
    +    } else {
    +      ceph_assert(!more);
         }
       } while (more && (group_snaps->size() < max_return));
     
    @@ -7457,14 +7521,51 @@ int group_snap_set(cls_method_context_t hctx,
         if (r < 0 && r != -ENOENT) {
           return r;
         } else if (r >= 0) {
    +      CLS_ERR("snap key already exists : %s", key.c_str());
    +      return -EEXIST;
    +    }
    +
    +    std::string order_key = group::snap_order_key(group_snap.id);
    +    r = cls_cxx_map_get_val(hctx, order_key, &snap_bl);
    +    if (r < 0 && r != -ENOENT) {
    +      return r;
    +    } else if (r >= 0) {
    +      CLS_ERR("order key already exists : %s", order_key.c_str());
           return -EEXIST;
         }
    +
    +    uint64_t max_order = 0;
    +    r = read_key(hctx, group::RBD_GROUP_SNAP_MAX_ORDER_KEY, &max_order);
    +    if (r < 0 && r != -ENOENT) {
    +      return r;
    +    }
    +
    +    bufferlist bl;
    +    encode(++max_order, bl);
    +    r = cls_cxx_map_set_val(hctx, group::RBD_GROUP_SNAP_MAX_ORDER_KEY, &bl);
    +    if (r < 0) {
    +      CLS_ERR("error setting key: %s : %s",
    +              group::RBD_GROUP_SNAP_MAX_ORDER_KEY.c_str(),
    +              cpp_strerror(r).c_str());
    +      return r;
    +    }
    +
    +    r = cls_cxx_map_set_val(hctx, order_key, &bl);
    +    if (r < 0) {
    +      CLS_ERR("error setting key: %s : %s", order_key.c_str(),
    +              cpp_strerror(r).c_str());
    +      return r;
    +    }
       }
     
       bufferlist obl;
       encode(group_snap, obl);
       r = cls_cxx_map_set_val(hctx, key, &obl);
    -  return r;
    +  if (r < 0) {
    +    CLS_ERR("error setting key: %s : %s", key.c_str(), cpp_strerror(r).c_str());
    +    return r;
    +  }
    +  return 0;
     }
     
     /**
    @@ -7492,7 +7593,21 @@ int group_snap_remove(cls_method_context_t hctx,
     
       CLS_LOG(20, "removing snapshot with key %s", snap_key.c_str());
       int r = cls_cxx_map_remove_key(hctx, snap_key);
    -  return r;
    +  if (r < 0) {
    +    CLS_ERR("error removing snapshot with key %s : %s", snap_key.c_str(),
    +            cpp_strerror(r).c_str());
    +    return r;
    +  }
    +
    +  std::string snap_order_key = group::snap_order_key(snap_id);
    +  r = cls_cxx_map_remove_key(hctx, snap_order_key);
    +  if (r < 0 && r != -ENOENT) {
    +    CLS_ERR("error removing snapshot order key %s : %s", snap_order_key.c_str(),
    +            cpp_strerror(r).c_str());
    +    return r;
    +  }
    +
    +  return 0;
     }
     
     /**
    @@ -7566,13 +7681,70 @@ int group_snap_list(cls_method_context_t hctx,
         return -EINVAL;
       }
       std::vector group_snaps;
    -  group::snap_list(hctx, start_after, max_return, &group_snaps);
    +  int r = group::snap_list(hctx, start_after, max_return, &group_snaps);
    +  if (r < 0) {
    +    return r;
    +  }
     
       encode(group_snaps, *out);
     
       return 0;
     }
     
    +int group_snap_list_order(cls_method_context_t hctx,
    +                          bufferlist *in, bufferlist *out)
    +{
    +  CLS_LOG(20, "group_snap_list_order");
    +
    +  std::string start_after;
    +  uint64_t max_return;
    +  try {
    +    auto iter = in->cbegin();
    +    decode(start_after, iter);
    +    decode(max_return, iter);
    +  } catch (const ceph::buffer::error &err) {
    +    return -EINVAL;
    +  }
    +
    +  std::map group_snaps_order;
    +  int max_read = RBD_MAX_KEYS_READ;
    +  bool more;
    +  std::string last_read = group::snap_order_key(start_after);
    +  std::map vals;
    +
    +  do {
    +    int r = cls_cxx_map_get_vals(hctx, last_read,
    +                                 group::RBD_GROUP_SNAP_ORDER_KEY_PREFIX,
    +                                 max_read, &vals, &more);
    +    if (r < 0) {
    +      CLS_ERR("error getting snapshot orders: %s", cpp_strerror(r).c_str());
    +      return r;
    +    }
    +
    +    for (auto it = vals.begin();
    +         it != vals.end() && group_snaps_order.size() < max_return; ++it) {
    +      std::string snap_id = group::snap_id_from_order_key(it->first);
    +      auto iter = it->second.cbegin();
    +      uint64_t order;
    +      try {
    +        decode(order, iter);
    +      } catch (const ceph::buffer::error &err) {
    +        CLS_ERR("error decoding snapshot order: %s", snap_id.c_str());
    +        return -EIO;
    +      }
    +      group_snaps_order[snap_id] = order;
    +    }
    +    if (!vals.empty()) {
    +      last_read = vals.rbegin()->first;
    +    } else {
    +      ceph_assert(!more);
    +    }
    +  } while (more && (group_snaps_order.size() < max_return));
    +
    +  encode(group_snaps_order, *out);
    +  return 0;
    +}
    +
     namespace trash {
     
     static const std::string IMAGE_KEY_PREFIX("id_");
    @@ -8157,6 +8329,8 @@ CLS_INIT(rbd)
       cls_method_handle_t h_mirror_uuid_set;
       cls_method_handle_t h_mirror_mode_get;
       cls_method_handle_t h_mirror_mode_set;
    +  cls_method_handle_t h_mirror_remote_namespace_get;
    +  cls_method_handle_t h_mirror_remote_namespace_set;
       cls_method_handle_t h_mirror_peer_ping;
       cls_method_handle_t h_mirror_peer_list;
       cls_method_handle_t h_mirror_peer_add;
    @@ -8199,6 +8373,7 @@ CLS_INIT(rbd)
       cls_method_handle_t h_group_snap_remove;
       cls_method_handle_t h_group_snap_get_by_id;
       cls_method_handle_t h_group_snap_list;
    +  cls_method_handle_t h_group_snap_list_order;
       cls_method_handle_t h_trash_add;
       cls_method_handle_t h_trash_remove;
       cls_method_handle_t h_trash_list;
    @@ -8453,6 +8628,13 @@ CLS_INIT(rbd)
       cls_register_cxx_method(h_class, "mirror_mode_set",
                               CLS_METHOD_RD | CLS_METHOD_WR,
                               mirror_mode_set, &h_mirror_mode_set);
    +  cls_register_cxx_method(h_class, "mirror_remote_namespace_get",
    +                          CLS_METHOD_RD, mirror_remote_namespace_get,
    +                          &h_mirror_remote_namespace_get);
    +  cls_register_cxx_method(h_class, "mirror_remote_namespace_set",
    +                          CLS_METHOD_RD | CLS_METHOD_WR,
    +                          mirror_remote_namespace_set,
    +                          &h_mirror_remote_namespace_set);
       cls_register_cxx_method(h_class, "mirror_peer_ping",
                               CLS_METHOD_RD | CLS_METHOD_WR,
                               mirror_peer_ping, &h_mirror_peer_ping);
    @@ -8582,6 +8764,9 @@ CLS_INIT(rbd)
       cls_register_cxx_method(h_class, "group_snap_list",
     			  CLS_METHOD_RD,
     			  group_snap_list, &h_group_snap_list);
    +  cls_register_cxx_method(h_class, "group_snap_list_order",
    +			  CLS_METHOD_RD,
    +			  group_snap_list_order, &h_group_snap_list_order);
     
       /* rbd_trash object methods */
       cls_register_cxx_method(h_class, "trash_add",
    diff --git a/src/cls/rbd/cls_rbd_client.cc b/src/cls/rbd/cls_rbd_client.cc
    index 2f1f37eaa9f7..559ac221f89a 100644
    --- a/src/cls/rbd/cls_rbd_client.cc
    +++ b/src/cls/rbd/cls_rbd_client.cc
    @@ -1882,6 +1882,40 @@ int mirror_mode_set(librados::IoCtx *ioctx,
       return 0;
     }
     
    +int mirror_remote_namespace_get(librados::IoCtx *ioctx,
    +			        std::string *mirror_namespace) {
    +  bufferlist in_bl;
    +  bufferlist out_bl;
    +
    +  int r = ioctx->exec(RBD_MIRRORING, "rbd", "mirror_remote_namespace_get",
    +                      in_bl, out_bl);
    +  if (r < 0) {
    +    return r;
    +  }
    +
    +  auto it = out_bl.cbegin();
    +  try {
    +    decode(*mirror_namespace, it);
    +  } catch (const ceph::buffer::error &err) {
    +    return -EBADMSG;
    +  }
    +  return 0;
    +}
    +
    +int mirror_remote_namespace_set(librados::IoCtx *ioctx,
    +                                const std::string &mirror_namespace) {
    +  bufferlist in_bl;
    +  encode(mirror_namespace, in_bl);
    +
    +  bufferlist out_bl;
    +  int r = ioctx->exec(RBD_MIRRORING, "rbd", "mirror_remote_namespace_set",
    +                      in_bl, out_bl);
    +  if (r < 0) {
    +    return r;
    +  }
    +  return 0;
    +}
    +
     void mirror_peer_list_start(librados::ObjectReadOperation *op) {
       bufferlist bl;
       op->exec("rbd", "mirror_peer_list", bl);
    @@ -2757,31 +2791,85 @@ int group_snap_get_by_id(librados::IoCtx *ioctx, const std::string &oid,
     
       return 0;
     }
    +
    +void group_snap_list_start(librados::ObjectReadOperation *op,
    +                           const cls::rbd::GroupSnapshot &start,
    +                           uint64_t max_return)
    +{
    +  bufferlist bl;
    +  encode(start, bl);
    +  encode(max_return, bl);
    +
    +  op->exec("rbd", "group_snap_list", bl);
    +}
    +
    +int group_snap_list_finish(bufferlist::const_iterator *iter,
    +                           std::vector *snapshots)
    +{
    +  try {
    +    decode(*snapshots, *iter);
    +  } catch (const ceph::buffer::error &err) {
    +    return -EBADMSG;
    +  }
    +  return 0;
    +}
    +
     int group_snap_list(librados::IoCtx *ioctx, const std::string &oid,
                         const cls::rbd::GroupSnapshot &start,
                         uint64_t max_return,
                         std::vector *snapshots)
     {
    -  using ceph::encode;
    -  using ceph::decode;
    -  bufferlist inbl, outbl;
    -  encode(start, inbl);
    -  encode(max_return, inbl);
    +  librados::ObjectReadOperation op;
    +  group_snap_list_start(&op, start, max_return);
     
    -  int r = ioctx->exec(oid, "rbd", "group_snap_list", inbl, outbl);
    +  bufferlist out_bl;
    +  int r = ioctx->operate(oid, &op, &out_bl);
       if (r < 0) {
         return r;
       }
    -  auto iter = outbl.cbegin();
    +
    +  auto it = out_bl.cbegin();
    +  return group_snap_list_finish(&it, snapshots);
    +}
    +
    +void group_snap_list_order_start(librados::ObjectReadOperation *op,
    +                                 const std::string &start,
    +                                 uint64_t max_return)
    +{
    +  bufferlist bl;
    +  encode(start, bl);
    +  encode(max_return, bl);
    +  op->exec("rbd", "group_snap_list_order", bl);
    +}
    +
    +int group_snap_list_order_finish(bufferlist::const_iterator *iter,
    +                                 std::map *snap_order)
    +{
       try {
    -    decode(*snapshots, iter);
    +    decode(*snap_order, *iter);
       } catch (const ceph::buffer::error &err) {
         return -EBADMSG;
       }
    -
       return 0;
     }
     
    +int group_snap_list_order(librados::IoCtx *ioctx, const std::string &oid,
    +                          const std::string &start, uint64_t max_return,
    +                          std::map *snap_order)
    +{
    +  librados::ObjectReadOperation op;
    +  group_snap_list_order_start(&op, start, max_return);
    +
    +  bufferlist out_bl;
    +  int r = ioctx->operate(oid, &op, &out_bl);
    +  if (r < 0) {
    +    return r;
    +  }
    +
    +  auto it = out_bl.cbegin();
    +  return group_snap_list_order_finish(&it, snap_order);
    +}
    +
     // rbd_trash functions
     void trash_add(librados::ObjectWriteOperation *op,
                    const std::string &id,
    diff --git a/src/cls/rbd/cls_rbd_client.h b/src/cls/rbd/cls_rbd_client.h
    index 38098805e98c..37992203affb 100644
    --- a/src/cls/rbd/cls_rbd_client.h
    +++ b/src/cls/rbd/cls_rbd_client.h
    @@ -389,6 +389,11 @@ int mirror_mode_get(librados::IoCtx *ioctx,
     int mirror_mode_set(librados::IoCtx *ioctx,
                         cls::rbd::MirrorMode mirror_mode);
     
    +int mirror_remote_namespace_get(librados::IoCtx *ioctx,
    +				std::string *mirror_namespace);
    +int mirror_remote_namespace_set(librados::IoCtx *ioctx,
    +				const std::string &mirror_namespace);
    +
     int mirror_peer_ping(librados::IoCtx *ioctx,
                          const std::string& site_name,
                          const std::string& fsid);
    @@ -580,11 +585,24 @@ int group_snap_remove(librados::IoCtx *ioctx, const std::string &oid,
     int group_snap_get_by_id(librados::IoCtx *ioctx, const std::string &oid,
                              const std::string &snap_id,
                              cls::rbd::GroupSnapshot *snapshot);
    +void group_snap_list_start(librados::ObjectReadOperation *op,
    +                           const cls::rbd::GroupSnapshot &start,
    +                           uint64_t max_return);
    +int group_snap_list_finish(ceph::buffer::list::const_iterator *iter,
    +                           std::vector *snapshots);
     int group_snap_list(librados::IoCtx *ioctx, const std::string &oid,
                         const cls::rbd::GroupSnapshot &start,
                         uint64_t max_return,
                         std::vector *snapshots);
    -
    +void group_snap_list_order_start(librados::ObjectReadOperation *op,
    +                                 const std::string &start_snap_id,
    +                                 uint64_t max_return);
    +int group_snap_list_order_finish(ceph::buffer::list::const_iterator *iter,
    +                                 std::map *snap_order);
    +int group_snap_list_order(librados::IoCtx *ioctx, const std::string &oid,
    +                          const std::string &snap_id, uint64_t max_return,
    +                          std::map *snap_order);
    + 
     // operations on rbd_trash object
     void trash_add(librados::ObjectWriteOperation *op,
                    const std::string &id,
    diff --git a/src/cls/rbd/cls_rbd_types.h b/src/cls/rbd/cls_rbd_types.h
    index c8d2cb871e44..c1d64805ae42 100644
    --- a/src/cls/rbd/cls_rbd_types.h
    +++ b/src/cls/rbd/cls_rbd_types.h
    @@ -374,6 +374,7 @@ struct GroupImageSpec {
     
       std::string image_key();
     
    +  bool operator==(const GroupImageSpec&) const = default;
     };
     WRITE_CLASS_ENCODER(GroupImageSpec);
     
    diff --git a/src/cls/refcount/cls_refcount_client.h b/src/cls/refcount/cls_refcount_client.h
    index 73a23a7ee28d..17af7a78ef3e 100644
    --- a/src/cls/refcount/cls_refcount_client.h
    +++ b/src/cls/refcount/cls_refcount_client.h
    @@ -19,7 +19,7 @@
      * So, the regular usage would be to create an object, to increase the refcount. Then, when
      * wanting to have another reference to it, increase the refcount using a different tag. When
      * removing a reference it is required to drop the refcount (using the same tag that was used
    - * for that reference). When the refcount drops to zero, the object is removed automaticfally.
    + * for that reference). When the refcount drops to zero, the object is removed automatically.
      *
      * In order to maintain backwards compatibility with objects that were created without having
      * their refcount increased, the implicit_ref was added. Any object that was created without
    diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc
    index 75a37bad6342..2b73fb3b7aa7 100644
    --- a/src/cls/rgw/cls_rgw.cc
    +++ b/src/cls/rgw/cls_rgw.cc
    @@ -48,30 +48,32 @@ CLS_NAME(rgw)
     // of a special bucket-index entry for the first byte. Note: although
     // it has no impact, the 2nd, 3rd, or 4th byte of a UTF-8 character
     // may be 0x80.
    -#define BI_PREFIX_CHAR 0x80
    +constexpr unsigned char BI_PREFIX_CHAR = 0x80;
     
     #define BI_BUCKET_OBJS_INDEX          0
     #define BI_BUCKET_LOG_INDEX           1
     #define BI_BUCKET_OBJ_INSTANCE_INDEX  2
     #define BI_BUCKET_OLH_DATA_INDEX      3
    +#define BI_BUCKET_RESHARD_LOG_INDEX   4
     
    -#define BI_BUCKET_LAST_INDEX          4
    +#define BI_BUCKET_LAST_INDEX          5
     
     static std::string bucket_index_prefixes[] = { "", /* special handling for the objs list index */
     					       "0_",     /* bucket log index */
     					       "1000_",  /* obj instance index */
     					       "1001_",  /* olh data index */
    +					       "2001_",   /* reshard log index */
     
     					       /* this must be the last index */
     					       "9999_",};
     
     // this string is greater than all ascii plain entries and less than
     // all special entries
    -static const std::string BI_PREFIX_BEGIN = string(1, BI_PREFIX_CHAR);
    +static const std::string BI_PREFIX_BEGIN = string(1, static_cast(BI_PREFIX_CHAR));
     
     // this string is greater than all special entries and less than all
     // non-ascii plain entries
    -static const std::string BI_PREFIX_END = string(1, BI_PREFIX_CHAR) +
    +static const std::string BI_PREFIX_END = string(1, static_cast(BI_PREFIX_CHAR)) +
         bucket_index_prefixes[BI_BUCKET_LAST_INDEX];
     
     /* Returns whether parameter is not a key for a special entry. Empty
    @@ -80,7 +82,7 @@ static const std::string BI_PREFIX_END = string(1, BI_PREFIX_CHAR) +
      * using appropriately.
      */
     static bool bi_is_plain_entry(const std::string& s) {
    -  return (s.empty() || (unsigned char)s[0] != BI_PREFIX_CHAR);
    +  return (s.empty() || static_cast(s[0]) != BI_PREFIX_CHAR);
     }
     
     static int bi_entry_type(const string& s)
    @@ -116,6 +118,20 @@ static bool bi_entry_gt(const string& first, const string& second)
       return first > second;
     }
     
    +/**
    + * return: Plain, Instance, OLH or Invalid
    + */
    +BIIndexType bi_type(const string& s, const string& prefix ="")
    +{
    +  int ret = bi_entry_type(s.substr(prefix.size()));
    +  if (ret < 0) {
    +    return BIIndexType::Invalid;
    +  } else if (ret == 0) {
    +    return BIIndexType::Plain;
    +  }
    +  return (BIIndexType)ret;
    +}
    +
     static void get_time_key(real_time& ut, string *key)
     {
       char buf[32];
    @@ -133,6 +149,40 @@ static void get_index_ver_key(cls_method_context_t hctx, uint64_t index_ver, str
       *key = buf;
     }
     
    +static void bi_reshard_log_prefix(string& key)
    +{
    +  key = BI_PREFIX_CHAR;
    +  key.append(bucket_index_prefixes[BI_BUCKET_RESHARD_LOG_INDEX]);
    +}
    +
    +// 0x802001_idx
    +static void bi_reshard_log_key(cls_method_context_t hctx, string& key, const string& idx)
    +{
    +  bi_reshard_log_prefix(key);
    +  key.append(idx);
    +}
    +
    +static int reshard_log_index_operation(cls_method_context_t hctx, const string& idx,
    +                                       const cls_rgw_obj_key& key, bufferlist* log_bl)
    +{
    +  string reshard_log_idx;
    +  bi_reshard_log_key(hctx, reshard_log_idx, idx);
    +
    +  rgw_cls_bi_entry reshard_log_entry;
    +  if (log_bl && log_bl->length() == 0) {
    +    reshard_log_entry.type = BIIndexType::ReshardDeleted;
    +    rgw_bucket_deleted_entry delete_entry;
    +    delete_entry.key = key;
    +    encode(delete_entry, reshard_log_entry.data);
    +  } else {
    +    reshard_log_entry.data = *log_bl;
    +  }
    +  reshard_log_entry.idx = idx;
    +  bufferlist bl;
    +  encode(reshard_log_entry, bl);
    +  return cls_cxx_map_set_val(hctx, reshard_log_idx, &bl);
    +}
    +
     static void bi_log_prefix(string& key)
     {
       key = BI_PREFIX_CHAR;
    @@ -209,7 +259,7 @@ static int get_obj_vals(cls_method_context_t hctx,
       }
     
       auto last_element = pkeys->crbegin();
    -  if ((unsigned char)last_element->first[0] < BI_PREFIX_CHAR) {
    +  if (static_cast(last_element->first[0]) < BI_PREFIX_CHAR) {
         /* if the first character of the last entry is less than the
          * prefix then all entries must preceed the "ugly namespace" and
          * we're done
    @@ -218,7 +268,7 @@ static int get_obj_vals(cls_method_context_t hctx,
       }
     
       auto first_element = pkeys->cbegin();
    -  if ((unsigned char)first_element->first[0] > BI_PREFIX_CHAR) {
    +  if (static_cast(first_element->first[0]) > BI_PREFIX_CHAR) {
         /* if the first character of the first entry is after the "ugly
          * namespace" then all entries must follow the "ugly namespace"
          * then all entries do and we're done
    @@ -518,7 +568,7 @@ int rgw_bucket_list(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
       std::string start_after_omap_key;
       encode_list_index_key(hctx, op.start_obj, &start_after_omap_key);
     
    -  // this is set whenenver start_after_omap_key is set to keep them in
    +  // this is set whenever start_after_omap_key is set to keep them in
       // sync since this will be the returned marker when a marker is
       // returned
       cls_rgw_obj_key start_after_entry_key;
    @@ -685,6 +735,39 @@ static int write_bucket_header(cls_method_context_t hctx, rgw_bucket_dir_header
       return cls_cxx_map_write_header(hctx, &header_bl);
     }
     
    +template 
    +static int write_entry(cls_method_context_t hctx, T& entry, const string& key,
    +                       rgw_bucket_dir_header& header, bool count_entry = true)
    +{
    +  bufferlist bl;
    +  encode(entry, bl);
    +  int ret = cls_cxx_map_set_val(hctx, key, &bl);
    +  if (ret < 0) {
    +    return ret;
    +  }
    +  if (header.resharding_in_logrecord()) {
    +    ret = reshard_log_index_operation(hctx, key, entry.key, &bl);
    +    header.reshardlog_entries++;
    +  }
    +  return ret;
    +}
    +
    +static int remove_entry(cls_method_context_t hctx, const string& idx,
    +                        const cls_rgw_obj_key& key,
    +                        rgw_bucket_dir_header& header)
    +{
    +  int ret = cls_cxx_map_remove_key(hctx, idx);
    +  if (ret < 0) {
    +    CLS_LOG(0, "ERROR: cls_cxx_map_remove_key() idx=%s ret=%d", idx.c_str(), ret);
    +    return ret;
    +  }
    +  if (header.resharding_in_logrecord()) {
    +    header.reshardlog_entries++;
    +    bufferlist empty;
    +    return reshard_log_index_operation(hctx, idx, key, &empty);
    +  }
    +  return 0;
    +}
     
     int rgw_bucket_update_stats(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
     {
    @@ -718,6 +801,19 @@ int rgw_bucket_update_stats(cls_method_context_t hctx, bufferlist *in, bufferlis
         }
       }
     
    +  for (auto& s : op.dec_stats) {
    +    auto& dest = header.stats[s.first];
    +    if (op.absolute) {
    +      CLS_LOG(0, "ERROR: %s: there can not be decribed stats when setting absolutly", __func__);
    +      return -EINVAL;
    +    } else {
    +      dest.total_size -= s.second.total_size;
    +      dest.total_size_rounded -= s.second.total_size_rounded;
    +      dest.num_entries -= s.second.num_entries;
    +      dest.actual_size -= s.second.actual_size;
    +    }
    +  }
    +
       return write_bucket_header(hctx, &header);
     }
     
    @@ -783,6 +879,30 @@ static std::string modify_op_str(uint8_t op) {
       return modify_op_str((RGWModifyOp) op);
     }
     
    +static int write_header_while_logrecord(cls_method_context_t hctx,
    +                                        rgw_bucket_dir_header& header) {
    +  if (header.resharding_in_logrecord())
    +    return write_bucket_header(hctx, &header);
    +  return 0;
    +}
    +
    +static int guard_bucket_resharding(cls_method_context_t hctx,
    +                                   const rgw_bucket_dir_header& header,
    +                                   int error_code = -CLS_RGW_ERR_BUSY_RESHARDING)
    +{
    +  const ConfigProxy& conf = cls_get_config(hctx);
    +  const uint32_t reshardlog_threshold = conf->rgw_reshardlog_threshold;
    +
    +  if (header.resharding_in_progress() ||
    +      (header.resharding_in_logrecord() && header.reshardlog_entries >= reshardlog_threshold)) {
    +    CLS_LOG(4, "ERROR: writes are blocked while bucket is "
    +            "resharding, returning %d", error_code);
    +    return error_code;
    +  }
    +
    +  return 0;
    +}
    +
     int rgw_bucket_prepare_op(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
     {
       const ConfigProxy& conf = cls_get_config(hctx);
    @@ -815,11 +935,23 @@ int rgw_bucket_prepare_op(cls_method_context_t hctx, bufferlist *in, bufferlist
     	       "INFO: %s: request: op=%s name=%s tag=%s", __func__,
     	       modify_op_str(op.op).c_str(), op.key.to_string().c_str(), op.tag.c_str());
     
    +  struct rgw_bucket_dir_header header;
    +  int rc = read_bucket_header(hctx, &header);
    +  if (rc < 0) {
    +    CLS_LOG_BITX(bitx_inst, 1, "ERROR: %s: failed to read header", __func__);
    +    return rc;
    +  }
    +
    +  rc = guard_bucket_resharding(hctx, header);
    +  if (rc < 0) {
    +    return rc;
    +  }
    +
       // get on-disk state
       std::string idx;
     
       rgw_bucket_dir_entry entry;
    -  int rc = read_key_entry(hctx, op.key, &idx, &entry);
    +  rc = read_key_entry(hctx, op.key, &idx, &entry);
       if (rc < 0 && rc != -ENOENT) {
         CLS_LOG_BITX(bitx_inst, 1,
     		 "ERROR: %s could not read key entry, key=%s, rc=%d",
    @@ -850,12 +982,10 @@ int rgw_bucket_prepare_op(cls_method_context_t hctx, bufferlist *in, bufferlist
       entry.pending_map.insert(pair(op.tag, info));
     
       // write out new key to disk
    -  bufferlist info_bl;
    -  encode(entry, info_bl);
       CLS_LOG_BITX(bitx_inst, 20,
     	       "INFO: %s: setting map entry at key=%s",
     	       __func__, escape_str(idx).c_str());
    -  rc = cls_cxx_map_set_val(hctx, idx, &info_bl);
    +  rc = write_entry(hctx, entry, idx, header, false);
       if (rc < 0) {
         CLS_LOG_BITX(bitx_inst, 1,
     		 "ERROR: %s could not set value for key, key=%s, rc=%d",
    @@ -963,7 +1093,7 @@ static int read_key_entry(cls_method_context_t hctx, const cls_rgw_obj_key& key,
     // called by rgw_bucket_complete_op() for each item in op.remove_objs
     static int complete_remove_obj(cls_method_context_t hctx,
                                    rgw_bucket_dir_header& header,
    -                               const cls_rgw_obj_key& key, bool log_op)
    +                               const cls_rgw_obj_key& key)
     {
       rgw_bucket_dir_entry entry;
       string idx;
    @@ -978,18 +1108,7 @@ static int complete_remove_obj(cls_method_context_t hctx,
               int(entry.meta.category));
       unaccount_entry(header, entry);
     
    -  if (log_op) {
    -    ++header.ver; // increment index version, or we'll overwrite keys previously written
    -    const std::string tag;
    -    ret = log_index_operation(hctx, key, CLS_RGW_OP_DEL, tag, entry.meta.mtime,
    -                              entry.ver, CLS_RGW_STATE_COMPLETE, header.ver,
    -                              header.max_marker, 0, nullptr, nullptr, nullptr);
    -    if (ret < 0) {
    -      return ret;
    -    }
    -  }
    -
    -  ret = cls_cxx_map_remove_key(hctx, idx);
    +  ret = remove_entry(hctx, idx, key, header);
       if (ret < 0) {
         CLS_LOG(1, "%s: cls_cxx_map_remove_key failed with %d", __func__, ret);
         return ret;
    @@ -1034,6 +1153,11 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist
         return -EINVAL;
       }
     
    +  rc = guard_bucket_resharding(hctx, header);
    +  if (rc < 0) {
    +    return rc;
    +  }
    +
       rgw_bucket_dir_entry entry;
       bool ondisk = true;
     
    @@ -1094,21 +1218,20 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist
             CLS_LOG_BITX(bitx_inst, 20,
                          "INFO: %s: removing map entry with key=%s",
                          __func__, escape_str(idx).c_str());
    -        rc = cls_cxx_map_remove_key(hctx, idx);
    +        rc = remove_entry(hctx, idx, entry.key, header);
             if (rc < 0) {
               CLS_LOG_BITX(bitx_inst, 1,
                            "ERROR: %s: unable to remove map key, key=%s, rc=%d",
                            __func__, escape_str(idx).c_str(), rc);
               return rc;
             }
    +
           } else {
             // we removed this tag from pending_map so need to write the changes
             CLS_LOG_BITX(bitx_inst, 20,
                          "INFO: %s: setting map entry at key=%s",
                          __func__, escape_str(idx).c_str());
    -        bufferlist new_key_bl;
    -        encode(entry, new_key_bl);
    -        rc = cls_cxx_map_set_val(hctx, idx, &new_key_bl);
    +        rc = write_entry(hctx, entry, idx, header);
             if (rc < 0) {
               CLS_LOG_BITX(bitx_inst, 1,
                            "ERROR: %s: unable to set map val, key=%s, rc=%d",
    @@ -1136,7 +1259,7 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist
     	CLS_LOG_BITX(bitx_inst, 20,
     		     "INFO: %s: removing map entry with key=%s",
     		     __func__, escape_str(idx).c_str());
    -      rc = cls_cxx_map_remove_key(hctx, idx);
    +      rc = remove_entry(hctx, idx, entry.key, header);
           if (rc < 0) {
     	  CLS_LOG_BITX(bitx_inst, 1,
     		       "ERROR: %s: unable to remove map key, key=%s, rc=%d",
    @@ -1145,12 +1268,11 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist
           }
         } else {
           entry.exists = false;
    -      bufferlist new_key_bl;
    -      encode(entry, new_key_bl);
           CLS_LOG_BITX(bitx_inst, 20,
     		   "INFO: %s: setting map entry at key=%s",
     		   __func__, escape_str(idx).c_str());
    -      rc = cls_cxx_map_set_val(hctx, idx, &new_key_bl);
    +
    +      rc = write_entry(hctx, entry, idx, header);
           if (rc < 0) {
     	CLS_LOG_BITX(bitx_inst, 1,
     		     "ERROR: %s: unable to set map val, key=%s, rc=%d",
    @@ -1177,12 +1299,10 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist
         stats.total_size += meta.accounted_size;
         stats.total_size_rounded += cls_rgw_get_rounded_size(meta.accounted_size);
         stats.actual_size += meta.size;
    -    bufferlist new_key_bl;
    -    encode(entry, new_key_bl);
         CLS_LOG_BITX(bitx_inst, 20,
     		 "INFO: %s: setting map entry at key=%s",
     		 __func__, escape_str(idx).c_str());
    -    rc = cls_cxx_map_set_val(hctx, idx, &new_key_bl);
    +    rc = write_entry(hctx, entry, idx, header);
         if (rc < 0) {
           CLS_LOG_BITX(bitx_inst, 1,
     		   "ERROR: %s: unable to set map value at key=%s, rc=%d",
    @@ -1210,7 +1330,7 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist
         CLS_LOG_BITX(bitx_inst, 20,
     		 "INFO: %s: completing object remove key=%s",
     		 __func__, escape_str(remove_key.to_string()).c_str());
    -    rc = complete_remove_obj(hctx, header, remove_key, default_log_op);
    +    rc = complete_remove_obj(hctx, header, remove_key);
         if (rc < 0) {
           CLS_LOG_BITX(bitx_inst, 1,
     		   "WARNING: %s: complete_remove_obj, failed to remove entry, "
    @@ -1234,14 +1354,6 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist
       return rc;
     } // rgw_bucket_complete_op
     
    -template 
    -static int write_entry(cls_method_context_t hctx, T& entry, const string& key)
    -{
    -  bufferlist bl;
    -  encode(entry, bl);
    -  return cls_cxx_map_set_val(hctx, key, &bl);
    -}
    -
     static int read_olh(cls_method_context_t hctx,cls_rgw_obj_key& obj_key, rgw_bucket_olh_entry *olh_data_entry, string *index_key, bool *found)
     {
       cls_rgw_obj_key olh_key;
    @@ -1272,11 +1384,13 @@ static void update_olh_log(rgw_bucket_olh_entry& olh_data_entry, OLHLogOp op, co
       log.push_back(log_entry);
     }
     
    -static int write_obj_instance_entry(cls_method_context_t hctx, rgw_bucket_dir_entry& instance_entry, const string& instance_idx)
    +static int write_obj_instance_entry(cls_method_context_t hctx, rgw_bucket_dir_entry& instance_entry,
    +                                    const string& instance_idx, rgw_bucket_dir_header& header)
     {
    -  CLS_LOG(20, "write_entry() instance=%s idx=%s flags=%d", escape_str(instance_entry.key.instance).c_str(), instance_idx.c_str(), instance_entry.flags);
    +  CLS_LOG(20, "write_entry() instance=%s idx=%s flags=%d", escape_str(instance_entry.key.instance).c_str(),
    +          instance_idx.c_str(), instance_entry.flags);
       /* write the instance entry */
    -  int ret = write_entry(hctx, instance_entry, instance_idx);
    +  int ret = write_entry(hctx, instance_entry, instance_idx, header);
       if (ret < 0) {
         CLS_LOG(0, "ERROR: write_entry() instance_key=%s ret=%d", escape_str(instance_idx).c_str(), ret);
         return ret;
    @@ -1287,9 +1401,10 @@ static int write_obj_instance_entry(cls_method_context_t hctx, rgw_bucket_dir_en
     /*
      * write object instance entry, and if needed also the list entry
      */
    -static int write_obj_entries(cls_method_context_t hctx, rgw_bucket_dir_entry& instance_entry, const string& instance_idx)
    +static int write_obj_entries(cls_method_context_t hctx, rgw_bucket_dir_entry& instance_entry,
    +                             const string& instance_idx, rgw_bucket_dir_header& header)
     {
    -  int ret = write_obj_instance_entry(hctx, instance_entry, instance_idx);
    +  int ret = write_obj_instance_entry(hctx, instance_entry, instance_idx, header);
       if (ret < 0) {
         return ret;
       }
    @@ -1299,7 +1414,7 @@ static int write_obj_entries(cls_method_context_t hctx, rgw_bucket_dir_entry& in
       if (instance_idx != instance_list_idx) {
         CLS_LOG(20, "write_entry() idx=%s flags=%d", escape_str(instance_list_idx).c_str(), instance_entry.flags);
         /* write a new list entry for the object instance */
    -    ret = write_entry(hctx, instance_entry, instance_list_idx);
    +    ret = write_entry(hctx, instance_entry, instance_list_idx, header);
         if (ret < 0) {
           CLS_LOG(0, "ERROR: write_entry() instance=%s instance_list_idx=%s ret=%d", instance_entry.key.instance.c_str(), instance_list_idx.c_str(), ret);
           return ret;
    @@ -1355,31 +1470,32 @@ class BIVerObjEntry {
         instance_entry.versioned_epoch = epoch;
       }
     
    -  int unlink_list_entry() {
    -    string list_idx;
    +  int unlink_list_entry(rgw_bucket_dir_header& header) {
    +    string list_idx, list_sub_ver;
         /* this instance has a previous list entry, remove that entry */
         get_list_index_key(instance_entry, &list_idx);
         CLS_LOG(20, "unlink_list_entry() list_idx=%s", escape_str(list_idx).c_str());
    -    int ret = cls_cxx_map_remove_key(hctx, list_idx);
    +    int ret = remove_entry(hctx, list_idx, instance_entry.key, header);
         if (ret < 0) {
    -      CLS_LOG(0, "ERROR: cls_cxx_map_remove_key() list_idx=%s ret=%d", list_idx.c_str(), ret);
    +      CLS_LOG(0, "ERROR: remove_entry() list_idx=%s ret=%d", list_idx.c_str(), ret);
           return ret;
         }
         return 0;
       }
     
    -  int unlink() {
    +  int unlink(rgw_bucket_dir_header& header, const cls_rgw_obj_key& key) {
         /* remove the instance entry */
         CLS_LOG(20, "unlink() idx=%s", escape_str(instance_idx).c_str());
    -    int ret = cls_cxx_map_remove_key(hctx, instance_idx);
    +    int ret = remove_entry(hctx, instance_idx, key, header);
         if (ret < 0) {
    -      CLS_LOG(0, "ERROR: cls_cxx_map_remove_key() instance_idx=%s ret=%d", instance_idx.c_str(), ret);
    +      CLS_LOG(0, "ERROR: remove_entry() instance_idx=%s ret=%d", instance_idx.c_str(), ret);
           return ret;
         }
         return 0;
       }
     
    -  int write_entries(uint64_t flags_set, uint64_t flags_reset) {
    +  int write_entries(uint64_t flags_set, uint64_t flags_reset,
    +                    rgw_bucket_dir_header& header) {
         if (!initialized) {
           int ret = init();
           if (ret < 0) {
    @@ -1392,7 +1508,7 @@ class BIVerObjEntry {
         /* write the instance and list entries */
         bool special_delete_marker_key = (instance_entry.is_delete_marker() && instance_entry.key.instance.empty());
         encode_obj_versioned_data_key(key, &instance_idx, special_delete_marker_key);
    -    int ret = write_obj_entries(hctx, instance_entry, instance_idx);
    +    int ret = write_obj_entries(hctx, instance_entry, instance_idx, header);
         if (ret < 0) {
           CLS_LOG(0, "ERROR: write_obj_entries() instance_idx=%s ret=%d", instance_idx.c_str(), ret);
           return ret;
    @@ -1401,11 +1517,11 @@ class BIVerObjEntry {
         return 0;
       }
     
    -  int write(uint64_t epoch, bool current) {
    +  int write(uint64_t epoch, bool current, rgw_bucket_dir_header& header) {
         if (instance_entry.versioned_epoch > 0) {
           CLS_LOG(20, "%s: instance_entry.versioned_epoch=%d epoch=%d", __func__, (int)instance_entry.versioned_epoch, (int)epoch);
           /* this instance has a previous list entry, remove that entry */
    -      int ret = unlink_list_entry();
    +      int ret = unlink_list_entry(header);
           if (ret < 0) {
             return ret;
           }
    @@ -1417,11 +1533,11 @@ class BIVerObjEntry {
         }
     
         instance_entry.versioned_epoch = epoch;
    -    return write_entries(flags, 0);
    +    return write_entries(flags, 0, header);
       }
     
    -  int demote_current() {
    -    return write_entries(0, rgw_bucket_dir_entry::FLAG_CURRENT);
    +  int demote_current(rgw_bucket_dir_header& header) {
    +    return write_entries(0, rgw_bucket_dir_entry::FLAG_CURRENT, header);
       }
     
       bool is_delete_marker() {
    @@ -1523,9 +1639,9 @@ class BIOLHEntry {
         olh_data_entry.key = key;
       }
     
    -  int write() {
    +  int write(rgw_bucket_dir_header& header) {
         /* write the olh data entry */
    -    int ret = write_entry(hctx, olh_data_entry, olh_data_idx);
    +    int ret = write_entry(hctx, olh_data_entry, olh_data_idx, header);
         if (ret < 0) {
           CLS_LOG(0, "ERROR: write_entry() olh_key=%s ret=%d", olh_data_idx.c_str(), ret);
           return ret;
    @@ -1559,12 +1675,13 @@ class BIOLHEntry {
       }
     };
     
    -static int write_version_marker(cls_method_context_t hctx, cls_rgw_obj_key& key)
    +static int write_version_marker(cls_method_context_t hctx, cls_rgw_obj_key& key,
    +                                rgw_bucket_dir_header& header)
     {
       rgw_bucket_dir_entry entry;
       entry.key = key;
       entry.flags = rgw_bucket_dir_entry::FLAG_VER_MARKER;
    -  int ret = write_entry(hctx, entry, key.name);
    +  int ret = write_entry(hctx, entry, key.name, header);
       if (ret < 0) {
         CLS_LOG(0, "ERROR: write_entry returned ret=%d", ret);
         return ret;
    @@ -1579,9 +1696,10 @@ static int write_version_marker(cls_method_context_t hctx, cls_rgw_obj_key& key)
      * key. Their version is going to be empty though
      */
     static int convert_plain_entry_to_versioned(cls_method_context_t hctx,
    -					    cls_rgw_obj_key& key,
    -					    bool demote_current,
    -					    bool instance_only)
    +                                            cls_rgw_obj_key& key,
    +                                            bool demote_current,
    +                                            bool instance_only,
    +                                            rgw_bucket_dir_header& header)
     {
       if (!key.instance.empty()) {
         return -EINVAL;
    @@ -1608,9 +1726,9 @@ static int convert_plain_entry_to_versioned(cls_method_context_t hctx,
         encode_obj_versioned_data_key(key, &new_idx);
     
         if (instance_only) {
    -      ret = write_obj_instance_entry(hctx, entry, new_idx);
    +      ret = write_obj_instance_entry(hctx, entry, new_idx, header);
         } else {
    -      ret = write_obj_entries(hctx, entry, new_idx);
    +      ret = write_obj_entries(hctx, entry, new_idx, header);
         }
         if (ret < 0) {
           CLS_LOG(0, "ERROR: write_obj_entries new_idx=%s returned %d",
    @@ -1619,7 +1737,7 @@ static int convert_plain_entry_to_versioned(cls_method_context_t hctx,
         }
       }
     
    -  ret = write_version_marker(hctx, key);
    +  ret = write_version_marker(hctx, key, header);
       if (ret < 0) {
         return ret;
       }
    @@ -1659,6 +1777,18 @@ static int rgw_bucket_link_olh(cls_method_context_t hctx, bufferlist *in, buffer
         return -EINVAL;
       }
     
    +  struct rgw_bucket_dir_header header;
    +  int rc = read_bucket_header(hctx, &header);
    +  if (rc < 0) {
    +    CLS_LOG(1, "ERROR: %s(): failed to read header\n", __func__);
    +    return rc;
    +  }
    +
    +  rc = guard_bucket_resharding(hctx, header);
    +  if (rc < 0) {
    +    return rc;
    +  }
    +
       /* read instance entry */
       BIVerObjEntry obj(hctx, op.key);
       int ret = obj.init(op.delete_marker);
    @@ -1732,7 +1862,7 @@ static int rgw_bucket_link_olh(cls_method_context_t hctx, bufferlist *in, buffer
     					      * entry */
         existed = (ret >= 0 && !other_obj.is_delete_marker());
         if (ret >= 0 && other_obj.is_delete_marker() != op.delete_marker) {
    -      ret = other_obj.unlink_list_entry();
    +      ret = other_obj.unlink_list_entry(header);
           if (ret < 0) {
             return ret;
           }
    @@ -1740,7 +1870,7 @@ static int rgw_bucket_link_olh(cls_method_context_t hctx, bufferlist *in, buffer
     
         removing = existed && op.delete_marker;
         if (!removing) {
    -      ret = other_obj.unlink();
    +      ret = other_obj.unlink(header, op.key);
           if (ret < 0) {
             return ret;
           }
    @@ -1766,14 +1896,14 @@ static int rgw_bucket_link_olh(cls_method_context_t hctx, bufferlist *in, buffer
       const uint64_t prev_epoch = olh.get_epoch();
     
       if (!olh.start_modify(op.olh_epoch)) {
    -    ret = obj.write(op.olh_epoch, false);
    +    ret = obj.write(op.olh_epoch, false, header);
         if (ret < 0) {
           return ret;
         }
         if (removing) {
           olh.update_log(CLS_RGW_OLH_OP_REMOVE_INSTANCE, op.op_tag, op.key, false, op.olh_epoch);
         }
    -    return 0;
    +    return write_header_while_logrecord(hctx, header);
       }
     
       // promote this version to current if it's a newer epoch, or if it matches the
    @@ -1798,7 +1928,7 @@ static int rgw_bucket_link_olh(cls_method_context_t hctx, bufferlist *in, buffer
           if (!(olh_entry.key == op.key)) {
             BIVerObjEntry old_obj(hctx, olh_entry.key);
     
    -        ret = old_obj.demote_current();
    +        ret = old_obj.demote_current(header);
             if (ret < 0) {
               CLS_LOG(0, "ERROR: could not demote current on previous key ret=%d", ret);
               return ret;
    @@ -1809,7 +1939,7 @@ static int rgw_bucket_link_olh(cls_method_context_t hctx, bufferlist *in, buffer
       } else {
         bool instance_only = (op.key.instance.empty() && op.delete_marker);
         cls_rgw_obj_key key(op.key.name);
    -    ret = convert_plain_entry_to_versioned(hctx, key, promote, instance_only);
    +    ret = convert_plain_entry_to_versioned(hctx, key, promote, instance_only, header);
         if (ret < 0) {
           CLS_LOG(0, "ERROR: convert_plain_entry_to_versioned ret=%d", ret);
           return ret;
    @@ -1831,30 +1961,24 @@ static int rgw_bucket_link_olh(cls_method_context_t hctx, bufferlist *in, buffer
       }
       olh.set_exists(true);
     
    -  ret = olh.write();
    +  ret = olh.write(header);
       if (ret < 0) {
         CLS_LOG(0, "ERROR: failed to update olh ret=%d", ret);
         return ret;
       }
     
       /* write the instance and list entries */
    -  ret = obj.write(olh.get_epoch(), promote);
    +  ret = obj.write(olh.get_epoch(), promote, header);
       if (ret < 0) {
         return ret;
       }
     
       if (!op.log_op) {
    -   return 0;
    +    return write_header_while_logrecord(hctx, header);
       }
     
    -  rgw_bucket_dir_header header;
    -  ret = read_bucket_header(hctx, &header);
    -  if (ret < 0) {
    -    CLS_LOG(1, "ERROR: rgw_bucket_link_olh(): failed to read header\n");
    -    return ret;
    -  }
       if (header.syncstopped) {
    -    return 0;
    +    return write_header_while_logrecord(hctx, header);
       }
     
       rgw_bucket_dir_entry& entry = obj.get_dir_entry();
    @@ -1898,19 +2022,27 @@ static int rgw_bucket_unlink_instance(cls_method_context_t hctx, bufferlist *in,
       }
     
       cls_rgw_obj_key dest_key = op.key;
    -  if (dest_key.instance == "null") {
    -    dest_key.instance.clear();
    +
    +  struct rgw_bucket_dir_header header;
    +  int ret = read_bucket_header(hctx, &header);
    +  if (ret < 0) {
    +    CLS_LOG(1, "ERROR: rgw_bucket_unlink_instance(): failed to read header\n");
    +    return ret;
    +  }
    +
    +  ret = guard_bucket_resharding(hctx, header);
    +  if (ret < 0) {
    +    return ret;
       }
     
       BIVerObjEntry obj(hctx, dest_key);
       BIOLHEntry olh(hctx, dest_key);
     
    -  int ret = obj.init();
    -  if (ret == -ENOENT) {
    -    return 0; /* already removed */
    -  }
    +  ret = obj.init();
       if (ret < 0) {
    -    CLS_LOG(0, "ERROR: obj.init() returned ret=%d", ret);
    +    if (ret != -ENOENT) {
    +      CLS_LOG(0, "ERROR: obj.init() returned ret=%d", ret);
    +    }
         return ret;
       }
     
    @@ -1924,7 +2056,7 @@ static int rgw_bucket_unlink_instance(cls_method_context_t hctx, bufferlist *in,
       if (!olh_found) {
         bool instance_only = false;
         cls_rgw_obj_key key(dest_key.name);
    -    ret = convert_plain_entry_to_versioned(hctx, key, true, instance_only);
    +    ret = convert_plain_entry_to_versioned(hctx, key, true, instance_only, header);
         if (ret < 0) {
           CLS_LOG(0, "ERROR: convert_plain_entry_to_versioned ret=%d", ret);
           return ret;
    @@ -1936,7 +2068,7 @@ static int rgw_bucket_unlink_instance(cls_method_context_t hctx, bufferlist *in,
       }
     
       if (!olh.start_modify(op.olh_epoch)) {
    -    ret = obj.unlink_list_entry();
    +    ret = obj.unlink_list_entry(header);
         if (ret < 0) {
           return ret;
         }
    @@ -1946,7 +2078,7 @@ static int rgw_bucket_unlink_instance(cls_method_context_t hctx, bufferlist *in,
         }
     
         olh.update_log(CLS_RGW_OLH_OP_REMOVE_INSTANCE, op.op_tag, op.key, false, op.olh_epoch);
    -    return olh.write();
    +    return olh.write(header);
       }
     
       rgw_bucket_olh_entry& olh_entry = olh.get_entry();
    @@ -1966,7 +2098,7 @@ static int rgw_bucket_unlink_instance(cls_method_context_t hctx, bufferlist *in,
     
         if (found) {
           BIVerObjEntry next(hctx, next_key);
    -      ret = next.write(olh.get_epoch(), true);
    +      ret = next.write(olh.get_epoch(), true, header);
           if (ret < 0) {
             CLS_LOG(0, "ERROR: next.write() returned ret=%d", ret);
             return ret;
    @@ -1993,34 +2125,28 @@ static int rgw_bucket_unlink_instance(cls_method_context_t hctx, bufferlist *in,
       } else {
         /* this is a delete marker, it's our responsibility to remove its
          * instance entry */
    -    ret = obj.unlink();
    +    ret = obj.unlink(header, op.key);
         if (ret < 0) {
           return ret;
         }
       }
     
    -  ret = obj.unlink_list_entry();
    +  ret = obj.unlink_list_entry(header);
       if (ret < 0) {
         return ret;
       }
     
    -  ret = olh.write();
    +  ret = olh.write(header);
       if (ret < 0) {
         return ret;
       }
     
       if (!op.log_op) {
    -    return 0;
    +    return write_header_while_logrecord(hctx, header);
       }
     
    -  rgw_bucket_dir_header header;
    -  ret = read_bucket_header(hctx, &header);
    -  if (ret < 0) {
    -    CLS_LOG(1, "ERROR: rgw_bucket_unlink_instance(): failed to read header\n");
    -    return ret;
    -  }
       if (header.syncstopped) {
    -    return 0;
    +    return write_header_while_logrecord(hctx, header);
       }
     
       rgw_bucket_entry_ver ver;
    @@ -2134,8 +2260,20 @@ static int rgw_bucket_trim_olh_log(cls_method_context_t hctx, bufferlist *in, bu
         log.erase(rm_iter);
       }
     
    +  struct rgw_bucket_dir_header header;
    +  int rc = read_bucket_header(hctx, &header);
    +  if (rc < 0) {
    +    CLS_LOG(1, "ERROR: %s(): failed to read header\n", __func__);
    +    return rc;
    +  }
    +
    +  rc = guard_bucket_resharding(hctx, header);
    +  if (rc < 0) {
    +    return rc;
    +  }
    +
       /* write the olh data entry */
    -  ret = write_entry(hctx, olh_data_entry, olh_data_key);
    +  ret = write_entry(hctx, olh_data_entry, olh_data_key, header);
       if (ret < 0) {
         CLS_LOG(0, "ERROR: write_entry() olh_key=%s ret=%d", olh_data_key.c_str(), ret);
         return ret;
    @@ -2162,9 +2300,21 @@ static int rgw_bucket_clear_olh(cls_method_context_t hctx, bufferlist *in, buffe
         return -EINVAL;
       }
     
    +  struct rgw_bucket_dir_header header;
    +  int rc = read_bucket_header(hctx, &header);
    +  if (rc < 0) {
    +    CLS_LOG(1, "ERROR: %s(): failed to read header\n", __func__);
    +    return rc;
    +  }
    +
    +  rc = guard_bucket_resharding(hctx, header);
    +  if (rc < 0) {
    +    return rc;
    +  }
    +
       /* read olh entry */
       rgw_bucket_olh_entry olh_data_entry;
    -  string olh_data_key;
    +  string olh_data_key, olh_sub_ver;
       encode_olh_data_key(op.key, &olh_data_key);
       int ret = read_index_entry(hctx, olh_data_key, &olh_data_entry);
       if (ret < 0 && ret != -ENOENT) {
    @@ -2177,7 +2327,7 @@ static int rgw_bucket_clear_olh(cls_method_context_t hctx, bufferlist *in, buffe
         return -ECANCELED;
       }
     
    -  ret = cls_cxx_map_remove_key(hctx, olh_data_key);
    +  ret = remove_entry(hctx, olh_data_key, olh_data_entry.key, header);
       if (ret < 0) {
         CLS_LOG(1, "NOTICE: %s: can't remove key %s ret=%d", __func__, olh_data_key.c_str(), ret);
         return ret;
    @@ -2201,7 +2351,7 @@ static int rgw_bucket_clear_olh(cls_method_context_t hctx, bufferlist *in, buffe
         return 0;
       }
     
    -  ret = cls_cxx_map_remove_key(hctx, op.key.name);
    +  ret = remove_entry(hctx, op.key.name, plain_entry.key, header);
       if (ret < 0) {
         CLS_LOG(1, "NOTICE: %s: can't remove key %s ret=%d", __func__, op.key.name.c_str(), ret);
         return ret;
    @@ -2233,6 +2383,11 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx,
         return rc;
       }
     
    +  rc = guard_bucket_resharding(hctx, header);
    +  if (rc < 0) {
    +    return rc;
    +  }
    +
       const uint64_t config_op_expiration =
         conf->rgw_pending_bucket_index_op_expiration;
     
    @@ -2358,7 +2513,7 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx,
     	CLS_LOG_BITX(bitx_inst, 20,
     		     "INFO: %s: removing map entry with key=%s",
     		     __func__, escape_str(cur_change_key).c_str());
    -	ret = cls_cxx_map_remove_key(hctx, cur_change_key);
    +	ret = remove_entry(hctx, cur_change_key, cur_change.key, header);
     	if (ret < 0) {
     	  CLS_LOG_BITX(bitx_inst, 0, "ERROR: %s: unable to remove key, key=%s, error=%d",
     		       __func__, escape_str(cur_change_key).c_str(), ret);
    @@ -2386,13 +2541,11 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx,
             stats.actual_size += cur_change.meta.size;
             header_changed = true;
             cur_change.index_ver = header.ver;
    -        bufferlist cur_state_bl;
    -        encode(cur_change, cur_state_bl);
     
     	CLS_LOG_BITX(bitx_inst, 20,
     		     "INFO: %s: setting map entry at key=%s",
     		     __func__, escape_str(cur_change.key.to_string()).c_str());
    -        ret = cls_cxx_map_set_val(hctx, cur_change_key, &cur_state_bl);
    +        ret = write_entry(hctx, cur_change, cur_change_key, header);
             if (ret < 0) {
     	  CLS_LOG_BITX(bitx_inst, 0, "ERROR: %s: unable to set value for key, key=%s, error=%d",
     		       __func__, escape_str(cur_change_key).c_str(), ret);
    @@ -2698,15 +2851,122 @@ static int rgw_bi_put_op(cls_method_context_t hctx, bufferlist *in, bufferlist *
       }
     
       rgw_cls_bi_entry& entry = op.entry;
    -
    -  int r = cls_cxx_map_set_val(hctx, entry.idx, &entry.data);
    -  if (r < 0) {
    -    CLS_LOG(0, "ERROR: %s: cls_cxx_map_set_val() returned r=%d", __func__, r);
    +  if (entry.type == BIIndexType::ReshardDeleted) {
    +    int r = cls_cxx_map_remove_key(hctx, entry.idx);
    +    if (r < 0) {
    +      CLS_LOG(0, "ERROR: %s: cls_cxx_map_remove_key() returned r=%d", __func__, r);
    +    }
    +  } else {
    +    int r = cls_cxx_map_set_val(hctx, entry.idx, &entry.data);
    +    if (r < 0) {
    +      CLS_LOG(0, "ERROR: %s: cls_cxx_map_set_val() returned r=%d", __func__, r);
    +    }
       }
     
       return 0;
     }
     
    +static int rgw_bi_put_entries(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
    +{
    +  rgw_cls_bi_put_entries_op op;
    +  try {
    +    auto iter = in->cbegin();
    +    decode(op, iter);
    +  } catch (const ceph::buffer::error&) {
    +    CLS_LOG(0, "ERROR: %s: failed to decode request", __func__);
    +    return -EINVAL;
    +  }
    +
    +  const size_t limit = cls_get_config(hctx)->osd_max_omap_entries_per_request;
    +  if (op.entries.size() > limit) {
    +    int r = -E2BIG;
    +    CLS_LOG(0, "ERROR: %s: got too many entries (%zu > %zu), returning %d",
    +            __func__, op.entries.size(), limit, r);
    +    return r;
    +  }
    +
    +  rgw_bucket_dir_header header;
    +  int r = read_bucket_header(hctx, &header);
    +  if (r < 0) {
    +    CLS_LOG(1, "ERROR: %s: failed to read header", __func__);
    +    return r;
    +  }
    +
    +  r = guard_bucket_resharding(hctx, header);
    +  if (r < 0) {
    +    return r;
    +  }
    +
    +  if (op.check_existing) {
    +    // fetch any existing keys and decrement their stats before overwriting
    +    std::set keys;
    +    for (const auto& entry : op.entries) {
    +      keys.insert(entry.idx);
    +    }
    +
    +    std::map vals;
    +    r = cls_cxx_map_get_vals_by_keys(hctx, keys, &vals);
    +    if (r < 0) {
    +      CLS_LOG(0, "ERROR: %s: cls_cxx_map_get_vals_by_keys() returned r=%d",
    +              __func__, r);
    +      return r;
    +    }
    +
    +    for (auto& [idx, data] : vals) {
    +      rgw_cls_bi_entry entry;
    +      entry.type = bi_type(idx);
    +      entry.idx = std::move(idx);
    +      entry.data = std::move(data);
    +
    +      cls_rgw_obj_key key;
    +      RGWObjCategory category;
    +      rgw_bucket_category_stats stats;
    +      const bool account = entry.get_info(&key, &category, &stats);
    +      if (account) {
    +        auto& dest = header.stats[category];
    +        dest.total_size -= stats.total_size;
    +        dest.total_size_rounded -= stats.total_size_rounded;
    +        dest.num_entries -= stats.num_entries;
    +        dest.actual_size -= stats.actual_size;
    +      }
    +    } // foreach vals
    +  } // if op.check_existing
    +
    +  std::map new_vals;
    +
    +  for (auto& entry : op.entries) {
    +    if (entry.type == BIIndexType::ReshardDeleted) {
    +      r = cls_cxx_map_remove_key(hctx, entry.idx);
    +      if (r < 0) {
    +        CLS_LOG(0, "WARNING: %s: cls_cxx_map_remove_key(%s) returned r=%d",
    +                __func__, entry.idx.c_str(), r);
    +      } // not fatal
    +      continue;
    +    }
    +
    +    cls_rgw_obj_key key;
    +    RGWObjCategory category;
    +    rgw_bucket_category_stats stats;
    +    const bool account = entry.get_info(&key, &category, &stats);
    +    if (account) {
    +      auto& dest = header.stats[category];
    +      dest.total_size += stats.total_size;
    +      dest.total_size_rounded += stats.total_size_rounded;
    +      dest.num_entries += stats.num_entries;
    +      dest.actual_size += stats.actual_size;
    +    }
    +
    +    new_vals.emplace(std::move(entry.idx), std::move(entry.data));
    +  }
    +
    +  r = cls_cxx_map_set_vals(hctx, &new_vals);
    +  if (r < 0) {
    +    CLS_LOG(0, "ERROR: %s: cls_cxx_map_set_vals() returned r=%d", __func__, r);
    +    return r;
    +  }
    +
    +  return write_bucket_header(hctx, &header);
    +}
     
     /* The plain entries in the bucket index are divided into two regions
      * divided by the special entries that begin with 0x80. Those below
    @@ -3053,19 +3313,64 @@ static int list_olh_entries(cls_method_context_t hctx,
       return count;
     }
     
    -static int check_index(cls_method_context_t hctx,
    -		       rgw_bucket_dir_header *existing_header,
    -		       rgw_bucket_dir_header *calc_header)
    +static int reshard_log_list_entries(cls_method_context_t hctx, const string& marker,
    +                                    uint32_t max, list& entries, bool *truncated)
     {
    -  int rc = read_bucket_header(hctx, existing_header);
    -  if (rc < 0) {
    -    CLS_LOG(1, "ERROR: check_index(): failed to read header\n");
    -    return rc;
    +  string start_key, end_key;
    +  start_key = BI_PREFIX_CHAR;
    +  start_key.append(bucket_index_prefixes[BI_BUCKET_RESHARD_LOG_INDEX]);
    +
    +  string bi_type_marker = start_key;
    +
    +  end_key = BI_PREFIX_CHAR;
    +  end_key.append(bucket_index_prefixes[BI_BUCKET_RESHARD_LOG_INDEX + 1]);
    +
    +  if (!marker.empty()) {
    +    start_key.append(marker);
    +  }
    +
    +  map keys;
    +  int ret = cls_cxx_map_get_vals(hctx, start_key, string(), max, &keys, truncated);
    +  CLS_LOG(20, "%s(): start_key=%s keys.size()=%d", __func__, escape_str(start_key).c_str(), (int)keys.size());
    +  if (ret < 0) {
    +    return ret;
    +  }
    +
    +  map::iterator iter;
    +  for (iter = keys.begin(); iter != keys.end(); ++iter) {
    +    if (iter->first.compare(end_key) >= 0) {
    +      if (truncated) {
    +        *truncated = false;
    +      }
    +      return 0;
    +    }
    +
    +    rgw_cls_bi_entry entry;
    +    auto biter = iter->second.cbegin();
    +    try {
    +      decode(entry, biter);
    +    } catch (ceph::buffer::error& err) {
    +      CLS_LOG(0, "ERROR: %s: failed to decode buffer for rgw_cls_bi_entry \"%s\"",
    +	      __func__, escape_str(iter->first).c_str());
    +      return -EIO;
    +    }
    +    if (entry.type != BIIndexType::ReshardDeleted)
    +      entry.type = bi_type(iter->first, bi_type_marker);
    +
    +    CLS_LOG(20, "reshard_log_list_entries key=%s bl.length=%d\n", entry.idx.c_str(), (int)iter->second.length());
    +
    +    entries.push_back(entry);
       }
    +  return 0;
    +}
     
    -  calc_header->tag_timeout = existing_header->tag_timeout;
    -  calc_header->ver = existing_header->ver;
    -  calc_header->syncstopped = existing_header->syncstopped;
    +static int check_index(cls_method_context_t hctx,
    +		       const rgw_bucket_dir_header& existing_header,
    +		       rgw_bucket_dir_header *calc_header)
    +{
    +  calc_header->tag_timeout = existing_header.tag_timeout;
    +  calc_header->ver = existing_header.ver;
    +  calc_header->syncstopped = existing_header.syncstopped;
     
       std::list entries;
       string start_obj;
    @@ -3075,7 +3380,7 @@ static int check_index(cls_method_context_t hctx,
       bool more;
     
       do {
    -    rc = list_plain_entries(hctx, filter_prefix, start_obj, CHECK_CHUNK_SIZE, &entries, &more);
    +    int rc = list_plain_entries(hctx, filter_prefix, start_obj, CHECK_CHUNK_SIZE, &entries, &more);
         if (rc < 0) {
           return rc;
         }
    @@ -3104,7 +3409,7 @@ static int check_index(cls_method_context_t hctx,
     
       start_obj = "";
       do {
    -    rc = list_instance_entries(hctx, filter_prefix, start_obj, CHECK_CHUNK_SIZE, &entries, &more);
    +    int rc = list_instance_entries(hctx, filter_prefix, start_obj, CHECK_CHUNK_SIZE, &entries, &more);
         if (rc < 0) {
           return rc;
         }
    @@ -3137,9 +3442,21 @@ static int check_index(cls_method_context_t hctx,
     int rgw_bucket_rebuild_index(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
     {
       CLS_LOG(10, "entered %s", __func__);
    +
       rgw_bucket_dir_header existing_header;
    +  int rc = read_bucket_header(hctx, &existing_header);
    +  if (rc < 0) {
    +    CLS_LOG(1, "ERROR: check_index(): failed to read header\n");
    +    return rc;
    +  }
    +
    +  rc = guard_bucket_resharding(hctx, existing_header);
    +  if (rc < 0) {
    +    return rc;
    +  }
    +
       rgw_bucket_dir_header calc_header;
    -  int rc = check_index(hctx, &existing_header, &calc_header);
    +  rc = check_index(hctx, existing_header, &calc_header);
       if (rc < 0)
         return rc;
     
    @@ -3151,8 +3468,13 @@ int rgw_bucket_check_index(cls_method_context_t hctx, bufferlist *in, bufferlist
     {
       CLS_LOG(10, "entered %s", __func__);
       rgw_cls_check_index_ret ret;
    +  int rc = read_bucket_header(hctx, &ret.existing_header);
    +  if (rc < 0) {
    +    CLS_LOG(1, "ERROR: check_index(): failed to read header\n");
    +    return rc;
    +  }
     
    -  int rc = check_index(hctx, &ret.existing_header, &ret.calculated_header);
    +  rc = check_index(hctx, ret.existing_header, &ret.calculated_header);
       if (rc < 0)
         return rc;
     
    @@ -3162,7 +3484,8 @@ int rgw_bucket_check_index(cls_method_context_t hctx, bufferlist *in, bufferlist
     }
     
     
    -/* Lists all the entries that appear in a bucket index listing.
    +/* Lists all the entries that appear in a bucket index listing,
    + * or list all the entries in reshardlog namespace.
      *
      * It may not be obvious why this function calls three other "segment"
      * functions (list_plain_entries (twice), list_instance_entries,
    @@ -3181,7 +3504,7 @@ int rgw_bucket_check_index(cls_method_context_t hctx, bufferlist *in, bufferlist
      * Additionally, each of the three segment functions, if successful,
      * is expected to return the number of entries added to the output
      * list as a non-negative value. As per usual, negative return values
    - * indicate error condtions.
    + * indicate error conditions.
      */
     static int rgw_bi_list_op(cls_method_context_t hctx,
     			  bufferlist *in,
    @@ -3201,15 +3524,24 @@ static int rgw_bi_list_op(cls_method_context_t hctx,
       constexpr uint32_t MAX_BI_LIST_ENTRIES = 1000;
       const uint32_t max = std::min(op.max, MAX_BI_LIST_ENTRIES);
     
    -  CLS_LOG(20, "%s: op.marker=\"%s\", op.name_filter=\"%s\", op.max=%u max=%u",
    +  CLS_LOG(20, "%s: op.marker=\"%s\", op.name_filter=\"%s\", op.max=%u max=%u, op.reshardlog=%d",
     	  __func__, escape_str(op.marker).c_str(), escape_str(op.name_filter).c_str(),
    -	  op.max, max);
    +	  op.max, max, op.reshardlog);
     
       int ret;
       uint32_t count = 0;
       bool more = false;
       rgw_cls_bi_list_ret op_ret;
     
    +  if (op.reshardlog) {
    +    ret = reshard_log_list_entries(hctx, op.marker, op.max, op_ret.entries, &op_ret.is_truncated);
    +    if (ret < 0)
    +      return ret;
    +    CLS_LOG(20, "%s: returning %lu entries, is_truncated=%d", __func__, op_ret.entries.size(), op_ret.is_truncated);
    +    encode(op_ret, *out);
    +    return 0;
    +  }
    +
       ret = list_plain_entries(hctx, op.name_filter, op.marker, max,
     			   &op_ret.entries, &more, PlainEntriesRegion::Low);
       if (ret < 0) {
    @@ -3539,6 +3871,61 @@ static int rgw_bi_log_stop(cls_method_context_t hctx, bufferlist *in, bufferlist
       return write_bucket_header(hctx, &header);
     }
     
    +static int rgw_reshard_log_trim_op(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
    +{
    +  string key_begin(1, BI_PREFIX_CHAR);
    +  key_begin.append(bucket_index_prefixes[BI_BUCKET_RESHARD_LOG_INDEX]);
    +
    +  string key_end;
    +  key_end = BI_PREFIX_CHAR;
    +  key_end.append(bucket_index_prefixes[BI_BUCKET_RESHARD_LOG_INDEX + 1]);
    +
    +  // list a single key to detect whether the range is empty
    +  const size_t max_entries = 1;
    +  std::set keys;
    +  bool more = false;
    +
    +  rgw_bucket_dir_header header;
    +  int rc = read_bucket_header(hctx, &header);
    +  if (rc < 0) {
    +    CLS_LOG(0, "ERROR: rgw_reshard_log_trim_op(): failed to read header\n");
    +    return rc;
    +  }
    +
    +  rc = cls_cxx_map_get_keys(hctx, key_begin, max_entries, &keys, &more);
    +  if (rc < 0) {
    +    CLS_LOG(1, "ERROR: cls_cxx_map_get_keys failed rc=%d", rc);
    +    return rc;
    +  }
    +
    +  if (keys.empty()) {
    +    CLS_LOG(20, "range is empty key_begin=%s", key_begin.c_str());
    +    return -ENODATA;
    +  }
    +
    +  const std::string& first_key = *keys.begin();
    +  if (key_end < first_key) {
    +    CLS_LOG(20, "listed key %s past key_end=%s", first_key.c_str(), key_end.c_str());
    +    return -ENODATA;
    +  }
    +
    +  CLS_LOG(20, "listed key %s, removing through %s",
    +          first_key.c_str(), key_end.c_str());
    +
    +  rc = cls_cxx_map_remove_range(hctx, first_key, key_end);
    +  if (rc < 0) {
    +    CLS_LOG(1, "ERROR: cls_cxx_map_remove_range failed rc=%d", rc);
    +    return rc;
    +  }
    +
    +  header.reshardlog_entries = 0;
    +  rc = write_bucket_header(hctx, &header);
    +  if (rc < 0) {
    +    CLS_LOG(0, "ERROR: rgw_reshard_log_trim_op(): failed to write header\n");
    +    return rc;
    +  }
    +  return 0;
    +}
     
     static void usage_record_prefix_by_time(uint64_t epoch, string& key)
     {
    @@ -4434,15 +4821,31 @@ static int rgw_reshard_add(cls_method_context_t hctx, bufferlist *in, bufferlist
         return -EINVAL;
       }
     
    -
    -  string key;
    +  std::string key;
       op.entry.get_key(&key);
     
    +  int ret;
       bufferlist bl;
    +
    +  if (op.create_only) {
    +    ret = cls_cxx_map_get_val(hctx, key, &bl);
    +    if (ret == 0) {
    +      // entry already exists; make no changes
    +      return -EEXIST;
    +    } else if (ret != -ENOENT) {
    +      CLS_ERR("error accessing reshard queue for %s with key %s",
    +	      op.entry.bucket_name.c_str(), key.c_str());
    +      return ret;
    +    }
    +
    +    // we got a -ENOENT and can just fall through...
    +  }
    +
       encode(op.entry, bl);
    -  int ret = cls_cxx_map_set_val(hctx, key, &bl);
    +  ret = cls_cxx_map_set_val(hctx, key, &bl);
       if (ret < 0) {
    -    CLS_ERR("error adding reshard job for bucket %s with key %s",op.entry.bucket_name.c_str(), key.c_str());
    +    CLS_ERR("error adding reshard job for bucket %s with key %s",
    +	    op.entry.bucket_name.c_str(), key.c_str());
         return ret;
       }
     
    @@ -4457,7 +4860,7 @@ static int rgw_reshard_list(cls_method_context_t hctx, bufferlist *in, bufferlis
       try {
         decode(op, in_iter);
       } catch (ceph::buffer::error& err) {
    -    CLS_LOG(1, "ERROR: rgw_cls_rehard_list(): failed to decode entry\n");
    +    CLS_LOG(1, "ERROR: rgw_cls_reshard_list(): failed to decode entry\n");
         return -EINVAL;
       }
       cls_rgw_reshard_list_ret op_ret;
    @@ -4476,7 +4879,7 @@ static int rgw_reshard_list(cls_method_context_t hctx, bufferlist *in, bufferlis
         try {
           decode(entry, iter);
         } catch (ceph::buffer::error& err) {
    -      CLS_LOG(1, "ERROR: rgw_cls_rehard_list(): failed to decode entry\n");
    +      CLS_LOG(1, "ERROR: rgw_cls_reshard_list(): failed to decode entry\n");
           return -EIO;
        }
         op_ret.entries.push_back(entry);
    @@ -4521,7 +4924,7 @@ static int rgw_reshard_remove(cls_method_context_t hctx, bufferlist *in, bufferl
       try {
         decode(op, in_iter);
       } catch (ceph::buffer::error& err) {
    -    CLS_LOG(1, "ERROR: rgw_cls_rehard_remove: failed to decode entry\n");
    +    CLS_LOG(1, "ERROR: rgw_cls_reshard_remove: failed to decode entry\n");
         return -EINVAL;
       }
     
    @@ -4598,10 +5001,10 @@ static int rgw_clear_bucket_resharding(cls_method_context_t hctx, bufferlist *in
     static int rgw_guard_bucket_resharding(cls_method_context_t hctx, bufferlist *in,  bufferlist *out)
     {
       CLS_LOG(10, "entered %s", __func__);
    -  cls_rgw_guard_bucket_resharding_op op;
     
    -  auto in_iter = in->cbegin();
    +  cls_rgw_guard_bucket_resharding_op op;
       try {
    +    auto in_iter = in->cbegin();
         decode(op, in_iter);
       } catch (ceph::buffer::error& err) {
         CLS_LOG(1, "ERROR: %s: failed to decode entry", __func__);
    @@ -4615,11 +5018,7 @@ static int rgw_guard_bucket_resharding(cls_method_context_t hctx, bufferlist *in
         return rc;
       }
     
    -  if (header.resharding()) {
    -    return op.ret_err;
    -  }
    -
    -  return 0;
    +  return guard_bucket_resharding(hctx, header, op.ret_err);
     }
     
     static int rgw_get_bucket_resharding(cls_method_context_t hctx,
    @@ -4675,7 +5074,9 @@ CLS_INIT(rgw)
       cls_method_handle_t h_rgw_obj_check_mtime;
       cls_method_handle_t h_rgw_bi_get_op;
       cls_method_handle_t h_rgw_bi_put_op;
    +  cls_method_handle_t h_rgw_bi_put_entries_op;
       cls_method_handle_t h_rgw_bi_list_op;
    +  cls_method_handle_t h_rgw_reshard_log_trim_op;
       cls_method_handle_t h_rgw_bi_log_list_op;
       cls_method_handle_t h_rgw_bi_log_trim_op;
       cls_method_handle_t h_rgw_bi_log_resync_op;
    @@ -4710,6 +5111,7 @@ CLS_INIT(rgw)
     
       /* bucket index */
       cls_register_cxx_method(h_class, RGW_BUCKET_INIT_INDEX, CLS_METHOD_RD | CLS_METHOD_WR, rgw_bucket_init_index, &h_rgw_bucket_init_index);
    +  cls_register_cxx_method(h_class, RGW_BUCKET_INIT_INDEX2, CLS_METHOD_RD | CLS_METHOD_WR, rgw_bucket_init_index, &h_rgw_bucket_init_index);
       cls_register_cxx_method(h_class, RGW_BUCKET_SET_TAG_TIMEOUT, CLS_METHOD_RD | CLS_METHOD_WR, rgw_bucket_set_tag_timeout, &h_rgw_bucket_set_tag_timeout);
       cls_register_cxx_method(h_class, RGW_BUCKET_LIST, CLS_METHOD_RD, rgw_bucket_list, &h_rgw_bucket_list);
       cls_register_cxx_method(h_class, RGW_BUCKET_CHECK_INDEX, CLS_METHOD_RD, rgw_bucket_check_index, &h_rgw_bucket_check_index);
    @@ -4730,7 +5132,9 @@ CLS_INIT(rgw)
     
       cls_register_cxx_method(h_class, RGW_BI_GET, CLS_METHOD_RD, rgw_bi_get_op, &h_rgw_bi_get_op);
       cls_register_cxx_method(h_class, RGW_BI_PUT, CLS_METHOD_RD | CLS_METHOD_WR, rgw_bi_put_op, &h_rgw_bi_put_op);
    +  cls_register_cxx_method(h_class, RGW_BI_PUT_ENTRIES, CLS_METHOD_RD | CLS_METHOD_WR, rgw_bi_put_entries, &h_rgw_bi_put_entries_op);
       cls_register_cxx_method(h_class, RGW_BI_LIST, CLS_METHOD_RD, rgw_bi_list_op, &h_rgw_bi_list_op);
    +  cls_register_cxx_method(h_class, RGW_RESHARD_LOG_TRIM, CLS_METHOD_RD | CLS_METHOD_WR, rgw_reshard_log_trim_op, &h_rgw_reshard_log_trim_op);
     
       cls_register_cxx_method(h_class, RGW_BI_LOG_LIST, CLS_METHOD_RD, rgw_bi_log_list, &h_rgw_bi_log_list_op);
       cls_register_cxx_method(h_class, RGW_BI_LOG_TRIM, CLS_METHOD_RD | CLS_METHOD_WR, rgw_bi_log_trim, &h_rgw_bi_log_trim_op);
    diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc
    index 5e7fba88f24a..bb6eb4d13e7c 100644
    --- a/src/cls/rgw/cls_rgw_client.cc
    +++ b/src/cls/rgw/cls_rgw_client.cc
    @@ -79,7 +79,7 @@ int CLSRGWConcurrentIO::operator()() {
         cleanup();
       }
       return ret;
    -} // CLSRGWConcurrintIO::operator()()
    +} // CLSRGWConcurrentIO::operator()()
     
     
     /**
    @@ -186,7 +186,6 @@ bool BucketIndexAioManager::wait_for_completions(int valid_ret_code,
       return true;
     }
     
    -// note: currently only called by tesing code
     void cls_rgw_bucket_init_index(ObjectWriteOperation& o)
     {
       bufferlist in;
    @@ -200,7 +199,24 @@ static bool issue_bucket_index_init_op(librados::IoCtx& io_ctx,
       bufferlist in;
       librados::ObjectWriteOperation op;
       op.create(true);
    -  op.exec(RGW_CLASS, RGW_BUCKET_INIT_INDEX, in);
    +  cls_rgw_bucket_init_index(op);
    +  return manager->aio_operate(io_ctx, shard_id, oid, &op);
    +}
    +
    +void cls_rgw_bucket_init_index2(ObjectWriteOperation& o)
    +{
    +  bufferlist in;
    +  o.exec(RGW_CLASS, RGW_BUCKET_INIT_INDEX2, in);
    +}
    +
    +static bool issue_bucket_index_init_op2(librados::IoCtx& io_ctx,
    +				       const int shard_id,
    +				       const string& oid,
    +				       BucketIndexAioManager *manager) {
    +  bufferlist in;
    +  librados::ObjectWriteOperation op;
    +  op.create(true);
    +  cls_rgw_bucket_init_index2(op);
       return manager->aio_operate(io_ctx, shard_id, oid, &op);
     }
     
    @@ -233,6 +249,11 @@ int CLSRGWIssueBucketIndexInit::issue_op(const int shard_id, const string& oid)
       return issue_bucket_index_init_op(io_ctx, shard_id, oid, &manager);
     }
     
    +int CLSRGWIssueBucketIndexInit2::issue_op(const int shard_id, const string& oid)
    +{
    +  return issue_bucket_index_init_op2(io_ctx, shard_id, oid, &manager);
    +}
    +
     void CLSRGWIssueBucketIndexInit::cleanup()
     {
       // Do best effort removal
    @@ -241,6 +262,14 @@ void CLSRGWIssueBucketIndexInit::cleanup()
       }
     }
     
    +void CLSRGWIssueBucketIndexInit2::cleanup()
    +{
    +  // Do best effort removal
    +  for (auto citer = objs_container.begin(); citer != iter; ++citer) {
    +    io_ctx.remove(citer->second);
    +  }
    +}
    +
     int CLSRGWIssueBucketIndexClean::issue_op(const int shard_id, const string& oid)
     {
       return issue_bucket_index_clean_op(io_ctx, shard_id, oid, &manager);
    @@ -253,11 +282,14 @@ int CLSRGWIssueSetTagTimeout::issue_op(const int shard_id, const string& oid)
     
     void cls_rgw_bucket_update_stats(librados::ObjectWriteOperation& o,
     				 bool absolute,
    -                                 const map& stats)
    +                                 const map& stats,
    +                                 const map* dec_stats)
     {
       rgw_cls_bucket_update_stats_op call;
       call.absolute = absolute;
       call.stats = stats;
    +  if (dec_stats != NULL)
    +    call.dec_stats = *dec_stats;
       bufferlist in;
       encode(call, in);
       o.exec(RGW_CLASS, RGW_BUCKET_UPDATE_STATS, in);
    @@ -465,18 +497,34 @@ void cls_rgw_bi_put(ObjectWriteOperation& op, const string oid, const rgw_cls_bi
       op.exec(RGW_CLASS, RGW_BI_PUT, in);
     }
     
    +void cls_rgw_bi_put_entries(librados::ObjectWriteOperation& op,
    +                            std::vector entries,
    +                            bool check_existing)
    +{
    +  const auto call = rgw_cls_bi_put_entries_op{
    +    .entries = std::move(entries),
    +    .check_existing = check_existing
    +  };
    +
    +  bufferlist in;
    +  encode(call, in);
    +
    +  op.exec(RGW_CLASS, RGW_BI_PUT_ENTRIES, in);
    +}
    +
     /* nb: any entries passed in are replaced with the results of the cls
      * call, so caller does not need to clear entries between calls
      */
     int cls_rgw_bi_list(librados::IoCtx& io_ctx, const std::string& oid,
     		    const std::string& name_filter, const std::string& marker, uint32_t max,
    -		    std::list *entries, bool *is_truncated)
    +		    std::list *entries, bool *is_truncated, bool reshardlog)
     {
       bufferlist in, out;
       rgw_cls_bi_list_op call;
       call.name_filter = name_filter;
       call.marker = marker;
       call.max = max;
    +  call.reshardlog = reshardlog;
       encode(call, in);
       int r = io_ctx.exec(oid, RGW_CLASS, RGW_BI_LIST, in, out);
       if (r < 0)
    @@ -535,10 +583,11 @@ void cls_rgw_bucket_link_olh(librados::ObjectWriteOperation& op, const cls_rgw_o
     
     int cls_rgw_bucket_unlink_instance(librados::IoCtx& io_ctx, const string& oid,
                                        const cls_rgw_obj_key& key, const string& op_tag,
    -                                   const string& olh_tag, uint64_t olh_epoch, bool log_op, const rgw_zone_set& zones_trace)
    +                                   const string& olh_tag, uint64_t olh_epoch, bool log_op,
    +                                   uint16_t bilog_flags, const rgw_zone_set& zones_trace)
     {
       librados::ObjectWriteOperation op;
    -  cls_rgw_bucket_unlink_instance(op, key, op_tag, olh_tag, olh_epoch, log_op, zones_trace);
    +  cls_rgw_bucket_unlink_instance(op, key, op_tag, olh_tag, olh_epoch, log_op, bilog_flags, zones_trace);
       int r = io_ctx.operate(oid, &op);
       if (r < 0)
         return r;
    @@ -548,7 +597,8 @@ int cls_rgw_bucket_unlink_instance(librados::IoCtx& io_ctx, const string& oid,
     
     void cls_rgw_bucket_unlink_instance(librados::ObjectWriteOperation& op,
                                        const cls_rgw_obj_key& key, const string& op_tag,
    -                                   const string& olh_tag, uint64_t olh_epoch, bool log_op, const rgw_zone_set& zones_trace)
    +                                   const string& olh_tag, uint64_t olh_epoch, bool log_op,
    +                                   uint16_t bilog_flags, const rgw_zone_set& zones_trace)
     {
       bufferlist in, out;
       rgw_cls_unlink_instance_op call;
    @@ -558,6 +608,7 @@ void cls_rgw_bucket_unlink_instance(librados::ObjectWriteOperation& op,
       call.olh_tag = olh_tag;
       call.log_op = log_op;
       call.zones_trace = zones_trace;
    +  call.bilog_flags = bilog_flags;
       encode(call, in);
       op.exec(RGW_CLASS, RGW_BUCKET_UNLINK_INSTANCE, in);
     }
    @@ -676,6 +727,19 @@ int CLSRGWIssueBILogTrim::issue_op(const int shard_id, const string& oid)
       return issue_bi_log_trim(io_ctx, oid, shard_id, start_marker_mgr, end_marker_mgr, &manager);
     }
     
    +static bool issue_reshard_log_trim(librados::IoCtx& io_ctx, const string& oid, int shard_id,
    +                                   BucketIndexAioManager *manager) {
    +  bufferlist in;
    +  ObjectWriteOperation op;
    +  op.exec(RGW_CLASS, RGW_RESHARD_LOG_TRIM, in);
    +  return manager->aio_operate(io_ctx, shard_id, oid, &op);
    +}
    +
    +int CLSRGWIssueReshardLogTrim::issue_op(int shard_id, const string& oid)
    +{
    +  return issue_reshard_log_trim(io_ctx, oid, shard_id, &manager);
    +}
    +
     static bool issue_bucket_check_index_op(IoCtx& io_ctx, const int shard_id, const string& oid, BucketIndexAioManager *manager,
         rgw_cls_check_index_ret *pdata) {
       bufferlist in;
    @@ -751,12 +815,11 @@ int CLSRGWIssueBucketBILogStop::issue_op(const int shard_id, const string& oid)
     }
     
     class GetDirHeaderCompletion : public ObjectOperationCompletion {
    -  RGWGetDirHeader_CB *ret_ctx;
    +  boost::intrusive_ptr cb;
     public:
    -  explicit GetDirHeaderCompletion(RGWGetDirHeader_CB *_ctx) : ret_ctx(_ctx) {}
    -  ~GetDirHeaderCompletion() override {
    -    ret_ctx->put();
    -  }
    +  explicit GetDirHeaderCompletion(boost::intrusive_ptr cb)
    +    : cb(std::move(cb)) {}
    +
       void handle_completion(int r, bufferlist& outbl) override {
         rgw_cls_list_ret ret;
         try {
    @@ -765,20 +828,20 @@ class GetDirHeaderCompletion : public ObjectOperationCompletion {
         } catch (ceph::buffer::error& err) {
           r = -EIO;
         }
    -
    -    ret_ctx->handle_response(r, ret.dir.header);
    +    cb->handle_response(r, ret.dir.header);
       }
     };
     
    -int cls_rgw_get_dir_header_async(IoCtx& io_ctx, string& oid, RGWGetDirHeader_CB *ctx)
    +int cls_rgw_get_dir_header_async(IoCtx& io_ctx, const string& oid,
    +                                 boost::intrusive_ptr cb)
     {
       bufferlist in, out;
       rgw_cls_list_op call;
       call.num_entries = 0;
       encode(call, in);
       ObjectReadOperation op;
    -  GetDirHeaderCompletion *cb = new GetDirHeaderCompletion(ctx);
    -  op.exec(RGW_CLASS, RGW_BUCKET_LIST, in, cb);
    +  op.exec(RGW_CLASS, RGW_BUCKET_LIST, in,
    +          new GetDirHeaderCompletion(std::move(cb)));
       AioCompletion *c = librados::Rados::aio_create_completion(nullptr, nullptr);
       int r = io_ctx.aio_operate(oid, c, &op, NULL);
       c->release();
    @@ -900,19 +963,22 @@ void cls_rgw_gc_defer_entry(ObjectWriteOperation& op, uint32_t expiration_secs,
       op.exec(RGW_CLASS, RGW_GC_DEFER_ENTRY, in);
     }
     
    -int cls_rgw_gc_list(IoCtx& io_ctx, string& oid, string& marker, uint32_t max, bool expired_only,
    -                    list& entries, bool *truncated, string& next_marker)
    +void cls_rgw_gc_list(ObjectReadOperation& op, const string& marker,
    +                     uint32_t max, bool expired_only, bufferlist& out)
     {
    -  bufferlist in, out;
    +  bufferlist in;
       cls_rgw_gc_list_op call;
       call.marker = marker;
       call.max = max;
       call.expired_only = expired_only;
       encode(call, in);
    -  int r = io_ctx.exec(oid, RGW_CLASS, RGW_GC_LIST, in, out);
    -  if (r < 0)
    -    return r;
    +  op.exec(RGW_CLASS, RGW_GC_LIST, in, &out, nullptr);
    +}
     
    +int cls_rgw_gc_list_decode(const bufferlist& out,
    +                           std::list& entries,
    +                           bool *truncated, std::string& next_marker)
    +{
       cls_rgw_gc_list_ret ret;
       try {
         auto iter = out.cbegin();
    @@ -926,7 +992,7 @@ int cls_rgw_gc_list(IoCtx& io_ctx, string& oid, string& marker, uint32_t max, bo
       if (truncated)
         *truncated = ret.truncated;
       next_marker = std::move(ret.next_marker);
    -  return r;
    +  return 0;
     }
     
     void cls_rgw_gc_remove(librados::ObjectWriteOperation& op, const vector& tags)
    @@ -938,13 +1004,14 @@ void cls_rgw_gc_remove(librados::ObjectWriteOperation& op, const vector&
       op.exec(RGW_CLASS, RGW_GC_REMOVE, in);
     }
     
    -int cls_rgw_lc_get_head(IoCtx& io_ctx, const string& oid, cls_rgw_lc_obj_head& head)
    +void cls_rgw_lc_get_head(ObjectReadOperation& op, bufferlist& out)
     {
    -  bufferlist in, out;
    -  int r = io_ctx.exec(oid, RGW_CLASS, RGW_LC_GET_HEAD, in, out);
    -  if (r < 0)
    -    return r;
    +  bufferlist in;
    +  op.exec(RGW_CLASS, RGW_LC_GET_HEAD, in, &out, nullptr);
    +}
     
    +int cls_rgw_lc_get_head_decode(const bufferlist& out, cls_rgw_lc_obj_head& head)
    +{
       cls_rgw_lc_get_head_ret ret;
       try {
         auto iter = out.cbegin();
    @@ -952,32 +1019,32 @@ int cls_rgw_lc_get_head(IoCtx& io_ctx, const string& oid, cls_rgw_lc_obj_head& h
       } catch (ceph::buffer::error& err) {
         return -EIO;
       }
    -  head = ret.head;
    +  head = std::move(ret.head);
     
    - return r;
    +  return 0;
     }
     
    -int cls_rgw_lc_put_head(IoCtx& io_ctx, const string& oid, cls_rgw_lc_obj_head& head)
    +void cls_rgw_lc_put_head(ObjectWriteOperation& op, const cls_rgw_lc_obj_head& head)
     {
    -  bufferlist in, out;
    +  bufferlist in;
       cls_rgw_lc_put_head_op call;
       call.head = head;
       encode(call, in);
    -  int r = io_ctx.exec(oid, RGW_CLASS, RGW_LC_PUT_HEAD, in, out);
    -  return r;
    +  op.exec(RGW_CLASS, RGW_LC_PUT_HEAD, in);
     }
     
    -int cls_rgw_lc_get_next_entry(IoCtx& io_ctx, const string& oid, const string& marker,
    -			      cls_rgw_lc_entry& entry)
    +void cls_rgw_lc_get_next_entry(ObjectReadOperation& op, const string& marker,
    +                               bufferlist& out)
     {
    -  bufferlist in, out;
    +  bufferlist in;
       cls_rgw_lc_get_next_entry_op call;
       call.marker = marker;
       encode(call, in);
    -  int r = io_ctx.exec(oid, RGW_CLASS, RGW_LC_GET_NEXT_ENTRY, in, out);
    -  if (r < 0)
    -    return r;
    +  op.exec(RGW_CLASS, RGW_LC_GET_NEXT_ENTRY, in, &out, nullptr);
    +}
     
    +int cls_rgw_lc_get_next_entry_decode(const bufferlist& out, cls_rgw_lc_entry& entry)
    +{
       cls_rgw_lc_get_next_entry_ret ret;
       try {
         auto iter = out.cbegin();
    @@ -985,45 +1052,42 @@ int cls_rgw_lc_get_next_entry(IoCtx& io_ctx, const string& oid, const string& ma
       } catch (ceph::buffer::error& err) {
         return -EIO;
       }
    -  entry = ret.entry;
    +  entry = std::move(ret.entry);
     
    - return r;
    +  return 0;
     }
     
    -int cls_rgw_lc_rm_entry(IoCtx& io_ctx, const string& oid,
    -			const cls_rgw_lc_entry& entry)
    +void cls_rgw_lc_rm_entry(ObjectWriteOperation& op,
    +                         const cls_rgw_lc_entry& entry)
     {
    -  bufferlist in, out;
    +  bufferlist in;
       cls_rgw_lc_rm_entry_op call;
       call.entry = entry;
       encode(call, in);
    -  int r = io_ctx.exec(oid, RGW_CLASS, RGW_LC_RM_ENTRY, in, out);
    - return r;
    +  op.exec(RGW_CLASS, RGW_LC_RM_ENTRY, in);
     }
     
    -int cls_rgw_lc_set_entry(IoCtx& io_ctx, const string& oid,
    -			 const cls_rgw_lc_entry& entry)
    +void cls_rgw_lc_set_entry(ObjectWriteOperation& op,
    +                          const cls_rgw_lc_entry& entry)
     {
       bufferlist in, out;
       cls_rgw_lc_set_entry_op call;
       call.entry = entry;
       encode(call, in);
    -  int r = io_ctx.exec(oid, RGW_CLASS, RGW_LC_SET_ENTRY, in, out);
    -  return r;
    +  op.exec(RGW_CLASS, RGW_LC_SET_ENTRY, in);
     }
     
    -int cls_rgw_lc_get_entry(IoCtx& io_ctx, const string& oid,
    -			 const std::string& marker, cls_rgw_lc_entry& entry)
    +void cls_rgw_lc_get_entry(ObjectReadOperation& op, const std::string& marker,
    +                          bufferlist& out)
     {
    -  bufferlist in, out;
    -  cls_rgw_lc_get_entry_op call{marker};;
    +  bufferlist in;
    +  cls_rgw_lc_get_entry_op call{marker};
       encode(call, in);
    -  int r = io_ctx.exec(oid, RGW_CLASS, RGW_LC_GET_ENTRY, in, out);
    -
    -  if (r < 0) {
    -    return r;
    -  }
    +  op.exec(RGW_CLASS, RGW_LC_GET_ENTRY, in, &out, nullptr);
    +}
     
    +int cls_rgw_lc_get_entry_decode(const bufferlist& out, cls_rgw_lc_entry& entry)
    +{
       cls_rgw_lc_get_entry_ret ret;
       try {
         auto iter = out.cbegin();
    @@ -1033,28 +1097,24 @@ int cls_rgw_lc_get_entry(IoCtx& io_ctx, const string& oid,
       }
     
       entry = std::move(ret.entry);
    -  return r;
    +  return 0;
     }
     
    -int cls_rgw_lc_list(IoCtx& io_ctx, const string& oid,
    -                    const string& marker,
    -                    uint32_t max_entries,
    -                    vector& entries)
    +void cls_rgw_lc_list(ObjectReadOperation& op, const string& marker,
    +                     uint32_t max_entries, bufferlist& out)
     {
    -  bufferlist in, out;
    -  cls_rgw_lc_list_entries_op op;
    -
    -  entries.clear();
    -
    -  op.marker = marker;
    -  op.max_entries = max_entries;
    +  bufferlist in;
    +  cls_rgw_lc_list_entries_op call;
    +  call.marker = marker;
    +  call.max_entries = max_entries;
     
    -  encode(op, in);
    +  encode(call, in);
     
    -  int r = io_ctx.exec(oid, RGW_CLASS, RGW_LC_LIST_ENTRIES, in, out);
    -  if (r < 0)
    -    return r;
    +  op.exec(RGW_CLASS, RGW_LC_LIST_ENTRIES, in, &out, nullptr);
    +}
     
    +int cls_rgw_lc_list_decode(const bufferlist& out, std::vector& entries)
    +{
       cls_rgw_lc_list_entries_ret ret;
       try {
         auto iter = out.cbegin();
    @@ -1067,7 +1127,7 @@ int cls_rgw_lc_list(IoCtx& io_ctx, const string& oid,
     	    [](const cls_rgw_lc_entry& a, const cls_rgw_lc_entry& b)
     	      { return a.bucket < b.bucket; });
       entries = std::move(ret.entries);
    -  return r;
    +  return 0;
     }
     
     void cls_rgw_mp_upload_part_info_update(librados::ObjectWriteOperation& op,
    @@ -1084,11 +1144,14 @@ void cls_rgw_mp_upload_part_info_update(librados::ObjectWriteOperation& op,
       op.exec(RGW_CLASS, RGW_MP_UPLOAD_PART_INFO_UPDATE, in);
     }
     
    -void cls_rgw_reshard_add(librados::ObjectWriteOperation& op, const cls_rgw_reshard_entry& entry)
    +void cls_rgw_reshard_add(librados::ObjectWriteOperation& op,
    +			 const cls_rgw_reshard_entry& entry,
    +			 const bool create_only)
     {
       bufferlist in;
       cls_rgw_reshard_add_op call;
       call.entry = entry;
    +  call.create_only = create_only;
       encode(call, in);
       op.exec(RGW_CLASS, RGW_RESHARD_ADD, in);
     }
    @@ -1221,3 +1284,4 @@ int CLSRGWIssueSetBucketResharding::issue_op(const int shard_id, const string& o
     {
       return issue_set_bucket_resharding(io_ctx, shard_id, oid, entry, &manager);
     }
    +
    diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h
    index 1ae49c877bb4..f14380b29199 100644
    --- a/src/cls/rgw/cls_rgw_client.h
    +++ b/src/cls/rgw/cls_rgw_client.h
    @@ -3,6 +3,8 @@
     
     #pragma once
     
    +#include 
    +#include 
     #include "include/str_list.h"
     #include "include/rados/librados.hpp"
     #include "cls_rgw_ops.h"
    @@ -151,10 +153,10 @@ class BucketIndexAioManager {
       }
     };
     
    -class RGWGetDirHeader_CB : public RefCountedObject {
    +class RGWGetDirHeader_CB : public boost::intrusive_ref_counter {
     public:
    -  ~RGWGetDirHeader_CB() override {}
    -  virtual void handle_response(int r, rgw_bucket_dir_header& header) = 0;
    +  virtual ~RGWGetDirHeader_CB() {}
    +  virtual void handle_response(int r, const rgw_bucket_dir_header& header) = 0;
     };
     
     class BucketIndexShardsManager {
    @@ -262,6 +264,7 @@ class BucketIndexShardsManager {
     
     /* bucket index */
     void cls_rgw_bucket_init_index(librados::ObjectWriteOperation& o);
    +void cls_rgw_bucket_init_index2(librados::ObjectWriteOperation& o);
     
     class CLSRGWConcurrentIO {
     protected:
    @@ -314,6 +317,20 @@ class CLSRGWIssueBucketIndexInit : public CLSRGWConcurrentIO {
     };
     
     
    +class CLSRGWIssueBucketIndexInit2 : public CLSRGWConcurrentIO {
    +protected:
    +  int issue_op(int shard_id, const std::string& oid) override;
    +  int valid_ret_code() override { return -EEXIST; }
    +  void cleanup() override;
    +public:
    +  CLSRGWIssueBucketIndexInit2(librados::IoCtx& ioc,
    +			     std::map& _bucket_objs,
    +			     uint32_t _max_aio) :
    +    CLSRGWConcurrentIO(ioc, _bucket_objs, _max_aio) {}
    +  virtual ~CLSRGWIssueBucketIndexInit2() override {}
    +};
    +
    +
     class CLSRGWIssueBucketIndexClean : public CLSRGWConcurrentIO {
     protected:
       int issue_op(int shard_id, const std::string& oid) override;
    @@ -344,7 +361,8 @@ class CLSRGWIssueSetTagTimeout : public CLSRGWConcurrentIO {
     
     void cls_rgw_bucket_update_stats(librados::ObjectWriteOperation& o,
                                      bool absolute,
    -                                 const std::map& stats);
    +                                 const std::map& stats,
    +                                 const std::map* dec_stats = nullptr);
     
     void cls_rgw_bucket_prepare_op(librados::ObjectWriteOperation& o, RGWModifyOp op, const std::string& tag,
                                    const cls_rgw_obj_key& key, const std::string& locator, bool log_op,
    @@ -368,10 +386,15 @@ int cls_rgw_bi_get(librados::IoCtx& io_ctx, const std::string oid,
                        rgw_cls_bi_entry *entry);
     int cls_rgw_bi_put(librados::IoCtx& io_ctx, const std::string oid, const rgw_cls_bi_entry& entry);
     void cls_rgw_bi_put(librados::ObjectWriteOperation& op, const std::string oid, const rgw_cls_bi_entry& entry);
    +// Write the given array of index entries and update bucket stats accordingly.
    +// If existing entries may be overwritten, pass check_existing=true to decrement
    +// their stats first.
    +void cls_rgw_bi_put_entries(librados::ObjectWriteOperation& op,
    +                            std::vector entries,
    +                            bool check_existing);
     int cls_rgw_bi_list(librados::IoCtx& io_ctx, const std::string& oid,
                        const std::string& name, const std::string& marker, uint32_t max,
    -                   std::list *entries, bool *is_truncated);
    -
    +                   std::list *entries, bool *is_truncated, bool reshardlog = false);
     
     void cls_rgw_bucket_link_olh(librados::ObjectWriteOperation& op,
                                 const cls_rgw_obj_key& key, const ceph::buffer::list& olh_tag,
    @@ -379,7 +402,7 @@ void cls_rgw_bucket_link_olh(librados::ObjectWriteOperation& op,
                                 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time, bool log_op, const rgw_zone_set& zones_trace);
     void cls_rgw_bucket_unlink_instance(librados::ObjectWriteOperation& op,
                                        const cls_rgw_obj_key& key, const std::string& op_tag,
    -                                   const std::string& olh_tag, uint64_t olh_epoch, bool log_op, const rgw_zone_set& zones_trace);
    +                                   const std::string& olh_tag, uint64_t olh_epoch, bool log_op, uint16_t bilog_flags, const rgw_zone_set& zones_trace);
     void cls_rgw_get_olh_log(librados::ObjectReadOperation& op, const cls_rgw_obj_key& olh, uint64_t ver_marker, const std::string& olh_tag, rgw_cls_read_olh_log_ret& log_ret, int& op_ret);
     void cls_rgw_trim_olh_log(librados::ObjectWriteOperation& op, const cls_rgw_obj_key& olh, uint64_t ver, const std::string& olh_tag);
     void cls_rgw_clear_olh(librados::ObjectWriteOperation& op, const cls_rgw_obj_key& olh, const std::string& olh_tag);
    @@ -393,7 +416,8 @@ int cls_rgw_bucket_link_olh(librados::IoCtx& io_ctx, const std::string& oid,
                                 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time, bool log_op, const rgw_zone_set& zones_trace);
     int cls_rgw_bucket_unlink_instance(librados::IoCtx& io_ctx, const std::string& oid,
                                        const cls_rgw_obj_key& key, const std::string& op_tag,
    -                                   const std::string& olh_tag, uint64_t olh_epoch, bool log_op, const rgw_zone_set& zones_trace);
    +                                   const std::string& olh_tag, uint64_t olh_epoch, bool log_op,
    +                                   uint16_t bilog_flags, const rgw_zone_set& zones_trace);
     int cls_rgw_get_olh_log(librados::IoCtx& io_ctx, std::string& oid, const cls_rgw_obj_key& olh, uint64_t ver_marker,
                             const std::string& olh_tag, rgw_cls_read_olh_log_ret& log_ret);
     int cls_rgw_clear_olh(librados::IoCtx& io_ctx, std::string& oid, const cls_rgw_obj_key& olh, const std::string& olh_tag);
    @@ -405,7 +429,7 @@ int cls_rgw_usage_log_trim(librados::IoCtx& io_ctx, const std::string& oid, cons
     /**
      * Std::list the bucket with the starting object and filter prefix.
      * NOTE: this method do listing requests for each bucket index shards identified by
    - *       the keys of the *list_results* std::map, which means the std::map should be popludated
    + *       the keys of the *list_results* std::map, which means the std::map should be populated
      *       by the caller to fill with each bucket index object id.
      *
      * io_ctx        - IO context for rados.
    @@ -502,6 +526,23 @@ class CLSRGWIssueBILogTrim : public CLSRGWConcurrentIO {
       virtual ~CLSRGWIssueBILogTrim() override {}
     };
     
    +class CLSRGWIssueReshardLogTrim : public CLSRGWConcurrentIO {
    +protected:
    +  int issue_op(int shard_id, const std::string& oid) override;
    +  // Trim until -ENODATA is returned.
    +  int valid_ret_code() override { return -ENODATA; }
    +  bool need_multiple_rounds() override { return true; }
    +  void add_object(int shard, const std::string& oid) override { objs_container[shard] = oid; }
    +  void reset_container(std::map& objs) override {
    +    objs_container.swap(objs);
    +    iter = objs_container.begin();
    +    objs.clear();
    +  }
    +public:
    +  CLSRGWIssueReshardLogTrim(librados::IoCtx& io_ctx, std::map& _bucket_objs, uint32_t max_aio) :
    +      CLSRGWConcurrentIO(io_ctx, _bucket_objs, max_aio) {}
    +};
    +
     /**
      * Check the bucket index.
      *
    @@ -572,7 +613,8 @@ class CLSRGWIssueBucketBILogStop : public CLSRGWConcurrentIO {
       virtual ~CLSRGWIssueBucketBILogStop() override {}
     };
     
    -int cls_rgw_get_dir_header_async(librados::IoCtx& io_ctx, std::string& oid, RGWGetDirHeader_CB *ctx);
    +int cls_rgw_get_dir_header_async(librados::IoCtx& io_ctx, const std::string& oid,
    +                                 boost::intrusive_ptr cb);
     
     void cls_rgw_encode_suggestion(char op, rgw_bucket_dir_entry& dirent, ceph::buffer::list& updates);
     
    @@ -596,34 +638,34 @@ void cls_rgw_usage_log_add(librados::ObjectWriteOperation& op, rgw_usage_log_inf
     void cls_rgw_gc_set_entry(librados::ObjectWriteOperation& op, uint32_t expiration_secs, cls_rgw_gc_obj_info& info);
     void cls_rgw_gc_defer_entry(librados::ObjectWriteOperation& op, uint32_t expiration_secs, const std::string& tag);
     void cls_rgw_gc_remove(librados::ObjectWriteOperation& op, const std::vector& tags);
    -
    -// these overloads which call io_ctx.operate() should not be called in the rgw.
    -// rgw_rados_operate() should be called after the overloads w/o calls to io_ctx.operate()
    -#ifndef CLS_CLIENT_HIDE_IOCTX
    -int cls_rgw_gc_list(librados::IoCtx& io_ctx, std::string& oid, std::string& marker, uint32_t max, bool expired_only,
    -                    std::list& entries, bool *truncated, std::string& next_marker);
    -#endif
    +void cls_rgw_gc_list(librados::ObjectReadOperation& op, const std::string& marker,
    +                     uint32_t max, bool expired_only, bufferlist& bl);
    +int cls_rgw_gc_list_decode(const bufferlist& bl,
    +                           std::list& entries,
    +                           bool *truncated, std::string& next_marker);
     
     /* lifecycle */
    -// these overloads which call io_ctx.operate() should not be called in the rgw.
    -// rgw_rados_operate() should be called after the overloads w/o calls to io_ctx.operate()
    -#ifndef CLS_CLIENT_HIDE_IOCTX
    -int cls_rgw_lc_get_head(librados::IoCtx& io_ctx, const std::string& oid, cls_rgw_lc_obj_head& head);
    -int cls_rgw_lc_put_head(librados::IoCtx& io_ctx, const std::string& oid, cls_rgw_lc_obj_head& head);
    -int cls_rgw_lc_get_next_entry(librados::IoCtx& io_ctx, const std::string& oid, const std::string& marker, cls_rgw_lc_entry& entry);
    -int cls_rgw_lc_rm_entry(librados::IoCtx& io_ctx, const std::string& oid, const cls_rgw_lc_entry& entry);
    -int cls_rgw_lc_set_entry(librados::IoCtx& io_ctx, const std::string& oid, const cls_rgw_lc_entry& entry);
    -int cls_rgw_lc_get_entry(librados::IoCtx& io_ctx, const std::string& oid, const std::string& marker, cls_rgw_lc_entry& entry);
    -int cls_rgw_lc_list(librados::IoCtx& io_ctx, const std::string& oid,
    -		    const std::string& marker, uint32_t max_entries,
    -                    std::vector& entries);
    -#endif
    +void cls_rgw_lc_get_head(librados::ObjectReadOperation& op, bufferlist& bl);
    +int cls_rgw_lc_get_head_decode(const bufferlist& bl, cls_rgw_lc_obj_head& head);
    +void cls_rgw_lc_put_head(librados::ObjectWriteOperation& op, const cls_rgw_lc_obj_head& head);
    +void cls_rgw_lc_get_next_entry(librados::ObjectReadOperation& op, const std::string& marker, bufferlist& bl);
    +int cls_rgw_lc_get_next_entry_decode(const bufferlist& bl, cls_rgw_lc_entry& entry);
    +void cls_rgw_lc_rm_entry(librados::ObjectWriteOperation& op, const cls_rgw_lc_entry& entry);
    +void cls_rgw_lc_set_entry(librados::ObjectWriteOperation& op, const cls_rgw_lc_entry& entry);
    +void cls_rgw_lc_get_entry(librados::ObjectReadOperation& op, const std::string& marker, bufferlist& bl);
    +int cls_rgw_lc_get_entry_decode(const bufferlist& bl, cls_rgw_lc_entry& entry);
    +void cls_rgw_lc_list(librados::ObjectReadOperation& op,
    +                     const std::string& marker, uint32_t max_entries,
    +                     bufferlist& bl);
    +int cls_rgw_lc_list_decode(const bufferlist& bl, std::vector& entries);
     
     /* multipart */
     void cls_rgw_mp_upload_part_info_update(librados::ObjectWriteOperation& op, const std::string& part_key, const RGWUploadPartInfo& info);
     
     /* resharding */
    -void cls_rgw_reshard_add(librados::ObjectWriteOperation& op, const cls_rgw_reshard_entry& entry);
    +void cls_rgw_reshard_add(librados::ObjectWriteOperation& op,
    +			 const cls_rgw_reshard_entry& entry,
    +			 const bool create_only);
     void cls_rgw_reshard_remove(librados::ObjectWriteOperation& op, const cls_rgw_reshard_entry& entry);
     // these overloads which call io_ctx.operate() should not be called in the rgw.
     // rgw_rados_operate() should be called after the overloads w/o calls to io_ctx.operate()
    @@ -633,8 +675,16 @@ int cls_rgw_reshard_list(librados::IoCtx& io_ctx, const std::string& oid, std::s
     int cls_rgw_reshard_get(librados::IoCtx& io_ctx, const std::string& oid, cls_rgw_reshard_entry& entry);
     #endif
     
    -/* resharding attribute on bucket index shard headers */
    +// If writes to the bucket index should be blocked during resharding, fail with
    +// the given error code. RGWRados::guard_reshard() calls this in a loop to retry
    +// the write until the reshard completes.
    +//
    +// As of the T release, all index write ops in cls_rgw perform this check
    +// themselves. RGW can stop issuing this call in the T+2 (V) release once it
    +// knows that OSDs are running T at least. The call can be safely removed from
    +// cls_rgw in the T+4 (X) release.
     void cls_rgw_guard_bucket_resharding(librados::ObjectOperation& op, int ret_err);
    +
     // these overloads which call io_ctx.operate() should not be called in the rgw.
     // rgw_rados_operate() should be called after the overloads w/o calls to io_ctx.operate()
     #ifndef CLS_CLIENT_HIDE_IOCTX
    diff --git a/src/cls/rgw/cls_rgw_const.h b/src/cls/rgw/cls_rgw_const.h
    index 8595db3c9e8b..da5778cd544c 100644
    --- a/src/cls/rgw/cls_rgw_const.h
    +++ b/src/cls/rgw/cls_rgw_const.h
    @@ -6,13 +6,13 @@
     #define RGW_CLASS "rgw"
     
     /* Special error code returned by cls bucket list operation if it was
    - * unable to skip past enough not visibile entries to return any
    + * unable to skip past enough not visible entries to return any
      * entries in the call. */
     constexpr int RGWBIAdvanceAndRetryError = -EFBIG;
     
     /* bucket index */
     #define RGW_BUCKET_INIT_INDEX "bucket_init_index"
    -
    +#define RGW_BUCKET_INIT_INDEX2 "bucket_init_index2"
     
     #define RGW_BUCKET_SET_TAG_TIMEOUT "bucket_set_tag_timeout"
     #define RGW_BUCKET_LIST "bucket_list"
    @@ -34,8 +34,11 @@ constexpr int RGWBIAdvanceAndRetryError = -EFBIG;
     
     #define RGW_BI_GET "bi_get"
     #define RGW_BI_PUT "bi_put"
    +#define RGW_BI_PUT_ENTRIES "bi_put_entries"
     #define RGW_BI_LIST "bi_list"
     
    +#define RGW_RESHARD_LOG_TRIM "reshard_log_trim"
    +
     #define RGW_BI_LOG_LIST "bi_log_list"
     #define RGW_BI_LOG_TRIM "bi_log_trim"
     #define RGW_DIR_SUGGEST_CHANGES "dir_suggest_changes"
    @@ -75,6 +78,7 @@ constexpr int RGWBIAdvanceAndRetryError = -EFBIG;
     
     /* resharding attribute  */
     #define RGW_SET_BUCKET_RESHARDING "set_bucket_resharding"
    +#define RGW_SET_BUCKET_RESHARDING2 "set_bucket_resharding2"
     #define RGW_CLEAR_BUCKET_RESHARDING "clear_bucket_resharding"
     #define RGW_GUARD_BUCKET_RESHARDING "guard_bucket_resharding"
     #define RGW_GET_BUCKET_RESHARDING "get_bucket_resharding"
    diff --git a/src/cls/rgw/cls_rgw_ops.cc b/src/cls/rgw/cls_rgw_ops.cc
    index 15bcba33330d..2c33a2691b5b 100644
    --- a/src/cls/rgw/cls_rgw_ops.cc
    +++ b/src/cls/rgw/cls_rgw_ops.cc
    @@ -373,6 +373,10 @@ void rgw_cls_bucket_update_stats_op::generate_test_instances(listdec_stats[RGWObjCategory::None];
    +  dec_s.total_size = 1;
    +  dec_s.total_size_rounded = 4096;
    +  dec_s.num_entries = 1;
       o.push_back(r);
     
       o.push_back(new rgw_cls_bucket_update_stats_op);
    @@ -386,6 +390,11 @@ void rgw_cls_bucket_update_stats_op::dump(Formatter *f) const
         s[(int)entry.first] = entry.second;
       }
       encode_json("stats", s, f);
    +  map dec_s;
    +  for (auto& entry : dec_stats) {
    +    dec_s[(int)entry.first] = entry.second;
    +  }
    +  encode_json("dec_stats", dec_s, f);
     }
     
     void cls_rgw_bi_log_list_op::dump(Formatter *f) const
    @@ -571,3 +580,9 @@ void cls_rgw_get_bucket_resharding_op::generate_test_instances(
     void cls_rgw_get_bucket_resharding_op::dump(Formatter *f) const
     {
     }
    +
    +void rgw_cls_bi_put_entries_op::dump(Formatter *f) const
    +{
    +  encode_json("entries", entries, f);
    +  encode_json("check_existing", check_existing, f);
    +}
    diff --git a/src/cls/rgw/cls_rgw_ops.h b/src/cls/rgw/cls_rgw_ops.h
    index 4d58909a7670..025faebe7d48 100644
    --- a/src/cls/rgw/cls_rgw_ops.h
    +++ b/src/cls/rgw/cls_rgw_ops.h
    @@ -430,7 +430,7 @@ struct rgw_cls_list_ret {
       // if is_truncated is true, starting marker for next iteration; this
       // is necessary as it's possible after maximum number of tries we
       // still might have zero entries to return, in which case we have to
    -  // at least move the ball foward
    +  // at least move the ball forward
       cls_rgw_obj_key marker;
     
       // cls_filtered is not transmitted; it is assumed true for versions
    @@ -493,19 +493,23 @@ struct rgw_cls_bucket_update_stats_op
     {
       bool absolute{false};
       std::map stats;
    +  std::map dec_stats;
     
       rgw_cls_bucket_update_stats_op() {}
     
       void encode(ceph::buffer::list &bl) const {
    -    ENCODE_START(1, 1, bl);
    +    ENCODE_START(2, 1, bl);
         encode(absolute, bl);
         encode(stats, bl);
    +    encode(dec_stats, bl);
         ENCODE_FINISH(bl);
       }
       void decode(ceph::buffer::list::const_iterator &bl) {
    -    DECODE_START(1, bl);
    +    DECODE_START(2, bl);
         decode(absolute, bl);
         decode(stats, bl);
    +    if (struct_v >= 2)
    +      decode(dec_stats, bl);
         DECODE_FINISH(bl);
       }
       void dump(ceph::Formatter *f) const;
    @@ -756,26 +760,60 @@ struct rgw_cls_bi_put_op {
     };
     WRITE_CLASS_ENCODER(rgw_cls_bi_put_op)
     
    +struct rgw_cls_bi_put_entries_op {
    +  std::vector entries;
    +  bool check_existing = false;
    +
    +  void encode(ceph::buffer::list& bl) const {
    +    ENCODE_START(1, 1, bl);
    +    encode(entries, bl);
    +    encode(check_existing, bl);
    +    ENCODE_FINISH(bl);
    +  }
    +
    +  void decode(ceph::buffer::list::const_iterator& bl) {
    +    DECODE_START(1, bl);
    +    decode(entries, bl);
    +    decode(check_existing, bl);
    +    DECODE_FINISH(bl);
    +  }
    +
    +  void dump(ceph::Formatter *f) const;
    +
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new rgw_cls_bi_put_entries_op);
    +    o.push_back(new rgw_cls_bi_put_entries_op);
    +    o.back()->entries.push_back({.idx = "entry"});
    +    o.back()->check_existing = true;
    +  }
    +};
    +WRITE_CLASS_ENCODER(rgw_cls_bi_put_entries_op)
    +
     struct rgw_cls_bi_list_op {
       uint32_t max;
    -  std::string name_filter; // limit resultto one object and its instances
    +  std::string name_filter; // limit result to one object and its instances
       std::string marker;
    +  bool reshardlog;
     
    -  rgw_cls_bi_list_op() : max(0) {}
    +  rgw_cls_bi_list_op() : max(0), reshardlog(false) {}
     
       void encode(ceph::buffer::list& bl) const {
    -    ENCODE_START(1, 1, bl);
    +    ENCODE_START(2, 1, bl);
         encode(max, bl);
         encode(name_filter, bl);
         encode(marker, bl);
    +    encode(reshardlog, bl);
         ENCODE_FINISH(bl);
       }
     
       void decode(ceph::buffer::list::const_iterator& bl) {
    -    DECODE_START(1, bl);
    +    DECODE_START(2, bl);
         decode(max, bl);
         decode(name_filter, bl);
         decode(marker, bl);
    +    if (struct_v >= 2) {
    +      decode(reshardlog, bl);
    +    }
         DECODE_FINISH(bl);
       }
     
    @@ -783,6 +821,7 @@ struct rgw_cls_bi_list_op {
         f->dump_unsigned("max", max);
         f->dump_string("name_filter", name_filter);
         f->dump_string("marker", marker);
    +    f->dump_bool("reshardlog", reshardlog);
       }
     
       static void generate_test_instances(std::list& o) {
    @@ -791,6 +830,7 @@ struct rgw_cls_bi_list_op {
         o.back()->max = 100;
         o.back()->name_filter = "name_filter";
         o.back()->marker = "marker";
    +    o.back()->reshardlog = true;
       }
     };
     WRITE_CLASS_ENCODER(rgw_cls_bi_list_op)
    @@ -1480,19 +1520,27 @@ struct cls_rgw_mp_upload_part_info_update_op {
     WRITE_CLASS_ENCODER(cls_rgw_mp_upload_part_info_update_op)
     
     struct cls_rgw_reshard_add_op {
    - cls_rgw_reshard_entry entry;
    +  cls_rgw_reshard_entry entry;
    +
    +  // true -> will not overwrite existing entry
    +  bool create_only {false};
     
       cls_rgw_reshard_add_op() {}
     
       void encode(ceph::buffer::list& bl) const {
    -    ENCODE_START(1, 1, bl);
    +    ENCODE_START(2, 1, bl);
         encode(entry, bl);
    +    encode(create_only, bl);
         ENCODE_FINISH(bl);
       }
     
       void decode(ceph::buffer::list::const_iterator& bl) {
    -    DECODE_START(1, bl);
    +    DECODE_START(2, bl);
         decode(entry, bl);
    +    create_only = false;
    +    if (struct_v >= 2) {
    +      decode(create_only, bl);
    +    }
         DECODE_FINISH(bl);
       }
       static void generate_test_instances(std::list& o);
    diff --git a/src/cls/rgw/cls_rgw_types.cc b/src/cls/rgw/cls_rgw_types.cc
    index 1c232a576bba..d5f6ba4bdee9 100644
    --- a/src/cls/rgw/cls_rgw_types.cc
    +++ b/src/cls/rgw/cls_rgw_types.cc
    @@ -312,6 +312,13 @@ static void dump_bi_entry(bufferlist bl, BIIndexType index_type, Formatter *form
             encode_json("entry", entry, formatter);
           }
           break;
    +    case BIIndexType::ReshardDeleted:
    +      {
    +        rgw_bucket_deleted_entry entry;
    +        decode(entry, iter);
    +        encode_json("entry", entry, formatter);
    +      }
    +      break;
         default:
           break;
       }
    @@ -327,6 +334,8 @@ void rgw_cls_bi_entry::decode_json(JSONObj *obj, cls_rgw_obj_key *effective_key)
         type = BIIndexType::Instance;
       } else if (s == "olh") {
         type = BIIndexType::OLH;
    +  } else if (s == "resharddeleted") {
    +    type = BIIndexType::ReshardDeleted;
       } else {
         type = BIIndexType::Invalid;
       }
    @@ -355,6 +364,17 @@ void rgw_cls_bi_entry::decode_json(JSONObj *obj, cls_rgw_obj_key *effective_key)
             }
           }
           break;
    +      case BIIndexType::ReshardDeleted:
    +      {
    +        rgw_bucket_deleted_entry entry;
    +        JSONDecoder::decode_json("entry", entry, obj);
    +        encode(entry, data);
    +
    +        if (effective_key) {
    +          *effective_key = entry.key;
    +        }
    +      }
    +      break;
         default:
           break;
       }
    @@ -373,6 +393,9 @@ void rgw_cls_bi_entry::dump(Formatter *f) const
       case BIIndexType::OLH:
         type_str = "olh";
         break;
    +  case BIIndexType::ReshardDeleted:
    +    type_str = "resharddeleted";
    +    break;
       default:
         type_str = "invalid";
       }
    @@ -383,14 +406,20 @@ void rgw_cls_bi_entry::dump(Formatter *f) const
     
     bool rgw_cls_bi_entry::get_info(cls_rgw_obj_key *key,
                                     RGWObjCategory *category,
    -                                rgw_bucket_category_stats *accounted_stats)
    +                                rgw_bucket_category_stats *accounted_stats) const
     {
       using ceph::decode;
       auto iter = data.cbegin();
       if (type == BIIndexType::OLH) {
         rgw_bucket_olh_entry entry;
         decode(entry, iter);
    -    *key = entry.key;
    +    *key = std::move(entry.key);
    +    return false;
    +  }
    +  if (type == BIIndexType::ReshardDeleted) {
    +    rgw_bucket_deleted_entry entry;
    +    decode(entry, iter);
    +    *key = std::move(entry.key);
         return false;
       }
     
    @@ -465,6 +494,25 @@ void rgw_bucket_olh_entry::generate_test_instances(list&
       o.push_back(new rgw_bucket_olh_entry);
     }
     
    +void rgw_bucket_deleted_entry::dump(Formatter *f) const
    +{
    +  encode_json("key", key, f);
    +}
    +
    +void rgw_bucket_deleted_entry::decode_json(JSONObj *obj)
    +{
    +  JSONDecoder::decode_json("key", key, obj);
    +}
    +
    +void rgw_bucket_deleted_entry::generate_test_instances(list& o)
    +{
    +  rgw_bucket_deleted_entry *entry = new rgw_bucket_deleted_entry;
    +  entry->key.name = "key.name";
    +  entry->key.instance = "key.instance";
    +  o.push_back(entry);
    +  o.push_back(new rgw_bucket_deleted_entry);
    +}
    +
     void rgw_bucket_olh_log_entry::generate_test_instances(list& o)
     {
       rgw_bucket_olh_log_entry *entry = new rgw_bucket_olh_log_entry;
    @@ -519,6 +567,7 @@ void rgw_bucket_olh_log_entry::decode_json(JSONObj *obj)
       JSONDecoder::decode_json("key", key, obj);
       JSONDecoder::decode_json("delete_marker", delete_marker, obj);
     }
    +
     void rgw_bi_log_entry::decode_json(JSONObj *obj)
     {
       JSONDecoder::decode_json("op_id", id, obj);
    @@ -648,6 +697,7 @@ void rgw_bucket_dir_header::dump(Formatter *f) const
       }
       f->close_section();
       ::encode_json("new_instance", new_instance, f);
    +  f->dump_int("reshardlog_entries", reshardlog_entries);
     }
     
     void rgw_bucket_dir::generate_test_instances(list& o)
    @@ -693,6 +743,21 @@ void rgw_bucket_dir::dump(Formatter *f) const
       f->close_section();
     }
     
    +void rgw_s3select_usage_data::generate_test_instances(list& o)
    +{
    +  rgw_s3select_usage_data *s = new rgw_s3select_usage_data;
    +  s->bytes_processed = 1024;
    +  s->bytes_returned = 512;
    +  o.push_back(s);
    +  o.push_back(new rgw_s3select_usage_data);
    +}
    +
    +void rgw_s3select_usage_data::dump(Formatter *f) const
    +{
    +  f->dump_unsigned("bytes_processed", bytes_processed);
    +  f->dump_unsigned("bytes_returned", bytes_returned);
    +}
    +
     void rgw_usage_data::generate_test_instances(list& o)
     {
       rgw_usage_data *s = new rgw_usage_data;
    @@ -773,12 +838,18 @@ void rgw_usage_log_entry::dump(Formatter *f) const
         }
       }
       f->close_section();
    +
    +  f->open_object_section("s3select");
    +  f->dump_unsigned("bytes_processed", s3select_usage.bytes_processed);
    +  f->dump_unsigned("bytes_returned", s3select_usage.bytes_returned);
    +  f->close_section();
     }
     
     void rgw_usage_log_entry::generate_test_instances(list &o)
     {
       rgw_usage_log_entry *entry = new rgw_usage_log_entry;
       rgw_usage_data usage_data{1024, 2048};
    +  rgw_s3select_usage_data s3select_usage_data{8192, 4096};
       entry->owner = rgw_user("owner");
       entry->payer = rgw_user("payer");
       entry->bucket = "bucket";
    @@ -788,10 +859,24 @@ void rgw_usage_log_entry::generate_test_instances(list &o
       entry->total_usage.ops = usage_data.ops;
       entry->total_usage.successful_ops = usage_data.successful_ops;
       entry->usage_map["get_obj"] = usage_data;
    +  entry->s3select_usage = s3select_usage_data;
       o.push_back(entry);
       o.push_back(new rgw_usage_log_entry);
     }
     
    +std::string to_string(cls_rgw_reshard_initiator i) {
    +  switch (i) {
    +  case cls_rgw_reshard_initiator::Unknown:
    +    return "unknown";
    +  case cls_rgw_reshard_initiator::Admin:
    +    return "administrator";
    +  case cls_rgw_reshard_initiator::Dynamic:
    +    return "dynamic resharding";
    +  default:
    +    return "error";
    +  }
    +}
    +
     void cls_rgw_reshard_entry::generate_key(const string& tenant, const string& bucket_name, string *key)
     {
       *key = tenant + ":" + bucket_name;
    @@ -805,12 +890,13 @@ void cls_rgw_reshard_entry::get_key(string *key) const
     void cls_rgw_reshard_entry::dump(Formatter *f) const
     {
       utime_t ut(time);
    -  encode_json("time",ut, f);
    +  encode_json("time", ut, f);
       encode_json("tenant", tenant, f);
       encode_json("bucket_name", bucket_name, f);
       encode_json("bucket_id", bucket_id, f);
       encode_json("old_num_shards", old_num_shards, f);
       encode_json("tentative_new_num_shards", new_num_shards, f);
    +  encode_json("initiator", to_string(initiator), f);
     }
     
     void cls_rgw_reshard_entry::generate_test_instances(list& ls)
    @@ -870,6 +956,9 @@ std::ostream& operator<<(std::ostream& out, cls_rgw_reshard_status status) {
       case cls_rgw_reshard_status::NOT_RESHARDING:
         out << "NOT_RESHARDING";
         break;
    +  case cls_rgw_reshard_status::IN_LOGRECORD:
    +    out << "IN_LOGRECORD";
    +    break;
       case cls_rgw_reshard_status::IN_PROGRESS:
         out << "IN_PROGRESS";
         break;
    diff --git a/src/cls/rgw/cls_rgw_types.h b/src/cls/rgw/cls_rgw_types.h
    index 5f94b9918fa4..1bfcbcc97b89 100644
    --- a/src/cls/rgw/cls_rgw_types.h
    +++ b/src/cls/rgw/cls_rgw_types.h
    @@ -5,6 +5,7 @@
     
     #include 
     #include 
    +#include 
     #include 
     #include "common/ceph_time.h"
     #include "common/Formatter.h"
    @@ -18,6 +19,8 @@
     #define CEPH_RGW_DIR_SUGGEST_LOG_OP  0x80
     #define CEPH_RGW_DIR_SUGGEST_OP_MASK 0x7f
     
    +#define CLS_RGW_ERR_BUSY_RESHARDING 2300 // also in rgw_common.h, don't change!
    +
     constexpr uint64_t CEPH_RGW_DEFAULT_TAG_TIMEOUT = 120; // in seconds
     
     class JSONObj;
    @@ -111,6 +114,7 @@ inline std::ostream& operator<<(std::ostream& out, RGWModifyOp op) {
     
     enum RGWBILogFlags {
       RGW_BILOG_FLAG_VERSIONED_OP = 0x1,
    +  RGW_BILOG_NULL_VERSION = 0X2,
     };
     
     enum RGWCheckMTimeType {
    @@ -132,7 +136,7 @@ inline uint64_t cls_rgw_get_rounded_size(uint64_t size) {
      * path that ends with a delimiter and appends a new character to the
      * end such that when a we request bucket-index entries *after* this,
      * we'll get the next object after the "subdirectory". This works
    - * because we append a '\xFF' charater, and no valid UTF-8 character
    + * because we append a '\xFF' character, and no valid UTF-8 character
      * can contain that byte, so no valid entries can be skipped.
      */
     inline std::string cls_rgw_after_delim(const std::string& path) {
    @@ -181,7 +185,7 @@ enum class RGWObjCategory : uint8_t {
     
       Main      = 1,  // b-i entries for standard objs
     
    -  Shadow    = 2,  // presumfably intended for multipart shadow
    +  Shadow    = 2,  // presumably intended for multipart shadow
                       // uploads; not currently used in the codebase
     
       MultiMeta = 3,  // b-i entries for multipart upload metadata objs
    @@ -196,20 +200,17 @@ inline std::ostream& operator<<(std::ostream& out, RGWObjCategory c) {
     }
     
     struct rgw_bucket_dir_entry_meta {
    -  RGWObjCategory category;
    -  uint64_t size;
    +  RGWObjCategory category = RGWObjCategory::None;
    +  uint64_t size = 0;
       ceph::real_time mtime;
       std::string etag;
       std::string owner;
       std::string owner_display_name;
       std::string content_type;
    -  uint64_t accounted_size;
    +  uint64_t accounted_size = 0;
       std::string user_data;
       std::string storage_class;
    -  bool appendable;
    -
    -  rgw_bucket_dir_entry_meta() :
    -    category(RGWObjCategory::None), size(0), accounted_size(0), appendable(false) { }
    +  bool appendable = false;
     
       void encode(ceph::buffer::list &bl) const {
         ENCODE_START(7, 3, bl);
    @@ -468,21 +469,20 @@ struct rgw_bucket_dir_entry {
     WRITE_CLASS_ENCODER(rgw_bucket_dir_entry)
     
     enum class BIIndexType : uint8_t {
    -  Invalid    = 0,
    -  Plain      = 1,
    -  Instance   = 2,
    -  OLH        = 3,
    +  Invalid        = 0,
    +  Plain          = 1,
    +  Instance       = 2,
    +  OLH            = 3,
    +  ReshardDeleted = 4,
     };
     
     struct rgw_bucket_category_stats;
     
     struct rgw_cls_bi_entry {
    -  BIIndexType type;
    +  BIIndexType type = BIIndexType::Invalid;
       std::string idx;
       ceph::buffer::list data;
     
    -  rgw_cls_bi_entry() : type(BIIndexType::Invalid) {}
    -
       void encode(ceph::buffer::list& bl) const {
         ENCODE_START(1, 1, bl);
         encode(type, bl);
    @@ -505,7 +505,7 @@ struct rgw_cls_bi_entry {
       void decode_json(JSONObj *obj, cls_rgw_obj_key *effective_key = NULL);
       static void generate_test_instances(std::list& o);
       bool get_info(cls_rgw_obj_key *key, RGWObjCategory *category,
    -		rgw_bucket_category_stats *accounted_stats);
    +		rgw_bucket_category_stats *accounted_stats) const;
     };
     WRITE_CLASS_ENCODER(rgw_cls_bi_entry)
     
    @@ -591,6 +591,25 @@ struct rgw_bucket_olh_entry {
     };
     WRITE_CLASS_ENCODER(rgw_bucket_olh_entry)
     
    +struct rgw_bucket_deleted_entry {
    +  cls_rgw_obj_key key;
    +  rgw_bucket_deleted_entry() {}
    +  void encode(ceph::buffer::list &bl) const {
    +    ENCODE_START(1, 1, bl);
    +    encode(key, bl);
    +    ENCODE_FINISH(bl);
    +  }
    +  void decode(ceph::buffer::list::const_iterator &bl) {
    +    DECODE_START(1, bl);
    +    decode(key, bl);
    +    DECODE_FINISH(bl);
    +  }
    +  void dump(ceph::Formatter *f) const;
    +  void decode_json(JSONObj *obj);
    +  static void generate_test_instances(std::list& o);
    +};
    +WRITE_CLASS_ENCODER(rgw_bucket_deleted_entry)
    +
     struct rgw_bi_log_entry {
       std::string id;
       std::string object;
    @@ -660,6 +679,11 @@ struct rgw_bi_log_entry {
       bool is_versioned() {
         return ((bilog_flags & RGW_BILOG_FLAG_VERSIONED_OP) != 0);
       }
    +
    +  bool is_null_verid() {
    +    return ((bilog_flags & RGW_BILOG_NULL_VERSION) != 0);
    +  }
    +
     };
     WRITE_CLASS_ENCODER(rgw_bi_log_entry)
     
    @@ -711,7 +735,8 @@ inline bool operator!=(const rgw_bucket_category_stats& lhs,
     enum class cls_rgw_reshard_status : uint8_t {
       NOT_RESHARDING  = 0,
       IN_PROGRESS     = 1,
    -  DONE            = 2
    +  DONE            = 2,
    +  IN_LOGRECORD    = 3
     };
     std::ostream& operator<<(std::ostream&, cls_rgw_reshard_status);
     
    @@ -720,6 +745,8 @@ inline std::string to_string(const cls_rgw_reshard_status status)
       switch (status) {
       case cls_rgw_reshard_status::NOT_RESHARDING:
         return "not-resharding";
    +  case cls_rgw_reshard_status::IN_LOGRECORD:
    +    return "in-logrecord";
       case cls_rgw_reshard_status::IN_PROGRESS:
         return "in-progress";
       case cls_rgw_reshard_status::DONE:
    @@ -774,6 +801,10 @@ struct cls_rgw_bucket_instance_entry {
         return reshard_status != RESHARD_STATUS::NOT_RESHARDING;
       }
     
    +  bool resharding_in_logrecord() const {
    +    return reshard_status == RESHARD_STATUS::IN_LOGRECORD;
    +  }
    +
       bool resharding_in_progress() const {
         return reshard_status == RESHARD_STATUS::IN_PROGRESS;
       }
    @@ -795,11 +826,13 @@ struct rgw_bucket_dir_header {
       std::string max_marker;
       cls_rgw_bucket_instance_entry new_instance;
       bool syncstopped;
    +  uint32_t reshardlog_entries;
     
    -  rgw_bucket_dir_header() : tag_timeout(0), ver(0), master_ver(0), syncstopped(false) {}
    +  rgw_bucket_dir_header() : tag_timeout(0), ver(0), master_ver(0), syncstopped(false),
    +                            reshardlog_entries(0) {}
     
       void encode(ceph::buffer::list &bl) const {
    -    ENCODE_START(7, 2, bl);
    +    ENCODE_START(8, 2, bl);
         encode(stats, bl);
         encode(tag_timeout, bl);
         encode(ver, bl);
    @@ -807,10 +840,11 @@ struct rgw_bucket_dir_header {
         encode(max_marker, bl);
         encode(new_instance, bl);
         encode(syncstopped,bl);
    +    encode(reshardlog_entries, bl);
         ENCODE_FINISH(bl);
       }
       void decode(ceph::buffer::list::const_iterator &bl) {
    -    DECODE_START_LEGACY_COMPAT_LEN(6, 2, 2, bl);
    +    DECODE_START_LEGACY_COMPAT_LEN(8, 2, 2, bl);
         decode(stats, bl);
         if (struct_v > 2) {
           decode(tag_timeout, bl);
    @@ -834,6 +868,11 @@ struct rgw_bucket_dir_header {
         if (struct_v >= 7) {
           decode(syncstopped,bl);
         }
    +    if (struct_v >= 8) {
    +      decode(reshardlog_entries, bl);
    +    } else {
    +      reshardlog_entries = 0;
    +    }
         DECODE_FINISH(bl);
       }
       void dump(ceph::Formatter *f) const;
    @@ -842,9 +881,15 @@ struct rgw_bucket_dir_header {
       bool resharding() const {
         return new_instance.resharding();
       }
    +
    +  bool resharding_in_logrecord() const {
    +    return new_instance.resharding_in_logrecord();
    +  }
    +
       bool resharding_in_progress() const {
         return new_instance.resharding_in_progress();
       }
    +
     };
     WRITE_CLASS_ENCODER(rgw_bucket_dir_header)
     
    @@ -869,6 +914,38 @@ struct rgw_bucket_dir {
     };
     WRITE_CLASS_ENCODER(rgw_bucket_dir)
     
    +struct rgw_s3select_usage_data {
    +  uint64_t bytes_processed;
    +  uint64_t bytes_returned;
    +
    +  rgw_s3select_usage_data() : bytes_processed(0), bytes_returned(0) {}
    +  rgw_s3select_usage_data(uint64_t processed, uint64_t returned)
    +    : bytes_processed(processed), bytes_returned(returned) {}
    +
    +  void encode(ceph::buffer::list& bl) const {
    +    ENCODE_START(1, 1, bl);
    +    encode(bytes_processed, bl);
    +    encode(bytes_returned, bl);
    +    ENCODE_FINISH(bl);
    +  }
    +
    +  void decode(ceph::buffer::list::const_iterator& bl) {
    +    DECODE_START(1, bl);
    +    decode(bytes_processed, bl);
    +    decode(bytes_returned, bl);
    +    DECODE_FINISH(bl);
    +  }
    +
    +  void aggregate(const rgw_s3select_usage_data& usage) {
    +    bytes_processed += usage.bytes_processed;
    +    bytes_returned += usage.bytes_returned;
    +  }
    +
    +  void dump(ceph::Formatter *f) const;
    +  static void generate_test_instances(std::list& o);
    +};
    +WRITE_CLASS_ENCODER(rgw_s3select_usage_data)
    +
     struct rgw_usage_data {
       uint64_t bytes_sent;
       uint64_t bytes_received;
    @@ -915,13 +992,14 @@ struct rgw_usage_log_entry {
       uint64_t epoch;
       rgw_usage_data total_usage; /* this one is kept for backwards compatibility */
       std::map usage_map;
    +  rgw_s3select_usage_data s3select_usage;
     
       rgw_usage_log_entry() : epoch(0) {}
       rgw_usage_log_entry(std::string& o, std::string& b) : owner(o), bucket(b), epoch(0) {}
       rgw_usage_log_entry(std::string& o, std::string& p, std::string& b) : owner(o), payer(p), bucket(b), epoch(0) {}
     
       void encode(ceph::buffer::list& bl) const {
    -    ENCODE_START(3, 1, bl);
    +    ENCODE_START(4, 1, bl);
         encode(owner.to_str(), bl);
         encode(bucket, bl);
         encode(epoch, bl);
    @@ -931,12 +1009,13 @@ struct rgw_usage_log_entry {
         encode(total_usage.successful_ops, bl);
         encode(usage_map, bl);
         encode(payer.to_str(), bl);
    +    encode(s3select_usage, bl);
         ENCODE_FINISH(bl);
       }
     
     
        void decode(ceph::buffer::list::const_iterator& bl) {
    -    DECODE_START(3, bl);
    +    DECODE_START(4, bl);
         std::string s;
         decode(s, bl);
         owner.from_str(s);
    @@ -956,6 +1035,9 @@ struct rgw_usage_log_entry {
           decode(p, bl);
           payer.from_str(p);
         }
    +    if (struct_v >= 4) {
    +      decode(s3select_usage, bl);
    +    }
         DECODE_FINISH(bl);
       }
     
    @@ -970,9 +1052,13 @@ struct rgw_usage_log_entry {
     
         for (auto iter = e.usage_map.begin(); iter != e.usage_map.end(); ++iter) {
           if (!categories || !categories->size() || categories->count(iter->first)) {
    -        add(iter->first, iter->second);
    +        add_usage(iter->first, iter->second);
           }
         }
    +
    +    if (!categories || !categories->size() || categories->count("s3select")) {
    +      s3select_usage.aggregate(e.s3select_usage);
    +    }
       }
     
       void sum(rgw_usage_data& usage,
    @@ -985,7 +1071,7 @@ struct rgw_usage_log_entry {
         }
       }
     
    -  void add(const std::string& category, const rgw_usage_data& data) {
    +  void add_usage(const std::string& category, const rgw_usage_data& data) {
         usage_map[category].aggregate(data);
         total_usage.aggregate(data);
       }
    @@ -1112,16 +1198,14 @@ struct cls_rgw_obj {
     WRITE_CLASS_ENCODER(cls_rgw_obj)
     
     struct cls_rgw_obj_chain {
    -  std::list objs;
    -
    -  cls_rgw_obj_chain() {}
    +  std::vector objs;
     
       void push_obj(const std::string& pool, const cls_rgw_obj_key& key, const std::string& loc) {
         cls_rgw_obj obj;
         obj.pool = pool;
         obj.key = key;
         obj.loc = loc;
    -    objs.push_back(obj);
    +    objs.push_back(std::move(obj));
       }
     
       void encode(ceph::buffer::list& bl) const {
    @@ -1138,9 +1222,9 @@ struct cls_rgw_obj_chain {
     
       void dump(ceph::Formatter *f) const {
         f->open_array_section("objs");
    -    for (std::list::const_iterator p = objs.begin(); p != objs.end(); ++p) {
    +    for (const auto& o : objs) {
           f->open_object_section("obj");
    -      p->dump(f);
    +      o.dump(f);
           f->close_section();
         }
         f->close_section();
    @@ -1284,30 +1368,45 @@ struct cls_rgw_lc_entry {
     };
     WRITE_CLASS_ENCODER(cls_rgw_lc_entry);
     
    +
    +// used to track the initiator of a reshard entry on the reshard queue (log)
    +enum class cls_rgw_reshard_initiator : uint8_t {
    +  Unknown = 0,
    +  Admin = 1,
    +  Dynamic = 2,
    +};
    +std::string to_string(cls_rgw_reshard_initiator i);
    +inline std::ostream& operator<<(std::ostream& out, cls_rgw_reshard_initiator i) {
    +  return out << to_string(i);
    +}
    +
    +
     struct cls_rgw_reshard_entry
     {
       ceph::real_time time;
       std::string tenant;
       std::string bucket_name;
       std::string bucket_id;
    -  uint32_t old_num_shards{0};
    -  uint32_t new_num_shards{0};
    +  uint32_t old_num_shards {0};
    +  uint32_t new_num_shards {0};
    +  cls_rgw_reshard_initiator initiator {cls_rgw_reshard_initiator::Unknown};
     
       cls_rgw_reshard_entry() {}
     
       void encode(ceph::buffer::list& bl) const {
    -    ENCODE_START(2, 1, bl);
    +    ENCODE_START(3, 1, bl);
         encode(time, bl);
         encode(tenant, bl);
         encode(bucket_name, bl);
         encode(bucket_id, bl);
         encode(old_num_shards, bl);
         encode(new_num_shards, bl);
    +    encode(initiator, bl);
         ENCODE_FINISH(bl);
       }
     
       void decode(ceph::buffer::list::const_iterator& bl) {
    -    DECODE_START(2, bl);
    +    DECODE_START(3, bl);
         decode(time, bl);
         decode(tenant, bl);
         decode(bucket_name, bl);
    @@ -1318,6 +1417,11 @@ struct cls_rgw_reshard_entry
         }
         decode(old_num_shards, bl);
         decode(new_num_shards, bl);
    +    if (struct_v >= 3) {
    +      decode(initiator, bl);
    +    } else {
    +      initiator = cls_rgw_reshard_initiator::Unknown;
    +    }
         DECODE_FINISH(bl);
       }
     
    diff --git a/src/cls/test_remote_reads/cls_test_remote_reads.cc b/src/cls/test_remote_reads/cls_test_remote_reads.cc
    deleted file mode 100644
    index 33b0e9dc1d50..000000000000
    --- a/src/cls/test_remote_reads/cls_test_remote_reads.cc
    +++ /dev/null
    @@ -1,87 +0,0 @@
    -/*
    - * This is an example RADOS object class that shows how to use remote reads.
    - */
    -
    -#include "common/ceph_json.h"
    -#include "objclass/objclass.h"
    -
    -CLS_VER(1,0)
    -CLS_NAME(test_remote_reads)
    -
    -cls_handle_t h_class;
    -cls_method_handle_t h_test_read;
    -cls_method_handle_t h_test_gather;
    -
    -/**
    - * read data
    - */
    -static int test_read(cls_method_context_t hctx, bufferlist *in, bufferlist *out) {
    -  int r = cls_cxx_read(hctx, 0, 0, out);
    -  if (r < 0) {
    -    CLS_ERR("%s: error reading data", __PRETTY_FUNCTION__);
    -    return r;
    -  }
    -  return 0;
    -}
    -
    -/**
    - * gather data from other objects using remote reads
    - */
    -static int test_gather(cls_method_context_t hctx, bufferlist *in, bufferlist *out) {
    -  std::map src_obj_buffs;
    -  int r = cls_cxx_get_gathered_data(hctx, &src_obj_buffs);
    -  if (src_obj_buffs.empty()) {
    -    // start remote reads
    -    JSONParser parser;
    -    bool b = parser.parse(in->c_str(), in->length());
    -    if (!b) {
    -      CLS_ERR("%s: failed to parse json", __PRETTY_FUNCTION__);
    -      return -EBADMSG;
    -    }
    -    auto *o_cls = parser.find_obj("cls");
    -    ceph_assert(o_cls);
    -    std::string cls = o_cls->get_data_val().str;
    -
    -    auto *o_method = parser.find_obj("method");
    -    ceph_assert(o_method);
    -    std::string method = o_method->get_data_val().str;
    -
    -    auto *o_pool = parser.find_obj("pool");
    -    ceph_assert(o_pool);
    -    std::string pool = o_pool->get_data_val().str;
    -
    -    auto *o_src_objects = parser.find_obj("src_objects");
    -    ceph_assert(o_src_objects);
    -    auto src_objects_v = o_src_objects->get_array_elements();
    -    std::set src_objects;
    -    for (auto it = src_objects_v.begin(); it != src_objects_v.end(); it++) {
    -      std::string oid_without_double_quotes = it->substr(1, it->size()-2);
    -      src_objects.insert(oid_without_double_quotes);
    -    }
    -    r = cls_cxx_gather(hctx, src_objects, pool, cls.c_str(), method.c_str(), *in);
    -  } else {
    -    // write data gathered using remote reads
    -    int offset = 0;
    -    for (std::map::iterator it = src_obj_buffs.begin(); it != src_obj_buffs.end(); it++) {
    -      bufferlist bl= it->second;
    -      r = cls_cxx_write(hctx, offset, bl.length(), &bl);
    -      offset += bl.length();
    -    }
    -  }
    -  return r;
    -}
    -
    -CLS_INIT(test_remote_reads)
    -{
    -  CLS_LOG(0, "loading cls_test_remote_reads");
    -
    -  cls_register("test_remote_reads", &h_class);
    -  
    -  cls_register_cxx_method(h_class, "test_read",
    -			  CLS_METHOD_RD,
    -			  test_read, &h_test_read);
    -
    -  cls_register_cxx_method(h_class, "test_gather",
    -			  CLS_METHOD_RD | CLS_METHOD_WR,
    -			  test_gather, &h_test_gather);
    -}
    diff --git a/src/cls/timeindex/cls_timeindex_ops.h b/src/cls/timeindex/cls_timeindex_ops.h
    index f40058954dce..f0f0cc024751 100644
    --- a/src/cls/timeindex/cls_timeindex_ops.h
    +++ b/src/cls/timeindex/cls_timeindex_ops.h
    @@ -4,6 +4,7 @@
     #ifndef CEPH_CLS_TIMEINDEX_OPS_H
     #define CEPH_CLS_TIMEINDEX_OPS_H
     
    +#include "common/ceph_json.h"
     #include "cls_timeindex_types.h"
     
     struct cls_timeindex_add_op {
    @@ -51,6 +52,26 @@ struct cls_timeindex_list_op {
         decode(max_entries, bl);
         DECODE_FINISH(bl);
       }
    +
    +   void dump(ceph::Formatter *f) const {
    +    f->open_object_section("from_time");
    +    from_time.dump(f);
    +    f->close_section();
    +    f->dump_string("marker", marker);
    +    f->open_object_section("to_time");
    +    to_time.dump(f);
    +    f->close_section();
    +    f->dump_int("max_entries", max_entries);
    +  }
    +
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_timeindex_list_op);
    +    o.push_back(new cls_timeindex_list_op);
    +    o.back()->from_time = utime_t(1, 2);
    +    o.back()->marker = "marker";
    +    o.back()->to_time = utime_t(3, 4);
    +    o.back()->max_entries = 5;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_timeindex_list_op)
     
    @@ -76,6 +97,23 @@ struct cls_timeindex_list_ret {
         decode(truncated, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    encode_json("entries", entries, f);
    +    f->dump_string("marker", marker);
    +    f->dump_bool("truncated", truncated);
    +  }
    +
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_timeindex_list_ret);
    +    o.push_back(new cls_timeindex_list_ret);
    +    o.back()->entries.push_back(cls_timeindex_entry());
    +    o.back()->entries.back().key_ts = utime_t(1, 2);
    +    o.back()->entries.back().key_ext = "key_ext";
    +    o.back()->entries.back().value.append("value");
    +    o.back()->marker = "marker";
    +    o.back()->truncated = true;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_timeindex_list_ret)
     
    diff --git a/src/cls/timeindex/cls_timeindex_types.h b/src/cls/timeindex/cls_timeindex_types.h
    index d33886881be5..ea8d6c93d2c8 100644
    --- a/src/cls/timeindex/cls_timeindex_types.h
    +++ b/src/cls/timeindex/cls_timeindex_types.h
    @@ -4,9 +4,9 @@
     #ifndef CEPH_CLS_TIMEINDEX_TYPES_H
     #define CEPH_CLS_TIMEINDEX_TYPES_H
     
    +#include "common/Formatter.h"
     #include "include/encoding.h"
     #include "include/types.h"
    -
     #include "include/utime.h"
     
     class JSONObj;
    diff --git a/src/cls/user/cls_user.cc b/src/cls/user/cls_user.cc
    index e278ad7fc128..592f304fc715 100644
    --- a/src/cls/user/cls_user.cc
    +++ b/src/cls/user/cls_user.cc
    @@ -2,11 +2,14 @@
     // vim: ts=8 sw=2 smarttab
     
     #include 
    +#include 
    +#include 
     
     #include "include/utime.h"
     #include "objclass/objclass.h"
     
     #include "cls_user_ops.h"
    +#include "rgw/rgw_string.h"
     
     using std::map;
     using std::string;
    @@ -71,7 +74,8 @@ static int get_existing_bucket_entry(cls_method_context_t hctx, const string& bu
       return 0;
     }
     
    -static int read_header(cls_method_context_t hctx, cls_user_header *header)
    +template 
    +static int read_header(cls_method_context_t hctx, T *header)
     {
       bufferlist bl;
     
    @@ -80,7 +84,7 @@ static int read_header(cls_method_context_t hctx, cls_user_header *header)
         return ret;
     
       if (bl.length() == 0) {
    -    *header = cls_user_header();
    +    *header = T();
         return 0;
       }
     
    @@ -478,10 +482,6 @@ static int cls_user_reset_stats2(cls_method_context_t hctx,
         add_header_stats(&ret.acc_stats, e);
       }
     
    -  /* try-update marker */
    -  if(!keys.empty())
    -    ret.marker = (--keys.cend())->first;
    -
       if (! ret.truncated) {
         buffer::list bl;
         header.last_stats_update = op.time;
    @@ -496,11 +496,230 @@ static int cls_user_reset_stats2(cls_method_context_t hctx,
         return rc;
       }
     
    +  /* try-update marker */
    +  if(!keys.empty())
    +    ret.marker = (--keys.cend())->first;
    +
       /* return partial result */
       encode(ret, *out);
       return 0;
     } /* cls_user_reset_stats2 */
     
    +
    +// account resource names must be unique and aren't distinguished by case, so
    +// convert all keys to lowercase
    +static std::string resource_key(std::string_view name)
    +{
    +  std::string key;
    +  key.resize(name.size());
    +  std::transform(name.begin(), name.end(), key.begin(),
    +                 [](unsigned char c) { return std::tolower(c); });
    +  return key;
    +}
    +
    +static int cls_account_resource_add(cls_method_context_t hctx,
    +                                    buffer::list *in, buffer::list *out)
    +{
    +  cls_user_account_resource_add_op op;
    +  try {
    +    auto bliter = in->cbegin();
    +    decode(op, bliter);
    +  } catch (const ceph::buffer::error& err) {
    +    CLS_LOG(0, "ERROR: %s failed to decode op", __func__);
    +    return -EINVAL;
    +  }
    +
    +  CLS_LOG(20, "adding account resource name=%s path=%s",
    +          op.entry.name.c_str(), op.entry.path.c_str());
    +
    +  const std::string key = resource_key(op.entry.name);
    +
    +  // does this resource entry exist?
    +  bufferlist readbl; // unused
    +  int ret = cls_cxx_map_get_val(hctx, key, &readbl);
    +  if (ret < 0 && ret != -ENOENT) {
    +    return ret;
    +  }
    +  const bool exists = (ret == 0);
    +
    +  std::optional header;
    +  if (!exists) {
    +    // if this is a new entry, update the resource count in the account header
    +    ret = read_header(hctx, &header.emplace());
    +    if (ret < 0) {
    +      CLS_LOG(0, "ERROR: failed to read account header ret=%d", ret);
    +      return ret;
    +    }
    +    if (header->count >= op.limit) {
    +      CLS_LOG(4, "account resource limit exceeded, %u >= %u",
    +              header->count, op.limit);
    +      return -EUSERS; // too many users
    +    }
    +    header->count++;
    +  } else if (op.exclusive) {
    +    return -EEXIST;
    +  }
    +
    +  // write/overwrite the entry
    +  bufferlist writebl;
    +  encode(op.entry, writebl);
    +  ret = cls_cxx_map_set_val(hctx, key, &writebl);
    +  if (ret < 0) {
    +    CLS_LOG(0, "ERROR: failed to write account resource: %d", ret);
    +    return ret;
    +  }
    +
    +  // write the updated account header
    +  if (header) {
    +    bufferlist headerbl;
    +    encode(*header, headerbl);
    +    return cls_cxx_map_write_header(hctx, &headerbl);
    +  }
    +  return 0;
    +} // cls_account_resource_add
    +
    +static int cls_account_resource_get(cls_method_context_t hctx,
    +                                    bufferlist *in, bufferlist *out)
    +{
    +  cls_user_account_resource_get_op op;
    +  try {
    +    auto p = in->cbegin();
    +    decode(op, p);
    +  } catch (const ceph::buffer::error& err) {
    +    CLS_LOG(0, "ERROR: %s failed to decode op", __func__);
    +    return -EINVAL;
    +  }
    +
    +  CLS_LOG(20, "reading account resource name=%s", op.name.c_str());
    +
    +  const std::string key = resource_key(op.name);
    +
    +  bufferlist bl;
    +  int r = cls_cxx_map_get_val(hctx, key, &bl);
    +  if (r < 0) {
    +    return r;
    +  }
    +
    +  cls_user_account_resource_get_ret ret;
    +  try {
    +    auto iter = bl.cbegin();
    +    decode(ret.entry, iter);
    +  } catch (ceph::buffer::error& err) {
    +    CLS_LOG(0, "ERROR: failed to decode entry %s", key.c_str());
    +    return -EIO;
    +  }
    +
    +  encode(ret, *out);
    +  return 0;
    +} // cls_account_resource_get
    +
    +static int cls_account_resource_rm(cls_method_context_t hctx,
    +                                   buffer::list *in, buffer::list *out)
    +{
    +  cls_user_account_resource_rm_op op;
    +  try {
    +    auto bliter = in->cbegin();
    +    decode(op, bliter);
    +  } catch (const ceph::buffer::error& err) {
    +    CLS_LOG(0, "ERROR: %s failed to decode op", __func__);
    +    return -EINVAL;
    +  }
    +
    +  CLS_LOG(20, "removing account resource name=%s", op.name.c_str());
    +
    +  const std::string key = resource_key(op.name);
    +
    +  // verify that the resource entry exists, so we can return ENOENT otherwise.
    +  // remove_key() alone would return success either way
    +  bufferlist readbl; // unused
    +  int ret = cls_cxx_map_get_val(hctx, key, &readbl);
    +  if (ret < 0) {
    +    return ret;
    +  }
    +
    +  // remove the resource entry
    +  ret = cls_cxx_map_remove_key(hctx, key);
    +  if (ret < 0) {
    +    CLS_LOG(0, "ERROR: failed to remove account resource: %d", ret);
    +    return ret;
    +  }
    +
    +  // update resource count in the account header
    +  cls_user_account_header header;
    +  ret = read_header(hctx, &header);
    +  if (ret < 0) {
    +    CLS_LOG(0, "ERROR: failed to read account header ret=%d", ret);
    +    return ret;
    +  }
    +  if (header.count) { // guard underflow
    +    header.count--;
    +  }
    +
    +  bufferlist headerbl;
    +  encode(header, headerbl);
    +  return cls_cxx_map_write_header(hctx, &headerbl);
    +} // cls_account_resource_rm
    +
    +static int cls_account_resource_list(cls_method_context_t hctx,
    +                                     bufferlist *in, bufferlist *out)
    +{
    +  cls_user_account_resource_list_op op;
    +  try {
    +    auto p = in->cbegin();
    +    decode(op, p);
    +  } catch (const ceph::buffer::error& err) {
    +    CLS_LOG(0, "ERROR: %s failed to decode op", __func__);
    +    return -EINVAL;
    +  }
    +  CLS_LOG(20, "listing account resources from marker=%s path_prefix=%s max_entries=%d",
    +          op.marker.c_str(), op.path_prefix.c_str(), (int)op.max_entries);
    +
    +  const std::string prefix; // empty
    +  const uint32_t max_entries = std::min(op.max_entries, 1000u);
    +  std::map entries;
    +  bool truncated = false;
    +
    +  int rc = cls_cxx_map_get_vals(hctx, op.marker, prefix, max_entries,
    +                                &entries, &truncated);
    +  if (rc < 0) {
    +    return rc;
    +  }
    +
    +  cls_user_account_resource_list_ret ret;
    +
    +  // copy matching decoded omap values into a vector
    +  for (auto& [key, bl] : entries) {
    +    // decode as cls_user_account_resource
    +    cls_user_account_resource entry;
    +    try {
    +      auto p = bl.cbegin();
    +      decode(entry, p);
    +    } catch (const ceph::buffer::error& e) {
    +      CLS_LOG(1, "ERROR: %s failed to decode resource entry at key=%s",
    +              __func__, key.c_str());
    +      return -EIO;
    +    }
    +
    +    // filter entries by path prefix
    +    if (entry.path.starts_with(op.path_prefix)) {
    +      CLS_LOG(20, "included resource path=%s name=%s",
    +              entry.path.c_str(), entry.name.c_str());
    +      ret.entries.push_back(std::move(entry));
    +    }
    +  }
    +
    +  ret.truncated = truncated;
    +  if (!entries.empty()) {
    +    ret.marker = entries.rbegin()->first;
    +  }
    +  CLS_LOG(20, "entries=%d next_marker=%s truncated=%d",
    +          (int)ret.entries.size(), ret.marker.c_str(), (int)ret.truncated);
    +
    +  encode(ret, *out);
    +  return 0;
    +} // cls_account_resource_list
    +
    +
     CLS_INIT(user)
     {
       CLS_LOG(1, "Loaded user class!");
    @@ -527,5 +746,18 @@ CLS_INIT(user)
       cls_register_cxx_method(h_class, "reset_user_stats", CLS_METHOD_RD | CLS_METHOD_WR, cls_user_reset_stats, &h_user_reset_stats);
       cls_register_cxx_method(h_class, "reset_user_stats2", CLS_METHOD_RD | CLS_METHOD_WR, cls_user_reset_stats2, &h_user_reset_stats2);
     
    -  return;
    +  // account
    +  cls_method_handle_t h_account_resource_add;
    +  cls_method_handle_t h_account_resource_get;
    +  cls_method_handle_t h_account_resource_rm;
    +  cls_method_handle_t h_account_resource_list;
    +
    +  cls_register_cxx_method(h_class, "account_resource_add", CLS_METHOD_RD | CLS_METHOD_WR,
    +                          cls_account_resource_add, &h_account_resource_add);
    +  cls_register_cxx_method(h_class, "account_resource_get", CLS_METHOD_RD,
    +                          cls_account_resource_get, &h_account_resource_get);
    +  cls_register_cxx_method(h_class, "account_resource_rm", CLS_METHOD_RD | CLS_METHOD_WR,
    +                          cls_account_resource_rm, &h_account_resource_rm);
    +  cls_register_cxx_method(h_class, "account_resource_list", CLS_METHOD_RD,
    +                          cls_account_resource_list, &h_account_resource_list);
     }
    diff --git a/src/cls/user/cls_user_client.cc b/src/cls/user/cls_user_client.cc
    index b74f55b48b2f..acc94ca326a2 100644
    --- a/src/cls/user/cls_user_client.cc
    +++ b/src/cls/user/cls_user_client.cc
    @@ -162,3 +162,124 @@ int cls_user_get_header_async(IoCtx& io_ctx, string& oid, RGWGetUserHeader_CB *c
     
       return 0;
     }
    +
    +
    +void cls_user_account_resource_add(librados::ObjectWriteOperation& op,
    +                                   const cls_user_account_resource& entry,
    +                                   bool exclusive, uint32_t limit)
    +{
    +  cls_user_account_resource_add_op call;
    +  call.entry = entry;
    +  call.exclusive = exclusive;
    +  call.limit = limit;
    +
    +  bufferlist inbl;
    +  encode(call, inbl);
    +  op.exec("user", "account_resource_add", inbl);
    +}
    +
    +class ResourceGetCB : public librados::ObjectOperationCompletion {
    +  cls_user_account_resource* entry;
    +  int* pret;
    +public:
    +  ResourceGetCB(cls_user_account_resource* entry, int* pret)
    +    : entry(entry), pret(pret)
    +  {}
    +  void handle_completion(int r, bufferlist& outbl) override {
    +    if (r >= 0) {
    +      cls_user_account_resource_get_ret ret;
    +      try {
    +        auto iter = outbl.cbegin();
    +        decode(ret, iter);
    +        if (entry) {
    +          *entry = std::move(ret.entry);
    +        }
    +      } catch (const ceph::buffer::error& err) {
    +        r = -EIO;
    +      }
    +    }
    +    if (pret) {
    +      *pret = r;
    +    }
    +  }
    +};
    +
    +void cls_user_account_resource_get(librados::ObjectReadOperation& op,
    +                                   std::string_view name,
    +                                   cls_user_account_resource& entry,
    +                                   int* pret)
    +{
    +  cls_user_account_resource_get_op call;
    +  call.name = name;
    +
    +  bufferlist inbl;
    +  encode(call, inbl);
    +  op.exec("user", "account_resource_get", inbl,
    +          new ResourceGetCB(&entry, pret));
    +}
    +
    +void cls_user_account_resource_rm(librados::ObjectWriteOperation& op,
    +                                  std::string_view name)
    +{
    +  cls_user_account_resource_rm_op call;
    +  call.name = name;
    +
    +  bufferlist inbl;
    +  encode(call, inbl);
    +  op.exec("user", "account_resource_rm", inbl);
    +}
    +
    +class ResourceListCB : public librados::ObjectOperationCompletion {
    +  std::vector* entries;
    +  bool* truncated;
    +  std::string* next_marker;
    +  int* pret;
    +public:
    +  ResourceListCB(std::vector* entries,
    +                 bool* truncated, std::string* next_marker, int* pret)
    +    : entries(entries), truncated(truncated),
    +      next_marker(next_marker), pret(pret)
    +  {}
    +  void handle_completion(int r, bufferlist& outbl) override {
    +    if (r >= 0) {
    +      cls_user_account_resource_list_ret ret;
    +      try {
    +        auto iter = outbl.cbegin();
    +        decode(ret, iter);
    +        if (entries) {
    +          *entries = std::move(ret.entries);
    +        }
    +        if (next_marker) {
    +          *next_marker = std::move(ret.marker);
    +        }
    +        if (truncated) {
    +          *truncated = ret.truncated;
    +        }
    +      } catch (const ceph::buffer::error& err) {
    +        r = -EIO;
    +      }
    +    }
    +    if (pret) {
    +      *pret = r;
    +    }
    +  }
    +};
    +
    +void cls_user_account_resource_list(librados::ObjectReadOperation& op,
    +                                    std::string_view marker,
    +                                    std::string_view path_prefix,
    +                                    uint32_t max_entries,
    +                                    std::vector& entries,
    +                                    bool* truncated, std::string* next_marker,
    +                                    int* pret)
    +{
    +  cls_user_account_resource_list_op call;
    +  call.marker = marker;
    +  call.path_prefix = path_prefix;
    +  call.max_entries = max_entries;
    +
    +  bufferlist inbl;
    +  encode(call, inbl);
    +  op.exec("user", "account_resource_list", inbl,
    +          new ResourceListCB(&entries, truncated, next_marker, pret));
    +}
    diff --git a/src/cls/user/cls_user_client.h b/src/cls/user/cls_user_client.h
    index 03d975c59cb5..a1120f86400b 100644
    --- a/src/cls/user/cls_user_client.h
    +++ b/src/cls/user/cls_user_client.h
    @@ -33,4 +33,31 @@ void cls_user_get_header(librados::ObjectReadOperation& op, cls_user_header *hea
     int cls_user_get_header_async(librados::IoCtx& io_ctx, std::string& oid, RGWGetUserHeader_CB *ctx);
     void cls_user_reset_stats(librados::ObjectWriteOperation& op);
     
    +// Account resources
    +
    +/// Add or overwrite an entry to the account's list of resources. Returns
    +/// -EUSERS (Too many users) if the resource count would exceed the given limit.
    +void cls_user_account_resource_add(librados::ObjectWriteOperation& op,
    +                                   const cls_user_account_resource& entry,
    +                                   bool exclusive, uint32_t limit);
    +
    +/// Look up an account resource by case-insensitive name.
    +void cls_user_account_resource_get(librados::ObjectReadOperation& op,
    +                                   std::string_view name,
    +                                   cls_user_account_resource& entry,
    +                                   int* pret);
    +
    +/// Remove an account resources by case-insensitive name.
    +void cls_user_account_resource_rm(librados::ObjectWriteOperation& op,
    +                                  std::string_view name);
    +
    +/// List the resources linked to an account.
    +void cls_user_account_resource_list(librados::ObjectReadOperation& op,
    +                                    std::string_view marker,
    +                                    std::string_view path_prefix,
    +                                    uint32_t max_entries,
    +                                    std::vector& entries,
    +                                    bool* truncated, std::string* next_marker,
    +                                    int* pret);
    +
     #endif
    diff --git a/src/cls/user/cls_user_ops.cc b/src/cls/user/cls_user_ops.cc
    index 5ae9d2c93b82..f787c1eeb022 100644
    --- a/src/cls/user/cls_user_ops.cc
    +++ b/src/cls/user/cls_user_ops.cc
    @@ -116,3 +116,89 @@ void cls_user_complete_stats_sync_op::generate_test_instances(list& ls)
    +{
    +  ls.push_back(new cls_user_account_resource_add_op);
    +  cls_user_account_resource_add_op *op = new cls_user_account_resource_add_op;
    +  cls_user_gen_test_resource(op->entry);
    +  ls.push_back(op);
    +}
    +
    +void cls_user_account_resource_get_op::dump(Formatter *f) const
    +{
    +  encode_json("name", name, f);
    +}
    +
    +void cls_user_account_resource_get_op::generate_test_instances(std::list& ls)
    +{
    +  ls.push_back(new cls_user_account_resource_get_op);
    +  cls_user_account_resource_get_op *op = new cls_user_account_resource_get_op;
    +  op->name = "name";
    +  ls.push_back(op);
    +}
    +
    +void cls_user_account_resource_get_ret::dump(Formatter *f) const
    +{
    +  encode_json("entry", entry, f);
    +}
    +
    +void cls_user_account_resource_get_ret::generate_test_instances(std::list& ls)
    +{
    +  ls.push_back(new cls_user_account_resource_get_ret);
    +  cls_user_account_resource_get_ret *ret = new cls_user_account_resource_get_ret;
    +  cls_user_gen_test_resource(ret->entry);
    +  ls.push_back(ret);
    +}
    +
    +void cls_user_account_resource_rm_op::dump(Formatter *f) const
    +{
    +  encode_json("name", name, f);
    +}
    +
    +void cls_user_account_resource_rm_op::generate_test_instances(std::list& ls)
    +{
    +  ls.push_back(new cls_user_account_resource_rm_op);
    +  cls_user_account_resource_rm_op *op = new cls_user_account_resource_rm_op;
    +  op->name = "name";
    +  ls.push_back(op);
    +}
    +
    +void cls_user_account_resource_list_op::dump(Formatter *f) const
    +{
    +  encode_json("marker", marker, f);
    +  encode_json("path_prefix", path_prefix, f);
    +  encode_json("max_entries", max_entries, f);
    +}
    +
    +void cls_user_account_resource_list_op::generate_test_instances(std::list& ls)
    +{
    +  ls.push_back(new cls_user_account_resource_list_op);
    +  cls_user_account_resource_list_op *op = new cls_user_account_resource_list_op;
    +  op->marker = "marker";
    +  op->path_prefix = "path";
    +  op->max_entries = 20;
    +  ls.push_back(op);
    +}
    +
    +void cls_user_account_resource_list_ret::dump(Formatter *f) const
    +{
    +  encode_json("entries", entries, f);
    +  encode_json("truncated", truncated, f);
    +  encode_json("marker", marker, f);
    +}
    +
    +void cls_user_account_resource_list_ret::generate_test_instances(std::list& ls)
    +{
    +  ls.push_back(new cls_user_account_resource_list_ret);
    +  cls_user_account_resource_list_ret *ret = new cls_user_account_resource_list_ret;
    +  cls_user_gen_test_resource(ret->entries.emplace_back());
    +  ret->truncated = true;
    +  ls.push_back(ret);
    +}
    diff --git a/src/cls/user/cls_user_ops.h b/src/cls/user/cls_user_ops.h
    index 7edd1bc15cef..d638896340b8 100644
    --- a/src/cls/user/cls_user_ops.h
    +++ b/src/cls/user/cls_user_ops.h
    @@ -264,4 +264,136 @@ struct cls_user_complete_stats_sync_op {
     WRITE_CLASS_ENCODER(cls_user_complete_stats_sync_op)
     
     
    +struct cls_user_account_resource_add_op {
    +  cls_user_account_resource entry;
    +  bool exclusive = false;
    +  uint32_t limit = 0;
    +
    +  void encode(ceph::buffer::list& bl) const {
    +    ENCODE_START(1, 1, bl);
    +    encode(entry, bl);
    +    encode(exclusive, bl);
    +    encode(limit, bl);
    +    ENCODE_FINISH(bl);
    +  }
    +  void decode(ceph::buffer::list::const_iterator& bl) {
    +    DECODE_START(1, bl);
    +    decode(entry, bl);
    +    decode(exclusive, bl);
    +    decode(limit, bl);
    +    DECODE_FINISH(bl);
    +  }
    +
    +  void dump(ceph::Formatter* f) const;
    +  static void generate_test_instances(std::list& ls);
    +};
    +WRITE_CLASS_ENCODER(cls_user_account_resource_add_op)
    +
    +struct cls_user_account_resource_get_op {
    +  std::string name;
    +
    +  void encode(ceph::buffer::list& bl) const {
    +    ENCODE_START(1, 1, bl);
    +    encode(name, bl);
    +    ENCODE_FINISH(bl);
    +  }
    +  void decode(ceph::buffer::list::const_iterator& bl) {
    +    DECODE_START(1, bl);
    +    decode(name, bl);
    +    DECODE_FINISH(bl);
    +  }
    +
    +  void dump(ceph::Formatter* f) const;
    +  static void generate_test_instances(std::list& ls);
    +};
    +WRITE_CLASS_ENCODER(cls_user_account_resource_get_op)
    +
    +struct cls_user_account_resource_get_ret {
    +  cls_user_account_resource entry;
    +
    +  void encode(ceph::buffer::list& bl) const {
    +    ENCODE_START(1, 1, bl);
    +    encode(entry, bl);
    +    ENCODE_FINISH(bl);
    +  }
    +  void decode(ceph::buffer::list::const_iterator& bl) {
    +    DECODE_START(1, bl);
    +    decode(entry, bl);
    +    DECODE_FINISH(bl);
    +  }
    +
    +  void dump(ceph::Formatter* f) const;
    +  static void generate_test_instances(std::list& ls);
    +};
    +WRITE_CLASS_ENCODER(cls_user_account_resource_get_ret)
    +
    +struct cls_user_account_resource_rm_op {
    +  std::string name;
    +
    +  void encode(ceph::buffer::list& bl) const {
    +    ENCODE_START(1, 1, bl);
    +    encode(name, bl);
    +    ENCODE_FINISH(bl);
    +  }
    +  void decode(ceph::buffer::list::const_iterator& bl) {
    +    DECODE_START(1, bl);
    +    decode(name, bl);
    +    DECODE_FINISH(bl);
    +  }
    +
    +  void dump(ceph::Formatter* f) const;
    +  static void generate_test_instances(std::list& ls);
    +};
    +WRITE_CLASS_ENCODER(cls_user_account_resource_rm_op)
    +
    +struct cls_user_account_resource_list_op {
    +  std::string marker;
    +  std::string path_prefix;
    +  uint32_t max_entries = 0;
    +
    +  void encode(ceph::buffer::list& bl) const {
    +    ENCODE_START(1, 1, bl);
    +    encode(marker, bl);
    +    encode(path_prefix, bl);
    +    encode(max_entries, bl);
    +    ENCODE_FINISH(bl);
    +  }
    +  void decode(ceph::buffer::list::const_iterator& bl) {
    +    DECODE_START(1, bl);
    +    decode(marker, bl);
    +    decode(path_prefix, bl);
    +    decode(max_entries, bl);
    +    DECODE_FINISH(bl);
    +  }
    +
    +  void dump(ceph::Formatter* f) const;
    +  static void generate_test_instances(std::list& ls);
    +};
    +WRITE_CLASS_ENCODER(cls_user_account_resource_list_op)
    +
    +struct cls_user_account_resource_list_ret {
    +  std::vector entries;
    +  bool truncated = false;
    +  std::string marker;
    +
    +  void encode(ceph::buffer::list& bl) const {
    +    ENCODE_START(1, 1, bl);
    +    encode(entries, bl);
    +    encode(truncated, bl);
    +    encode(marker, bl);
    +    ENCODE_FINISH(bl);
    +  }
    +  void decode(ceph::buffer::list::const_iterator& bl) {
    +    DECODE_START(1, bl);
    +    decode(entries, bl);
    +    decode(truncated, bl);
    +    decode(marker, bl);
    +    DECODE_FINISH(bl);
    +  }
    +
    +  void dump(ceph::Formatter* f) const;
    +  static void generate_test_instances(std::list& ls);
    +};
    +WRITE_CLASS_ENCODER(cls_user_account_resource_list_ret)
    +
     #endif
    diff --git a/src/cls/user/cls_user_types.cc b/src/cls/user/cls_user_types.cc
    index 0d823f0bea20..23f2044e9636 100644
    --- a/src/cls/user/cls_user_types.cc
    +++ b/src/cls/user/cls_user_types.cc
    @@ -109,3 +109,35 @@ void cls_user_header::generate_test_instances(list& ls)
       cls_user_gen_test_header(h);
       ls.push_back(h);
     }
    +
    +
    +void cls_user_account_header::dump(ceph::Formatter* f) const
    +{
    +  encode_json("count", count, f);
    +}
    +
    +void cls_user_account_header::generate_test_instances(std::list& ls)
    +{
    +  ls.push_back(new cls_user_account_header);
    +}
    +
    +void cls_user_account_resource::dump(ceph::Formatter* f) const
    +{
    +  encode_json("name", name, f);
    +  encode_json("path", path, f);
    +  // skip metadata
    +}
    +
    +void cls_user_gen_test_resource(cls_user_account_resource& r)
    +{
    +  r.name = "name";
    +  r.path = "path";
    +}
    +
    +void cls_user_account_resource::generate_test_instances(std::list& ls)
    +{
    +  ls.push_back(new cls_user_account_resource);
    +  auto p = new cls_user_account_resource;
    +  cls_user_gen_test_resource(*p);
    +  ls.push_back(p);
    +}
    diff --git a/src/cls/user/cls_user_types.h b/src/cls/user/cls_user_types.h
    index a139449d3c3e..8193ff4139ac 100644
    --- a/src/cls/user/cls_user_types.h
    +++ b/src/cls/user/cls_user_types.h
    @@ -216,9 +216,57 @@ struct cls_user_header {
     };
     WRITE_CLASS_ENCODER(cls_user_header)
     
    +// omap header for an account index object
    +struct cls_user_account_header {
    +  uint32_t count = 0;
    +
    +  void encode(ceph::buffer::list& bl) const {
    +    ENCODE_START(1, 1, bl);
    +    encode(count, bl);
    +    ENCODE_FINISH(bl);
    +  }
    +  void decode(ceph::buffer::list::const_iterator& bl) {
    +    DECODE_START(1, bl);
    +    decode(count, bl);
    +    DECODE_FINISH(bl);
    +  }
    +  void dump(ceph::Formatter* f) const;
    +  static void generate_test_instances(std::list& ls);
    +};
    +WRITE_CLASS_ENCODER(cls_user_account_header)
    +
    +// account resource entry
    +struct cls_user_account_resource {
    +  // index by name for put/delete
    +  std::string name;
    +  // index by path for listing by PathPrefix
    +  std::string path;
    +  // additional opaque metadata depending on resource type
    +  ceph::buffer::list metadata;
    +
    +  void encode(ceph::buffer::list& bl) const {
    +    ENCODE_START(1, 1, bl);
    +    encode(name, bl);
    +    encode(path, bl);
    +    encode(metadata, bl);
    +    ENCODE_FINISH(bl);
    +  }
    +  void decode(ceph::buffer::list::const_iterator& bl) {
    +    DECODE_START(1, bl);
    +    decode(name, bl);
    +    decode(path, bl);
    +    decode(metadata, bl);
    +    DECODE_FINISH(bl);
    +  }
    +  void dump(ceph::Formatter* f) const;
    +  static void generate_test_instances(std::list& ls);
    +};
    +WRITE_CLASS_ENCODER(cls_user_account_resource)
    +
     void cls_user_gen_test_bucket(cls_user_bucket *bucket, int i);
     void cls_user_gen_test_bucket_entry(cls_user_bucket_entry *entry, int i);
     void cls_user_gen_test_stats(cls_user_stats *stats);
     void cls_user_gen_test_header(cls_user_header *h);
    +void cls_user_gen_test_resource(cls_user_account_resource& r);
     
     #endif
    diff --git a/src/cls/version/cls_version_ops.h b/src/cls/version/cls_version_ops.h
    index 62cd1172982a..2eff788ce52c 100644
    --- a/src/cls/version/cls_version_ops.h
    +++ b/src/cls/version/cls_version_ops.h
    @@ -5,6 +5,7 @@
     #define CEPH_CLS_VERSION_OPS_H
     
     #include "cls_version_types.h"
    +#include "common/ceph_json.h"
     
     struct cls_version_set_op {
       obj_version objv;
    @@ -22,6 +23,17 @@ struct cls_version_set_op {
         decode(objv, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_object("objv", objv);
    +  }
    +
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_version_set_op);
    +    o.push_back(new cls_version_set_op);
    +    o.back()->objv.ver = 123;
    +    o.back()->objv.tag = "foo";
    +  }
     };
     WRITE_CLASS_ENCODER(cls_version_set_op)
     
    @@ -44,6 +56,22 @@ struct cls_version_inc_op {
         decode(conds, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_object("objv", objv);
    +    encode_json("conds", conds, f);
    +  }
    +
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_version_inc_op);
    +    o.push_back(new cls_version_inc_op);
    +    o.back()->objv.ver = 123;
    +    o.back()->objv.tag = "foo";
    +    o.back()->conds.push_back(obj_version_cond());
    +    o.back()->conds.back().ver.ver = 123;
    +    o.back()->conds.back().ver.tag = "foo";
    +    o.back()->conds.back().cond = VER_COND_GE;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_version_inc_op)
     
    @@ -66,6 +94,22 @@ struct cls_version_check_op {
         decode(conds, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_object("objv", objv);
    +    encode_json("conds", conds, f);
    +  }
    +
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_version_check_op);
    +    o.push_back(new cls_version_check_op);
    +    o.back()->objv.ver = 123;
    +    o.back()->objv.tag = "foo";
    +    o.back()->conds.push_back(obj_version_cond());
    +    o.back()->conds.back().ver.ver = 123;
    +    o.back()->conds.back().ver.tag = "foo";
    +    o.back()->conds.back().cond = VER_COND_GE;
    +  }
     };
     WRITE_CLASS_ENCODER(cls_version_check_op)
     
    @@ -85,6 +129,17 @@ struct cls_version_read_ret {
         decode(objv, bl);
         DECODE_FINISH(bl);
       }
    +
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_object("objv", objv);
    +  }
    +
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new cls_version_read_ret);
    +    o.push_back(new cls_version_read_ret);
    +    o.back()->objv.ver = 123;
    +    o.back()->objv.tag = "foo";
    +  }
     };
     WRITE_CLASS_ENCODER(cls_version_read_ret)
     
    diff --git a/src/cls/version/cls_version_types.cc b/src/cls/version/cls_version_types.cc
    index b82f6aa8a5dd..735ef7c89857 100644
    --- a/src/cls/version/cls_version_types.cc
    +++ b/src/cls/version/cls_version_types.cc
    @@ -6,12 +6,6 @@
     #include "common/ceph_json.h"
     
     
    -void obj_version::dump(ceph::Formatter *f) const
    -{
    -  f->dump_int("ver", ver);
    -  f->dump_string("tag", tag);
    -}
    -
     void obj_version::decode_json(JSONObj *obj)
     {
       JSONDecoder::decode_json("ver", ver, obj);
    diff --git a/src/cls/version/cls_version_types.h b/src/cls/version/cls_version_types.h
    index 62cc16e33d52..dafa866e1f35 100644
    --- a/src/cls/version/cls_version_types.h
    +++ b/src/cls/version/cls_version_types.h
    @@ -53,7 +53,11 @@ struct obj_version {
                 tag.compare(v.tag) == 0);
       }
     
    -  void dump(ceph::Formatter *f) const;
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_int("ver", ver);
    +    f->dump_string("tag", tag);
    +  }
    +
       void decode_json(JSONObj *obj);
       static void generate_test_instances(std::list& o);
     };
    @@ -91,6 +95,18 @@ struct obj_version_cond {
         DECODE_FINISH(bl);
       }
     
    +  void dump(ceph::Formatter *f) const {
    +    f->dump_object("ver", ver);
    +    f->dump_unsigned("cond", cond);
    +  }
    +
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new obj_version_cond);
    +    o.push_back(new obj_version_cond);
    +    o.back()->ver.ver = 1;
    +    o.back()->ver.tag = "foo";
    +    o.back()->cond = VER_COND_EQ;
    +  }
     };
     WRITE_CLASS_ENCODER(obj_version_cond)
     
    diff --git a/src/common/AsyncReserver.h b/src/common/AsyncReserver.h
    index b80f9e7df8f6..b98e54ef767c 100644
    --- a/src/common/AsyncReserver.h
    +++ b/src/common/AsyncReserver.h
    @@ -16,6 +16,9 @@
     #define ASYNC_RESERVER_H
     
     #include "common/Formatter.h"
    +#include "common/ceph_context.h"
    +#include "common/ceph_mutex.h"
    +#include "include/Context.h"
     
     #define rdout(x) lgeneric_subdout(cct,reserver,x)
     
    @@ -110,8 +113,10 @@ class AsyncReserver {
           if (it->second.empty()) {
     	queues.erase(it);
           }
    -      f->queue(p.grant);
    -      p.grant = nullptr;
    +      if (p.grant) {
    +	f->queue(p.grant);
    +	p.grant = nullptr;
    +      }
           in_progress[p.item] = p;
           if (p.preempt) {
     	preempt_by_prio.insert(std::make_pair(p.prio, p.item));
    @@ -264,6 +269,38 @@ class AsyncReserver {
         do_queues();
       }
     
    +  /**
    +   * The synchronous version of request_reservation
    +   * Used to handle requests from OSDs that do not support the async interface
    +   * to scrub replica reservations, but still must count towards the max
    +   * active reservations.
    +   */
    +  bool request_reservation_or_fail(
    +      T item		     ///< [in] reservation key
    +  )
    +  {
    +    std::lock_guard l(lock);
    +    ceph_assert(!queue_pointers.count(item) && !in_progress.count(item));
    +
    +    if (in_progress.size() >= max_allowed) {
    +      rdout(10) << fmt::format("{}: request: {} denied", __func__, item)
    +		<< dendl;
    +      return false;
    +    }
    +
    +    const unsigned prio = UINT_MAX;
    +    Reservation r(item, prio, nullptr, nullptr);
    +    queues[prio].push_back(r);
    +    queue_pointers.insert(std::make_pair(
    +	item, std::make_pair(prio, --(queues[prio]).end())));
    +    do_queues();
    +    // the new request should be in_progress now
    +    ceph_assert(in_progress.count(item));
    +    rdout(10) << fmt::format("{}: request: {} granted", __func__, item)
    +	      << dendl;
    +    return true;
    +  }
    +
       /**
        * Cancels reservation
        *
    diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
    index 695ea7a68b3e..ea3cce166092 100644
    --- a/src/common/CMakeLists.txt
    +++ b/src/common/CMakeLists.txt
    @@ -6,12 +6,13 @@ add_library(common_texttable_obj OBJECT
     
     add_library(common_prioritycache_obj OBJECT
       PriorityCache.cc)
    -add_dependencies(common_prioritycache_obj legacy-option-headers)
    +target_link_libraries(common_prioritycache_obj legacy-option-headers)
     
     if(WIN32)
       add_library(dlfcn_win32 STATIC win32/dlfcn.cc win32/errno.cc)
     endif()
     
    +add_subdirectory(io_exerciser)
     add_subdirectory(options)
     
     set(common_srcs
    @@ -83,6 +84,7 @@ set(common_srcs
       options.cc
       page.cc
       perf_counters.cc
    +  perf_counters_cache.cc
       perf_counters_collection.cc
       perf_counters_key.cc
       perf_histogram.cc
    @@ -191,8 +193,8 @@ target_compile_definitions(common-common-objs PRIVATE
       "CMAKE_INSTALL_LIBDIR=\"${CMAKE_INSTALL_LIBDIR}\""
       "CEPH_INSTALL_FULL_PKGLIBDIR=\"${CEPH_INSTALL_FULL_PKGLIBDIR}\""
       "CEPH_INSTALL_DATADIR=\"${CEPH_INSTALL_DATADIR}\""
    -  $)
    -add_dependencies(common-common-objs legacy-option-headers)
    +  $)
    +target_link_libraries(common-common-objs legacy-option-headers)
     
     set(common_mountcephfs_srcs
       armor.c
    @@ -214,6 +216,7 @@ if(HAVE_INTEL)
         set(CMAKE_ASM_FLAGS "-i ${PROJECT_SOURCE_DIR}/src/isa-l/include/ ${CMAKE_ASM_FLAGS}")
         list(APPEND crc32_srcs
           ${PROJECT_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_00.asm
    +      ${PROJECT_SOURCE_DIR}/src/isa-l/crc/crc32_iscsi_01.asm
           crc32c_intel_fast_zero_asm.s)
       endif(HAVE_NASM_X64)
     elseif(HAVE_POWER8)
    @@ -223,10 +226,16 @@ elseif(HAVE_POWER8)
         list(APPEND crc32_srcs
           crc32c_ppc_asm.S
           crc32c_ppc_fast_zero_asm.S)
    +    set_source_files_properties(crc32c_ppc_asm.S PROPERTIES COMPILE_FLAGS -D__ASSEMBLY__)
       endif(HAVE_PPC64LE)
     elseif(HAVE_ARMV8_CRC)
       list(APPEND crc32_srcs
         crc32c_aarch64.c)
    +elseif(HAVE_S390X)
    +  list(APPEND crc32_srcs
    +    crc32c_s390x.c
    +    crc32c_s390x_le-vx.S
    +  )
     endif(HAVE_INTEL)
     
     add_library(crc32 OBJECT ${crc32_srcs})
    diff --git a/src/common/Cond.h b/src/common/Cond.h
    index f41d0bf40702..2ea4b9e2de95 100644
    --- a/src/common/Cond.h
    +++ b/src/common/Cond.h
    @@ -74,6 +74,7 @@ class C_SafeCond : public Context {
      * until wait() returns.
      */
     class C_SaferCond : public Context {
    +protected:
       ceph::mutex lock;  ///< Mutex to take
       ceph::condition_variable cond;     ///< Cond to signal
       bool done = false; ///< true after finish() has been called
    diff --git a/src/common/DecayCounter.h b/src/common/DecayCounter.h
    index 9455ecc5a33d..30570c72a306 100644
    --- a/src/common/DecayCounter.h
    +++ b/src/common/DecayCounter.h
    @@ -16,7 +16,6 @@
     #define CEPH_DECAYCOUNTER_H
     
     #include "include/buffer.h"
    -#include "common/Formatter.h"
     #include "common/StackStringStream.h"
     #include "common/ceph_time.h"
     
    @@ -24,6 +23,8 @@
     #include 
     #include 
     
    +namespace ceph { class Formatter; }
    +
     /**
      *
      * TODO: normalize value based on some function of half_life, 
    diff --git a/src/common/Finisher.cc b/src/common/Finisher.cc
    index ff931faffc1a..43550f351973 100644
    --- a/src/common/Finisher.cc
    +++ b/src/common/Finisher.cc
    @@ -2,11 +2,40 @@
     // vim: ts=8 sw=2 smarttab
     
     #include "Finisher.h"
    +#include "common/perf_counters.h"
    +
    +#include 
     
     #define dout_subsys ceph_subsys_finisher
     #undef dout_prefix
     #define dout_prefix *_dout << "finisher(" << this << ") "
     
    +Finisher::Finisher(CephContext *cct_) :
    +  cct(cct_), finisher_lock(ceph::make_mutex("Finisher::finisher_lock")),
    +  thread_name("fn_anonymous"),
    +  finisher_thread(this) {}
    +
    +Finisher::Finisher(CephContext *cct_, std::string_view name, std::string &&tn) :
    +  cct(cct_), finisher_lock(ceph::make_mutex(fmt::format("Finisher::{}", name))),
    +  thread_name(std::move(tn)),
    +  finisher_thread(this) {
    +  PerfCountersBuilder b(cct, fmt::format("finisher-{}", name),
    +			l_finisher_first, l_finisher_last);
    +  b.add_u64(l_finisher_queue_len, "queue_len");
    +  b.add_time_avg(l_finisher_complete_lat, "complete_latency");
    +  logger = b.create_perf_counters();
    +  cct->get_perfcounters_collection()->add(logger);
    +  logger->set(l_finisher_queue_len, 0);
    +  logger->set(l_finisher_complete_lat, 0);
    +}
    +
    +Finisher::~Finisher() {
    +  if (logger && cct) {
    +    cct->get_perfcounters_collection()->remove(logger);
    +    delete logger;
    +  }
    +}
    +
     void Finisher::start()
     {
       ldout(cct, 10) << __func__ << dendl;
    @@ -20,7 +49,7 @@ void Finisher::stop()
       finisher_stop = true;
       // we don't have any new work to do, but we want the worker to wake up anyway
       // to process the stop condition.
    -  finisher_cond.notify_all();
    +  finisher_cond.notify_one();
       finisher_lock.unlock();
       finisher_thread.join(); // wait until the worker exits completely
       ldout(cct, 10) << __func__ << " finish" << dendl;
    @@ -40,7 +69,7 @@ void Finisher::wait_for_empty()
     
     bool Finisher::is_empty()
     {
    -  std::unique_lock ul(finisher_lock);
    +  const std::lock_guard l{finisher_lock};
       return finisher_queue.empty();
     }
     
    diff --git a/src/common/Finisher.h b/src/common/Finisher.h
    index 9091d0b892a6..acee6594ca4d 100644
    --- a/src/common/Finisher.h
    +++ b/src/common/Finisher.h
    @@ -19,10 +19,8 @@
     #include "include/common_fwd.h"
     #include "common/Thread.h"
     #include "common/ceph_mutex.h"
    -#include "common/perf_counters.h"
     #include "common/Cond.h"
     
    -
     /// Finisher queue length performance counter ID.
     enum {
       l_finisher_first = 997082,
    @@ -37,23 +35,23 @@ enum {
      * contexts to complete is thread-safe.
      */
     class Finisher {
    -  CephContext *cct;
    +  CephContext *const cct;
       ceph::mutex finisher_lock; ///< Protects access to queues and finisher_running.
       ceph::condition_variable finisher_cond; ///< Signaled when there is something to process.
       ceph::condition_variable finisher_empty_cond; ///< Signaled when the finisher has nothing more to process.
    -  bool         finisher_stop; ///< Set when the finisher should stop.
    -  bool         finisher_running; ///< True when the finisher is currently executing contexts.
    -  bool	       finisher_empty_wait; ///< True mean someone wait finisher empty.
    +  bool         finisher_stop = false; ///< Set when the finisher should stop.
    +  bool         finisher_running = false; ///< True when the finisher is currently executing contexts.
    +  bool	       finisher_empty_wait = false; ///< True mean someone wait finisher empty.
     
       /// Queue for contexts for which complete(0) will be called.
       std::vector> finisher_queue;
       std::vector> in_progress_queue;
     
    -  std::string thread_name;
    +  const std::string thread_name;
     
       /// Performance counter for the finisher's queue length.
       /// Only active for named finishers.
    -  PerfCounters *logger;
    +  PerfCounters *logger = nullptr;
     
       void *finisher_thread_entry();
     
    @@ -66,56 +64,34 @@ class Finisher {
      public:
       /// Add a context to complete, optionally specifying a parameter for the complete function.
       void queue(Context *c, int r = 0) {
    -    std::unique_lock ul(finisher_lock);
    -    bool was_empty = finisher_queue.empty();
    -    finisher_queue.push_back(std::make_pair(c, r));
    -    if (was_empty) {
    -      finisher_cond.notify_one();
    +    {
    +      const std::lock_guard l{finisher_lock};
    +      const bool should_notify = finisher_queue.empty() && !finisher_running;
    +      finisher_queue.push_back(std::make_pair(c, r));
    +      if (should_notify) {
    +	finisher_cond.notify_one();
    +      }
         }
    +
         if (logger)
           logger->inc(l_finisher_queue_len);
       }
     
    -  void queue(std::list& ls) {
    +  // TODO use C++20 concept checks instead of SFINAE
    +  template
    +  auto queue(T &ls) -> decltype(std::distance(ls.begin(), ls.end()), void()) {
         {
    -      std::unique_lock ul(finisher_lock);
    -      if (finisher_queue.empty()) {
    -	finisher_cond.notify_all();
    -      }
    -      for (auto i : ls) {
    -	finisher_queue.push_back(std::make_pair(i, 0));
    -      }
    -      if (logger)
    -	logger->inc(l_finisher_queue_len, ls.size());
    -    }
    -    ls.clear();
    -  }
    -  void queue(std::deque& ls) {
    -    {
    -      std::unique_lock ul(finisher_lock);
    -      if (finisher_queue.empty()) {
    -	finisher_cond.notify_all();
    -      }
    -      for (auto i : ls) {
    +      const std::lock_guard l{finisher_lock};
    +      const bool should_notify = finisher_queue.empty() && !finisher_running;
    +      for (Context *i : ls) {
     	finisher_queue.push_back(std::make_pair(i, 0));
           }
    -      if (logger)
    -	logger->inc(l_finisher_queue_len, ls.size());
    -    }
    -    ls.clear();
    -  }
    -  void queue(std::vector& ls) {
    -    {
    -      std::unique_lock ul(finisher_lock);
    -      if (finisher_queue.empty()) {
    -	finisher_cond.notify_all();
    +      if (should_notify) {
    +	finisher_cond.notify_one();
           }
    -      for (auto i : ls) {
    -	finisher_queue.push_back(std::make_pair(i, 0));
    -      }
    -      if (logger)
    -	logger->inc(l_finisher_queue_len, ls.size());
         }
    +    if (logger)
    +      logger->inc(l_finisher_queue_len, ls.size());
         ls.clear();
       }
     
    @@ -137,36 +113,17 @@ class Finisher {
     
       bool is_empty();
     
    +  std::string_view get_thread_name() const noexcept {
    +    return thread_name;
    +  }
    +
       /// Construct an anonymous Finisher.
       /// Anonymous finishers do not log their queue length.
    -  explicit Finisher(CephContext *cct_) :
    -    cct(cct_), finisher_lock(ceph::make_mutex("Finisher::finisher_lock")),
    -    finisher_stop(false), finisher_running(false), finisher_empty_wait(false),
    -    thread_name("fn_anonymous"), logger(0),
    -    finisher_thread(this) {}
    +  explicit Finisher(CephContext *cct_);
     
       /// Construct a named Finisher that logs its queue length.
    -  Finisher(CephContext *cct_, std::string name, std::string tn) :
    -    cct(cct_), finisher_lock(ceph::make_mutex("Finisher::" + name)),
    -    finisher_stop(false), finisher_running(false), finisher_empty_wait(false),
    -    thread_name(tn), logger(0),
    -    finisher_thread(this) {
    -    PerfCountersBuilder b(cct, std::string("finisher-") + name,
    -			  l_finisher_first, l_finisher_last);
    -    b.add_u64(l_finisher_queue_len, "queue_len");
    -    b.add_time_avg(l_finisher_complete_lat, "complete_latency");
    -    logger = b.create_perf_counters();
    -    cct->get_perfcounters_collection()->add(logger);
    -    logger->set(l_finisher_queue_len, 0);
    -    logger->set(l_finisher_complete_lat, 0);
    -  }
    -
    -  ~Finisher() {
    -    if (logger && cct) {
    -      cct->get_perfcounters_collection()->remove(logger);
    -      delete logger;
    -    }
    -  }
    +  Finisher(CephContext *cct_, std::string_view name, std::string &&tn);
    +  ~Finisher();
     };
     
     /// Context that is completed asynchronously on the supplied finisher.
    diff --git a/src/common/Formatter.cc b/src/common/Formatter.cc
    index f121afa07a3e..fd3b2be02214 100644
    --- a/src/common/Formatter.cc
    +++ b/src/common/Formatter.cc
    @@ -16,6 +16,7 @@
     
     #include "HTMLFormatter.h"
     #include "common/escape.h"
    +#include "common/StackStringStream.h"
     #include "include/buffer.h"
     
     #include 
    @@ -29,27 +30,39 @@ namespace ceph {
     std::string
     fixed_u_to_string(uint64_t num, int scale)
     {
    -	std::ostringstream t;
    +  CachedStackStringStream css;
     
    -	t.fill('0');
    -	t.width(scale + 1);
    -	t << num;
    -	int len = t.str().size();
    -	return t.str().substr(0,len - scale) + "." + t.str().substr(len - scale);
    +  css->fill('0');
    +  css->width(scale + 1);
    +  *css << num;
    +  auto len = css->strv().size();
    +
    +  CachedStackStringStream css2;
    +  *css2 << css->strv().substr(0, len - scale)
    +        << "."
    +        << css->strv().substr(len - scale);
    +  return css2->str();
     }
     
     std::string
     fixed_to_string(int64_t num, int scale)
     {
    -	std::ostringstream t;
    -	bool neg = num < 0;
    -	if (neg) num = -num;
    +  CachedStackStringStream css;
    +
    +  bool neg = num < 0;
    +  if (neg) num = -num;
    +
    +  css->fill('0');
    +  css->width(scale + 1);
    +  *css << num;
    +  auto len = css->strv().size();
     
    -	t.fill('0');
    -	t.width(scale + 1);
    -	t << num;
    -	int len = t.str().size();
    -	return (neg ? "-" : "") + t.str().substr(0,len - scale) + "." + t.str().substr(len - scale);
    +  CachedStackStringStream css2;
    +  *css2 << (neg ? "-" : "")
    +        << css->strv().substr(0, len - scale)
    +        << "."
    +        << css->strv().substr(len - scale);
    +  return css2->str();
     }
     
     /*
    @@ -78,10 +91,6 @@ FormatterAttrs::FormatterAttrs(const char *attr, ...)
     
     void Formatter::write_bin_data(const char*, int){}
     
    -Formatter::Formatter() { }
    -
    -Formatter::~Formatter() { }
    -
     Formatter *Formatter::create(std::string_view type,
     			     std::string_view default_type,
     			     std::string_view fallback)
    @@ -116,9 +125,9 @@ Formatter *Formatter::create(std::string_view type,
     
     void Formatter::flush(bufferlist &bl)
     {
    -  std::stringstream os;
    -  flush(os);
    -  bl.append(os.str());
    +  CachedStackStringStream css;
    +  flush(*css);
    +  bl.append(css->strv());
     }
     
     void Formatter::dump_format(std::string_view name, const char *fmt, ...)
    @@ -148,12 +157,6 @@ void Formatter::dump_format_unquoted(std::string_view name, const char *fmt, ...
     
     // -----------------------
     
    -JSONFormatter::JSONFormatter(bool p)
    -: m_pretty(p), m_is_pending_string(false)
    -{
    -  reset();
    -}
    -
     void JSONFormatter::flush(std::ostream& os)
     {
       finish_pending_string();
    @@ -175,30 +178,33 @@ void JSONFormatter::reset()
     
     void JSONFormatter::print_comma(json_formatter_stack_entry_d& entry)
     {
    +  auto& ss = get_ss();
       if (entry.size) {
         if (m_pretty) {
    -      m_ss << ",\n";
    +      ss << ",\n";
           for (unsigned i = 1; i < m_stack.size(); i++)
    -        m_ss << "    ";
    +        ss << "    ";
         } else {
    -      m_ss << ",";
    +      ss << ",";
         }
       } else if (m_pretty) {
    -    m_ss << "\n";
    +    ss << "\n";
         for (unsigned i = 1; i < m_stack.size(); i++)
    -      m_ss << "    ";
    +      ss << "    ";
       }
       if (m_pretty && entry.is_array)
    -    m_ss << "    ";
    +    ss << "    ";
     }
     
     void JSONFormatter::print_quoted_string(std::string_view s)
     {
    -  m_ss << '\"' << json_stream_escaper(s) << '\"';
    +  auto& ss = get_ss();
    +  ss << '\"' << json_stream_escaper(s) << '\"';
     }
     
     void JSONFormatter::print_name(std::string_view name)
     {
    +  auto& ss = get_ss();
       finish_pending_string();
       if (m_stack.empty())
         return;
    @@ -206,19 +212,20 @@ void JSONFormatter::print_name(std::string_view name)
       print_comma(entry);
       if (!entry.is_array) {
         if (m_pretty) {
    -      m_ss << "    ";
    +      ss << "    ";
         }
    -    m_ss << "\"" << name << "\"";
    +    ss << "\"" << name << "\"";
         if (m_pretty)
    -      m_ss << ": ";
    +      ss << ": ";
         else
    -      m_ss << ':';
    +      ss << ':';
       }
       ++entry.size;
     }
     
     void JSONFormatter::open_section(std::string_view name, const char *ns, bool is_array)
     {
    +  auto& ss = get_ss();
       if (handle_open_section(name, ns, is_array)) {
         return;
       }
    @@ -230,9 +237,9 @@ void JSONFormatter::open_section(std::string_view name, const char *ns, bool is_
         print_name(name);
       }
       if (is_array)
    -    m_ss << '[';
    +    ss << '[';
       else
    -    m_ss << '{';
    +    ss << '{';
     
       json_formatter_stack_entry_d n;
       n.is_array = is_array;
    @@ -261,7 +268,7 @@ void JSONFormatter::open_object_section_in_ns(std::string_view name, const char
     
     void JSONFormatter::close_section()
     {
    -
    +  auto& ss = get_ss();
       if (handle_close_section()) {
         return;
       }
    @@ -270,14 +277,14 @@ void JSONFormatter::close_section()
     
       struct json_formatter_stack_entry_d& entry = m_stack.back();
       if (m_pretty && entry.size) {
    -    m_ss << "\n";
    +    ss << "\n";
         for (unsigned i = 1; i < m_stack.size(); i++)
    -      m_ss << "    ";
    +      ss << "    ";
       }
    -  m_ss << (entry.is_array ? ']' : '}');
    +  ss << (entry.is_array ? ']' : '}');
       m_stack.pop_back();
       if (m_pretty && m_stack.empty())
    -    m_ss << "\n";
    +    ss << "\n";
     }
     
     void JSONFormatter::finish_pending_string()
    @@ -289,23 +296,35 @@ void JSONFormatter::finish_pending_string()
       }
     }
     
    +void JSONFormatter::add_value(std::string_view name, double val) {
    +  CachedStackStringStream css;
    +  if (!std::isfinite(val) || std::isnan(val)) {
    +    *css << "null";
    +  } else {
    +    css->precision(std::numeric_limits::max_digits10);
    +    *css << val;
    +  }
    +  add_value(name, css->strv(), false);
    +}
    +
     template 
     void JSONFormatter::add_value(std::string_view name, T val)
     {
    -  std::stringstream ss;
    -  ss.precision(std::numeric_limits::max_digits10);
    -  ss << val;
    -  add_value(name, ss.str(), false);
    +  CachedStackStringStream css;
    +  css->precision(std::numeric_limits::max_digits10);
    +  *css << val;
    +  add_value(name, css->strv(), false);
     }
     
     void JSONFormatter::add_value(std::string_view name, std::string_view val, bool quoted)
     {
    +  auto& ss = get_ss();
       if (handle_value(name, val, quoted)) {
         return;
       }
       print_name(name);
       if (!quoted) {
    -    m_ss << val;
    +    ss << val;
       } else {
         print_quoted_string(val);
       }
    @@ -354,12 +373,12 @@ void JSONFormatter::dump_format_va(std::string_view name, const char *ns, bool q
     
     int JSONFormatter::get_len() const
     {
    -  return m_ss.str().size();
    +  return m_ss.tellp();
     }
     
     void JSONFormatter::write_raw_data(const char *data)
     {
    -  m_ss << data;
    +  get_ss() << data;
     }
     
     const char *XMLFormatter::XML_1_DTD =
    @@ -565,15 +584,15 @@ void XMLFormatter::write_bin_data(const char* buff, int buf_len)
     
     void XMLFormatter::get_attrs_str(const FormatterAttrs *attrs, std::string& attrs_str)
     {
    -  std::stringstream attrs_ss;
    +  CachedStackStringStream css;
     
       for (std::list >::const_iterator iter = attrs->attrs.begin();
            iter != attrs->attrs.end(); ++iter) {
         std::pair p = *iter;
    -    attrs_ss << " " << p.first << "=" << "\"" << p.second << "\"";
    +    *css << " " << p.first << "=" << "\"" << p.second << "\"";
       }
     
    -  attrs_str = attrs_ss.str();
    +  attrs_str = css->strv();
     }
     
     void XMLFormatter::open_section_in_ns(std::string_view name, const char *ns, const FormatterAttrs *attrs)
    @@ -942,15 +961,15 @@ void TableFormatter::write_raw_data(const char *data) {
     
     void TableFormatter::get_attrs_str(const FormatterAttrs *attrs, std::string& attrs_str)
     {
    -  std::stringstream attrs_ss;
    +  CachedStackStringStream css;
     
       for (std::list >::const_iterator iter = attrs->attrs.begin();
            iter != attrs->attrs.end(); ++iter) {
         std::pair p = *iter;
    -    attrs_ss << " " << p.first << "=" << "\"" << p.second << "\"";
    +    *css << " " << p.first << "=" << "\"" << p.second << "\"";
       }
     
    -  attrs_str = attrs_ss.str();
    +  attrs_str = css->strv();
     }
     
     void TableFormatter::finish_pending_string()
    diff --git a/src/common/Formatter.h b/src/common/Formatter.h
    index 1919b018a67c..c237e8ea207d 100644
    --- a/src/common/Formatter.h
    +++ b/src/common/Formatter.h
    @@ -7,12 +7,14 @@
     #include "include/buffer_fwd.h"
     
     #include 
    +#include 
     #include 
     #include 
     #include 
     #include 
     #include 
     #include 
    +#include 
     
     namespace ceph {
     
    @@ -70,8 +72,8 @@ namespace ceph {
     	  Formatter::create(std::forward(params)...));
         }
     
    -    Formatter();
    -    virtual ~Formatter();
    +    Formatter() = default;
    +    virtual ~Formatter() = default;
     
         virtual void enable_line_break() = 0;
         virtual void flush(std::ostream& os) = 0;
    @@ -129,21 +131,52 @@ namespace ceph {
         virtual void write_bin_data(const char* buff, int buf_len);
       };
     
    -  class copyable_sstream : public std::stringstream {
    +  class JSONFormatter : public Formatter {
       public:
    -    copyable_sstream() {}
    -    copyable_sstream(const copyable_sstream& rhs) {
    -      str(rhs.str());
    +    explicit JSONFormatter(bool p = false) : m_pretty(p) {}
    +    JSONFormatter(const JSONFormatter& f) :
    +      m_pretty(f.m_pretty),
    +      m_pending_name(f.m_pending_name),
    +      m_stack(f.m_stack),
    +      m_is_pending_string(f.m_is_pending_string),
    +      m_line_break_enabled(f.m_line_break_enabled)
    +    {
    +      m_ss.str(f.m_ss.str());
    +      m_pending_string.str(f.m_pending_string.str());
    +    }
    +    JSONFormatter(JSONFormatter&& f) :
    +      m_pretty(f.m_pretty),
    +      m_ss(std::move(f.m_ss)),
    +      m_pending_string(std::move(f.m_pending_string)),
    +      m_pending_name(f.m_pending_name),
    +      m_stack(std::move(f.m_stack)),
    +      m_is_pending_string(f.m_is_pending_string),
    +      m_line_break_enabled(f.m_line_break_enabled)
    +    {
         }
    -    copyable_sstream& operator=(const copyable_sstream& rhs) {
    -      str(rhs.str());
    +    JSONFormatter& operator=(const JSONFormatter& f)
    +    {
    +      m_pretty = f.m_pretty;
    +      m_ss.str(f.m_ss.str());
    +      m_pending_string.str(f.m_pending_string.str());
    +      m_pending_name = f.m_pending_name;
    +      m_stack = f.m_stack;
    +      m_is_pending_string = f.m_is_pending_string;
    +      m_line_break_enabled = f.m_line_break_enabled;
           return *this;
         }
    -  };
     
    -  class JSONFormatter : public Formatter {
    -  public:
    -    explicit JSONFormatter(bool p = false);
    +    JSONFormatter& operator=(JSONFormatter&& f)
    +    {
    +      m_pretty = f.m_pretty;
    +      m_ss = std::move(f.m_ss);
    +      m_pending_string = std::move(f.m_pending_string);
    +      m_pending_name = f.m_pending_name;
    +      m_stack = std::move(f.m_stack);
    +      m_is_pending_string = f.m_is_pending_string;
    +      m_line_break_enabled = f.m_line_break_enabled;
    +      return *this;
    +    }
     
         void set_status(int status, const char* status_name) override {};
         void output_header() override {};
    @@ -167,7 +200,7 @@ namespace ceph {
         int get_len() const override;
         void write_raw_data(const char *data) override;
     
    -  protected:
    +protected:
         virtual bool handle_value(std::string_view name, std::string_view s, bool quoted) {
           return false; /* is handling done? */
         }
    @@ -182,33 +215,78 @@ namespace ceph {
     
         int stack_size() { return m_stack.size(); }
     
    -  private:
    +    virtual std::ostream& get_ss() {
    +      return m_ss;
    +    }
    +
    +    void finish_pending_string();
     
    +private:
         struct json_formatter_stack_entry_d {
    -      int size;
    -      bool is_array;
    -      json_formatter_stack_entry_d() : size(0), is_array(false) { }
    +      int size = 0;
    +      bool is_array = false;
         };
     
    -    bool m_pretty;
    +    bool m_pretty = false;
         void open_section(std::string_view name, const char *ns, bool is_array);
         void print_quoted_string(std::string_view s);
         void print_name(std::string_view name);
         void print_comma(json_formatter_stack_entry_d& entry);
    -    void finish_pending_string();
    +    void add_value(std::string_view name, double val);
     
         template 
         void add_value(std::string_view name, T val);
         void add_value(std::string_view name, std::string_view val, bool quoted);
     
    -    copyable_sstream m_ss;
    -    copyable_sstream m_pending_string;
    +    mutable std::stringstream m_ss; // mutable for get_len
    +    std::stringstream m_pending_string;
         std::string m_pending_name;
    -    std::list m_stack;
    -    bool m_is_pending_string;
    +    std::vector m_stack;
    +    bool m_is_pending_string = false;
         bool m_line_break_enabled = false;
       };
     
    +  class JSONFormatterFile : public JSONFormatter {
    +public:
    +    JSONFormatterFile(const std::string& path, bool pretty=false) :
    +      JSONFormatter(pretty),
    +      path(path),
    +      file(path, std::ios::out | std::ios::trunc)
    +    {
    +    }
    +    ~JSONFormatterFile() {
    +      flush();
    +    }
    +
    +    void flush(std::ostream& os) override {
    +      flush();
    +    }
    +    void flush() {
    +      JSONFormatter::finish_pending_string();
    +      file.flush();
    +    }
    +
    +    void reset() override {
    +      JSONFormatter::reset();
    +      file = std::ofstream(path, std::ios::out | std::ios::trunc);
    +    }
    +    int get_len() const override {
    +      return file.tellp();
    +    }
    +    std::ofstream const& get_ofstream() const {
    +      return file;
    +    }
    +
    +protected:
    +    std::ostream& get_ss() override {
    +      return file;
    +    }
    +
    +private:
    +    std::string path;
    +    mutable std::ofstream file; // mutable for get_len
    +  };
    +
       template 
       void add_value(std::string_view name, T val);
     
    diff --git a/src/common/Graylog.cc b/src/common/Graylog.cc
    index cbd63fab25fd..099acacd803b 100644
    --- a/src/common/Graylog.cc
    +++ b/src/common/Graylog.cc
    @@ -2,6 +2,9 @@
     // vim: ts=8 sw=2 smarttab
     
     #include "Graylog.h"
    +
    +#include  // for std::cerr
    +
     #include "common/Formatter.h"
     #include "common/LogEntry.h"
     #include "log/Entry.h"
    diff --git a/src/common/Graylog.h b/src/common/Graylog.h
    index c8c50131999d..f70ac754cfa7 100644
    --- a/src/common/Graylog.h
    +++ b/src/common/Graylog.h
    @@ -4,7 +4,8 @@
     #ifndef __CEPH_LOG_GRAYLOG_H
     #define __CEPH_LOG_GRAYLOG_H
     
    -#include 
    +#include 
    +#include 
     #include 
     #include 
     
    @@ -66,7 +67,7 @@ class Graylog
       std::string m_logger;
     
       boost::asio::ip::udp::endpoint m_endpoint;
    -  boost::asio::io_service m_io_service;
    +  boost::asio::io_context m_io_service;
     
       std::unique_ptr m_formatter;
       std::unique_ptr m_formatter_section;
    diff --git a/src/common/HeartbeatMap.cc b/src/common/HeartbeatMap.cc
    index 544427092295..246cec9460b1 100644
    --- a/src/common/HeartbeatMap.cc
    +++ b/src/common/HeartbeatMap.cc
    @@ -43,11 +43,11 @@ HeartbeatMap::~HeartbeatMap()
       ceph_assert(m_workers.empty());
     }
     
    -heartbeat_handle_d *HeartbeatMap::add_worker(const string& name, pthread_t thread_id)
    +heartbeat_handle_d *HeartbeatMap::add_worker(string&& name, pthread_t thread_id)
     {
       std::unique_lock locker{m_rwlock};
       ldout(m_cct, 10) << "add_worker '" << name << "'" << dendl;
    -  heartbeat_handle_d *h = new heartbeat_handle_d(name);
    +  heartbeat_handle_d *h = new heartbeat_handle_d(std::move(name));
       ANNOTATE_BENIGN_RACE_SIZED(&h->timeout, sizeof(h->timeout),
                                  "heartbeat_handle_d timeout");
       ANNOTATE_BENIGN_RACE_SIZED(&h->suicide_timeout, sizeof(h->suicide_timeout),
    diff --git a/src/common/HeartbeatMap.h b/src/common/HeartbeatMap.h
    index 6f486b21ca86..401042cc2717 100644
    --- a/src/common/HeartbeatMap.h
    +++ b/src/common/HeartbeatMap.h
    @@ -48,15 +48,15 @@ struct heartbeat_handle_d {
       ceph::timespan suicide_grace = ceph::timespan::zero();
       std::list::iterator list_item;
     
    -  explicit heartbeat_handle_d(const std::string& n)
    -    : name(n)
    +  explicit heartbeat_handle_d(std::string&& n)
    +    : name(std::move(n))
       { }
     };
     
     class HeartbeatMap {
      public:
       // register/unregister
    -  heartbeat_handle_d *add_worker(const std::string& name, pthread_t thread_id);
    +  heartbeat_handle_d *add_worker(std::string&& name, pthread_t thread_id);
       void remove_worker(const heartbeat_handle_d *h);
     
       // reset the timeout so that it expects another touch within grace amount of time
    diff --git a/src/common/Journald.cc b/src/common/Journald.cc
    index a1321c7eea8e..12e1a97e9983 100644
    --- a/src/common/Journald.cc
    +++ b/src/common/Journald.cc
    @@ -14,6 +14,9 @@
     #include 
     #include 
     #include 
    +
    +#include  // for std::cerr
    +
     #include 
     #include 
     
    @@ -23,7 +26,6 @@
     #include "log/SubsystemMap.h"
     #include "msg/msg_fmt.h"
     
    -
     namespace ceph::logging {
     
     namespace {
    @@ -87,6 +89,8 @@ class EntryEncoderBase {
         m_msg_vec[0].iov_len = static_segment.size();
       }
     
    +  EntryEncoderBase(const EntryEncoderBase&) = delete; // we have self-referencing pointers
    +
       constexpr struct iovec *iovec() { return this->m_msg_vec; }
       constexpr std::size_t iovec_len()
       {
    @@ -125,7 +129,7 @@ MESSAGE
     
         uint64_t msg_len = htole64(e.size());
         meta_buf.resize(meta_buf.size() + sizeof(msg_len));
    -    *(reinterpret_cast(meta_buf.end()) - 1) = htole64(e.size());
    +    memcpy(meta_buf.end() - sizeof(msg_len), &msg_len, sizeof(msg_len));
     
         meta_vec().iov_base = meta_buf.data();
         meta_vec().iov_len = meta_buf.size();
    diff --git a/src/common/LRUSet.h b/src/common/LRUSet.h
    index b62956ba460f..c8c66e854582 100644
    --- a/src/common/LRUSet.h
    +++ b/src/common/LRUSet.h
    @@ -43,6 +43,7 @@ class LRUSet {
       // lru
       boost::intrusive::list<
         Node,
    +    boost::intrusive::constant_time_size,
         boost::intrusive::member_hook,
     				  &Node::lru_item>
    diff --git a/src/common/LogClient.cc b/src/common/LogClient.cc
    index 1ba363da790e..d5ae6b753216 100644
    --- a/src/common/LogClient.cc
    +++ b/src/common/LogClient.cc
    @@ -186,13 +186,11 @@ void LogChannel::do_log(clog_type prio, const std::string& s)
     
       // log to syslog?
       if (do_log_to_syslog()) {
    -    ldout(cct,0) << __func__ << " log to syslog"  << dendl;
         e.log_to_syslog(get_log_prio(), get_syslog_facility());
       }
     
       // log to graylog?
       if (do_log_to_graylog()) {
    -    ldout(cct,0) << __func__ << " log to graylog"  << dendl;
         graylog->log_log_entry(&e);
       }
     }
    diff --git a/src/common/LogEntry.cc b/src/common/LogEntry.cc
    index d7b44a2110bd..7bb49432268b 100644
    --- a/src/common/LogEntry.cc
    +++ b/src/common/LogEntry.cc
    @@ -183,7 +183,6 @@ string clog_type_to_string(clog_type t)
           return "crit";
         default:
           ceph_abort();
    -      return 0;
       }
     }
     
    diff --git a/src/common/LogEntry.h b/src/common/LogEntry.h
    index 3ddebbd3043c..f79b76debaf0 100644
    --- a/src/common/LogEntry.h
    +++ b/src/common/LogEntry.h
    @@ -18,6 +18,7 @@
     #include 
     
     #include "include/utime.h"
    +#include "include/utime_fmt.h"
     #include "msg/msg_fmt.h"
     #include "msg/msg_types.h"
     #include "common/entity_name.h"
    @@ -125,6 +126,23 @@ struct LogEntry {
       void dump(ceph::Formatter *f) const;
       static void generate_test_instances(std::list& o);
       static clog_type str_to_level(std::string const &str);
    +  static std::string_view level_to_str(clog_type t) {
    +    switch (t) {
    +    case CLOG_DEBUG:
    +      return "DBG";
    +    case CLOG_INFO:
    +      return "INF";
    +    case CLOG_SEC:
    +      return "SEC";
    +    case CLOG_WARN:
    +      return "WRN";
    +    case CLOG_ERROR:
    +      return "ERR";
    +    case CLOG_UNKNOWN:
    +      return "UNKNOWN";
    +    }
    +    return "???";
    +  }
     };
     WRITE_CLASS_ENCODER_FEATURES(LogEntry)
     
    @@ -194,18 +212,23 @@ inline std::ostream& operator<<(std::ostream& out, const LogEntry& e)
                  << e.channel << " " << e.prio << " " << e.msg;
     }
     
    +#if FMT_VERSION >= 90000
    +template <> struct fmt::formatter : fmt::ostream_formatter {};
    +#endif
    +
     template <> struct fmt::formatter : fmt::formatter {
       template 
    -  auto format(const EntityName& e, FormatContext& ctx) {
    +  auto format(const EntityName& e, FormatContext& ctx) const {
         return formatter::format(e.to_str(), ctx);
       }
     };
     
     template <> struct fmt::formatter : fmt::formatter {
       template 
    -  auto format(const LogEntry& e, FormatContext& ctx) {
    -    return fmt::format_to(ctx.out(), "{} {} ({}) {} : {} {} {}",
    -			  e.stamp, e.name, e.rank, e.seq, e.channel, e.prio, e.msg);
    +  auto format(const LogEntry& e, FormatContext& ctx) const {
    +    return fmt::format_to(ctx.out(), "{} {} ({}) {} : {} [{}] {}",
    +                          e.stamp, e.name, e.rank, e.seq, e.channel,
    +                          LogEntry::level_to_str(e.prio), e.msg);
       }
     };
     
    diff --git a/src/common/MemoryModel.cc b/src/common/MemoryModel.cc
    index 0f6ab986f5aa..0f659aca9583 100644
    --- a/src/common/MemoryModel.cc
    +++ b/src/common/MemoryModel.cc
    @@ -1,96 +1,140 @@
    -#include "MemoryModel.h"
    -#include "include/compat.h"
     #include "debug.h"
    +
    +#include "include/compat.h"
    +
    +#include "MemoryModel.h"
     #if defined(__linux__)
     #include 
     #endif
     
    -#include 
    +#include 
    +
    +#include "common/fmt_common.h"
    +
     
     #define dout_subsys ceph_subsys_
     
     using namespace std;
    +using mem_snap_t = MemoryModel::mem_snap_t;
     
    -MemoryModel::MemoryModel(CephContext *cct_)
    -  : cct(cct_)
    +inline bool MemoryModel::cmp_against(
    +    const std::string &ln,
    +    std::string_view param,
    +    long &v) const
     {
    +  if (ln.size() < (param.size() + 10)) {
    +    return false;
    +  }
    +  if (ln.starts_with(param)) {
    +    auto p = ln.c_str();
    +    auto s = p + param.size();
    +    // charconv does not like leading spaces
    +    while (*s && isblank(*s)) {
    +      s++;
    +    }
    +    from_chars(s, p + ln.size(), v);
    +    return true;
    +  }
    +  return false;
     }
     
    -void MemoryModel::_sample(snap *psnap)
    -{
    -  ifstream f;
     
    -  f.open(PROCPREFIX "/proc/self/status");
    -  if (!f.is_open()) {
    -    ldout(cct, 0) << "check_memory_usage unable to open " PROCPREFIX "/proc/self/status" << dendl;
    -    return;
    -  }
    -  while (!f.eof()) {
    -    string line;
    -    getline(f, line);
    -    
    -    if (strncmp(line.c_str(), "VmSize:", 7) == 0)
    -      psnap->size = atol(line.c_str() + 7);
    -    else if (strncmp(line.c_str(), "VmRSS:", 6) == 0)
    -      psnap->rss = atol(line.c_str() + 7);
    -    else if (strncmp(line.c_str(), "VmHWM:", 6) == 0)
    -      psnap->hwm = atol(line.c_str() + 7);
    -    else if (strncmp(line.c_str(), "VmLib:", 6) == 0)
    -      psnap->lib = atol(line.c_str() + 7);
    -    else if (strncmp(line.c_str(), "VmPeak:", 7) == 0)
    -      psnap->peak = atol(line.c_str() + 7);
    -    else if (strncmp(line.c_str(), "VmData:", 7) == 0)
    -      psnap->data = atol(line.c_str() + 7);
    +tl::expected MemoryModel::get_mapped_heap()
    +{
    +  if (!proc_maps.is_open()) {
    +    return tl::unexpected("unable to open proc/maps");
       }
    -  f.close();
    +  // always rewind before reading
    +  proc_maps.clear();
    +  proc_maps.seekg(0);
     
    -  f.open(PROCPREFIX "/proc/self/maps");
    -  if (!f.is_open()) {
    -    ldout(cct, 0) << "check_memory_usage unable to open " PROCPREFIX "/proc/self/maps" << dendl;
    -    return;
    -  }
    +  int64_t heap = 0;
     
    -  long heap = 0;
    -  while (f.is_open() && !f.eof()) {
    +  while (proc_maps.is_open() && !proc_maps.eof()) {
         string line;
    -    getline(f, line);
    -    //ldout(cct, 0) << "line is " << line << dendl;
    +    getline(proc_maps, line);
     
    -    const char *start = line.c_str();
    -    const char *dash = start;
    -    while (*dash && *dash != '-') dash++;
    +    if (line.length() < 48) {
    +      // a malformed line. We expect at least
    +      // '560c03f8d000-560c03fae000 rw-p 00000000 00:00 0'
    +      continue;
    +    }
    +
    +    const char* start = line.c_str();
    +    const char* dash = start;
    +    while (*dash && *dash != '-')
    +      dash++;
         if (!*dash)
           continue;
    -    const char *end = dash + 1;
    -    while (*end && *end != ' ') end++;
    +    const char* end = dash + 1;
    +    while (*end && *end != ' ')
    +      end++;
         if (!*end)
           continue;
    -    unsigned long long as = strtoll(start, 0, 16);
    -    unsigned long long ae = strtoll(dash+1, 0, 16);
    -
    -    //ldout(cct, 0) << std::hex << as << " to " << ae << std::dec << dendl;
     
    +    auto addr_end = end;
         end++;
    -    const char *mode = end;
    -
    -    int skip = 4;
    -    while (skip--) {
    -      end++;
    -      while (*end && *end != ' ') end++;
    -    }
    -    if (*end)
    -      end++;
    -
    -    long size = ae - as;
    -    //ldout(cct, 0) << "size " << size << " mode is '" << mode << "' end is '" << end << "'" << dendl;
    +    const char* mode = end;
     
         /*
          * anything 'rw' and anon is assumed to be heap.
    +     * But we should count lines with inode '0' and '[heap]' as well
          */
    -    if (mode[0] == 'r' && mode[1] == 'w' && !*end)
    +    if (mode[0] != 'r' || mode[1] != 'w') {
    +      continue;
    +    }
    +
    +    auto the_rest = line.substr(5 + end - start);
    +    if (!the_rest.starts_with("00000000 00:00 0")) {
    +      continue;
    +    }
    +
    +    std::string_view final_token{the_rest.begin() + sizeof("00000000 00:00 0") - 1,
    +                                 the_rest.end()};
    +    if (final_token.size() < 3 ||
    +        final_token.ends_with("[heap]") || final_token.ends_with("[stack]")) {
    +      // calculate and sum the size of the heap segment
    +      uint64_t as{0ull};
    +      from_chars(start, dash, as, 16);
    +      uint64_t ae{0ull};
    +      from_chars(dash + 1, addr_end, ae, 16);
    +      //     fmt::print("\t\tas:{:x} ae:{:x} -> {}\n", as, ae, ((ae - as) >> 10));
    +      long size = ae - as;
           heap += size;
    +    }
       }
     
    -  psnap->heap = heap >> 10;
    +  return heap;
    +}
    +
    +
    +tl::expected MemoryModel::full_sample()
    +{
    +  if (!proc_status.is_open()) {
    +    return tl::unexpected("unable to open proc/status");
    +  }
    +  // always rewind before reading
    +  proc_status.clear();
    +  proc_status.seekg(0);
    +
    +  mem_snap_t s;
    +  // we will be looking for 6 entries
    +  int yet_to_find = 6;
    +
    +  while (!proc_status.eof() && yet_to_find > 0) {
    +    string ln;
    +    getline(proc_status, ln);
    +
    +    if (cmp_against(ln, "VmSize:", s.size) ||
    +	cmp_against(ln, "VmRSS:", s.rss) || cmp_against(ln, "VmHWM:", s.hwm) ||
    +	cmp_against(ln, "VmLib:", s.lib) ||
    +	cmp_against(ln, "VmPeak:", s.peak) ||
    +	cmp_against(ln, "VmData:", s.data)) {
    +      yet_to_find--;
    +    }
    +  }
     
    +  // get heap size
    +  s.heap = static_cast(get_mapped_heap().value_or(0));
    +  return s;
     }
    diff --git a/src/common/MemoryModel.h b/src/common/MemoryModel.h
    index ee87c6f3bb50..6cfe3c7acaf9 100644
    --- a/src/common/MemoryModel.h
    +++ b/src/common/MemoryModel.h
    @@ -15,40 +15,67 @@
     #ifndef CEPH_MEMORYMODEL_H
     #define CEPH_MEMORYMODEL_H
     
    +#include 
    +#include 
    +#include 
     #include "include/common_fwd.h"
    +#include "include/compat.h"
    +#include "include/expected.hpp"
    +
     
     class MemoryModel {
     public:
    -  struct snap {
    -    long peak;
    -    long size;
    -    long hwm;
    -    long rss;
    -    long data;
    -    long lib;
    -    
    -    long heap;
    -
    -    snap() : peak(0), size(0), hwm(0), rss(0), data(0), lib(0),
    -	     heap(0)
    -    {}
    -
    -    long get_total() { return size; }
    -    long get_rss() { return rss; }
    -    long get_heap() { return heap; }
    -  } last;
    +  struct mem_snap_t {
    +    long peak{0};
    +    long size{0};
    +    long hwm{0};
    +    long rss{0};
    +    long data{0};
    +    long lib{0};
    +    long heap{0};
    +
    +    long get_total() const { return size; }
    +    long get_rss() const { return rss; }
    +    long get_heap() const { return heap; }
    +  };
     
     private:
    -  CephContext *cct;
    -  void _sample(snap *p);
    +  static inline constexpr const char* proc_stat_fn = PROCPREFIX "/proc/self/status";
    +  static inline constexpr const char* proc_maps_fn = PROCPREFIX "/proc/self/maps";
    +
    +  std::ifstream proc_status{proc_stat_fn};
    +  std::ifstream proc_maps{proc_maps_fn};
    +
    +  /**
    +   * @brief Get the mapped heap size
    +   *
    +   * Read /proc/self/maps to get the heap size.
    +   * \retval the mapped heap size, or an error message if the file had not been opened
    +   *    when the object was constructed.
    +   */
    +  tl::expected get_mapped_heap();
    +
    +  /**
    +   * @brief Compare a line against an expected data label
    +   *
    +   * If the line starts with the expected label, extract the value and store it in v.
    +   * \retval true if the line starts with the expected label
    +   */
    +  bool cmp_against(const std::string& ln, std::string_view param, long& v) const;
     
     public:
    -  explicit MemoryModel(CephContext *cct);
    -  void sample(snap *p = 0) {
    -    _sample(&last);
    -    if (p)
    -      *p = last;
    -  }
    +  /**
    +   * @brief extract memory usage information from /proc/self/status &
    +   *        /proc/self/maps
    +   *
    +   * Read /proc/self/status and /proc/self/maps to get memory usage information.
    +   * \retval a structure containing the memory usage information, or an error
    +   *    message if /proc/status had not been opened when the object was
    +   *    constructed.
    +   *    Note that no error is returned if only /proc/maps is not open (the heap
    +   *    size will be reported as 0).
    +   */
    +  tl::expected full_sample();
     };
     
     #endif
    diff --git a/src/common/OpQueue.h b/src/common/OpQueue.h
    index 0204f4b44039..07104b21f538 100644
    --- a/src/common/OpQueue.h
    +++ b/src/common/OpQueue.h
    @@ -16,6 +16,7 @@
     #define OP_QUEUE_H
     
     #include "include/msgr.h"
    +#include "osd/osd_types.h"
     
     #include 
     #include 
    @@ -66,6 +67,9 @@ class OpQueue {
       // Human readable brief description of queue and relevant parameters
       virtual void print(std::ostream &f) const = 0;
     
    +  // Get the type of OpQueue implementation
    +  virtual op_queue_type_t get_type() const = 0;
    +
       // Don't leak resources on destruction
       virtual ~OpQueue() {};
     };
    diff --git a/src/common/Preforker.h b/src/common/Preforker.h
    index d34179b40204..d25d5dd5adae 100644
    --- a/src/common/Preforker.h
    +++ b/src/common/Preforker.h
    @@ -126,7 +126,7 @@ class Preforker {
         }
         return r;
       }
    -  void exit(int r) {
    +  [[noreturn]] void exit(int r) {
         if (is_child())
             signal_exit(r);
         ::exit(r);
    diff --git a/src/common/PrioritizedQueue.h b/src/common/PrioritizedQueue.h
    index 9adf21aafe11..0c006795eb85 100644
    --- a/src/common/PrioritizedQueue.h
    +++ b/src/common/PrioritizedQueue.h
    @@ -345,7 +345,11 @@ class PrioritizedQueue : public OpQueue  {
       }
     
       void print(std::ostream &ostream) const final {
    -    ostream << "PrioritizedQueue";
    +    ostream << get_op_queue_type_name(get_type());
    +  }
    +
    +  op_queue_type_t get_type() const final {
    +    return op_queue_type_t::PrioritizedQueue;
       }
     };
     
    diff --git a/src/common/RefCountedObj.h b/src/common/RefCountedObj.h
    index ef966463cda9..a26677573996 100644
    --- a/src/common/RefCountedObj.h
    +++ b/src/common/RefCountedObj.h
    @@ -180,6 +180,12 @@ struct RefCountedWaitObject {
       }
     };
     
    +static inline void intrusive_ptr_add_ref(RefCountedWaitObject *p) {
    +  p->get();
    +}
    +static inline void intrusive_ptr_release(RefCountedWaitObject *p) {
    +  p->put();
    +}
     #endif // !defined(WITH_SEASTAR)|| defined(WITH_ALIEN)
     
     static inline void intrusive_ptr_add_ref(const RefCountedObject *p) {
    @@ -196,7 +202,7 @@ struct UniquePtrDeleter
         p->put();
       }
     };
    -}
    +} // namespace TOPNSPC::common
     using RefCountedPtr = ceph::ref_t;
     
     #endif
    diff --git a/src/common/SloppyCRCMap.cc b/src/common/SloppyCRCMap.cc
    index ec9cbdf53a6a..f82a70701d21 100644
    --- a/src/common/SloppyCRCMap.cc
    +++ b/src/common/SloppyCRCMap.cc
    @@ -73,7 +73,7 @@ void SloppyCRCMap::truncate(uint64_t offset)
       offset -= offset % block_size;
       std::map::iterator p = crc_map.lower_bound(offset);
       while (p != crc_map.end())
    -    crc_map.erase(p++);
    +    p = crc_map.erase(p);
     }
     
     void SloppyCRCMap::zero(uint64_t offset, uint64_t len)
    diff --git a/src/common/StackStringStream.h b/src/common/StackStringStream.h
    index 3324e7add353..6a144fb938af 100644
    --- a/src/common/StackStringStream.h
    +++ b/src/common/StackStringStream.h
    @@ -18,10 +18,9 @@
     #include 
     
     #include 
    -#include 
     #include 
     #include 
    -#include 
    +#include 
     #include 
     #include 
     
    @@ -77,6 +76,8 @@ class StackStringBuf : public std::basic_streambuf
         if (traits_type::not_eof(c)) {
           char str = traits_type::to_char_type(c);
           vec.push_back(str);
    +      setp(vec.data(), vec.data() + vec.size());
    +      pbump(vec.size());
           return c;
         } else {
           return traits_type::eof();
    diff --git a/src/common/SubProcess.cc b/src/common/SubProcess.cc
    index 1faf33e36eee..8f28ff25ca21 100644
    --- a/src/common/SubProcess.cc
    +++ b/src/common/SubProcess.cc
    @@ -4,6 +4,9 @@
     #include 
     #include 
     #endif
    +#ifdef __linux__
    +#include 
    +#endif
     #include 
     #include 
     #include 
    @@ -200,6 +203,12 @@ int SubProcess::spawn() {
         int maxfd = sysconf(_SC_OPEN_MAX);
         if (maxfd == -1)
           maxfd = 16384;
    +
    +#if defined(__linux__) && defined(SYS_close_range)
    +    if (::syscall(SYS_close_range, STDERR_FILENO + 1, ~0U, 0) == 0)
    +      maxfd = STDERR_FILENO;
    +#endif
    +
         for (int fd = 0; fd <= maxfd; fd++) {
           if (fd == STDIN_FILENO && stdin_op != CLOSE)
     	continue;
    diff --git a/src/common/Thread.cc b/src/common/Thread.cc
    index 9a7a31923c1b..c714aa0aa879 100644
    --- a/src/common/Thread.cc
    +++ b/src/common/Thread.cc
    @@ -83,7 +83,7 @@ void *Thread::entry_wrapper()
       if (pid && cpuid >= 0)
         _set_affinity(cpuid);
     
    -  ceph_pthread_setname(pthread_self(), thread_name.c_str());
    +  ceph_pthread_setname(thread_name.c_str());
       return entry();
     }
     
    @@ -203,24 +203,6 @@ int Thread::set_affinity(int id)
     // Functions for std::thread
     // =========================
     
    -void set_thread_name(std::thread& t, const std::string& s) {
    -  int r = ceph_pthread_setname(t.native_handle(), s.c_str());
    -  if (r != 0) {
    -    throw std::system_error(r, std::generic_category());
    -  }
    -}
    -std::string get_thread_name(const std::thread& t) {
    -  std::string s(256, '\0');
    -
    -  int r = ceph_pthread_getname(const_cast(t).native_handle(),
    -			       s.data(), s.length());
    -  if (r != 0) {
    -    throw std::system_error(r, std::generic_category());
    -  }
    -  s.resize(std::strlen(s.data()));
    -  return s;
    -}
    -
     void kill(std::thread& t, int signal)
     {
       auto r = ceph_pthread_kill(t.native_handle(), signal);
    diff --git a/src/common/Thread.h b/src/common/Thread.h
    index 5242fb5f3075..8dc0e6c3cbed 100644
    --- a/src/common/Thread.h
    +++ b/src/common/Thread.h
    @@ -17,13 +17,15 @@
     #define CEPH_THREAD_H
     
     #include 
    +#include 
     #include 
    -#include 
     #include 
    +#include 
     
     #include 
     #include 
     
    +#include "include/ceph_assert.h"
     #include "include/compat.h"
     
     extern pid_t ceph_gettid();
    @@ -65,8 +67,6 @@ class Thread {
     
     // Functions for with std::thread
     
    -void set_thread_name(std::thread& t, const std::string& s);
    -std::string get_thread_name(const std::thread& t);
     void kill(std::thread& t, int signal);
     
     template
    @@ -75,7 +75,7 @@ std::thread make_named_thread(std::string_view n,
     			      Args&& ...args) {
     
       return std::thread([n = std::string(n)](auto&& fun, auto&& ...args) {
    -		       ceph_pthread_setname(pthread_self(), n.data());
    +		       ceph_pthread_setname(n.data());
     		       std::invoke(std::forward(fun),
     				   std::forward(args)...);
     		     }, std::forward(fun), std::forward(args)...);
    diff --git a/src/common/Throttle.h b/src/common/Throttle.h
    index e190b946c458..fb5d949b438b 100644
    --- a/src/common/Throttle.h
    +++ b/src/common/Throttle.h
    @@ -6,7 +6,7 @@
     
     #include 
     #include 
    -#include 
    +#include 
     #include 
     #include 
     
    diff --git a/src/common/Timer.cc b/src/common/Timer.cc
    index 2a3277a27659..a7c35fb8ef52 100644
    --- a/src/common/Timer.cc
    +++ b/src/common/Timer.cc
    @@ -102,6 +102,7 @@ void CommonSafeTimer::timer_thread()
           }
           #endif
     
    +      ldout(cct, 20) << "timer_thread going to execute and remove the top of a schedule sized " << schedule.size() << dendl;
           Context *callback = p->second;
           events.erase(callback);
           schedule.erase(p);
    @@ -120,10 +121,11 @@ void CommonSafeTimer::timer_thread()
         if (!safe_callbacks && stopping)
           break;
     
    -    ldout(cct,20) << "timer_thread going to sleep" << dendl;
         if (schedule.empty()) {
    +      ldout(cct, 20) << "timer_thread going to sleep with an empty schedule" << dendl;
           cond.wait(l);
         } else {
    +      ldout(cct, 20) << "timer_thread going to sleep with a schedule size " << schedule.size() << dendl;
           auto when = schedule.begin()->first;
           cond.wait_until(l, when);
         }
    diff --git a/src/common/TrackedOp.cc b/src/common/TrackedOp.cc
    index 32a1ab472a89..b888d9334808 100644
    --- a/src/common/TrackedOp.cc
    +++ b/src/common/TrackedOp.cc
    @@ -90,7 +90,7 @@ void OpHistory::_insert_delayed(const utime_t& now, TrackedOpRef op)
       arrived.insert(make_pair(op->get_initiated(), op));
       if (opduration >= history_slow_op_threshold.load()) {
         slow_op.insert(make_pair(op->get_initiated(), op));
    -    logger->inc(l_osd_slow_op_count);
    +    logger->inc(l_trackedop_slow_op_count);
       }
       cleanup(now);
     }
    @@ -204,17 +204,14 @@ void OpHistory::dump_slow_ops(utime_t now, Formatter *f, set filters)
       cleanup(now);
       f->open_object_section("OpHistory slow ops");
       f->dump_int("num to keep", history_slow_op_size.load());
    -  f->dump_int("threshold to keep", history_slow_op_threshold.load());
    +  f->dump_float("threshold to keep", history_slow_op_threshold.load());
       {
         f->open_array_section("Ops");
    -    for (set >::const_iterator i =
    -	   slow_op.begin();
    -	 i != slow_op.end();
    -	 ++i) {
    -      if (!i->second->filter_out(filters))
    +    for ([[maybe_unused]] const auto& [t, op] : slow_op) {
    +      if (!op->filter_out(filters))
             continue;
           f->open_object_section("Op");
    -      i->second->dump(now, f, OpTracker::default_dumper);
    +      op->dump(now, f, OpTracker::default_dumper);
           f->close_section();
         }
         f->close_section();
    @@ -342,12 +339,15 @@ bool OpTracker::visit_ops_in_flight(utime_t* oldest_secs,
       for (const auto sdata : sharded_in_flight_list) {
         ceph_assert(sdata);
         std::lock_guard locker(sdata->ops_in_flight_lock_sharded);
    -    if (!sdata->ops_in_flight_sharded.empty()) {
    -      utime_t oldest_op_tmp =
    -	sdata->ops_in_flight_sharded.front().get_initiated();
    +    for (auto& op : sdata->ops_in_flight_sharded) {
    +      if (!op.warn_interval_multiplier || op.is_continuous())
    +	continue;
    +
    +      utime_t oldest_op_tmp = op.get_initiated();
           if (oldest_op_tmp < oldest_op) {
             oldest_op = oldest_op_tmp;
           }
    +      break;
         }
         std::transform(std::begin(sdata->ops_in_flight_sharded),
                        std::end(sdata->ops_in_flight_sharded),
    @@ -391,6 +391,9 @@ bool OpTracker::with_slow_ops_in_flight(utime_t* oldest_secs,
           // no more slow ops in flight
           return false;
         }
    +    if (op.is_continuous()) {
    +      return true; /* skip reporting */
    +    }
         if (!op.warn_interval_multiplier)
           return true;
         slow++;
    @@ -505,6 +508,7 @@ void TrackedOp::dump(utime_t now, Formatter *f, OpTracker::dumper lambda) const
       f->dump_stream("initiated_at") << get_initiated();
       f->dump_float("age", now - get_initiated());
       f->dump_float("duration", get_duration());
    +  f->dump_bool("continuous", is_continuous());
       {
         f->open_object_section("type_data");
         lambda(*this, f);
    diff --git a/src/common/TrackedOp.h b/src/common/TrackedOp.h
    index 238f1c7ac7c5..57d73038364d 100644
    --- a/src/common/TrackedOp.h
    +++ b/src/common/TrackedOp.h
    @@ -53,9 +53,9 @@ class OpHistoryServiceThread : public Thread
     };
     
     enum {
    -  l_osd_slow_op_first = 1000,
    -  l_osd_slow_op_count,
    -  l_osd_slow_op_last,
    +  l_trackedop_slow_op_first = 1000,
    +  l_trackedop_slow_op_count,
    +  l_trackedop_slow_op_last,
     };
     
     class OpHistory {
    @@ -68,7 +68,7 @@ class OpHistory {
       std::atomic_size_t history_size{0};
       std::atomic_uint32_t history_duration{0};
       std::atomic_size_t history_slow_op_size{0};
    -  std::atomic_uint32_t history_slow_op_threshold{0};
    +  std::atomic history_slow_op_threshold{0};
       std::atomic_bool shutdown{false};
       OpHistoryServiceThread opsvc;
       friend class OpHistoryServiceThread;
    @@ -76,9 +76,11 @@ class OpHistory {
     
     public:
       OpHistory(CephContext *c) : cct(c), opsvc(this) {
    -    PerfCountersBuilder b(cct, "osd-slow-ops",
    -                         l_osd_slow_op_first, l_osd_slow_op_last);
    -    b.add_u64_counter(l_osd_slow_op_count, "slow_ops_count",
    +    PerfCountersBuilder b(cct, "trackedop",
    +                         l_trackedop_slow_op_first, l_trackedop_slow_op_last);
    +    b.set_prio_default(PerfCountersBuilder::PRIO_USEFUL);
    +
    +    b.add_u64_counter(l_trackedop_slow_op_count, "slow_ops_count",
                           "Number of operations taking over ten second");
     
         logger.reset(b.create_perf_counters());
    @@ -111,7 +113,7 @@ class OpHistory {
         history_size = new_size;
         history_duration = new_duration;
       }
    -  void set_slow_op_size_and_threshold(size_t new_size, uint32_t new_threshold) {
    +  void set_slow_op_size_and_threshold(size_t new_size, float new_threshold) {
         history_slow_op_size = new_size;
         history_slow_op_threshold = new_threshold;
       }
    @@ -142,7 +144,7 @@ class OpTracker {
       void set_history_size_and_duration(uint32_t new_size, uint32_t new_duration) {
         history.set_size_and_duration(new_size, new_duration);
       }
    -  void set_history_slow_op_size_and_threshold(uint32_t new_size, uint32_t new_threshold) {
    +  void set_history_slow_op_size_and_threshold(uint32_t new_size, float new_threshold) {
         history.set_slow_op_size_and_threshold(new_size, new_threshold);
       }
       bool is_tracking() const {
    @@ -204,10 +206,15 @@ class OpTracker {
       }
       ~OpTracker();
     
    -  template 
    -  typename T::Ref create_request(U params)
    +  // NB: P is ref-like, i.e. `params` should be dereferenced for members
    +  template 
    +  typename R::Ref create_request(P params)
       {
    -    typename T::Ref retval(new T(params, this));
    +    constexpr bool enable_mark_continuous = requires(typename R::Ref r, P p) {
    +      { p->is_continuous() } -> std::same_as;
    +      r->mark_continuous();
    +    };
    +    typename R::Ref retval(new R(params, this));
         retval->tracking_start();
         if (is_tracking()) {
           retval->mark_event("header_read", params->get_recv_stamp());
    @@ -215,21 +222,28 @@ class OpTracker {
           retval->mark_event("all_read", params->get_recv_complete_stamp());
           retval->mark_event("dispatched", params->get_dispatch_stamp());
         }
    -
    +    if constexpr (enable_mark_continuous) {
    +      if (params->is_continuous()) {
    +        retval->mark_continuous();
    +      }
    +    }
         return retval;
       }
     };
     
     class TrackedOp : public boost::intrusive::list_base_hook<> {
    -private:
    +public:
       friend class OpHistory;
       friend class OpTracker;
     
    -  boost::intrusive::list_member_hook<> tracker_item;
    +  static const uint64_t FLAG_CONTINUOUS = (1<<1);
     
    +private:
    +  boost::intrusive::list_member_hook<> tracker_item;
     public:
       typedef boost::intrusive::list<
       TrackedOp,
    +  boost::intrusive::constant_time_size,
       boost::intrusive::member_hook<
         TrackedOp,
         boost::intrusive::list_member_hook<>,
    @@ -243,6 +257,7 @@ class TrackedOp : public boost::intrusive::list_base_hook<> {
         }
       };
     
    +
     protected:
       OpTracker *tracker;          ///< the tracker we are associated with
       std::atomic_int nref = {0};  ///< ref count
    @@ -281,6 +296,14 @@ class TrackedOp : public boost::intrusive::list_base_hook<> {
         STATE_HISTORY
       };
       std::atomic state = {STATE_UNTRACKED};
    +  uint64_t flags = 0;
    +
    +  void mark_continuous() {
    +    flags |= FLAG_CONTINUOUS;
    +  }
    +  bool is_continuous() const {
    +    return flags & FLAG_CONTINUOUS;
    +  }
     
       TrackedOp(OpTracker *_tracker, const utime_t& initiated) :
         tracker(_tracker),
    diff --git a/src/common/WeightedPriorityQueue.h b/src/common/WeightedPriorityQueue.h
    index cf34709b9794..c8d92b5e05f2 100644
    --- a/src/common/WeightedPriorityQueue.h
    +++ b/src/common/WeightedPriorityQueue.h
    @@ -346,7 +346,11 @@ class WeightedPriorityQueue :  public OpQueue 
         }
     
         void print(std::ostream &ostream) const final {
    -      ostream << "WeightedPriorityQueue";
    +      ostream << get_op_queue_type_name(get_type());
    +    }
    +
    +    op_queue_type_t get_type() const final {
    +      return op_queue_type_t::WeightedPriorityQueue;
         }
     };
     
    diff --git a/src/common/admin_finisher.h b/src/common/admin_finisher.h
    new file mode 100644
    index 000000000000..f087b5edf474
    --- /dev/null
    +++ b/src/common/admin_finisher.h
    @@ -0,0 +1,21 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright (C) 2024 IBM, Inc.
    + *
    + * This is free software; you can redistribute it and/or modify it under the
    + * terms of the GNU Lesser General Public License version 2.1, as published by
    + * the Free Software Foundation.  See file COPYING.
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +
    +#include "include/buffer.h"
    +
    +typedef std::function asok_finisher;
    diff --git a/src/common/admin_socket.cc b/src/common/admin_socket.cc
    index 8a7e0c721971..55b87de32072 100644
    --- a/src/common/admin_socket.cc
    +++ b/src/common/admin_socket.cc
    @@ -12,8 +12,17 @@
      *
      */
     #include 
    +#include 
     #include 
     
    +#ifndef WIN32
    +#include 
    +#endif
    +
    +#include 
    +
    +#include 
    +
     #include "common/admin_socket.h"
     #include "common/admin_socket_client.h"
     #include "common/dout.h"
    @@ -36,6 +45,7 @@
     #include "include/ceph_assert.h"
     #include "include/compat.h"
     #include "include/sock_compat.h"
    +#include "fmt/format.h"
     
     #define dout_subsys ceph_subsys_asok
     #undef dout_prefix
    @@ -421,7 +431,7 @@ void AdminSocket::do_tell_queue()
         execute_command(
           m->cmd,
           m->get_data(),
    -      [m](int r, const std::string& err, bufferlist& outbl) {
    +      [m](int r, std::string_view err, bufferlist& outbl) {
     	auto reply = new MCommandReply(r, err);
     	reply->set_tid(m->get_tid());
     	reply->set_data(outbl);
    @@ -437,7 +447,7 @@ void AdminSocket::do_tell_queue()
         execute_command(
           m->cmd,
           m->get_data(),
    -      [m](int r, const std::string& err, bufferlist& outbl) {
    +      [m](int r, std::string_view err, bufferlist& outbl) {
     	auto reply = new MMonCommandAck(m->cmd, r, err, 0);
     	reply->set_tid(m->get_tid());
     	reply->set_data(outbl);
    @@ -468,7 +478,7 @@ int AdminSocket::execute_command(
       execute_command(
         cmd,
         inbl,
    -    [&errss, outbl, &fin](int r, const std::string& err, bufferlist& out) {
    +    [&errss, outbl, &fin](int r, std::string_view err, bufferlist& out) {
           errss << err;
           *outbl = std::move(out);
           fin.finish(r);
    @@ -484,7 +494,7 @@ int AdminSocket::execute_command(
     void AdminSocket::execute_command(
       const std::vector& cmdvec,
       const bufferlist& inbl,
    -  std::function on_finish)
    +  asok_finisher on_finish)
     {
       cmdmap_t cmdmap;
       string format;
    @@ -504,7 +514,46 @@ void AdminSocket::execute_command(
     		     empty);
       }
     
    -  auto f = Formatter::create(format, "json-pretty", "json-pretty");
    +  ldout(m_cct, 20) << __func__ << ": format is " << format << " prefix is " << prefix << dendl;
    +
    +  string output;
    +  try {
    +    cmd_getval(cmdmap, "output-file", output);
    +    if (!output.empty()) {
    +      ldout(m_cct, 20) << __func__ << ": output file is " << output << dendl;
    +    }
    +  } catch (const bad_cmd_get& e) {
    +    output = "";
    +  }
    +
    +  if (output == ":tmp:") {
    +    auto path = m_cct->_conf.get_val("tmp_file_template");
    +    if (int fd = mkstemp(path.data()); fd >= 0) {
    +      close(fd);
    +      output = path;
    +      ldout(m_cct, 20) << __func__ << ": output file created in tmp_dir is " << output << dendl;
    +    } else {
    +      return on_finish(-errno, "temporary output file could not be opened", empty);
    +    }
    +  }
    +
    +  Formatter* f;
    +  if (!output.empty()) {
    +    if (!(format == "json" || format == "json-pretty")) {
    +      return on_finish(-EINVAL, "unsupported format for --output-file", empty);
    +    }
    +    ldout(m_cct, 10) << __func__ << ": opening file for json output: " << output << dendl;
    +    bool pretty = (format == "json-pretty");
    +    auto* jff = new JSONFormatterFile(output, pretty);
    +    auto&& of = jff->get_ofstream();
    +    if (!of.is_open()) {
    +      delete jff;
    +      return on_finish(-EIO, "output file could not be opened", empty);
    +    }
    +    f = jff;
    +  } else {
    +    f = Formatter::create(format, "json-pretty", "json-pretty");
    +  }
     
       auto [retval, hook] = find_matched_hook(prefix, cmdmap);
       switch (retval) {
    @@ -522,10 +571,27 @@ void AdminSocket::execute_command(
     
       hook->call_async(
         prefix, cmdmap, f, inbl,
    -    [f, on_finish](int r, const std::string& err, bufferlist& out) {
    +    [f, output, on_finish, m_cct=m_cct](int r, std::string_view err, bufferlist& out) {
           // handle either existing output in bufferlist *or* via formatter
    -      if (r >= 0 && out.length() == 0) {
    -	f->flush(out);
    +      ldout(m_cct, 10) << __func__ << ": command completed with result " << r << dendl;
    +      if (auto* jff = dynamic_cast(f); jff != nullptr) {
    +        ldout(m_cct, 25) << __func__ << ": flushing file" << dendl;
    +        jff->flush();
    +        auto* outf = new JSONFormatter(true);
    +        outf->open_object_section("result");
    +        outf->dump_string("path", output);
    +        outf->dump_int("result", r);
    +        outf->dump_string("output", out.to_str());
    +        outf->dump_int("len", jff->get_len());
    +        outf->close_section();
    +        CachedStackStringStream css;
    +        outf->flush(*css);
    +        delete outf;
    +        out.clear();
    +        out.append(css->strv());
    +      } else if (r >= 0 && out.length() == 0) {
    +        ldout(m_cct, 25) << __func__ << ": out is empty, dumping formatter" << dendl;
    +        f->flush(out);
           }
           delete f;
           on_finish(r, err, out);
    @@ -693,6 +759,303 @@ class GetdescsHook : public AdminSocketHook {
       }
     };
     
    +// Define a macro to simplify adding signals to the map
    +#define ADD_SIGNAL(signalName)                 \
    +  {                                            \
    +    ((const char*)#signalName) + 3, signalName \
    +  }
    +
    +static const std::map known_signals = {
    +  // the following 6 signals are recognized in windows according to
    +  // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/raise?view=msvc-170
    +  ADD_SIGNAL(SIGABRT),
    +  ADD_SIGNAL(SIGFPE),
    +  ADD_SIGNAL(SIGILL),
    +  ADD_SIGNAL(SIGINT),
    +  ADD_SIGNAL(SIGSEGV),
    +  ADD_SIGNAL(SIGTERM),
    +#ifndef WIN32
    +  ADD_SIGNAL(SIGTRAP),
    +  ADD_SIGNAL(SIGHUP),
    +  ADD_SIGNAL(SIGBUS),
    +  ADD_SIGNAL(SIGQUIT),
    +  ADD_SIGNAL(SIGKILL),
    +  ADD_SIGNAL(SIGUSR1),
    +  ADD_SIGNAL(SIGUSR2),
    +  ADD_SIGNAL(SIGPIPE),
    +  ADD_SIGNAL(SIGALRM),
    +  ADD_SIGNAL(SIGCHLD),
    +  ADD_SIGNAL(SIGCONT),
    +  ADD_SIGNAL(SIGSTOP),
    +  ADD_SIGNAL(SIGTSTP),
    +  ADD_SIGNAL(SIGTTIN),
    +  ADD_SIGNAL(SIGTTOU),
    +#endif
    +  // Add more signals as needed...
    +};
    +
    +#undef ADD_SIGNAL
    +
    +static std::string strsignal_compat(int signal) {
    +#ifndef WIN32
    +  return strsignal(signal);
    +#else
    +  switch (signal) {
    +    case SIGABRT: return "SIGABRT";
    +    case SIGFPE: return "SIGFPE";
    +    case SIGILL: return "SIGILL";
    +    case SIGINT: return "SIGINT";
    +    case SIGSEGV: return "SIGSEGV";
    +    case SIGTERM: return "SIGTERM";
    +    default: return fmt::format("Signal #{}", signal);
    +  }
    +#endif
    +}
    +
    +class RaiseHook: public AdminSocketHook {
    +  using clock = ceph::coarse_mono_clock;
    +  struct Killer {
    +    CephContext* m_cct;
    +    pid_t pid;
    +    int signal;
    +    clock::time_point due;
    +
    +    std::string describe()
    +    {
    +      using std::chrono::duration_cast;
    +      using std::chrono::seconds;
    +      auto remaining = (due - clock::now());
    +      return fmt::format(
    +        "pending signal ({}) due in {}", 
    +        strsignal_compat(signal),
    +        duration_cast(remaining).count());
    +    }
    +
    +    bool cancel()
    +    {
    +#   ifndef WIN32
    +      int wstatus;
    +      int status;
    +      if (0 == (status = waitpid(pid, &wstatus, WNOHANG))) {
    +        status = kill(pid, SIGKILL);
    +        if (status) {
    +          ldout(m_cct, 5) << __func__ << "couldn't kill the killer. Error: " << strerror(errno) << dendl;
    +          return false;
    +        }
    +        while (pid == waitpid(pid, &wstatus, 0)) {
    +          if (WIFEXITED(wstatus)) {
    +            return false;
    +          }
    +          if (WIFSIGNALED(wstatus)) {
    +            return true;
    +          }
    +        }
    +      }
    +      if (status < 0) {
    +        ldout(m_cct, 5) << __func__ << "waitpid(killer, NOHANG) returned " << status << "; " << strerror(errno) << dendl;
    +      } else {
    +        ldout(m_cct, 20) << __func__ << "killer process " << pid << "\"" << describe() << "\" reaped. "
    +                         << "WIFEXITED: " << WIFEXITED(wstatus)
    +                         << "WIFSIGNALED: " << WIFSIGNALED(wstatus)
    +                         << dendl;
    +      }
    +#   endif
    +      return false;
    +    }
    +
    +    static std::optional fork(CephContext *m_cct, int signal_to_send, double delay) {
    +#   ifndef WIN32
    +      pid_t victim = getpid();
    +      clock::time_point until = clock::now() + ceph::make_timespan(delay);
    +
    +      int fresult = ::fork();
    +      if (fresult < 0) {
    +        ldout(m_cct, 5) << __func__ << "couldn't fork the killer. Error: " << strerror(errno) << dendl;
    +        return std::nullopt;
    +      }
    +
    +      if (fresult) {
    +        // this is parent
    +        return {{m_cct, fresult, signal_to_send, until}};
    +      }
    +
    +      const ceph::signedspan poll_interval = ceph::make_timespan(0.1);
    +      while (getppid() == victim) {
    +        ceph::signedspan remaining = (until - clock::now());
    +        if (remaining.count() > 0) {
    +          using std::chrono::duration_cast;
    +          using std::chrono::nanoseconds;
    +          std::this_thread::sleep_for(duration_cast(std::min(remaining, poll_interval)));
    +        } else {
    +          break;
    +        }
    +      }
    +
    +      if (getppid() != victim) {
    +        // suicide if my parent has changed
    +        // this means that the original parent process has terminated
    +        ldout(m_cct, 5) << __func__ << "my parent isn't what it used to be, i'm out" << strerror(errno) << dendl;
    +        _exit(1);
    +      }
    +
    +      int status = kill(victim, signal_to_send);
    +      if (0 != status) {
    +        ldout(m_cct, 5) << __func__ << "couldn't kill the victim: " << strerror(errno) << dendl;
    +      }
    +      _exit(status);
    +#   endif
    +      return std::nullopt;
    +    }
    +  };
    +
    +  CephContext* m_cct;
    +  std::optional killer;
    +
    +  int parse_signal(std::string&& sigdesc, Formatter* f, std::ostream& errss)
    +  {
    +    int result = 0;
    +    std::transform(sigdesc.begin(), sigdesc.end(), sigdesc.begin(),
    +        [](unsigned char c) { return std::toupper(c); });
    +    if (0 == sigdesc.find("-")) {
    +      sigdesc.erase(0, 1);
    +    }
    +    if (0 == sigdesc.find("SIG")) {
    +      sigdesc.erase(0, 3);
    +    }
    +
    +    if (sigdesc == "L") {
    +      f->open_object_section("known_signals");
    +      for (auto& [name, num] : known_signals) {
    +        f->dump_int(name, num);
    +      }
    +      f->close_section();
    +    } else {
    +      try {
    +        result = std::stoi(sigdesc);
    +        if (result < 1 || result > 64) {
    +          errss << "signal number should be an integer in the range [1..64]" << std::endl;
    +          return -EINVAL;
    +        }
    +      } catch (const std::invalid_argument&) {
    +        auto sig_it = known_signals.find(sigdesc);
    +        if (sig_it == known_signals.end()) {
    +          errss << "unknown signal name; use -l to see recognized names" << std::endl;
    +          return -EINVAL;
    +        }
    +        result = sig_it->second;
    +      }
    +    }
    +    return result;
    +  }
    +
    +public:
    +  RaiseHook(CephContext* cct) : m_cct(cct) { }
    +  static const char* get_cmddesc()
    +  {
    +    return "raise "
    +           "name=signal,type=CephString,req=false "
    +           "name=cancel,type=CephBool,req=false "
    +           "name=after,type=CephFloat,range=0.0,req=false ";
    +  }
    +
    +  static const char* get_help()
    +  {
    +    return "deliver the  to the daemon process, optionally delaying  seconds; "
    +           "when --after is used, the program will fork before sleeping, which allows to "
    +           "schedule signal delivery to a stopped daemon; it's possible to --cancel a pending signal delivery. "
    +           " can be in the forms '9', '-9', 'kill', '-KILL'. Use `raise -l` to list known signal names.";
    +  }
    +
    +  int call(std::string_view command, const cmdmap_t& cmdmap,
    +      const bufferlist&,
    +      Formatter* f,
    +      std::ostream& errss,
    +      bufferlist& out) override
    +  {
    +    using std::endl;
    +    string sigdesc;
    +    bool cancel = cmd_getval_or(cmdmap, "cancel", false);
    +    int signal_to_send = 0;
    +
    +    if (cmd_getval(cmdmap, "signal", sigdesc)) {
    +      signal_to_send = parse_signal(std::move(sigdesc), f, errss);
    +      if (signal_to_send < 0) {
    +        return signal_to_send;
    +      }
    +    } else if (!cancel) {
    +      errss << "signal name or number is required" << endl;
    +      return -EINVAL;
    +    }
    +
    +    if (cancel) {
    +      if (killer) {
    +        if (signal_to_send == 0 || signal_to_send == killer->signal) {
    +          if (killer->cancel()) {
    +            errss << "cancelled " << killer->describe() << endl;
    +            return 0;
    +          }
    +          killer = std::nullopt;
    +        }
    +        if (signal_to_send) {
    +          errss << "signal " << signal_to_send << " is not pending" << endl;
    +        }
    +      } else {
    +        errss << "no pending signal" << endl;
    +      }
    +      return 1;
    +    }
    +
    +    if (!signal_to_send) {
    +      return 0;
    +    }
    +
    +    double delay = 0;
    +    if (cmd_getval(cmdmap, "after", delay)) {
    +      #ifdef WIN32
    +        errss << "'--after' functionality is unsupported on Windows" << endl;
    +        return -ENOTSUP;
    +      #endif
    +      if (killer) {
    +        if (killer->cancel()) {
    +          errss << "cancelled " << killer->describe() << endl;
    +        }
    +      }
    +
    +      killer = Killer::fork(m_cct, signal_to_send, delay);
    +
    +      if (killer) {
    +        errss << "scheduled " << killer->describe() << endl;
    +        ldout(m_cct, 20) << __func__ << "scheduled " << killer->describe() << dendl;
    +      } else {
    +        errss << "couldn't fork the killer" << std::endl;
    +        return -EAGAIN;
    +      }
    +    } else {
    +      ldout(m_cct, 20) << __func__ << "raising "
    +                      << " (" << strsignal_compat(signal_to_send) << ")" << dendl;
    +      // raise the signal immediately
    +      int status = raise(signal_to_send);
    +
    +      if (0 == status) {
    +        errss << "raised signal "
    +              << " (" << strsignal_compat(signal_to_send) << ")" << endl;
    +      } else {
    +        errss << "couldn't raise signal "
    +              << " (" << strsignal_compat(signal_to_send) << ")."
    +              << " Error: " << strerror(errno) << endl;
    +
    +        ldout(m_cct, 5) << __func__ << "couldn't raise signal "
    +                << " (" << strsignal_compat(signal_to_send) << ")."
    +                << " Error: " << strerror(errno) << dendl;
    +
    +        return 1;
    +      }
    +    }
    +
    +    return 0;
    +  }
    +};
    +
     bool AdminSocket::init(const std::string& path)
     {
       ldout(m_cct, 5) << "init " << path << dendl;
    @@ -745,6 +1108,12 @@ bool AdminSocket::init(const std::string& path)
       register_command("get_command_descriptions",
     		   getdescs_hook.get(), "list available commands");
     
    +  raise_hook = std::make_unique(m_cct);
    +  register_command(
    +      RaiseHook::get_cmddesc(),
    +      raise_hook.get(),
    +      RaiseHook::get_help());
    +
       th = make_named_thread("admin_socket", &AdminSocket::entry, this);
       add_cleanup_file(m_path.c_str());
       return true;
    @@ -777,6 +1146,9 @@ void AdminSocket::shutdown()
       unregister_commands(getdescs_hook.get());
       getdescs_hook.reset();
     
    +  unregister_commands(raise_hook.get());
    +  raise_hook.reset();
    +
       remove_cleanup_file(m_path);
       m_path.clear();
     }
    diff --git a/src/common/admin_socket.h b/src/common/admin_socket.h
    index 3f364a5b711c..4e2f26834666 100644
    --- a/src/common/admin_socket.h
    +++ b/src/common/admin_socket.h
    @@ -27,6 +27,7 @@
     
     #include "include/buffer.h"
     #include "include/common_fwd.h"
    +#include "common/admin_finisher.h"
     #include "common/ref.h"
     #include "common/cmdparse.h"
     
    @@ -35,6 +36,8 @@ class MMonCommand;
     
     inline constexpr auto CEPH_ADMIN_SOCK_VERSION = std::string_view("2");
     
    +typedef std::function asok_finisher;
    +
     class AdminSocketHook {
     public:
       /**
    @@ -93,7 +96,7 @@ class AdminSocketHook {
         const cmdmap_t& cmdmap,
         ceph::Formatter *f,
         const ceph::buffer::list& inbl,
    -    std::function on_finish) {
    +    asok_finisher on_finish) {
         // by default, call the synchronous handler and then finish
         ceph::buffer::list out;
         std::ostringstream errss;
    @@ -151,7 +154,7 @@ class AdminSocket
       void execute_command(
         const std::vector& cmd,
         const ceph::buffer::list& inbl,
    -    std::function on_fin);
    +    asok_finisher on_fin);
     
       /// execute (blocking)
       int execute_command(
    @@ -190,6 +193,7 @@ class AdminSocket
       std::unique_ptr version_hook;
       std::unique_ptr help_hook;
       std::unique_ptr getdescs_hook;
    +  std::unique_ptr raise_hook;
     
       std::mutex tell_lock;
       std::list> tell_queue;
    diff --git a/src/common/assert.cc b/src/common/assert.cc
    index 7fb4c2d726b0..68ad99c878e2 100644
    --- a/src/common/assert.cc
    +++ b/src/common/assert.cc
    @@ -44,8 +44,7 @@ namespace ceph {
         g_assert_line = line;
         g_assert_func = func;
         g_assert_thread = (unsigned long long)pthread_self();
    -    ceph_pthread_getname(pthread_self(), g_assert_thread_name,
    -		       sizeof(g_assert_thread_name));
    +    ceph_pthread_getname(g_assert_thread_name, sizeof(g_assert_thread_name));
     
         ostringstream tss;
         tss << ceph_clock_now();
    @@ -122,8 +121,7 @@ namespace ceph {
         g_assert_line = line;
         g_assert_func = func;
         g_assert_thread = (unsigned long long)pthread_self();
    -    ceph_pthread_getname(pthread_self(), g_assert_thread_name,
    -		       sizeof(g_assert_thread_name));
    +    ceph_pthread_getname(g_assert_thread_name, sizeof(g_assert_thread_name));
     
         BufAppender ba(g_assert_msg, sizeof(g_assert_msg));
         BackTrace *bt = new ClibBackTrace(1);
    @@ -168,8 +166,7 @@ namespace ceph {
         g_assert_line = line;
         g_assert_func = func;
         g_assert_thread = (unsigned long long)pthread_self();
    -    ceph_pthread_getname(pthread_self(), g_assert_thread_name,
    -		       sizeof(g_assert_thread_name));
    +    ceph_pthread_getname(g_assert_thread_name, sizeof(g_assert_thread_name));
     
         BackTrace *bt = new ClibBackTrace(1);
         snprintf(g_assert_msg, sizeof(g_assert_msg),
    @@ -210,8 +207,7 @@ namespace ceph {
         g_assert_line = line;
         g_assert_func = func;
         g_assert_thread = (unsigned long long)pthread_self();
    -    ceph_pthread_getname(pthread_self(), g_assert_thread_name,
    -		       sizeof(g_assert_thread_name));
    +    ceph_pthread_getname(g_assert_thread_name, sizeof(g_assert_thread_name));
     
         BufAppender ba(g_assert_msg, sizeof(g_assert_msg));
         BackTrace *bt = new ClibBackTrace(1);
    diff --git a/src/common/async/bind_handler.h b/src/common/async/bind_handler.h
    index 516d8a5e8b41..69128501a07f 100644
    --- a/src/common/async/bind_handler.h
    +++ b/src/common/async/bind_handler.h
    @@ -16,7 +16,8 @@
     #define CEPH_ASYNC_BIND_HANDLER_H
     
     #include 
    -#include 
    +#include 
    +#include 
     
     namespace ceph::async {
     
    diff --git a/src/common/async/cancel_on_error.h b/src/common/async/cancel_on_error.h
    new file mode 100644
    index 000000000000..fd3752d2f824
    --- /dev/null
    +++ b/src/common/async/cancel_on_error.h
    @@ -0,0 +1,29 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab ft=cpp
    +
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright contributors to the Ceph project
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation. See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +
    +namespace ceph::async {
    +
    +/// Error handling strategy for concurrent operations.
    +enum class cancel_on_error : uint8_t {
    +  none, //< No spawned coroutines are canceled on failure.
    +  after, //< Cancel coroutines spawned after the failed coroutine.
    +  all, //< Cancel all spawned coroutines on failure.
    +};
    +
    +} // namespace ceph::async
    diff --git a/src/common/async/co_spawn_group.h b/src/common/async/co_spawn_group.h
    new file mode 100644
    index 000000000000..e30d20cdb4d4
    --- /dev/null
    +++ b/src/common/async/co_spawn_group.h
    @@ -0,0 +1,101 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation.  See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +#include "cancel_on_error.h"
    +#include "detail/co_spawn_group.h"
    +
    +namespace ceph::async {
    +
    +/// \brief Tracks a group of coroutines to await all of their completions.
    +///
    +/// The wait() function can be used to await the completion of all children.
    +/// If any child coroutines exit with an exception, the first such exception
    +/// is rethrown by wait(). The cancel_on_error option controls whether these
    +/// exceptions trigger the cancellation of other children.
    +///
    +/// All child coroutines are canceled by cancel() or co_spawn_group destruction.
    +/// This allows the parent coroutine to share memory with its child coroutines
    +/// without fear of dangling references.
    +///
    +/// This class is not thread-safe, so a strand executor should be used in
    +/// multi-threaded contexts.
    +///
    +/// Example:
    +/// \code
    +/// awaitable child(task& t);
    +///
    +/// awaitable parent(std::span tasks)
    +/// {
    +///   // process all tasks in parallel
    +///   auto ex = co_await boost::asio::this_coro::executor;
    +///   auto group = co_spawn_group{ex, tasks.size()};
    +///
    +///   for (auto& t : tasks) {
    +///     group.spawn(child(t));
    +///   }
    +///   co_await group.wait();
    +/// }
    +/// \endcode
    +template 
    +class co_spawn_group {
    +  using impl_type = detail::co_spawn_group_impl;
    +  boost::intrusive_ptr impl;
    +
    + public:
    +  co_spawn_group(Executor ex, size_t limit,
    +                 cancel_on_error on_error = cancel_on_error::none)
    +    : impl(new impl_type(ex, limit, on_error))
    +  {
    +  }
    +
    +  ~co_spawn_group()
    +  {
    +    impl->cancel();
    +  }
    +
    +  using executor_type = Executor;
    +  executor_type get_executor() const
    +  {
    +    return impl->get_executor();
    +  }
    +
    +  /// Spawn the given coroutine \ref cr on the group's executor. Throws a
    +  /// std::length_error exception if the number of outstanding coroutines
    +  /// would exceed the group's limit.
    +  void spawn(boost::asio::awaitable cr)
    +  {
    +    impl->spawn(std::move(cr));
    +  }
    +
    +  /// Wait for all outstanding coroutines before returning. If any of the
    +  /// spawned coroutines exit with an exception, the first exception is
    +  /// rethrown.
    +  ///
    +  /// After wait() completes, whether by exception or co_return, the spawn
    +  /// group can be reused to spawn and await additional coroutines.
    +  boost::asio::awaitable wait()
    +  {
    +    return impl->wait();
    +  }
    +
    +  /// Cancel all outstanding coroutines.
    +  void cancel()
    +  {
    +    impl->cancel();
    +  }
    +};
    +
    +} // namespace ceph::async
    diff --git a/src/common/async/co_throttle.h b/src/common/async/co_throttle.h
    new file mode 100644
    index 000000000000..880ffc96ce9b
    --- /dev/null
    +++ b/src/common/async/co_throttle.h
    @@ -0,0 +1,113 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright (C) 2023 Red Hat 
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation.  See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +#include 
    +#include "common/async/cancel_on_error.h"
    +#include "common/async/detail/co_throttle_impl.h"
    +
    +namespace ceph::async {
    +
    +/// A coroutine throttle that allows a parent coroutine to spawn and manage
    +/// multiple child coroutines, while enforcing an upper bound on concurrency.
    +///
    +/// Child coroutines must be of type awaitable. Exceptions thrown by
    +/// children are rethrown to the parent on its next call to spawn() or wait().
    +/// The cancel_on_error option controls whether these exceptions errors trigger
    +/// the cancellation of other children.
    +///
    +/// All child coroutines are canceled by cancel() or co_throttle destruction.
    +/// This allows the parent coroutine to share memory with its child coroutines
    +/// without fear of dangling references.
    +///
    +/// This class is not thread-safe, so a strand executor should be used in
    +/// multi-threaded contexts.
    +///
    +/// Example:
    +/// \code
    +/// awaitable child(task& t);
    +///
    +/// awaitable parent(std::span tasks)
    +/// {
    +///   // process all tasks, up to 10 at a time
    +///   auto ex = co_await boost::asio::this_coro::executor;
    +///   auto throttle = co_throttle{ex, 10};
    +///
    +///   for (auto& t : tasks) {
    +///     co_await throttle.spawn(child(t));
    +///   }
    +///   co_await throttle.wait();
    +/// }
    +/// \endcode
    +template 
    +class co_throttle {
    +  using impl_type = detail::co_throttle_impl;
    +  boost::intrusive_ptr impl;
    +
    + public:
    +  using executor_type = Executor;
    +  executor_type get_executor() const noexcept { return impl->get_executor(); }
    +
    +  static constexpr size_t max_limit = std::numeric_limits::max();
    +
    +  co_throttle(const executor_type& ex, size_t limit,
    +              cancel_on_error on_error = cancel_on_error::none)
    +    : impl(new impl_type(ex, limit, on_error))
    +  {
    +  }
    +
    +  ~co_throttle()
    +  {
    +    cancel();
    +  }
    +
    +  co_throttle(const co_throttle&) = delete;
    +  co_throttle& operator=(const co_throttle&) = delete;
    +
    +  /// Try to spawn the given coroutine \ref cr. If this would exceed the
    +  /// concurrency limit, wait for another coroutine to complete first. This
    +  /// default limit can be overridden with the optional \ref smaller_limit
    +  /// argument.
    +  ///
    +  /// If any spawned coroutines exit with an exception, the first exception is
    +  /// rethrown by the next call to spawn() or wait(). If spawn() has an
    +  /// exception to rethrow, it will spawn \cr first only in the case of
    +  /// cancel_on_error::none. New coroutines can be spawned by later calls to
    +  /// spawn() regardless of cancel_on_error.
    +  auto spawn(boost::asio::awaitable cr,
    +             size_t smaller_limit = max_limit)
    +      -> boost::asio::awaitable
    +  {
    +    return impl->spawn(std::move(cr), smaller_limit);
    +  }
    +
    +  /// Wait for all associated coroutines to complete. If any of these coroutines
    +  /// exit with an exception, the first of those exceptions is rethrown.
    +  auto wait()
    +      -> boost::asio::awaitable
    +  {
    +    return impl->wait();
    +  }
    +
    +  /// Cancel all associated coroutines.
    +  void cancel()
    +  {
    +    impl->cancel();
    +  }
    +};
    +
    +} // namespace ceph::async
    diff --git a/src/common/async/co_waiter.h b/src/common/async/co_waiter.h
    new file mode 100644
    index 000000000000..098ff1f26b67
    --- /dev/null
    +++ b/src/common/async/co_waiter.h
    @@ -0,0 +1,166 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation. See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include "include/ceph_assert.h"
    +
    +namespace ceph::async {
    +
    +/// Captures an awaitable handler for deferred completion or cancellation.
    +template 
    +class co_waiter {
    +  using signature = void(std::exception_ptr, Ret);
    +  using token_type = boost::asio::use_awaitable_t;
    +  using handler_type = typename boost::asio::async_result<
    +      token_type, signature>::handler_type;
    +  std::optional handler;
    +
    +  struct op_cancellation {
    +    co_waiter* self;
    +    op_cancellation(co_waiter* self) : self(self) {}
    +    void operator()(boost::asio::cancellation_type_t type) {
    +      if (type != boost::asio::cancellation_type::none) {
    +        self->cancel();
    +      }
    +    }
    +  };
    + public:
    +  co_waiter() = default;
    +
    +  // copy and move are disabled because the cancellation handler captures 'this'
    +  co_waiter(const co_waiter&) = delete;
    +  co_waiter& operator=(const co_waiter&) = delete;
    +
    +  /// Returns true if there's a handler awaiting completion.
    +  bool waiting() const { return handler.has_value(); }
    +
    +  /// Returns an awaitable that blocks until complete() or cancel().
    +  boost::asio::awaitable get()
    +  {
    +    ceph_assert(!handler);
    +    token_type token;
    +    return boost::asio::async_initiate(
    +        [this] (handler_type h) {
    +          auto slot = boost::asio::get_associated_cancellation_slot(h);
    +          if (slot.is_connected()) {
    +            slot.template emplace(this);
    +          }
    +          handler.emplace(std::move(h));
    +        }, token);
    +  }
    +
    +  /// Schedule the completion handler with the given arguments.
    +  void complete(std::exception_ptr eptr, Ret value)
    +  {
    +    ceph_assert(handler);
    +    auto h = boost::asio::append(std::move(*handler), eptr, std::move(value));
    +    handler.reset();
    +    boost::asio::dispatch(std::move(h));
    +  }
    +
    +  /// Cancel the coroutine with an operation_aborted exception.
    +  void cancel()
    +  {
    +    if (handler) {
    +      auto eptr = std::make_exception_ptr(
    +          boost::system::system_error(
    +              boost::asio::error::operation_aborted));
    +      complete(eptr, Ret{});
    +    }
    +  }
    +
    +  /// Destroy the completion handler.
    +  void shutdown()
    +  {
    +    handler.reset();
    +  }
    +};
    +
    +// specialization for Ret=void
    +template 
    +class co_waiter {
    +  using signature = void(std::exception_ptr);
    +  using token_type = boost::asio::use_awaitable_t;
    +  using handler_type = typename boost::asio::async_result<
    +      token_type, signature>::handler_type;
    +  std::optional handler;
    +
    +  struct op_cancellation {
    +    co_waiter* self;
    +    op_cancellation(co_waiter* self) : self(self) {}
    +    void operator()(boost::asio::cancellation_type_t type) {
    +      if (type != boost::asio::cancellation_type::none) {
    +        self->cancel();
    +      }
    +    }
    +  };
    + public:
    +  co_waiter() = default;
    +
    +  // copy and move are disabled because the cancellation handler captures 'this'
    +  co_waiter(const co_waiter&) = delete;
    +  co_waiter& operator=(const co_waiter&) = delete;
    +
    +  /// Returns true if there's a handler awaiting completion.
    +  bool waiting() const { return handler.has_value(); }
    +
    +  /// Returns an awaitable that blocks until complete() or cancel().
    +  boost::asio::awaitable get()
    +  {
    +    ceph_assert(!handler);
    +    token_type token;
    +    return boost::asio::async_initiate(
    +        [this] (handler_type h) {
    +          auto slot = boost::asio::get_associated_cancellation_slot(h);
    +          if (slot.is_connected()) {
    +            slot.template emplace(this);
    +          }
    +          handler.emplace(std::move(h));
    +        }, token);
    +  }
    +
    +  /// Schedule the completion handler with the given arguments.
    +  void complete(std::exception_ptr eptr)
    +  {
    +    ceph_assert(handler);
    +    auto h = boost::asio::append(std::move(*handler), eptr);
    +    handler.reset();
    +    boost::asio::dispatch(std::move(h));
    +  }
    +
    +  /// Cancel the coroutine with an operation_aborted exception.
    +  void cancel()
    +  {
    +    if (handler) {
    +      auto eptr = std::make_exception_ptr(
    +          boost::system::system_error(
    +              boost::asio::error::operation_aborted));
    +      complete(eptr);
    +    }
    +  }
    +
    +  /// Destroy the completion handler.
    +  void shutdown()
    +  {
    +    handler.reset();
    +  }
    +};
    +
    +} // namespace ceph::async
    diff --git a/src/common/async/completion.h b/src/common/async/completion.h
    index 6af9109d5479..d8065934e016 100644
    --- a/src/common/async/completion.h
    +++ b/src/common/async/completion.h
    @@ -17,6 +17,12 @@
     
     #include 
     
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +
     #include "bind_handler.h"
     #include "forward_handler.h"
     
    @@ -181,33 +187,38 @@ class CompletionImpl final : public Completion {
         RebindTraits2::deallocate(alloc2, static_cast(p), 1);
       }
     
    -  static auto bind_and_forward(Handler&& h, std::tuple&& args) {
    -    return forward_handler(CompletionHandler{std::move(h), std::move(args)});
    +  static auto bind_and_forward(const Executor2& ex, Handler&& h,
    +                               std::tuple&& args) {
    +    return forward_handler(CompletionHandler{
    +        boost::asio::bind_executor(ex, std::move(h)), std::move(args)});
       }
     
       void destroy_defer(std::tuple&& args) override {
         auto w = std::move(work);
    -    auto f = bind_and_forward(std::move(handler), std::move(args));
    +    auto ex2 = w.second.get_executor();
         RebindAlloc2 alloc2 = boost::asio::get_associated_allocator(handler);
    +    auto f = bind_and_forward(ex2, std::move(handler), std::move(args));
         RebindTraits2::destroy(alloc2, this);
         RebindTraits2::deallocate(alloc2, this, 1);
    -    w.second.get_executor().defer(std::move(f), alloc2);
    +    boost::asio::defer(boost::asio::bind_executor(ex2, std::move(f)));
       }
       void destroy_dispatch(std::tuple&& args) override {
         auto w = std::move(work);
    -    auto f = bind_and_forward(std::move(handler), std::move(args));
    +    auto ex2 = w.second.get_executor();
         RebindAlloc2 alloc2 = boost::asio::get_associated_allocator(handler);
    +    auto f = bind_and_forward(ex2, std::move(handler), std::move(args));
         RebindTraits2::destroy(alloc2, this);
         RebindTraits2::deallocate(alloc2, this, 1);
    -    w.second.get_executor().dispatch(std::move(f), alloc2);
    +    boost::asio::dispatch(std::move(f));
       }
       void destroy_post(std::tuple&& args) override {
         auto w = std::move(work);
    -    auto f = bind_and_forward(std::move(handler), std::move(args));
    +    auto ex2 = w.second.get_executor();
         RebindAlloc2 alloc2 = boost::asio::get_associated_allocator(handler);
    +    auto f = bind_and_forward(ex2, std::move(handler), std::move(args));
         RebindTraits2::destroy(alloc2, this);
         RebindTraits2::deallocate(alloc2, this, 1);
    -    w.second.get_executor().post(std::move(f), alloc2);
    +    boost::asio::post(std::move(f));
       }
       void destroy() override {
         RebindAlloc2 alloc2 = boost::asio::get_associated_allocator(handler);
    diff --git a/src/common/async/context_pool.h b/src/common/async/context_pool.h
    index 9c6cab7677db..5bfaf2be51eb 100644
    --- a/src/common/async/context_pool.h
    +++ b/src/common/async/context_pool.h
    @@ -16,6 +16,7 @@
     #ifndef CEPH_COMMON_ASYNC_CONTEXT_POOL_H
     #define CEPH_COMMON_ASYNC_CONTEXT_POOL_H
     
    +#include 
     #include 
     #include 
     #include 
    @@ -46,9 +47,14 @@ class io_context_pool {
       }
     public:
       io_context_pool() noexcept {}
    -  io_context_pool(std::int16_t threadcnt) noexcept {
    +
    +  io_context_pool(std::int64_t threadcnt) noexcept {
         start(threadcnt);
       }
    +  template Init>
    +  io_context_pool(std::int64_t threadcnt, Init&& init) noexcept {
    +    start(threadcnt, std::move(init));
    +  }
       ~io_context_pool() {
         stop();
       }
    @@ -59,7 +65,22 @@ class io_context_pool {
           ioctx.restart();
           for (std::int16_t i = 0; i < threadcnt; ++i) {
     	threadvec.emplace_back(make_named_thread("io_context_pool",
    -						 [this]() {
    +						 [this] {
    +						   ioctx.run();
    +						 }));
    +      }
    +    }
    +  }
    +  template Init>
    +  void start(std::int16_t threadcnt, Init&& init) noexcept {
    +    auto l = std::scoped_lock(m);
    +    if (threadvec.empty()) {
    +      guard.emplace(boost::asio::make_work_guard(ioctx));
    +      ioctx.restart();
    +      for (std::int16_t i = 0; i < threadcnt; ++i) {
    +	threadvec.emplace_back(make_named_thread("io_context_pool",
    +						 [this, init=std::move(init)] {
    +						   std::move(init)();
     						   ioctx.run();
     						 }));
           }
    @@ -85,6 +106,7 @@ class io_context_pool {
       operator boost::asio::io_context&() {
         return ioctx;
       }
    +  using executor_type = boost::asio::io_context::executor_type;
       boost::asio::io_context::executor_type get_executor() {
         return ioctx.get_executor();
       }
    diff --git a/src/common/async/detail/co_spawn_group.h b/src/common/async/detail/co_spawn_group.h
    new file mode 100644
    index 000000000000..bfdb2ded54f7
    --- /dev/null
    +++ b/src/common/async/detail/co_spawn_group.h
    @@ -0,0 +1,182 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation.  See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include "common/async/cancel_on_error.h"
    +#include "common/async/co_waiter.h"
    +#include "common/async/service.h"
    +#include "include/scope_guard.h"
    +
    +namespace ceph::async::detail {
    +
    +template 
    +class co_spawn_group_impl;
    +
    +// A cancellable co_spawn() completion handler that notifies the co_spawn_group
    +// upon completion. This holds a reference to the implementation in order to
    +// extend its lifetime. This is required for per-op cancellation because the
    +// cancellation_signals must outlive these coroutine frames.
    +template 
    +class co_spawn_group_handler {
    +  using impl_type = co_spawn_group_impl;
    +  using size_type = typename impl_type::size_type;
    +  boost::intrusive_ptr impl;
    +  boost::asio::cancellation_slot slot;
    +  size_type index;
    + public:
    +  co_spawn_group_handler(boost::intrusive_ptr impl,
    +                         boost::asio::cancellation_slot slot, size_type index)
    +      : impl(std::move(impl)), slot(std::move(slot)), index(index)
    +  {}
    +
    +  using executor_type = typename impl_type::executor_type;
    +  executor_type get_executor() const noexcept
    +  {
    +    return impl->get_executor();
    +  }
    +
    +  using cancellation_slot_type = boost::asio::cancellation_slot;
    +  cancellation_slot_type get_cancellation_slot() const noexcept
    +  {
    +    return slot;
    +  }
    +
    +  void operator()(std::exception_ptr eptr)
    +  {
    +    impl->child_complete(index, eptr);
    +  }
    +};
    +
    +// Reference-counted spawn group implementation.
    +template 
    +class co_spawn_group_impl :
    +    public boost::intrusive_ref_counter,
    +        boost::thread_unsafe_counter>,
    +    public service_list_base_hook
    +{
    + public:
    +  using size_type = uint16_t;
    +
    +  co_spawn_group_impl(Executor ex, size_type limit,
    +                      cancel_on_error on_error)
    +    : svc(boost::asio::use_service>(
    +            boost::asio::query(ex, boost::asio::execution::context))),
    +      ex(ex),
    +      signals(std::make_unique(limit)),
    +      limit(limit), on_error(on_error)
    +  {
    +    // register for service_shutdown() notifications
    +    svc.add(*this);
    +  }
    +  ~co_spawn_group_impl()
    +  {
    +    svc.remove(*this);
    +  }
    +
    +  using executor_type = Executor;
    +  executor_type get_executor() const noexcept
    +  {
    +    return ex;
    +  }
    +
    +  void child_complete(size_type index, std::exception_ptr e)
    +  {
    +    if (e) {
    +      if (!eptr) {
    +        eptr = e;
    +      }
    +      if (on_error == cancel_on_error::all) {
    +        cancel_from(0);
    +      } else if (on_error == cancel_on_error::after) {
    +        cancel_from(index + 1);
    +      }
    +    }
    +    if (++completed == spawned) {
    +      complete();
    +    }
    +  }
    +
    +  void spawn(boost::asio::awaitable cr)
    +  {
    +    boost::asio::co_spawn(get_executor(), std::move(cr), completion());
    +  }
    +
    +  boost::asio::awaitable wait()
    +  {
    +    if (completed < spawned) {
    +      co_await waiter.get();
    +    }
    +
    +    // clear for reuse
    +    completed = 0;
    +    spawned = 0;
    +
    +    if (eptr) {
    +      std::rethrow_exception(std::exchange(eptr, nullptr));
    +    }
    +  }
    +
    +  void cancel()
    +  {
    +    cancel_from(0);
    +  }
    +
    +  void service_shutdown()
    +  {
    +    waiter.shutdown();
    +  }
    +
    + private:
    +  service& svc;
    +  co_waiter waiter;
    +  executor_type ex;
    +  std::unique_ptr signals;
    +  std::exception_ptr eptr;
    +  const size_type limit;
    +  size_type spawned = 0;
    +  size_type completed = 0;
    +  const cancel_on_error on_error;
    +
    +  void cancel_from(size_type begin)
    +  {
    +    for (size_type i = begin; i < spawned; i++) {
    +      signals[i].emit(boost::asio::cancellation_type::terminal);
    +    }
    +  }
    +
    +  void complete()
    +  {
    +    if (waiter.waiting()) {
    +      waiter.complete(nullptr);
    +    }
    +  }
    +
    +  co_spawn_group_handler completion()
    +  {
    +    if (spawned >= limit) {
    +      throw std::length_error("spawn group maximum size exceeded");
    +    }
    +    const size_type index = spawned++;
    +    return {boost::intrusive_ptr{this}, signals[index].slot(), index};
    +  }
    +};
    +
    +} // namespace ceph::async::detail
    diff --git a/src/common/async/detail/co_throttle_impl.h b/src/common/async/detail/co_throttle_impl.h
    new file mode 100644
    index 000000000000..f2f17a043abe
    --- /dev/null
    +++ b/src/common/async/detail/co_throttle_impl.h
    @@ -0,0 +1,222 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright (C) 2023 Red Hat 
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation.  See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include "common/async/cancel_on_error.h"
    +#include "common/async/co_waiter.h"
    +#include "common/async/service.h"
    +#include "include/ceph_assert.h"
    +
    +namespace ceph::async::detail {
    +
    +// Coroutine throttle implementation. This is reference-counted so the
    +// co_spawn() completion handlers can extend the implementation's lifetime.
    +// This is required for per-op cancellation because the cancellation_signals
    +// must outlive their coroutine frames.
    +template 
    +class co_throttle_impl :
    +    public boost::intrusive_ref_counter,
    +        boost::thread_unsafe_counter>,
    +    public service_list_base_hook
    +{
    + public:
    +  using executor_type = Executor;
    +  executor_type get_executor() const { return ex; }
    +
    +  co_throttle_impl(const executor_type& ex, size_t limit,
    +                   cancel_on_error on_error)
    +    : svc(boost::asio::use_service>(
    +            boost::asio::query(ex, boost::asio::execution::context))),
    +      ex(ex), limit(limit), on_error(on_error),
    +      children(new child[limit])
    +  {
    +    // register for service_shutdown() notifications
    +    svc.add(*this);
    +
    +    // initialize the free list
    +    for (size_t i = 0; i < limit; i++) {
    +      free.push_back(children[i]);
    +    }
    +  }
    +  ~co_throttle_impl()
    +  {
    +    svc.remove(*this);
    +  }
    +
    +  auto spawn(boost::asio::awaitable cr,
    +             size_t smaller_limit)
    +      -> boost::asio::awaitable
    +  {
    +    if (unreported_exception && on_error != cancel_on_error::none) {
    +      std::rethrow_exception(std::exchange(unreported_exception, nullptr));
    +    }
    +
    +    const size_t current_limit = std::min(smaller_limit, limit);
    +    if (count >= current_limit) {
    +      co_await wait_for(current_limit - 1);
    +      if (unreported_exception && on_error != cancel_on_error::none) {
    +        std::rethrow_exception(std::exchange(unreported_exception, nullptr));
    +      }
    +    }
    +
    +    ++count;
    +
    +    // move a free child to the outstanding list
    +    ceph_assert(!free.empty());
    +    child& c = free.front();
    +    free.pop_front();
    +    outstanding.push_back(c);
    +
    +    // spawn the coroutine with its associated cancellation signal
    +    c.signal.emplace();
    +    c.canceled = false;
    +
    +    boost::asio::co_spawn(get_executor(), std::move(cr),
    +        boost::asio::bind_cancellation_slot(c.signal->slot(),
    +            child_completion{this, c}));
    +
    +    if (unreported_exception) {
    +      std::rethrow_exception(std::exchange(unreported_exception, nullptr));
    +    }
    +  }
    +
    +  auto wait()
    +      -> boost::asio::awaitable
    +  {
    +    if (count > 0) {
    +      co_await wait_for(0);
    +    }
    +    if (unreported_exception) {
    +      std::rethrow_exception(std::exchange(unreported_exception, nullptr));
    +    }
    +  }
    +
    +  void cancel()
    +  {
    +    while (!outstanding.empty()) {
    +      child& c = outstanding.front();
    +      outstanding.pop_front();
    +
    +      c.canceled = true;
    +      c.signal->emit(boost::asio::cancellation_type::terminal);
    +    }
    +  }
    +
    +  void service_shutdown()
    +  {
    +    waiter.shutdown();
    +  }
    +
    + private:
    +  service& svc;
    +  executor_type ex;
    +  const size_t limit;
    +  const cancel_on_error on_error;
    +
    +  size_t count = 0;
    +  size_t wait_for_count = 0;
    +
    +  std::exception_ptr unreported_exception;
    +
    +  // track each spawned coroutine for cancellation. these are stored in an
    +  // array, and recycled after each use via the free list
    +  struct child : boost::intrusive::list_base_hook<> {
    +    std::optional signal;
    +    bool canceled = false;
    +  };
    +  std::unique_ptr children;
    +
    +  using child_list = boost::intrusive::list>;
    +  child_list outstanding;
    +  child_list free;
    +
    +  co_waiter waiter;
    +
    +  // return an awaitable that completes once count <= target_count
    +  auto wait_for(size_t target_count)
    +      -> boost::asio::awaitable
    +  {
    +    wait_for_count = target_count;
    +    return waiter.get();
    +  }
    +
    +  void on_complete(child& c, std::exception_ptr eptr)
    +  {
    +    --count;
    +
    +    if (c.canceled) {
    +      // if the child was canceled, it was already removed from outstanding
    +      ceph_assert(!c.is_linked());
    +      c.canceled = false;
    +      c.signal.reset();
    +      free.push_back(c);
    +    } else {
    +      // move back to the free list
    +      ceph_assert(c.is_linked());
    +      auto next = outstanding.erase(outstanding.iterator_to(c));
    +      c.signal.reset();
    +      free.push_back(c);
    +
    +      if (eptr) {
    +        if (eptr && !unreported_exception) {
    +          unreported_exception = eptr;
    +        }
    +
    +        // handle cancel_on_error. cancellation signals may recurse into
    +        // on_complete(), so move the entries into a separate list first
    +        child_list to_cancel;
    +        if (on_error == cancel_on_error::after) {
    +          to_cancel.splice(to_cancel.end(), outstanding,
    +                           next, outstanding.end());
    +        } else if (on_error == cancel_on_error::all) {
    +          to_cancel = std::move(outstanding);
    +        }
    +
    +        for (auto i = to_cancel.begin(); i != to_cancel.end(); ++i) {
    +          child& c = *i;
    +          i = to_cancel.erase(i);
    +
    +          c.canceled = true;
    +          c.signal->emit(boost::asio::cancellation_type::terminal);
    +        }
    +      }
    +    }
    +
    +    // maybe wake the waiter
    +    if (waiter.waiting() && count <= wait_for_count) {
    +      waiter.complete(nullptr);
    +    }
    +  }
    +
    +  struct child_completion {
    +    boost::intrusive_ptr impl;
    +    child& c;
    +
    +    void operator()(std::exception_ptr eptr) {
    +      impl->on_complete(c, eptr);
    +    }
    +  };
    +};
    +
    +} // namespace ceph::async::detail
    diff --git a/src/common/async/detail/shared_mutex.h b/src/common/async/detail/shared_mutex.h
    index 8e5436350cfd..6eae25b430d8 100644
    --- a/src/common/async/detail/shared_mutex.h
    +++ b/src/common/async/detail/shared_mutex.h
    @@ -123,30 +123,28 @@ auto SharedMutexImpl::async_lock(Mutex& mtx, CompletionToken&& token)
     {
       using Request = AsyncRequest;
       using Signature = typename Request::Signature;
    -  boost::asio::async_completion init(token);
    -  auto& handler = init.completion_handler;
    -  auto ex1 = mtx.get_executor();
    -  {
    -    std::lock_guard lock{mutex};
    -
    -    boost::system::error_code ec;
    -    if (state == Unlocked) {
    -      state = Exclusive;
    -
    -      // post a successful completion
    -      auto ex2 = boost::asio::get_associated_executor(handler, ex1);
    -      auto alloc2 = boost::asio::get_associated_allocator(handler);
    -      auto b = bind_handler(std::move(handler), ec,
    -                            std::unique_lock{mtx, std::adopt_lock});
    -      ex2.post(forward_handler(std::move(b)), alloc2);
    -    } else {
    -      // create a request and add it to the exclusive list
    -      using LockCompletion = typename Request::LockCompletion;
    -      auto request = LockCompletion::create(ex1, std::move(handler), mtx);
    -      exclusive_queue.push_back(*request.release());
    -    }
    -  }
    -  return init.result.get();
    +  return boost::asio::async_initiate(
    +      [this] (auto handler, Mutex& mtx) {
    +        auto ex1 = mtx.get_executor();
    +
    +        std::lock_guard lock{mutex};
    +
    +        boost::system::error_code ec;
    +        if (state == Unlocked) {
    +          state = Exclusive;
    +
    +          // post a successful completion
    +          auto ex2 = boost::asio::get_associated_executor(handler, ex1);
    +          auto h = boost::asio::bind_executor(ex2, std::move(handler));
    +          boost::asio::post(bind_handler(std::move(h), ec,
    +                                         std::unique_lock{mtx, std::adopt_lock}));
    +        } else {
    +          // create a request and add it to the exclusive list
    +          using LockCompletion = typename Request::LockCompletion;
    +          auto request = LockCompletion::create(ex1, std::move(handler), mtx);
    +          exclusive_queue.push_back(*request.release());
    +        }
    +      }, token, mtx);
     }
     
     inline void SharedMutexImpl::lock()
    @@ -158,7 +156,7 @@ inline void SharedMutexImpl::lock()
       }
     }
     
    -void SharedMutexImpl::lock(boost::system::error_code& ec)
    +inline void SharedMutexImpl::lock(boost::system::error_code& ec)
     {
       std::unique_lock lock{mutex};
     
    @@ -183,7 +181,7 @@ inline bool SharedMutexImpl::try_lock()
       return false;
     }
     
    -void SharedMutexImpl::unlock()
    +inline void SharedMutexImpl::unlock()
     {
       RequestList granted;
       {
    @@ -216,28 +214,26 @@ auto SharedMutexImpl::async_lock_shared(Mutex& mtx, CompletionToken&& token)
     {
       using Request = AsyncRequest;
       using Signature = typename Request::Signature;
    -  boost::asio::async_completion init(token);
    -  auto& handler = init.completion_handler;
    -  auto ex1 = mtx.get_executor();
    -  {
    -    std::lock_guard lock{mutex};
    -
    -    boost::system::error_code ec;
    -    if (exclusive_queue.empty() && state < MaxShared) {
    -      state++;
    -
    -      auto ex2 = boost::asio::get_associated_executor(handler, ex1);
    -      auto alloc2 = boost::asio::get_associated_allocator(handler);
    -      auto b = bind_handler(std::move(handler), ec,
    -                            std::shared_lock{mtx, std::adopt_lock});
    -      ex2.post(forward_handler(std::move(b)), alloc2);
    -    } else {
    -      using LockCompletion = typename Request::LockCompletion;
    -      auto request = LockCompletion::create(ex1, std::move(handler), mtx);
    -      shared_queue.push_back(*request.release());
    -    }
    -  }
    -  return init.result.get();
    +  return boost::asio::async_initiate(
    +      [this] (auto handler, Mutex& mtx) {
    +        auto ex1 = mtx.get_executor();
    +
    +        std::lock_guard lock{mutex};
    +
    +        boost::system::error_code ec;
    +        if (exclusive_queue.empty() && state < MaxShared) {
    +          state++;
    +
    +          auto ex2 = boost::asio::get_associated_executor(handler, ex1);
    +          auto h = boost::asio::bind_executor(ex2, std::move(handler));
    +          boost::asio::post(bind_handler(std::move(h), ec,
    +                                         std::shared_lock{mtx, std::adopt_lock}));
    +        } else {
    +          using LockCompletion = typename Request::LockCompletion;
    +          auto request = LockCompletion::create(ex1, std::move(handler), mtx);
    +          shared_queue.push_back(*request.release());
    +        }
    +      }, token, mtx);
     }
     
     inline void SharedMutexImpl::lock_shared()
    @@ -249,7 +245,7 @@ inline void SharedMutexImpl::lock_shared()
       }
     }
     
    -void SharedMutexImpl::lock_shared(boost::system::error_code& ec)
    +inline void SharedMutexImpl::lock_shared(boost::system::error_code& ec)
     {
       std::unique_lock lock{mutex};
     
    @@ -307,8 +303,8 @@ inline void SharedMutexImpl::cancel()
       complete(std::move(canceled), boost::asio::error::operation_aborted);
     }
     
    -void SharedMutexImpl::complete(RequestList&& requests,
    -                               boost::system::error_code ec)
    +inline void SharedMutexImpl::complete(RequestList&& requests,
    +                                      boost::system::error_code ec)
     {
       while (!requests.empty()) {
         auto& request = requests.front();
    diff --git a/src/common/async/detail/spawn_throttle_impl.h b/src/common/async/detail/spawn_throttle_impl.h
    new file mode 100644
    index 000000000000..9030f2662335
    --- /dev/null
    +++ b/src/common/async/detail/spawn_throttle_impl.h
    @@ -0,0 +1,360 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab ft=cpp
    +
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright contributors to the Ceph project
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation. See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include "common/async/cancel_on_error.h"
    +#include "common/async/service.h"
    +#include "common/async/yield_context.h"
    +
    +namespace ceph::async::detail {
    +
    +struct spawn_throttle_handler;
    +
    +// Reference-counted spawn throttle interface.
    +class spawn_throttle_impl :
    +    public boost::intrusive_ref_counter
    +{
    + public:
    +  spawn_throttle_impl(size_t limit, cancel_on_error on_error)
    +    : limit(limit), on_error(on_error),
    +      children(std::make_unique(limit))
    +  {
    +    // initialize the free list
    +    for (size_t i = 0; i < limit; i++) {
    +      free.push_back(children[i]);
    +    }
    +  }
    +  virtual ~spawn_throttle_impl() {}
    +
    +  // factory function
    +  static auto create(optional_yield y, size_t limit, cancel_on_error on_error)
    +      -> boost::intrusive_ptr;
    +
    +  // return the completion handler for a new child. may block due to throttling
    +  // or rethrow an exception from a previously-spawned child
    +  spawn_throttle_handler get();
    +
    +  // track each spawned coroutine for cancellation. these are stored in an
    +  // array, and recycled after each use via the free list
    +  struct child : boost::intrusive::list_base_hook<> {
    +    std::optional signal;
    +  };
    +
    +  using executor_type = boost::asio::any_io_executor;
    +  virtual executor_type get_executor() = 0;
    +
    +  // wait until count <= target_count
    +  virtual void wait_for(size_t target_count) = 0;
    +
    +  // cancel outstanding coroutines
    +  virtual void cancel(bool shutdown)
    +  {
    +    cancel_outstanding_from(outstanding.begin());
    +  }
    +
    +  // complete the given child coroutine
    +  virtual void on_complete(child& c, std::exception_ptr eptr)
    +  {
    +    --count;
    +
    +    // move back to the free list
    +    auto next = outstanding.erase(outstanding.iterator_to(c));
    +    c.signal.reset();
    +    free.push_back(c);
    +
    +    if (eptr && !unreported_exception) {
    +      // hold on to the first child exception until we can report it in wait()
    +      // or completion()
    +      unreported_exception = eptr;
    +
    +      // handle cancel_on_error
    +      auto cancel_from = outstanding.end();
    +      if (on_error == cancel_on_error::after) {
    +        cancel_from = next;
    +      } else if (on_error == cancel_on_error::all) {
    +        cancel_from = outstanding.begin();
    +      }
    +      cancel_outstanding_from(cancel_from);
    +    }
    +  }
    +
    + protected:
    +  const size_t limit;
    +  const cancel_on_error on_error;
    +  size_t count = 0;
    +
    +  void report_exception()
    +  {
    +    if (unreported_exception) {
    +      std::rethrow_exception(std::exchange(unreported_exception, nullptr));
    +    }
    +  }
    +
    + private:
    +  std::exception_ptr unreported_exception;
    +  std::unique_ptr children;
    +
    +  using child_list = boost::intrusive::list>;
    +  child_list outstanding;
    +  child_list free;
    +
    +  void cancel_outstanding_from(child_list::iterator i)
    +  {
    +    while (i != outstanding.end()) {
    +      // increment before cancellation, which may invoke on_complete()
    +      // directly and remove the child from this list
    +      child& c = *i++;
    +      c.signal->emit(boost::asio::cancellation_type::terminal);
    +    }
    +  }
    +};
    +
    +// A cancellable spawn() completion handler that notifies the spawn_throttle
    +// upon completion. This holds a reference to the implementation in order to
    +// extend its lifetime. This is required for per-op cancellation because the
    +// cancellation_signals must outlive these coroutine stacks.
    +struct spawn_throttle_handler {
    +  boost::intrusive_ptr impl;
    +  spawn_throttle_impl::child& c;
    +  boost::asio::cancellation_slot slot;
    +
    +  spawn_throttle_handler(boost::intrusive_ptr impl,
    +                         spawn_throttle_impl::child& c)
    +    : impl(std::move(impl)), c(c), slot(c.signal->slot())
    +  {}
    +
    +  using executor_type = spawn_throttle_impl::executor_type;
    +  executor_type get_executor() const noexcept
    +  {
    +    return impl->get_executor();
    +  }
    +
    +  using cancellation_slot_type = boost::asio::cancellation_slot;
    +  cancellation_slot_type get_cancellation_slot() const noexcept
    +  {
    +    return slot;
    +  }
    +
    +  void operator()(std::exception_ptr eptr)
    +  {
    +    impl->on_complete(c, eptr);
    +  }
    +};
    +
    +spawn_throttle_handler spawn_throttle_impl::get()
    +{
    +  report_exception(); // throw unreported exception
    +
    +  if (count >= limit) {
    +    wait_for(limit - 1);
    +  }
    +
    +  ++count;
    +
    +  // move a free child to the outstanding list
    +  child& c = free.front();
    +  free.pop_front();
    +  outstanding.push_back(c);
    +
    +  // spawn the coroutine with its associated cancellation signal
    +  c.signal.emplace();
    +  return {this, c};
    +}
    +
    +
    +// Spawn throttle implementation for use in synchronous contexts where wait()
    +// blocks the calling thread until completion.
    +class sync_spawn_throttle_impl final : public spawn_throttle_impl {
    +  static constexpr int concurrency = 1; // only run from a single thread
    + public:
    +  sync_spawn_throttle_impl(size_t limit, cancel_on_error on_error)
    +    : spawn_throttle_impl(limit, on_error),
    +      ctx(std::in_place, concurrency)
    +  {}
    +
    +  executor_type get_executor() override
    +  {
    +    return ctx->get_executor();
    +  }
    +
    +  void wait_for(size_t target_count) override
    +  {
    +    while (count > target_count) {
    +      if (ctx->stopped()) {
    +        ctx->restart();
    +      }
    +      ctx->run_one();
    +    }
    +
    +    report_exception(); // throw unreported exception
    +  }
    +
    +  void cancel(bool shutdown) override
    +  {
    +    spawn_throttle_impl::cancel(shutdown);
    +
    +    if (shutdown) {
    +      // destroy the io_context to trigger two-phase shutdown which
    +      // destroys any completion handlers with a reference to 'this'
    +      ctx.reset();
    +      count = 0;
    +    }
    +  }
    +
    + private:
    +  std::optional ctx;
    +};
    +
    +// Spawn throttle implementation for use in asynchronous contexts where wait()
    +// suspends the calling stackful coroutine.
    +class async_spawn_throttle_impl final :
    +    public spawn_throttle_impl,
    +    public service_list_base_hook
    +{
    + public:
    +  async_spawn_throttle_impl(boost::asio::yield_context yield,
    +                            size_t limit, cancel_on_error on_error)
    +    : spawn_throttle_impl(limit, on_error),
    +      svc(boost::asio::use_service>(
    +              boost::asio::query(yield.get_executor(),
    +                                 boost::asio::execution::context))),
    +      yield(yield)
    +  {
    +    // register for service_shutdown() notifications
    +    svc.add(*this);
    +  }
    +
    +  ~async_spawn_throttle_impl()
    +  {
    +    svc.remove(*this);
    +  }
    +
    +  executor_type get_executor() override
    +  {
    +    return yield.get_executor();
    +  }
    +
    +  void service_shutdown()
    +  {
    +    waiter.reset();
    +  }
    +
    + private:
    +  service& svc;
    +  boost::asio::yield_context yield;
    +
    +  using WaitSignature = void(boost::system::error_code);
    +  struct wait_state {
    +    using Work = boost::asio::executor_work_guard<
    +        boost::asio::any_io_executor>;
    +    using Handler = typename boost::asio::async_result<
    +        boost::asio::yield_context, WaitSignature>::handler_type;
    +
    +    Work work;
    +    Handler handler;
    +
    +    explicit wait_state(Handler&& h)
    +      : work(make_work_guard(h)),
    +        handler(std::move(h))
    +    {}
    +  };
    +  std::optional waiter;
    +  size_t wait_for_count = 0;
    +
    +  struct op_cancellation {
    +    async_spawn_throttle_impl* self;
    +    explicit op_cancellation(async_spawn_throttle_impl* self) noexcept
    +      : self(self) {}
    +    void operator()(boost::asio::cancellation_type type) {
    +      if (type != boost::asio::cancellation_type::none) {
    +        self->cancel(false);
    +      }
    +    }
    +  };
    +
    +  void wait_for(size_t target_count) override
    +  {
    +    if (count > target_count) {
    +      wait_for_count = target_count;
    +
    +      boost::asio::async_initiate(
    +          [this] (auto handler) {
    +            auto slot = get_associated_cancellation_slot(handler);
    +            if (slot.is_connected()) {
    +              slot.template emplace(this);
    +            }
    +            waiter.emplace(std::move(handler));
    +          }, yield);
    +      // this is a coroutine, so the wait has completed by this point
    +    }
    +
    +    report_exception(); // throw unreported exception
    +  }
    +
    +  void wait_complete(boost::system::error_code ec)
    +  {
    +    auto w = std::move(*waiter);
    +    waiter.reset();
    +    boost::asio::dispatch(boost::asio::append(std::move(w.handler), ec));
    +  }
    +
    +  void on_complete(child& c, std::exception_ptr eptr) override
    +  {
    +    spawn_throttle_impl::on_complete(c, eptr);
    +
    +    if (waiter && count <= wait_for_count) {
    +      wait_complete({});
    +    }
    +  }
    +
    +  void cancel(bool shutdown) override
    +  {
    +    spawn_throttle_impl::cancel(shutdown);
    +
    +    if (waiter) {
    +      wait_complete(make_error_code(boost::asio::error::operation_aborted));
    +    }
    +  }
    +};
    +
    +auto spawn_throttle_impl::create(optional_yield y, size_t limit,
    +                                 cancel_on_error on_error)
    +    -> boost::intrusive_ptr
    +{
    +  if (y) {
    +    auto yield = y.get_yield_context();
    +    return new async_spawn_throttle_impl(yield, limit, on_error);
    +  } else {
    +    return new sync_spawn_throttle_impl(limit, on_error);
    +  }
    +}
    +
    +} // namespace ceph::async::detail
    diff --git a/src/common/async/forward_handler.h b/src/common/async/forward_handler.h
    index ae88cc83f464..1491ef6085d4 100644
    --- a/src/common/async/forward_handler.h
    +++ b/src/common/async/forward_handler.h
    @@ -15,7 +15,8 @@
     #ifndef CEPH_ASYNC_FORWARD_HANDLER_H
     #define CEPH_ASYNC_FORWARD_HANDLER_H
     
    -#include 
    +#include 
    +#include 
     
     namespace ceph::async {
     
    diff --git a/src/common/async/max_concurrent_for_each.h b/src/common/async/max_concurrent_for_each.h
    new file mode 100644
    index 000000000000..dd272b957eb0
    --- /dev/null
    +++ b/src/common/async/max_concurrent_for_each.h
    @@ -0,0 +1,136 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab ft=cpp
    +
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright contributors to the Ceph project
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation. See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include "cancel_on_error.h"
    +#include "co_throttle.h"
    +#include "yield_context.h"
    +#include "spawn_throttle.h"
    +
    +namespace ceph::async {
    +
    +/// Call a coroutine with each element in the given range then wait for all of
    +/// them to complete. The first exception is rethrown to the caller. The
    +/// cancel_on_error option controls whether these exceptions trigger the
    +/// cancellation of other children. The number of outstanding coroutines
    +/// is limited by the max_concurrent argument.
    +///
    +/// Example:
    +/// \code
    +/// void child(task& t, boost::asio::yield_context yield);
    +///
    +/// void parent(std::span tasks, optional_yield y)
    +/// {
    +///   // process all tasks, up to 10 at a time
    +///   max_concurrent_for_each(tasks, 10, y, child);
    +/// }
    +/// \endcode
    +template >
    +    requires (std::input_iterator &&
    +              std::sentinel_for &&
    +              std::invocable)
    +void max_concurrent_for_each(Iterator begin,
    +                             Sentinel end,
    +                             size_t max_concurrent,
    +                             optional_yield y,
    +                             Func&& func,
    +                             cancel_on_error on_error = cancel_on_error::none)
    +{
    +  if (begin == end) {
    +    return;
    +  }
    +  auto throttle = spawn_throttle{y, max_concurrent, on_error};
    +  for (Iterator i = begin; i != end; ++i) {
    +    throttle.spawn([&func, &val = *i] (boost::asio::yield_context yield) {
    +        func(val, yield);
    +      });
    +  }
    +  throttle.wait();
    +}
    +
    +/// \overload
    +template >
    +    requires (std::ranges::range &&
    +              std::invocable)
    +auto max_concurrent_for_each(Range&& range,
    +                             size_t max_concurrent,
    +                             optional_yield y,
    +                             Func&& func,
    +                             cancel_on_error on_error = cancel_on_error::none)
    +{
    +  return max_concurrent_for_each(std::begin(range), std::end(range),
    +                                 max_concurrent, y, std::forward(func),
    +                                 on_error);
    +}
    +
    +// \overload
    +template ,
    +          typename VoidAwaitable = std::invoke_result_t<
    +              VoidAwaitableFactory, Value>,
    +          typename AwaitableT = typename VoidAwaitable::value_type,
    +          typename AwaitableExecutor = typename VoidAwaitable::executor_type>
    +    requires (std::input_iterator &&
    +              std::sentinel_for &&
    +              std::same_as &&
    +              boost::asio::execution::executor)
    +auto max_concurrent_for_each(Iterator begin,
    +                             Sentinel end,
    +                             size_t max_concurrent,
    +                             VoidAwaitableFactory&& factory,
    +                             cancel_on_error on_error = cancel_on_error::none)
    +    -> boost::asio::awaitable
    +{
    +  if (begin == end) {
    +    co_return;
    +  }
    +  auto ex = co_await boost::asio::this_coro::executor;
    +  auto throttle = co_throttle{ex, max_concurrent, on_error};
    +  for (Iterator i = begin; i != end; ++i) {
    +    co_await throttle.spawn(factory(*i));
    +  }
    +  co_await throttle.wait();
    +}
    +
    +/// \overload
    +template ,
    +          typename VoidAwaitable = std::invoke_result_t<
    +              VoidAwaitableFactory, Value>,
    +          typename AwaitableT = typename VoidAwaitable::value_type,
    +          typename AwaitableExecutor = typename VoidAwaitable::executor_type>
    +    requires (std::ranges::range &&
    +              std::same_as &&
    +              boost::asio::execution::executor)
    +auto max_concurrent_for_each(Range&& range,
    +                             size_t max_concurrent,
    +                             VoidAwaitableFactory&& factory,
    +                             cancel_on_error on_error = cancel_on_error::none)
    +    -> boost::asio::awaitable
    +{
    +  return max_concurrent_for_each(
    +      std::begin(range), std::end(range), max_concurrent,
    +      std::forward(factory), on_error);
    +}
    +
    +} // namespace ceph::async
    diff --git a/src/common/async/parallel_for_each.h b/src/common/async/parallel_for_each.h
    new file mode 100644
    index 000000000000..cb4970378e3a
    --- /dev/null
    +++ b/src/common/async/parallel_for_each.h
    @@ -0,0 +1,86 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation. See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include "co_spawn_group.h"
    +
    +namespace ceph::async {
    +
    +/// Call a coroutine with each element in the given range then wait for all
    +/// of them to complete. The first exception is rethrown to the caller. The
    +/// cancel_on_error option controls whether these exceptions trigger the
    +/// cancellation of other children.
    +///
    +/// Example:
    +/// \code
    +/// awaitable child(task& t);
    +///
    +/// awaitable parent(std::span tasks)
    +/// {
    +///   co_await parallel_for_each(tasks.begin(), tasks.end(), child);
    +/// }
    +/// \endcode
    +template ,
    +          typename VoidAwaitable = std::invoke_result_t<
    +              VoidAwaitableFactory, Value>,
    +          typename AwaitableT = typename VoidAwaitable::value_type,
    +          typename AwaitableExecutor = typename VoidAwaitable::executor_type>
    +    requires (std::input_iterator &&
    +              std::sentinel_for &&
    +              std::same_as &&
    +              boost::asio::execution::executor)
    +auto parallel_for_each(Iterator begin, Sentinel end,
    +                       VoidAwaitableFactory&& factory,
    +                       cancel_on_error on_error = cancel_on_error::none)
    +    -> boost::asio::awaitable
    +{
    +  const size_t count = std::ranges::distance(begin, end);
    +  if (!count) {
    +    co_return;
    +  }
    +  auto ex = co_await boost::asio::this_coro::executor;
    +  auto group = co_spawn_group{ex, count, on_error};
    +  for (Iterator i = begin; i != end; ++i) {
    +    group.spawn(factory(*i));
    +  }
    +  co_await group.wait();
    +}
    +
    +/// \overload
    +template ,
    +          typename VoidAwaitable = std::invoke_result_t<
    +              VoidAwaitableFactory, Value>,
    +          typename AwaitableT = typename VoidAwaitable::value_type,
    +          typename AwaitableExecutor = typename VoidAwaitable::executor_type>
    +    requires (std::ranges::range &&
    +              std::same_as &&
    +              boost::asio::execution::executor)
    +auto parallel_for_each(Range&& range, VoidAwaitableFactory&& factory,
    +                       cancel_on_error on_error = cancel_on_error::none)
    +    -> boost::asio::awaitable
    +{
    +  return parallel_for_each(std::begin(range), std::end(range),
    +                           std::move(factory), on_error);
    +}
    +
    +} // namespace ceph::async
    diff --git a/src/common/async/service.h b/src/common/async/service.h
    new file mode 100644
    index 000000000000..f611e7c81298
    --- /dev/null
    +++ b/src/common/async/service.h
    @@ -0,0 +1,75 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright contributors to the Ceph project
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation.  See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +#include 
    +
    +namespace ceph::async {
    +
    +struct service_tag {};
    +using service_list_base_hook = boost::intrusive::list_base_hook<
    +    boost::intrusive::tag>;
    +
    +/// Service for two-phase execution_context shutdown, which breaks ownership
    +/// cycles between completion handlers and their io objects. Tracks objects
    +/// which may have outstanding completion handlers, and calls their member
    +/// function service_shutdown() when the execution_context is shutting down.
    +/// This member function should destroy any memory associated with its
    +/// outstanding completion handlers.
    +///
    +/// Requirements for IoObject:
    +/// * Inherits publicly from service_list_base_hook
    +/// * Has public member function service_shutdown()
    +/// * Calls add(*this) on construction and remove(*this) on destruction.
    +template 
    +class service : public boost::asio::execution_context::service {
    +  using base_hook = boost::intrusive::base_hook;
    +  boost::intrusive::list entries;
    +  std::mutex mutex;
    +
    +  /// Called by the execution_context on shutdown
    +  void shutdown() override {
    +    while (!entries.empty()) {
    +      auto& entry = entries.front();
    +      entries.pop_front();
    +      entry.service_shutdown();
    +    }
    +  }
    + public:
    +  using key_type = service;
    +  static inline boost::asio::execution_context::id id;
    +
    +  explicit service(boost::asio::execution_context& ctx)
    +      : boost::asio::execution_context::service(ctx) {}
    +
    +  /// Register an io object for notification of service_shutdown()
    +  void add(IoObject& entry) {
    +    auto lock = std::scoped_lock{mutex};
    +    entries.push_back(entry);
    +  }
    +  /// Unregister an object
    +  void remove(IoObject& entry) {
    +    auto lock = std::scoped_lock{mutex};
    +    if (entries.empty()) {
    +      // already shut down
    +    } else {
    +      entries.erase(entries.iterator_to(entry));
    +    }
    +  }
    +};
    +
    +} // namespace ceph::async
    diff --git a/src/common/async/spawn_throttle.h b/src/common/async/spawn_throttle.h
    new file mode 100644
    index 000000000000..1fdff1928c7f
    --- /dev/null
    +++ b/src/common/async/spawn_throttle.h
    @@ -0,0 +1,126 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab ft=cpp
    +
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright contributors to the Ceph project
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation. See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include "detail/spawn_throttle_impl.h"
    +
    +#include 
    +#include "cancel_on_error.h"
    +#include "yield_context.h"
    +
    +namespace ceph::async {
    +
    +/// A coroutine throttle that allows a thread of execution to spawn and manage
    +/// multiple child coroutines, while enforcing an upper bound on concurrency.
    +/// The parent may either be a synchronous function or a stackful coroutine,
    +/// depending on the optional_yield constructor argument.
    +///
    +/// Child coroutines take boost::asio::yield_context as the only argument.
    +/// Exceptions thrown by children are reported to the caller on its next call
    +/// to spawn() or wait(). The cancel_on_error option controls whether these
    +/// exceptions trigger the cancellation of other children.
    +///
    +/// All child coroutines are canceled by cancel() or spawn_throttle destruction.
    +/// This allows a parent function to share memory with its child coroutines
    +/// without fear of dangling references.
    +///
    +/// This class is not thread-safe. Member functions should be called from the
    +/// parent thread of execution only.
    +///
    +/// Example:
    +/// @code
    +/// void child(boost::asio::yield_context yield);
    +///
    +/// void parent(size_t count, optional_yield y)
    +/// {
    +///   // spawn all children, up to 10 at a time
    +///   auto throttle = ceph::async::spawn_throttle{y, 10};
    +///
    +///   for (size_t i = 0; i < count; i++) {
    +///     throttle.spawn(child);
    +///   }
    +///   throttle.wait();
    +/// }
    +/// @endcode
    +class spawn_throttle {
    +  using impl_type = detail::spawn_throttle_impl;
    +  boost::intrusive_ptr impl;
    +
    + public:
    +  spawn_throttle(optional_yield y, size_t limit,
    +                 cancel_on_error on_error = cancel_on_error::none)
    +    : impl(detail::spawn_throttle_impl::create(y, limit, on_error))
    +  {}
    +
    +  spawn_throttle(spawn_throttle&&) = default;
    +  spawn_throttle& operator=(spawn_throttle&&) = default;
    +  // disable copy for unique ownership
    +  spawn_throttle(const spawn_throttle&) = delete;
    +  spawn_throttle& operator=(const spawn_throttle&) = delete;
    +
    +  /// Cancel outstanding coroutines on destruction.
    +  ~spawn_throttle()
    +  {
    +    if (impl) {
    +      impl->cancel(true);
    +    }
    +  }
    +
    +  using executor_type = impl_type::executor_type;
    +  executor_type get_executor()
    +  {
    +    return impl->get_executor();
    +  }
    +
    +  /// Spawn a cancellable coroutine to call the given function, passing its
    +  /// boost::asio::yield_context as the only argument.
    +  ///
    +  /// Before spawning, this function may block until a throttle unit becomes
    +  /// available. If one or more previously-spawned coroutines exit with an
    +  /// exception, the first such exception is rethrown here.
    +  template 
    +  void spawn(F&& f)
    +  {
    +    boost::asio::spawn(get_executor(), std::forward(f), impl->get());
    +  }
    +
    +  /// /overload
    +  template 
    +  void spawn(std::allocator_arg_t arg, StackAllocator&& alloc, F&& f)
    +  {
    +    boost::asio::spawn(get_executor(), arg, std::forward(alloc),
    +                       std::forward(f), impl->get());
    +  }
    +
    +  /// Wait for all outstanding completions before returning. If any
    +  /// of the spawned coroutines exits with an exception, the first exception
    +  /// is rethrown.
    +  ///
    +  /// After wait() completes, whether successfully or by exception, the yield
    +  /// throttle can be reused to spawn and await additional coroutines.
    +  void wait()
    +  {
    +    impl->wait_for(0);
    +  }
    +
    +  /// Cancel all outstanding coroutines.
    +  void cancel()
    +  {
    +    impl->cancel(false);
    +  }
    +};
    +
    +} // namespace ceph::async
    diff --git a/src/common/async/yield_context.h b/src/common/async/yield_context.h
    index 05e6ca6140c5..fd9a20901aa5 100644
    --- a/src/common/async/yield_context.h
    +++ b/src/common/async/yield_context.h
    @@ -17,29 +17,18 @@
     #include 
     #include 
     #include 
    +#include 
     
     #include "acconfig.h"
     
    -#include 
    -
    -// use explicit executor types instead of the type-erased boost::asio::executor.
    -// coroutines wrap the default io_context executor with a strand executor
    -using yield_context = spawn::basic_yield_context<
    -    boost::asio::executor_binder>>;
    -
    -/// optional-like wrapper for a spawn::yield_context and its associated
    -/// boost::asio::io_context. operations that take an optional_yield argument
    -/// will, when passed a non-empty yield context, suspend this coroutine instead
    -/// of the blocking the thread of execution
    +/// optional-like wrapper for a boost::asio::yield_context. operations that take
    +/// an optional_yield argument will, when passed a non-empty yield context,
    +/// suspend this coroutine instead of the blocking the thread of execution
     class optional_yield {
    -  boost::asio::io_context *c = nullptr;
    -  yield_context *y = nullptr;
    +  boost::asio::yield_context *y = nullptr;
      public:
       /// construct with a valid io and yield_context
    -  explicit optional_yield(boost::asio::io_context& c,
    -                          yield_context& y) noexcept
    -    : c(&c), y(&y) {}
    +  optional_yield(boost::asio::yield_context& y) noexcept : y(&y) {}
     
       /// type tag to construct an empty object
       struct empty_t {};
    @@ -48,11 +37,8 @@ class optional_yield {
       /// implicit conversion to bool, returns true if non-empty
       operator bool() const noexcept { return y; }
     
    -  /// return a reference to the associated io_context. only valid if non-empty
    -  boost::asio::io_context& get_io_context() const noexcept { return *c; }
    -
       /// return a reference to the yield_context. only valid if non-empty
    -  yield_context& get_yield_context() const noexcept { return *y; }
    +  boost::asio::yield_context& get_yield_context() const noexcept { return *y; }
     };
     
     // type tag object to construct an empty optional_yield
    diff --git a/src/common/async/yield_waiter.h b/src/common/async/yield_waiter.h
    new file mode 100644
    index 000000000000..9c14d9bafe4b
    --- /dev/null
    +++ b/src/common/async/yield_waiter.h
    @@ -0,0 +1,191 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab ft=cpp
    +
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright contributors to the Ceph project
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation. See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +
    +namespace ceph::async {
    +
    +/// Captures a yield_context handler for deferred completion or cancellation.
    +template 
    +class yield_waiter {
    + public:
    +  /// Function signature for the completion handler.
    +  using Signature = void(boost::system::error_code, Ret);
    +
    +  yield_waiter() = default;
    +
    +  // copy and move are disabled because the cancellation handler captures 'this'
    +  yield_waiter(const yield_waiter&) = delete;
    +  yield_waiter& operator=(const yield_waiter&) = delete;
    +
    +  /// Returns true if there's a handler awaiting completion.
    +  operator bool() const { return state.has_value(); }
    +
    +  /// Suspends the given yield_context until the captured handler is invoked
    +  /// via complete() or cancel().
    +  template 
    +  auto async_wait(CompletionToken&& token)
    +  {
    +    return boost::asio::async_initiate(
    +        [this] (handler_type h) {
    +          auto slot = get_associated_cancellation_slot(h);
    +          if (slot.is_connected()) {
    +            slot.template emplace(this);
    +          }
    +          state.emplace(std::move(h));
    +        }, token);
    +  }
    +
    +  /// Schedule the completion handler with the given arguments.
    +  void complete(boost::system::error_code ec, Ret value)
    +  {
    +    auto s = std::move(*state);
    +    state.reset();
    +    auto h = boost::asio::append(std::move(s.handler), ec, std::move(value));
    +    boost::asio::dispatch(std::move(h));
    +  }
    +
    +  /// Destroy the completion handler.
    +  void shutdown()
    +  {
    +    state.reset();
    +  }
    +
    + private:
    +  using handler_type = typename boost::asio::async_result<
    +      boost::asio::yield_context, Signature>::handler_type;
    +  using work_guard = boost::asio::executor_work_guard<
    +      boost::asio::any_io_executor>;
    +
    +  struct handler_state {
    +    handler_type handler;
    +    work_guard work;
    +
    +    explicit handler_state(handler_type&& h)
    +      : handler(std::move(h)),
    +        work(make_work_guard(handler))
    +    {}
    +  };
    +  std::optional state;
    +
    +  struct op_cancellation {
    +    yield_waiter* self;
    +    op_cancellation(yield_waiter* self) : self(self) {}
    +    void operator()(boost::asio::cancellation_type type) {
    +      if (type != boost::asio::cancellation_type::none) {
    +        self->cancel();
    +      }
    +    }
    +  };
    +
    +  // Cancel the coroutine with an operation_aborted error.
    +  void cancel()
    +  {
    +    if (state) {
    +      complete(make_error_code(boost::asio::error::operation_aborted), Ret{});
    +    }
    +  }
    +};
    +
    +// specialization for Ret=void
    +template <>
    +class yield_waiter {
    + public:
    +  /// Function signature for the completion handler.
    +  using Signature = void(boost::system::error_code);
    +
    +  yield_waiter() = default;
    +
    +  // copy and move are disabled because the cancellation handler captures 'this'
    +  yield_waiter(const yield_waiter&) = delete;
    +  yield_waiter& operator=(const yield_waiter&) = delete;
    +
    +  /// Returns true if there's a handler awaiting completion.
    +  operator bool() const { return state.has_value(); }
    +
    +  /// Suspends the given yield_context until the captured handler is invoked
    +  /// via complete() or cancel().
    +  template 
    +  auto async_wait(CompletionToken&& token)
    +  {
    +    return boost::asio::async_initiate(
    +        [this] (handler_type h) {
    +          auto slot = get_associated_cancellation_slot(h);
    +          if (slot.is_connected()) {
    +            slot.template emplace(this);
    +          }
    +          state.emplace(std::move(h));
    +        }, token);
    +  }
    +
    +  /// Schedule the completion handler with the given arguments.
    +  void complete(boost::system::error_code ec)
    +  {
    +    auto s = std::move(*state);
    +    state.reset();
    +    boost::asio::dispatch(boost::asio::append(std::move(s.handler), ec));
    +  }
    +
    +  /// Destroy the completion handler.
    +  void shutdown()
    +  {
    +    state.reset();
    +  }
    +
    + private:
    +  using handler_type = typename boost::asio::async_result<
    +      boost::asio::yield_context, Signature>::handler_type;
    +  using work_guard = boost::asio::executor_work_guard<
    +      boost::asio::any_io_executor>;
    +
    +  struct handler_state {
    +    handler_type handler;
    +    work_guard work;
    +
    +    explicit handler_state(handler_type&& h)
    +      : handler(std::move(h)),
    +        work(make_work_guard(handler))
    +    {}
    +  };
    +  std::optional state;
    +
    +  struct op_cancellation {
    +    yield_waiter* self;
    +    op_cancellation(yield_waiter* self) : self(self) {}
    +    void operator()(boost::asio::cancellation_type type) {
    +      if (type != boost::asio::cancellation_type::none) {
    +        self->cancel();
    +      }
    +    }
    +  };
    +
    +  // Cancel the coroutine with an operation_aborted error.
    +  void cancel()
    +  {
    +    if (state) {
    +      complete(make_error_code(boost::asio::error::operation_aborted));
    +    }
    +  }
    +};
    +
    +} // namespace ceph::async
    diff --git a/src/common/bit_vector.hpp b/src/common/bit_vector.hpp
    index 9ce3e8b1ebb2..961d9a0192ee 100644
    --- a/src/common/bit_vector.hpp
    +++ b/src/common/bit_vector.hpp
    @@ -83,7 +83,7 @@ class BitVector
       };
     
     public:
    -  template 
    +  template 
       class IteratorImpl {
       private:
         friend class BitVector;
    @@ -94,7 +94,7 @@ class BitVector
         // cached derived values
         uint64_t m_index = 0;
         uint64_t m_shift = 0;
    -    DataIterator m_data_iterator;
    +    DataIteratorT m_data_iterator;
     
         IteratorImpl(BitVectorT *bit_vector, uint64_t offset)
           : m_bit_vector(bit_vector),
    @@ -129,7 +129,7 @@ class BitVector
     
         inline IteratorImpl operator++(int) {
           IteratorImpl iterator_impl(*this);
    -      ++iterator_impl;
    +      ++*this;
           return iterator_impl;
         }
         inline IteratorImpl operator+(uint64_t offset) {
    @@ -145,17 +145,15 @@ class BitVector
           return (m_offset != rhs.m_offset || m_bit_vector != rhs.m_bit_vector);
         }
     
    -    inline ConstReference operator*() const {
    -      return ConstReference(m_data_iterator, m_shift);
    -    }
    -    inline Reference operator*() {
    -      return Reference(m_data_iterator, m_shift);
    +    inline ReferenceT operator*() const {
    +      return ReferenceT(m_data_iterator, m_shift);
         }
       };
     
       typedef IteratorImpl ConstIterator;
    -  typedef IteratorImpl Iterator;
    +                       bufferlist::const_iterator,
    +                       ConstReference> ConstIterator;
    +  typedef IteratorImpl Iterator;
     
       static const uint32_t BLOCK_SIZE;
       static const uint8_t BIT_COUNT = _bit_count;
    diff --git a/src/common/buffer.cc b/src/common/buffer.cc
    index b363b99573f6..4443ef141249 100644
    --- a/src/common/buffer.cc
    +++ b/src/common/buffer.cc
    @@ -19,6 +19,8 @@
     
     #include 
     
    +#include 
    +
     #include "include/ceph_assert.h"
     #include "include/types.h"
     #include "include/buffer_raw.h"
    @@ -827,8 +829,9 @@ static ceph::spinlock debug_lock;
       {
         length = std::min(length, get_remaining());
         while (length > 0) {
    -      const char *p;
    +      const char *p = nullptr;
           size_t l = get_ptr_and_advance(length, &p);
    +      ceph_assert(p);
           crc = ceph_crc32c(crc, (unsigned char*)p, l);
           length -= l;
         }
    diff --git a/src/common/ceph_argparse.cc b/src/common/ceph_argparse.cc
    index 9b989fe7270a..ad12e0b67641 100644
    --- a/src/common/ceph_argparse.cc
    +++ b/src/common/ceph_argparse.cc
    @@ -16,6 +16,7 @@
     #include "auth/Auth.h"
     #include "common/ceph_argparse.h"
     #include "common/config.h"
    +#include "common/strtol.h" // for strict_strtof()
     #include "common/version.h"
     #include "include/str_list.h"
     
    diff --git a/src/common/ceph_argparse.h b/src/common/ceph_argparse.h
    index d63a2bdd796a..5a160dd0b797 100644
    --- a/src/common/ceph_argparse.h
    +++ b/src/common/ceph_argparse.h
    @@ -29,6 +29,8 @@
     #include "common/entity_name.h"
     #include "include/encoding.h"
     
    +class entity_addrvec_t;
    +
     /////////////////////// Types ///////////////////////
     class CephInitParameters
     {
    diff --git a/src/common/ceph_context.cc b/src/common/ceph_context.cc
    index d26f24511d22..68b92c45d37e 100644
    --- a/src/common/ceph_context.cc
    +++ b/src/common/ceph_context.cc
    @@ -1042,7 +1042,7 @@ void CephContext::notify_pre_fork()
     
     void CephContext::notify_post_fork()
     {
    -  ceph::spin_unlock(&_fork_watchers_lock);
    +  std::lock_guard lg(_fork_watchers_lock);
       for (auto &&t : _fork_watchers)
         t->handle_post_fork();
     }
    diff --git a/src/common/ceph_context.h b/src/common/ceph_context.h
    index f1877647877a..6a02d5c5bf1f 100644
    --- a/src/common/ceph_context.h
    +++ b/src/common/ceph_context.h
    @@ -282,10 +282,20 @@ class CephContext {
       void set_mon_addrs(const MonMap& mm);
       void set_mon_addrs(const std::vector& in) {
         auto ptr = std::make_shared>(in);
    +#if defined(__GNUC__) && __GNUC__ < 12
    +    // workaround for GCC 11 bug
         atomic_store_explicit(&_mon_addrs, std::move(ptr), std::memory_order_relaxed);
    +#else
    +    _mon_addrs.store(std::move(ptr), std::memory_order_relaxed);
    +#endif
       }
       std::shared_ptr> get_mon_addrs() const {
    +#if defined(__GNUC__) && __GNUC__ < 12
    +    // workaround for GCC 11 bug
         auto ptr = atomic_load_explicit(&_mon_addrs, std::memory_order_relaxed);
    +#else
    +    auto ptr = _mon_addrs.load(std::memory_order_relaxed);
    +#endif
         return ptr;
       }
     
    @@ -306,7 +316,12 @@ class CephContext {
     
       int _crypto_inited;
     
    +#if defined(__GNUC__) && __GNUC__ < 12
    +  // workaround for GCC 11 bug
       std::shared_ptr> _mon_addrs;
    +#else
    +  std::atomic>> _mon_addrs;
    +#endif
     
       /* libcommon service thread.
        * SIGHUP wakes this thread, which then reopens logfiles */
    diff --git a/src/common/ceph_crypto.h b/src/common/ceph_crypto.h
    index 5beda7a12522..6b2fa50dc2aa 100644
    --- a/src/common/ceph_crypto.h
    +++ b/src/common/ceph_crypto.h
    @@ -14,6 +14,7 @@
     #define CEPH_CRYPTO_SHA1_DIGESTSIZE 20
     #define CEPH_CRYPTO_HMACSHA256_DIGESTSIZE 32
     #define CEPH_CRYPTO_SHA256_DIGESTSIZE 32
    +#define CEPH_CRYPTO_HMACSHA512_DIGESTSIZE 64
     #define CEPH_CRYPTO_SHA512_DIGESTSIZE 64
     
     #include 
    @@ -90,7 +91,6 @@ namespace TOPNSPC::crypto {
             SHA512 () : OpenSSLDigest(EVP_sha512()) { }
         };
     
    -
     # if OPENSSL_VERSION_NUMBER < 0x10100000L
       class HMAC {
       private:
    @@ -187,6 +187,12 @@ namespace TOPNSPC::crypto {
           : HMAC(EVP_sha256(), key, length) {
         }
       };
    +
    +  struct HMACSHA512 : public HMAC {
    +    HMACSHA512 (const unsigned char *key, size_t length)
    +      : HMAC(EVP_sha512(), key, length) {
    +    }
    +  };
     }
     
     
    @@ -197,6 +203,7 @@ namespace TOPNSPC::crypto {
     
       using ssl::HMACSHA256;
       using ssl::HMACSHA1;
    +  using ssl::HMACSHA512;
     
     template
     auto digest(const ceph::buffer::list& bl)
    diff --git a/src/common/ceph_json.h b/src/common/ceph_json.h
    index 08e8d9e46623..f5898496e6f4 100644
    --- a/src/common/ceph_json.h
    +++ b/src/common/ceph_json.h
    @@ -582,6 +582,17 @@ static void encode_json(const char *name, const std::vector& l, ceph::Formatt
       f->close_section();
     }
     
    +template
    +static void encode_json(const char *name, const std::array& l,
    +                        ceph::Formatter *f)
    +{
    +  f->open_array_section(name);
    +  for (auto iter = l.cbegin(); iter != l.cend(); ++iter) {
    +    encode_json("obj", *iter, f);
    +  }
    +  f->close_section();
    +}
    +
     template>
     static void encode_json(const char *name, const std::map& m, ceph::Formatter *f)
     {
    @@ -836,6 +847,61 @@ class JSONFormattable : public ceph::JSONFormatter {
         DECODE_FINISH(bl);
       }
     
    +  void dump(ceph::Formatter *f) const {
    +    switch (type) {
    +      case FMT_VALUE:
    +        if (value.quoted) {
    +          f->dump_string("value", value.str);
    +        } else {
    +          f->dump_format_unquoted("value", "%s", value.str.c_str());
    +        }
    +        break;
    +      case FMT_ARRAY:
    +        f->open_array_section("array");
    +        for (auto& i : arr) {
    +          i.dump(f);
    +        }
    +        f->close_section();
    +        break;
    +      case FMT_OBJ:
    +        f->open_object_section("object");
    +        for (auto& i : obj) {
    +          f->dump_object(i.first.c_str(), i.second);
    +        }
    +        f->close_section();
    +        break;
    +      default:
    +        break;
    +    }
    +  }
    +  static void generate_test_instances(std::list& o) {
    +    o.push_back(new JSONFormattable);
    +    o.push_back(new JSONFormattable);
    +    o.back()->set_type(FMT_VALUE);
    +    o.back()->value.str = "foo";
    +    o.back()->value.quoted = true;
    +    o.push_back(new JSONFormattable);
    +    o.back()->set_type(FMT_VALUE);
    +    o.back()->value.str = "foo";
    +    o.back()->value.quoted = false;
    +    o.push_back(new JSONFormattable);
    +    o.back()->set_type(FMT_ARRAY);
    +    o.back()->arr.push_back(JSONFormattable());
    +    o.back()->arr.back().set_type(FMT_VALUE);
    +    o.back()->arr.back().value.str = "foo";
    +    o.back()->arr.back().value.quoted = true;
    +    o.back()->arr.push_back(JSONFormattable());
    +    o.back()->arr.back().set_type(FMT_VALUE);
    +    o.back()->arr.back().value.str = "bar";
    +    o.back()->arr.back().value.quoted = true;
    +    o.push_back(new JSONFormattable);
    +    o.back()->set_type(FMT_OBJ);
    +    o.back()->obj["foo"] = JSONFormattable();
    +    o.back()->obj["foo"].set_type(FMT_VALUE);
    +    o.back()->obj["foo"].value.str = "bar";
    +    o.back()->obj["foo"].value.quoted = true;
    +  }
    +
       const std::string& val() const {
         return value.str;
       }
    diff --git a/src/common/ceph_releases.h b/src/common/ceph_releases.h
    index e09e191e5ec7..6d330b5d5b68 100644
    --- a/src/common/ceph_releases.h
    +++ b/src/common/ceph_releases.h
    @@ -30,6 +30,7 @@ enum class ceph_release_t : std::uint8_t {
       pacific,
       quincy,
       reef,
    +  squid,
       max,
     };
     
    diff --git a/src/common/ceph_strings.cc b/src/common/ceph_strings.cc
    index 18dcc701b31d..6204a9ca3b89 100644
    --- a/src/common/ceph_strings.cc
    +++ b/src/common/ceph_strings.cc
    @@ -112,6 +112,8 @@ const char *ceph_release_name(int r)
     		return "quincy";
     	case CEPH_RELEASE_REEF:
     		return "reef";
    +	case CEPH_RELEASE_SQUID:
    +		return "squid";
     	default:
     		if (r < 0)
     			return "unspecified";
    @@ -151,7 +153,15 @@ uint64_t ceph_release_features(int r)
     		return req;
     
     	req |= CEPH_FEATUREMASK_CRUSH_CHOOSE_ARGS; // and overlaps
    -	if (r <= CEPH_RELEASE_LUMINOUS)
    +	if (r <= CEPH_RELEASE_QUINCY)
    +		return req;
    +
    +	req |= CEPH_FEATUREMASK_SERVER_REEF; // upmap-primary
    +	if (r <= CEPH_RELEASE_REEF)
    +		return req;
    +
    +	req |= CEPH_FEATUREMASK_CRUSH_MSR;
    +	if (r <= CEPH_RELEASE_SQUID)
     		return req;
     
     	return req;
    @@ -309,6 +319,8 @@ const char *ceph_mds_op_name(int op)
     	case CEPH_MDS_OP_ENQUEUE_SCRUB: return "enqueue_scrub";
     	case CEPH_MDS_OP_REPAIR_FRAGSTATS: return "repair_fragstats";
     	case CEPH_MDS_OP_REPAIR_INODESTATS: return "repair_inodestats";
    +	case CEPH_MDS_OP_QUIESCE_PATH: return "quiesce_path";
    +	case CEPH_MDS_OP_QUIESCE_INODE: return "quiesce_inode";
     	}
     	return "???";
     }
    diff --git a/src/common/ceph_time.h b/src/common/ceph_time.h
    index 6ada4d8944cd..01feff4c063b 100644
    --- a/src/common/ceph_time.h
    +++ b/src/common/ceph_time.h
    @@ -16,7 +16,7 @@
     #define COMMON_CEPH_TIME_H
     
     #include 
    -#include 
    +#include 
     #include 
     #include 
     #include 
    @@ -529,6 +529,9 @@ struct converts_to_timespec
     constexpr bool converts_to_timespec_v = converts_to_timespec::value;
     
    +template 
    +concept clock_with_timespec = converts_to_timespec_v;
    +
     template
     static Rep to_seconds(T t) {
       return std::chrono::duration_cast<
    diff --git a/src/common/ceph_timer.h b/src/common/ceph_timer.h
    index bc324bfa2437..7fb2c7bac125 100644
    --- a/src/common/ceph_timer.h
    +++ b/src/common/ceph_timer.h
    @@ -98,6 +98,7 @@ class timer {
       std::thread thread;
     
       void timer_thread() {
    +    ceph_pthread_setname("ceph_timer");
         std::unique_lock l(lock);
         while (!suspended) {
           auto now = TC::now();
    @@ -155,7 +156,6 @@ class timer {
     public:
       timer() : suspended(false) {
         thread = std::thread(&timer::timer_thread, this);
    -    set_thread_name(thread, "ceph_timer");
       }
     
       // Create a suspended timer, jobs will be executed in order when
    diff --git a/src/common/code_environment.cc b/src/common/code_environment.cc
    index 14d55f60c304..21633fc5d41b 100644
    --- a/src/common/code_environment.cc
    +++ b/src/common/code_environment.cc
    @@ -11,6 +11,7 @@
      * Foundation.  See file COPYING.
      *
      */
    +#include "include/compat.h"
     
     #include "common/code_environment.h"
     
    @@ -18,10 +19,6 @@
     
     #include "acconfig.h"
     
    -#ifdef HAVE_PTHREAD_GETNAME_NP
    -#include 
    -#endif
    -
     #include 
     
     code_environment_t g_code_env = CODE_ENVIRONMENT_UTILITY;
    @@ -57,7 +54,7 @@ int get_process_name(char *buf, int len)
       }
       // FIPS zeroization audit 20191115: this memset is not security related.
       memset(buf, 0, len);
    -  return pthread_getname_np(pthread_self(), buf, len);
    +  return ceph_pthread_getname(buf, len);
     }
     
     #elif defined(HAVE_GETPROGNAME)
    diff --git a/src/common/cohort_lru.h b/src/common/cohort_lru.h
    index af2baaa5c67b..86ced8d183c7 100644
    --- a/src/common/cohort_lru.h
    +++ b/src/common/cohort_lru.h
    @@ -15,6 +15,12 @@
     
     #include 
     #include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
     
     #ifdef __CEPH__
     # include "include/ceph_assert.h"
    diff --git a/src/common/compat.cc b/src/common/compat.cc
    index 82b57ad94b53..84a395c5a19a 100644
    --- a/src/common/compat.cc
    +++ b/src/common/compat.cc
    @@ -565,3 +565,66 @@ ssize_t get_self_exe_path(char* path, int buff_length) {
     }
     
     #endif /* _WIN32 */
    +
    +
    +static thread_local char cached_thread_name[256]{};
    +
    +int ceph_pthread_setname(char const* name)
    +{
    +  strncpy(cached_thread_name, name, sizeof cached_thread_name - 1);
    +#if defined(_WIN32) && defined(__clang__) && \
    +    !defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
    +  // In this case, llvm doesn't use the pthread api for std::thread.
    +  // We cannot use native_handle() with the pthread api, nor can we pass
    +  // it to Windows API functions.
    +  return 0;
    +#elif defined(HAVE_PTHREAD_SETNAME_NP)
    +  #if defined(__APPLE__)
    +      return pthread_setname_np(name);
    +  #else
    +      return pthread_setname_np(pthread_self(), name);
    +  #endif
    +#elif defined(HAVE_PTHREAD_SET_NAME_NP)
    +  pthread_set_name_np(pthread_self(), name);          \
    +  return 0;
    +#else
    +  return 0;
    +#endif
    +}
    +
    +int ceph_pthread_getname(char* name, size_t len)
    +{
    +  if (cached_thread_name[0]) {
    +    if (len > 0) {
    +      strncpy(name, cached_thread_name, len);
    +      name[len-1] = 0;
    +    }
    +    return 0;
    +  } else {
    +#if defined(_WIN32) && defined(__clang__) && \
    +    !defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
    +    if (len > 0) {
    +      strcpy(name, "");
    +    }
    +    return 0;
    +#elif defined(HAVE_PTHREAD_GETNAME_NP) || defined(HAVE_PTHREAD_GET_NAME_NP)
    +#  if defined(HAVE_PTHREAD_GETNAME_NP)
    +    int rc = pthread_getname_np(pthread_self(), cached_thread_name, sizeof cached_thread_name);
    +#  else
    +    int rc = pthread_get_name_np(pthread_self(), cached_thread_name, sizeof cached_thread_name);
    +#  endif
    +    if (rc == 0) {
    +      strncpy(name, cached_thread_name, len);
    +      name[len-1] = 0;
    +      return 0;
    +    } else {
    +      return rc;
    +    }
    +#else
    +    if (len > 0) {
    +      strcpy(name, "");
    +    }
    +    return 0;
    +#endif
    +  }
    +}
    diff --git a/src/common/config.cc b/src/common/config.cc
    index c8101587b719..3a5ee91c3472 100644
    --- a/src/common/config.cc
    +++ b/src/common/config.cc
    @@ -24,6 +24,8 @@
     #include "common/hostname.h"
     #include "common/dout.h"
     
    +#include 
    +
     /* Don't use standard Ceph logging in this file.
      * We can't use logging until it's initialized, and a lot of the necessary
      * initialization happens here.
    @@ -55,7 +57,7 @@ using ceph::decode;
     using ceph::encode;
     using ceph::Formatter;
     
    -static const char *CEPH_CONF_FILE_DEFAULT = "$data_dir/config,/etc/ceph/$cluster.conf,$home/.ceph/$cluster.conf,$cluster.conf"
    +const char *CEPH_CONF_FILE_DEFAULT = "$data_dir/config,/etc/ceph/$cluster.conf,$home/.ceph/$cluster.conf,$cluster.conf"
     #if defined(__FreeBSD__)
         ",/usr/local/etc/ceph/$cluster.conf"
     #elif defined(_WIN32)
    @@ -131,14 +133,11 @@ md_config_t::md_config_t(ConfigValues& values,
       // Define the debug_* options as well.
       subsys_options.reserve(values.subsys.get_num());
       for (unsigned i = 0; i < values.subsys.get_num(); ++i) {
    -    string name = string("debug_") + values.subsys.get_name(i);
    -    subsys_options.push_back(
    -      Option(name, Option::TYPE_STR, Option::LEVEL_ADVANCED));
    +    subsys_options.emplace_back(
    +      fmt::format("debug_{}", values.subsys.get_name(i)), Option::TYPE_STR, Option::LEVEL_ADVANCED);
         Option& opt = subsys_options.back();
    -    opt.set_default(stringify(values.subsys.get_log_level(i)) + "/" +
    -		    stringify(values.subsys.get_gather_level(i)));
    -    string desc = string("Debug level for ") + values.subsys.get_name(i);
    -    opt.set_description(desc.c_str());
    +    opt.set_default(fmt::format("{}/{}", values.subsys.get_log_level(i), values.subsys.get_gather_level(i)));
    +    opt.set_description(fmt::format("Debug level for {}", values.subsys.get_name(i)).c_str());
         opt.set_flag(Option::FLAG_RUNTIME);
         opt.set_long_description("The value takes the form 'N' or 'N/M' where N and M are values between 0 and 99.  N is the debug level to log (all values below this are included), and M is the level to gather and buffer in memory.  In the event of a crash, the most recent items <= M are dumped to the log file.");
         opt.set_subsys(i);
    @@ -158,7 +157,7 @@ md_config_t::md_config_t(ConfigValues& values,
     	  } else {
     	    // normalize to M/N
     	    n = m;
    -	    *value = stringify(m) + "/" + stringify(n);
    +	    *value = fmt::format("{}/{}", m, n);
     	  }
     	} else {
     	  *error_message = "value must take the form N or N/M, where N and M are integers";
    @@ -493,6 +492,11 @@ void md_config_t::parse_env(unsigned entity_type,
         }
       }
     
    +  if (auto s = getenv("TMPDIR"); s) {
    +    string err;
    +    _set_val(values, tracker, s, *find_option("tmp_dir"), CONF_ENV, &err);
    +  }
    +
       // Apply pod memory limits:
       //
       // There are two types of resource requests: `limits` and `requests`.
    @@ -770,7 +774,7 @@ int md_config_t::parse_option(ConfigValues& values,
         option_name = opt.name;
         if (ceph_argparse_witharg(
     	  args, i, &val, err,
    -	  string(string("--default-") + opt.name).c_str(), (char*)NULL)) {
    +	  fmt::format("--default-{}", opt.name).c_str(), (char*)NULL)) {
           if (!err.str().empty()) {
             error_message = err.str();
     	ret = -EINVAL;
    @@ -1263,7 +1267,7 @@ Option::value_t md_config_t::_expand_meta(
     		     << Option::to_str(*i->second) << "\n";
     	      }
     	    }
    -	    return Option::value_t(std::string("$") + o->name);
    +	    return Option::value_t(fmt::format("${}", o->name));
     	  } else {
     	    // recursively evaluate!
     	    string n;
    diff --git a/src/common/config.h b/src/common/config.h
    index ff7bc20f613d..3e7b51d987d2 100644
    --- a/src/common/config.h
    +++ b/src/common/config.h
    @@ -39,6 +39,8 @@ enum {
     
     extern const char *ceph_conf_level_name(int level);
     
    +extern const char *CEPH_CONF_FILE_DEFAULT;
    +
     /** This class represents the current Ceph configuration.
      *
      * For Ceph daemons, this is the daemon configuration.  Log levels, caching
    diff --git a/src/common/config_cacher.h b/src/common/config_cacher.h
    index a84bad08eee8..91b8152dde10 100644
    --- a/src/common/config_cacher.h
    +++ b/src/common/config_cacher.h
    @@ -50,7 +50,7 @@ class md_config_cacher_t : public md_config_obs_t {
         conf.remove_observer(this);
       }
     
    -  operator ValueT() const {
    +  ValueT operator*() const {
         return value_cache.load();
       }
     };
    diff --git a/src/common/config_obs_mgr.h b/src/common/config_obs_mgr.h
    index 06b3cf934a53..5336538e4387 100644
    --- a/src/common/config_obs_mgr.h
    +++ b/src/common/config_obs_mgr.h
    @@ -14,13 +14,11 @@ class ConfigValues;
     // the changes of settings at runtime.
     template
     class ObserverMgr : public ConfigTracker {
    -  // Maps configuration options to the observer listening for them.
    -  using obs_map_t = std::multimap;
    -  obs_map_t observers;
    -
     public:
    -  typedef std::map> rev_obs_map;
    -  typedef std::function config_gather_cb;
    +  using config_obs_ptr = std::shared_ptr;
    +  using config_obs_wptr = std::weak_ptr;
    +  typedef std::map> rev_obs_map;
    +  typedef std::function config_gather_cb;
     
       // Adds a new observer to this configuration. You can do this at any time,
       // but it will only receive notifications for the changes that happen after
    @@ -37,15 +35,18 @@ class ObserverMgr : public ConfigTracker {
       // you need to delete it yourself.
       // This function will assert if you try to delete an observer that isn't
       // there.
    -  void remove_observer(ConfigObs* observer);
    +  config_obs_wptr remove_observer(ConfigObs* observer);
       // invoke callback for every observers tracking keys
       void for_each_observer(config_gather_cb callback);
       // invoke callback for observers keys tracking the provided change set
    -  template
    -  void for_each_change(const std::set& changes,
    -                       ConfigProxyT& proxy,
    +  void for_each_change(const std::map& changes,
                            config_gather_cb callback, std::ostream *oss);
       bool is_tracking(const std::string& name) const override;
    +
    +private:
    +  // Maps configuration options to the observer listening for them.
    +  using obs_map_t = std::multimap;
    +  obs_map_t observers;
     };
     
     // we could put the implementations in a .cc file, and only instantiate the
    @@ -60,24 +61,28 @@ template
     void ObserverMgr::add_observer(ConfigObs* observer)
     {
       const char **keys = observer->get_tracked_conf_keys();
    +  auto ptr = std::make_shared(observer);
       for (const char ** k = keys; *k; ++k) {
    -    observers.emplace(*k, observer);
    +    observers.emplace(*k, ptr);
       }
     }
     
     template
    -void ObserverMgr::remove_observer(ConfigObs* observer)
    +typename ObserverMgr::config_obs_wptr ObserverMgr::remove_observer(ConfigObs* observer)
     {
       [[maybe_unused]] bool found_obs = false;
    +  config_obs_ptr ptr;
       for (auto o = observers.begin(); o != observers.end(); ) {
    -    if (o->second == observer) {
    -      observers.erase(o++);
    +    if (*o->second == observer) {
    +      ptr = std::move(o->second);
    +      o = observers.erase(o);
           found_obs = true;
         } else {
           ++o;
         }
       }
       ceph_assert(found_obs);
    +  return config_obs_wptr(ptr);
     }
     
     template
    @@ -89,17 +94,15 @@ void ObserverMgr::for_each_observer(config_gather_cb callback)
     }
     
     template
    -template
    -void ObserverMgr::for_each_change(const std::set& changes,
    -                                             ConfigProxyT& proxy,
    +void ObserverMgr::for_each_change(const std::map& changes,
                                                  config_gather_cb callback, std::ostream *oss)
     {
       // create the reverse observer mapping, mapping observers to the set of
       // changed keys that they'll get.
       std::string val;
    -  for (auto& key : changes) {
    +  for (auto& [key, present] : changes) {
         auto [first, last] = observers.equal_range(key);
    -    if ((oss) && !proxy.get_val(key, &val)) {
    +    if ((oss) && present) {
           (*oss) << key << " = '" << val << "' ";
           if (first == last) {
             (*oss) << "(not observed, change may require restart) ";
    diff --git a/src/common/config_proxy.h b/src/common/config_proxy.h
    index 02c670f60277..12a273b8c84f 100644
    --- a/src/common/config_proxy.h
    +++ b/src/common/config_proxy.h
    @@ -18,91 +18,50 @@ class ConfigProxy {
        */
       ConfigValues values;
       using md_config_obs_t = ceph::md_config_obs_impl;
    -  ObserverMgr obs_mgr;
    +  using ObsMgr = ObserverMgr;
    +  ObsMgr obs_mgr;
       md_config_t config;
       /** A lock that protects the md_config_t internals. It is
        * recursive, for simplicity.
        * It is best if this lock comes first in the lock hierarchy. We will
        * hold this lock when calling configuration observers.  */
    -  mutable ceph::recursive_mutex lock =
    -    ceph::make_recursive_mutex("ConfigProxy::lock");
    +  mutable ceph::mutex lock = ceph::make_mutex("ConfigProxy::lock");
    +  ceph::condition_variable cond;
     
    -  class CallGate {
    -  private:
    -    uint32_t call_count = 0;
    -    ceph::mutex lock;
    -    ceph::condition_variable cond;
    -  public:
    -    CallGate()
    -      : lock(ceph::make_mutex("call::gate::lock")) {
    -    }
    +  using rev_obs_map_t = ObsMgr::rev_obs_map;
     
    -    void enter() {
    -      std::lock_guard locker(lock);
    -      ++call_count;
    +  void _call_observers(rev_obs_map_t& rev_obs) {
    +    for (auto& [obs, keys] : rev_obs) {
    +      (*obs)->handle_conf_change(*this, keys);
         }
    -    void leave() {
    -      std::lock_guard locker(lock);
    -      ceph_assert(call_count > 0);
    -      if (--call_count == 0) {
    -        cond.notify_all();
    -      }
    +    rev_obs.clear(); // drop shared_ptrs
    +    {
    +      std::lock_guard l{lock};
    +      cond.notify_all();
         }
    -    void close() {
    -      std::unique_lock locker(lock);
    -      while (call_count != 0) {
    -        cond.wait(locker);
    -      }
    -    }
    -  };
    -
    -  void call_gate_enter(md_config_obs_t *obs) {
    -    auto p = obs_call_gate.find(obs);
    -    ceph_assert(p != obs_call_gate.end());
    -    p->second->enter();
    -  }
    -  void call_gate_leave(md_config_obs_t *obs) {
    -    auto p = obs_call_gate.find(obs);
    -    ceph_assert(p != obs_call_gate.end());
    -    p->second->leave();
       }
    -  void call_gate_close(md_config_obs_t *obs) {
    -    auto p = obs_call_gate.find(obs);
    -    ceph_assert(p != obs_call_gate.end());
    -    p->second->close();
    -  }
    -
    -  using rev_obs_map_t = ObserverMgr::rev_obs_map;
    -  typedef std::unique_ptr CallGateRef;
    -
    -  std::map obs_call_gate;
    -
    -  void call_observers(std::unique_lock& locker,
    -                      rev_obs_map_t& rev_obs) {
    -    // observers are notified outside of lock
    -    locker.unlock();
    -    for (auto& [obs, keys] : rev_obs) {
    -      obs->handle_conf_change(*this, keys);
    -    }
    -    locker.lock();
    -
    -    for (auto& rev_ob : rev_obs) {
    -      call_gate_leave(rev_ob.first);
    +  void _gather_changes(std::set &changes,
    +                       rev_obs_map_t *rev_obs, std::ostream* oss) {
    +    ceph_assert(ceph_mutex_is_locked_by_me(lock));
    +    std::map changes_present;
    +    for (auto& change : changes) {
    +      std::string dummy;
    +      changes_present[change] = (0 == config.get_val(values, change, &dummy));
         }
    +    obs_mgr.for_each_change(
    +      changes_present,
    +      [this, rev_obs](auto obs, const std::string &key) {
    +        _map_observer_changes(obs, key, rev_obs);
    +      }, oss);
    +    changes.clear();
       }
     
    -  void map_observer_changes(md_config_obs_t *obs, const std::string &key,
    +  void _map_observer_changes(ObsMgr::config_obs_ptr obs, const std::string& key,
                                 rev_obs_map_t *rev_obs) {
    -    ceph_assert(ceph_mutex_is_locked(lock));
    +    ceph_assert(ceph_mutex_is_locked_by_me(lock));
     
         auto [it, new_entry] = rev_obs->emplace(obs, std::set{});
         it->second.emplace(key);
    -    if (new_entry) {
    -      // this needs to be done under lock as once this lock is
    -      // dropped (before calling observers) a remove_observer()
    -      // can sneak in and cause havoc.
    -      call_gate_enter(obs);
    -    }
       }
     
     public:
    @@ -150,12 +109,15 @@ class ConfigProxy {
     				       std::forward(args)...);
       }
       void config_options(ceph::Formatter *f) const {
    +    std::lock_guard l{lock};
         config.config_options(f);
       }
       const decltype(md_config_t::schema)& get_schema() const {
    +    std::lock_guard l{lock};
         return config.schema;
       }
       const Option* get_schema(const std::string_view key) const {
    +    std::lock_guard l{lock};
         auto found = config.schema.find(key);
         if (found == config.schema.end()) {
           return nullptr;
    @@ -164,6 +126,7 @@ class ConfigProxy {
         }
       }
       const Option *find_option(const std::string& name) const {
    +    std::lock_guard l{lock};
         return config.find_option(name);
       }
       void diff(ceph::Formatter *f, const std::string& name = {}) const {
    @@ -186,6 +149,7 @@ class ConfigProxy {
     					 sections, key, out, emeta);
       }
       unsigned get_osd_pool_default_min_size(uint8_t size) const {
    +    std::lock_guard l{lock};
         return config.get_osd_pool_default_min_size(values, size);
       }
       void early_expand_meta(std::string &val,
    @@ -195,39 +159,46 @@ class ConfigProxy {
       }
       // for those want to reexpand special meta, e.g, $pid
       void finalize_reexpand_meta() {
    -    std::unique_lock locker(lock);
         rev_obs_map_t rev_obs;
    -    if (config.finalize_reexpand_meta(values, obs_mgr)) {
    -      _gather_changes(values.changed, &rev_obs, nullptr);
    +    {
    +      std::lock_guard locker(lock);
    +      if (config.finalize_reexpand_meta(values, obs_mgr)) {
    +        _gather_changes(values.changed, &rev_obs, nullptr);
    +      }
         }
     
    -    call_observers(locker, rev_obs);
    +    _call_observers(rev_obs);
       }
       void add_observer(md_config_obs_t* obs) {
         std::lock_guard l(lock);
         obs_mgr.add_observer(obs);
    -    obs_call_gate.emplace(obs, std::make_unique());
    +    cond.notify_all();
       }
       void remove_observer(md_config_obs_t* obs) {
    -    std::lock_guard l(lock);
    -    call_gate_close(obs);
    -    obs_call_gate.erase(obs);
    -    obs_mgr.remove_observer(obs);
    +    std::unique_lock l(lock);
    +    auto wptr = obs_mgr.remove_observer(obs);
    +    while (!wptr.expired()) {
    +      cond.wait(l);
    +    }
       }
       void call_all_observers() {
    -    std::unique_lock locker(lock);
         rev_obs_map_t rev_obs;
    -    obs_mgr.for_each_observer(
    -      [this, &rev_obs](md_config_obs_t *obs, const std::string &key) {
    -        map_observer_changes(obs, key, &rev_obs);
    -      });
    +    {
    +      std::lock_guard locker(lock);
    +      obs_mgr.for_each_observer(
    +        [this, &rev_obs](auto obs, const std::string& key) {
    +          _map_observer_changes(obs, key, &rev_obs);
    +        });
    +    }
     
    -    call_observers(locker, rev_obs);
    +    _call_observers(rev_obs);
       }
       void set_safe_to_start_threads() {
    +    std::lock_guard l(lock);
         config.set_safe_to_start_threads();
       }
       void _clear_safe_to_start_threads() {
    +    std::lock_guard l(lock);
         config._clear_safe_to_start_threads();
       }
       void show_config(std::ostream& out) {
    @@ -248,25 +219,18 @@ class ConfigProxy {
       }
       // Expand all metavariables. Make any pending observer callbacks.
       void apply_changes(std::ostream* oss) {
    -    std::unique_lock locker(lock);
         rev_obs_map_t rev_obs;
     
    -    // apply changes until the cluster name is assigned
    -    if (!values.cluster.empty()) {
    -      // meta expands could have modified anything.  Copy it all out again.
    -      _gather_changes(values.changed, &rev_obs, oss);
    +    {
    +      std::lock_guard locker(lock);
    +      // apply changes until the cluster name is assigned
    +      if (!values.cluster.empty()) {
    +        // meta expands could have modified anything.  Copy it all out again.
    +        _gather_changes(values.changed, &rev_obs, oss);
    +      }
         }
     
    -    call_observers(locker, rev_obs);
    -  }
    -  void _gather_changes(std::set &changes,
    -                       rev_obs_map_t *rev_obs, std::ostream* oss) {
    -    obs_mgr.for_each_change(
    -      changes, *this,
    -      [this, rev_obs](md_config_obs_t *obs, const std::string &key) {
    -        map_observer_changes(obs, key, rev_obs);
    -      }, oss);
    -      changes.clear();
    +    _call_observers(rev_obs);
       }
       int set_val(const std::string_view key, const std::string& s,
                   std::stringstream* err_ss=nullptr) {
    @@ -284,23 +248,27 @@ class ConfigProxy {
       int set_mon_vals(CephContext *cct,
     		   const std::map>& kv,
     		   md_config_t::config_callback config_cb) {
    -    std::unique_lock locker(lock);
    -    int ret = config.set_mon_vals(cct, values, obs_mgr, kv, config_cb);
    -
    +    int ret;
         rev_obs_map_t rev_obs;
    -    _gather_changes(values.changed, &rev_obs, nullptr);
     
    -    call_observers(locker, rev_obs);
    +    {
    +      std::lock_guard locker(lock);
    +      ret = config.set_mon_vals(cct, values, obs_mgr, kv, config_cb);
    +      _gather_changes(values.changed, &rev_obs, nullptr);
    +    }
    +
    +    _call_observers(rev_obs);
         return ret;
       }
       int injectargs(const std::string &s, std::ostream *oss) {
    -    std::unique_lock locker(lock);
    -    int ret = config.injectargs(values, obs_mgr, s, oss);
    -
    +    int ret;
         rev_obs_map_t rev_obs;
    -    _gather_changes(values.changed, &rev_obs, oss);
    -
    -    call_observers(locker, rev_obs);
    +    {
    +      std::lock_guard locker(lock);
    +      ret = config.injectargs(values, obs_mgr, s, oss);
    +      _gather_changes(values.changed, &rev_obs, oss);
    +    }
    +    _call_observers(rev_obs);
         return ret;
       }
       void parse_env(unsigned entity_type,
    @@ -319,12 +287,15 @@ class ConfigProxy {
     				     conf_files, warnings, flags);
       }
       bool has_parse_error() const {
    +    std::lock_guard l(lock);
         return !config.parse_error.empty();
       }
       std::string get_parse_error() {
    +    std::lock_guard l(lock);
         return config.parse_error;
       }
       void complain_about_parse_error(CephContext *cct) {
    +    std::lock_guard l(lock);
         return config.complain_about_parse_error(cct);
       }
       void do_argv_commands() const {
    @@ -342,9 +313,11 @@ class ConfigProxy {
         config.get_defaults_bl(values, bl);
       }
       const std::string& get_conf_path() const {
    +    std::lock_guard l(lock);
         return config.get_conf_path();
       }
       std::optional get_val_default(std::string_view key) {
    +    std::lock_guard l(lock);
         return config.get_val_default(key);
       }
     };
    diff --git a/src/common/crc32c.cc b/src/common/crc32c.cc
    index e4a77ae99aeb..2fe511818d60 100644
    --- a/src/common/crc32c.cc
    +++ b/src/common/crc32c.cc
    @@ -6,10 +6,12 @@
     #include "arch/intel.h"
     #include "arch/arm.h"
     #include "arch/ppc.h"
    +#include "arch/s390x.h"
     #include "common/sctp_crc32.h"
     #include "common/crc32c_intel_fast.h"
     #include "common/crc32c_aarch64.h"
     #include "common/crc32c_ppc.h"
    +#include "common/crc32c_s390x.h"
     
     /*
      * choose best implementation based on the CPU architecture.
    @@ -24,6 +26,9 @@ ceph_crc32c_func_t ceph_choose_crc32(void)
       // use that.
     #if defined(__i386__) || defined(__x86_64__)
       if (ceph_arch_intel_sse42 && ceph_crc32c_intel_fast_exists()) {
    +    if (ceph_arch_intel_pclmul) {
    +      return ceph_crc32c_intel_fast_pclmul;
    +    }
         return ceph_crc32c_intel_fast;
       }
     #elif defined(__arm__) || defined(__aarch64__)
    @@ -36,6 +41,10 @@ ceph_crc32c_func_t ceph_choose_crc32(void)
       if (ceph_arch_ppc_crc32) {
         return ceph_crc32c_ppc;
       }
    +#elif defined(__s390__)
    +  if (ceph_arch_s390x_crc32) {
    +    return ceph_crc32c_s390x;
    +  }
     #endif
       // default
       return ceph_crc32c_sctp;
    diff --git a/src/common/crc32c_intel_fast.c b/src/common/crc32c_intel_fast.c
    index 28bd93416519..3fbb63e2812d 100644
    --- a/src/common/crc32c_intel_fast.c
    +++ b/src/common/crc32c_intel_fast.c
    @@ -2,10 +2,25 @@
     #include "common/crc32c_intel_baseline.h"
     
     extern unsigned int crc32_iscsi_00(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_00");
    +extern unsigned int crc32_iscsi_01(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_01");
     extern unsigned int crc32_iscsi_zero_00(unsigned char const *buffer, uint64_t len, uint64_t crc) asm("crc32_iscsi_zero_00");
     
     #ifdef HAVE_NASM_X64
     
    +uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
    +{
    +	if (!buffer)
    +	{
    +	  return crc32_iscsi_zero_00(buffer, len, crc);
    +	}
    +
    +	/* Unlike crc32_iscsi_00, crc32_iscsi_01 handles the case where the
    +	 * input buffer is less than 8 bytes in its prelude, and does not
    +	 * prefetch beyond said buffer.
    +	 */
    +	return crc32_iscsi_01(buffer, len, crc);
    +}
    +
     uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len)
     {
     	uint32_t v;
    @@ -43,6 +58,11 @@ int ceph_crc32c_intel_fast_exists(void)
     	return 0;
     }
     
    +uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
    +{
    +	return 0;
    +}
    +
     uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len)
     {
     	return 0;
    diff --git a/src/common/crc32c_intel_fast.h b/src/common/crc32c_intel_fast.h
    index 26a444f60615..81c6e494f0c3 100644
    --- a/src/common/crc32c_intel_fast.h
    +++ b/src/common/crc32c_intel_fast.h
    @@ -10,10 +10,16 @@ extern int ceph_crc32c_intel_fast_exists(void);
     
     #ifdef __x86_64__
     
    +extern uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len);
     extern uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len);
     
     #else
     
    +static inline uint32_t ceph_crc32c_intel_fast_pclmul(uint32_t crc, unsigned char const *buffer, unsigned len)
    +{
    +	return 0;
    +}
    +
     static inline uint32_t ceph_crc32c_intel_fast(uint32_t crc, unsigned char const *buffer, unsigned len)
     {
     	return 0;
    diff --git a/src/common/crc32c_ppc_fast_zero_asm.S b/src/common/crc32c_ppc_fast_zero_asm.S
    index cff9cce7fd40..3defa6deca68 100644
    --- a/src/common/crc32c_ppc_fast_zero_asm.S
    +++ b/src/common/crc32c_ppc_fast_zero_asm.S
    @@ -25,6 +25,23 @@
     #endif
     #include "ppc-opcode.h"
     
    +/*
    + * The following line is required because toc is defined as 2 in
    + * ppc-asm.h. This definition will break @toc in the assembly code,
    + * hence toc should be undefined.
    + */
    +#undef toc
    +
    +/* If we do not define r2 as 2, the assembler throws errors.
    + * This is because the assembler has no builtin support for
    + * registers, and we should either define them ourselves or
    + * use their indexes explicitly like:
    + *       addis   4,2,.bit_reflected_constants@toc@ha
    + */
    +#ifndef r2
    +#define r2 2
    +#endif
    +
     	.section	.data
     .balign 16
     .constants:
    @@ -45,8 +62,8 @@
     
     /* unsigned int barrett_reduction(unsigned long val) */
     FUNC_START(barrett_reduction)
    -	lis	r4,.constants@ha
    -	la	r4,.constants@l(r4)
    +	addis   r4,r2,.constants@toc@ha
    +	addi    r4,r4,.constants@toc@l
     
     	li	r5,16
     	vxor	v1,v1,v1	/* zero v1 */
    @@ -83,8 +100,8 @@ FUNC_END(barrett_reduction)
     
     /* unsigned int barrett_reduction_reflected(unsigned long val) */
     FUNC_START(barrett_reduction_reflected)
    -	lis	r4,.bit_reflected_constants@ha
    -	la	r4,.bit_reflected_constants@l(r4)
    +	addis   r4,r2,.bit_reflected_constants@toc@ha
    +	addi    r4,r4,.bit_reflected_constants@toc@l
     
     	li	r5,16
     	vxor	v1,v1,v1	/* zero v1 */
    diff --git a/src/common/crc32c_s390x.c b/src/common/crc32c_s390x.c
    new file mode 100644
    index 000000000000..6966f41c85e8
    --- /dev/null
    +++ b/src/common/crc32c_s390x.c
    @@ -0,0 +1,606 @@
    +/*
    + * CRC-32 algorithm implemented with the z/Architecture Vector Extension
    + * Facility.
    + *
    + * Copyright 2024 IBM Corporation
    + *
    + * Licensed under the Apache License, Version 2.0 (the "License"); you may not
    + * use this file except in compliance with the License.  You may obtain a copy
    + * of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
    + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
    + * License for the specific language governing permissions and limitations
    + * under the License.
    + *
    + *
    + * Author(s): Hendrik Brueckner 
    + *            Anton Blanchard 
    + *            Bryan Chan 
    + *            Chris Zou 
    + *            Aliaksei Makarau 
    + */
    +
    +#include 
    +#include 
    +#include "crc32c_s390x.h"
    +
    +#define VX_MIN_LEN		64
    +#define VX_ALIGNMENT		16L
    +#define VX_ALIGN_MASK		(VX_ALIGNMENT - 1)
    +
    +/* CRC-32C slicing-by-8 constants, for use on big-endian systems */
    +static const unsigned int __attribute__((aligned(128))) crc32ctable_le[8][256] = {
    +    {
    +    0x00000000, 0x03836bf2, 0xf7703be1, 0xf4f35013,
    +    0x1f979ac7, 0x1c14f135, 0xe8e7a126, 0xeb64cad4,
    +    0xcf58d98a, 0xccdbb278, 0x3828e26b, 0x3bab8999,
    +    0xd0cf434d, 0xd34c28bf, 0x27bf78ac, 0x243c135e,
    +    0x6fc75e10, 0x6c4435e2, 0x98b765f1, 0x9b340e03,
    +    0x7050c4d7, 0x73d3af25, 0x8720ff36, 0x84a394c4,
    +    0xa09f879a, 0xa31cec68, 0x57efbc7b, 0x546cd789,
    +    0xbf081d5d, 0xbc8b76af, 0x487826bc, 0x4bfb4d4e,
    +    0xde8ebd20, 0xdd0dd6d2, 0x29fe86c1, 0x2a7ded33,
    +    0xc11927e7, 0xc29a4c15, 0x36691c06, 0x35ea77f4,
    +    0x11d664aa, 0x12550f58, 0xe6a65f4b, 0xe52534b9,
    +    0x0e41fe6d, 0x0dc2959f, 0xf931c58c, 0xfab2ae7e,
    +    0xb149e330, 0xb2ca88c2, 0x4639d8d1, 0x45bab323,
    +    0xaede79f7, 0xad5d1205, 0x59ae4216, 0x5a2d29e4,
    +    0x7e113aba, 0x7d925148, 0x8961015b, 0x8ae26aa9,
    +    0x6186a07d, 0x6205cb8f, 0x96f69b9c, 0x9575f06e,
    +    0xbc1d7b41, 0xbf9e10b3, 0x4b6d40a0, 0x48ee2b52,
    +    0xa38ae186, 0xa0098a74, 0x54fada67, 0x5779b195,
    +    0x7345a2cb, 0x70c6c939, 0x8435992a, 0x87b6f2d8,
    +    0x6cd2380c, 0x6f5153fe, 0x9ba203ed, 0x9821681f,
    +    0xd3da2551, 0xd0594ea3, 0x24aa1eb0, 0x27297542,
    +    0xcc4dbf96, 0xcfced464, 0x3b3d8477, 0x38beef85,
    +    0x1c82fcdb, 0x1f019729, 0xebf2c73a, 0xe871acc8,
    +    0x0315661c, 0x00960dee, 0xf4655dfd, 0xf7e6360f,
    +    0x6293c661, 0x6110ad93, 0x95e3fd80, 0x96609672,
    +    0x7d045ca6, 0x7e873754, 0x8a746747, 0x89f70cb5,
    +    0xadcb1feb, 0xae487419, 0x5abb240a, 0x59384ff8,
    +    0xb25c852c, 0xb1dfeede, 0x452cbecd, 0x46afd53f,
    +    0x0d549871, 0x0ed7f383, 0xfa24a390, 0xf9a7c862,
    +    0x12c302b6, 0x11406944, 0xe5b33957, 0xe63052a5,
    +    0xc20c41fb, 0xc18f2a09, 0x357c7a1a, 0x36ff11e8,
    +    0xdd9bdb3c, 0xde18b0ce, 0x2aebe0dd, 0x29688b2f,
    +    0x783bf682, 0x7bb89d70, 0x8f4bcd63, 0x8cc8a691,
    +    0x67ac6c45, 0x642f07b7, 0x90dc57a4, 0x935f3c56,
    +    0xb7632f08, 0xb4e044fa, 0x401314e9, 0x43907f1b,
    +    0xa8f4b5cf, 0xab77de3d, 0x5f848e2e, 0x5c07e5dc,
    +    0x17fca892, 0x147fc360, 0xe08c9373, 0xe30ff881,
    +    0x086b3255, 0x0be859a7, 0xff1b09b4, 0xfc986246,
    +    0xd8a47118, 0xdb271aea, 0x2fd44af9, 0x2c57210b,
    +    0xc733ebdf, 0xc4b0802d, 0x3043d03e, 0x33c0bbcc,
    +    0xa6b54ba2, 0xa5362050, 0x51c57043, 0x52461bb1,
    +    0xb922d165, 0xbaa1ba97, 0x4e52ea84, 0x4dd18176,
    +    0x69ed9228, 0x6a6ef9da, 0x9e9da9c9, 0x9d1ec23b,
    +    0x767a08ef, 0x75f9631d, 0x810a330e, 0x828958fc,
    +    0xc97215b2, 0xcaf17e40, 0x3e022e53, 0x3d8145a1,
    +    0xd6e58f75, 0xd566e487, 0x2195b494, 0x2216df66,
    +    0x062acc38, 0x05a9a7ca, 0xf15af7d9, 0xf2d99c2b,
    +    0x19bd56ff, 0x1a3e3d0d, 0xeecd6d1e, 0xed4e06ec,
    +    0xc4268dc3, 0xc7a5e631, 0x3356b622, 0x30d5ddd0,
    +    0xdbb11704, 0xd8327cf6, 0x2cc12ce5, 0x2f424717,
    +    0x0b7e5449, 0x08fd3fbb, 0xfc0e6fa8, 0xff8d045a,
    +    0x14e9ce8e, 0x176aa57c, 0xe399f56f, 0xe01a9e9d,
    +    0xabe1d3d3, 0xa862b821, 0x5c91e832, 0x5f1283c0,
    +    0xb4764914, 0xb7f522e6, 0x430672f5, 0x40851907,
    +    0x64b90a59, 0x673a61ab, 0x93c931b8, 0x904a5a4a,
    +    0x7b2e909e, 0x78adfb6c, 0x8c5eab7f, 0x8fddc08d,
    +    0x1aa830e3, 0x192b5b11, 0xedd80b02, 0xee5b60f0,
    +    0x053faa24, 0x06bcc1d6, 0xf24f91c5, 0xf1ccfa37,
    +    0xd5f0e969, 0xd673829b, 0x2280d288, 0x2103b97a,
    +    0xca6773ae, 0xc9e4185c, 0x3d17484f, 0x3e9423bd,
    +    0x756f6ef3, 0x76ec0501, 0x821f5512, 0x819c3ee0,
    +    0x6af8f434, 0x697b9fc6, 0x9d88cfd5, 0x9e0ba427,
    +    0xba37b779, 0xb9b4dc8b, 0x4d478c98, 0x4ec4e76a,
    +    0xa5a02dbe, 0xa623464c, 0x52d0165f, 0x51537dad
    +    },{
    +    0x00000000, 0x7798a213, 0xee304527, 0x99a8e734,
    +    0xdc618a4e, 0xabf9285d, 0x3251cf69, 0x45c96d7a,
    +    0xb8c3149d, 0xcf5bb68e, 0x56f351ba, 0x216bf3a9,
    +    0x64a29ed3, 0x133a3cc0, 0x8a92dbf4, 0xfd0a79e7,
    +    0x81f1c53f, 0xf669672c, 0x6fc18018, 0x1859220b,
    +    0x5d904f71, 0x2a08ed62, 0xb3a00a56, 0xc438a845,
    +    0x3932d1a2, 0x4eaa73b1, 0xd7029485, 0xa09a3696,
    +    0xe5535bec, 0x92cbf9ff, 0x0b631ecb, 0x7cfbbcd8,
    +    0x02e38b7f, 0x757b296c, 0xecd3ce58, 0x9b4b6c4b,
    +    0xde820131, 0xa91aa322, 0x30b24416, 0x472ae605,
    +    0xba209fe2, 0xcdb83df1, 0x5410dac5, 0x238878d6,
    +    0x664115ac, 0x11d9b7bf, 0x8871508b, 0xffe9f298,
    +    0x83124e40, 0xf48aec53, 0x6d220b67, 0x1abaa974,
    +    0x5f73c40e, 0x28eb661d, 0xb1438129, 0xc6db233a,
    +    0x3bd15add, 0x4c49f8ce, 0xd5e11ffa, 0xa279bde9,
    +    0xe7b0d093, 0x90287280, 0x098095b4, 0x7e1837a7,
    +    0x04c617ff, 0x735eb5ec, 0xeaf652d8, 0x9d6ef0cb,
    +    0xd8a79db1, 0xaf3f3fa2, 0x3697d896, 0x410f7a85,
    +    0xbc050362, 0xcb9da171, 0x52354645, 0x25ade456,
    +    0x6064892c, 0x17fc2b3f, 0x8e54cc0b, 0xf9cc6e18,
    +    0x8537d2c0, 0xf2af70d3, 0x6b0797e7, 0x1c9f35f4,
    +    0x5956588e, 0x2ecefa9d, 0xb7661da9, 0xc0febfba,
    +    0x3df4c65d, 0x4a6c644e, 0xd3c4837a, 0xa45c2169,
    +    0xe1954c13, 0x960dee00, 0x0fa50934, 0x783dab27,
    +    0x06259c80, 0x71bd3e93, 0xe815d9a7, 0x9f8d7bb4,
    +    0xda4416ce, 0xaddcb4dd, 0x347453e9, 0x43ecf1fa,
    +    0xbee6881d, 0xc97e2a0e, 0x50d6cd3a, 0x274e6f29,
    +    0x62870253, 0x151fa040, 0x8cb74774, 0xfb2fe567,
    +    0x87d459bf, 0xf04cfbac, 0x69e41c98, 0x1e7cbe8b,
    +    0x5bb5d3f1, 0x2c2d71e2, 0xb58596d6, 0xc21d34c5,
    +    0x3f174d22, 0x488fef31, 0xd1270805, 0xa6bfaa16,
    +    0xe376c76c, 0x94ee657f, 0x0d46824b, 0x7ade2058,
    +    0xf9fac3fb, 0x8e6261e8, 0x17ca86dc, 0x605224cf,
    +    0x259b49b5, 0x5203eba6, 0xcbab0c92, 0xbc33ae81,
    +    0x4139d766, 0x36a17575, 0xaf099241, 0xd8913052,
    +    0x9d585d28, 0xeac0ff3b, 0x7368180f, 0x04f0ba1c,
    +    0x780b06c4, 0x0f93a4d7, 0x963b43e3, 0xe1a3e1f0,
    +    0xa46a8c8a, 0xd3f22e99, 0x4a5ac9ad, 0x3dc26bbe,
    +    0xc0c81259, 0xb750b04a, 0x2ef8577e, 0x5960f56d,
    +    0x1ca99817, 0x6b313a04, 0xf299dd30, 0x85017f23,
    +    0xfb194884, 0x8c81ea97, 0x15290da3, 0x62b1afb0,
    +    0x2778c2ca, 0x50e060d9, 0xc94887ed, 0xbed025fe,
    +    0x43da5c19, 0x3442fe0a, 0xadea193e, 0xda72bb2d,
    +    0x9fbbd657, 0xe8237444, 0x718b9370, 0x06133163,
    +    0x7ae88dbb, 0x0d702fa8, 0x94d8c89c, 0xe3406a8f,
    +    0xa68907f5, 0xd111a5e6, 0x48b942d2, 0x3f21e0c1,
    +    0xc22b9926, 0xb5b33b35, 0x2c1bdc01, 0x5b837e12,
    +    0x1e4a1368, 0x69d2b17b, 0xf07a564f, 0x87e2f45c,
    +    0xfd3cd404, 0x8aa47617, 0x130c9123, 0x64943330,
    +    0x215d5e4a, 0x56c5fc59, 0xcf6d1b6d, 0xb8f5b97e,
    +    0x45ffc099, 0x3267628a, 0xabcf85be, 0xdc5727ad,
    +    0x999e4ad7, 0xee06e8c4, 0x77ae0ff0, 0x0036ade3,
    +    0x7ccd113b, 0x0b55b328, 0x92fd541c, 0xe565f60f,
    +    0xa0ac9b75, 0xd7343966, 0x4e9cde52, 0x39047c41,
    +    0xc40e05a6, 0xb396a7b5, 0x2a3e4081, 0x5da6e292,
    +    0x186f8fe8, 0x6ff72dfb, 0xf65fcacf, 0x81c768dc,
    +    0xffdf5f7b, 0x8847fd68, 0x11ef1a5c, 0x6677b84f,
    +    0x23bed535, 0x54267726, 0xcd8e9012, 0xba163201,
    +    0x471c4be6, 0x3084e9f5, 0xa92c0ec1, 0xdeb4acd2,
    +    0x9b7dc1a8, 0xece563bb, 0x754d848f, 0x02d5269c,
    +    0x7e2e9a44, 0x09b63857, 0x901edf63, 0xe7867d70,
    +    0xa24f100a, 0xd5d7b219, 0x4c7f552d, 0x3be7f73e,
    +    0xc6ed8ed9, 0xb1752cca, 0x28ddcbfe, 0x5f4569ed,
    +    0x1a8c0497, 0x6d14a684, 0xf4bc41b0, 0x8324e3a3
    +    },{
    +    0x00000000, 0x7e9241a5, 0x0d526f4f, 0x73c02eea,
    +    0x1aa4de9e, 0x64369f3b, 0x17f6b1d1, 0x6964f074,
    +    0xc53e5138, 0xbbac109d, 0xc86c3e77, 0xb6fe7fd2,
    +    0xdf9a8fa6, 0xa108ce03, 0xd2c8e0e9, 0xac5aa14c,
    +    0x8a7da270, 0xf4efe3d5, 0x872fcd3f, 0xf9bd8c9a,
    +    0x90d97cee, 0xee4b3d4b, 0x9d8b13a1, 0xe3195204,
    +    0x4f43f348, 0x31d1b2ed, 0x42119c07, 0x3c83dda2,
    +    0x55e72dd6, 0x2b756c73, 0x58b54299, 0x2627033c,
    +    0x14fb44e1, 0x6a690544, 0x19a92bae, 0x673b6a0b,
    +    0x0e5f9a7f, 0x70cddbda, 0x030df530, 0x7d9fb495,
    +    0xd1c515d9, 0xaf57547c, 0xdc977a96, 0xa2053b33,
    +    0xcb61cb47, 0xb5f38ae2, 0xc633a408, 0xb8a1e5ad,
    +    0x9e86e691, 0xe014a734, 0x93d489de, 0xed46c87b,
    +    0x8422380f, 0xfab079aa, 0x89705740, 0xf7e216e5,
    +    0x5bb8b7a9, 0x252af60c, 0x56ead8e6, 0x28789943,
    +    0x411c6937, 0x3f8e2892, 0x4c4e0678, 0x32dc47dd,
    +    0xd98065c7, 0xa7122462, 0xd4d20a88, 0xaa404b2d,
    +    0xc324bb59, 0xbdb6fafc, 0xce76d416, 0xb0e495b3,
    +    0x1cbe34ff, 0x622c755a, 0x11ec5bb0, 0x6f7e1a15,
    +    0x061aea61, 0x7888abc4, 0x0b48852e, 0x75dac48b,
    +    0x53fdc7b7, 0x2d6f8612, 0x5eafa8f8, 0x203de95d,
    +    0x49591929, 0x37cb588c, 0x440b7666, 0x3a9937c3,
    +    0x96c3968f, 0xe851d72a, 0x9b91f9c0, 0xe503b865,
    +    0x8c674811, 0xf2f509b4, 0x8135275e, 0xffa766fb,
    +    0xcd7b2126, 0xb3e96083, 0xc0294e69, 0xbebb0fcc,
    +    0xd7dfffb8, 0xa94dbe1d, 0xda8d90f7, 0xa41fd152,
    +    0x0845701e, 0x76d731bb, 0x05171f51, 0x7b855ef4,
    +    0x12e1ae80, 0x6c73ef25, 0x1fb3c1cf, 0x6121806a,
    +    0x47068356, 0x3994c2f3, 0x4a54ec19, 0x34c6adbc,
    +    0x5da25dc8, 0x23301c6d, 0x50f03287, 0x2e627322,
    +    0x8238d26e, 0xfcaa93cb, 0x8f6abd21, 0xf1f8fc84,
    +    0x989c0cf0, 0xe60e4d55, 0x95ce63bf, 0xeb5c221a,
    +    0x4377278b, 0x3de5662e, 0x4e2548c4, 0x30b70961,
    +    0x59d3f915, 0x2741b8b0, 0x5481965a, 0x2a13d7ff,
    +    0x864976b3, 0xf8db3716, 0x8b1b19fc, 0xf5895859,
    +    0x9ceda82d, 0xe27fe988, 0x91bfc762, 0xef2d86c7,
    +    0xc90a85fb, 0xb798c45e, 0xc458eab4, 0xbacaab11,
    +    0xd3ae5b65, 0xad3c1ac0, 0xdefc342a, 0xa06e758f,
    +    0x0c34d4c3, 0x72a69566, 0x0166bb8c, 0x7ff4fa29,
    +    0x16900a5d, 0x68024bf8, 0x1bc26512, 0x655024b7,
    +    0x578c636a, 0x291e22cf, 0x5ade0c25, 0x244c4d80,
    +    0x4d28bdf4, 0x33bafc51, 0x407ad2bb, 0x3ee8931e,
    +    0x92b23252, 0xec2073f7, 0x9fe05d1d, 0xe1721cb8,
    +    0x8816eccc, 0xf684ad69, 0x85448383, 0xfbd6c226,
    +    0xddf1c11a, 0xa36380bf, 0xd0a3ae55, 0xae31eff0,
    +    0xc7551f84, 0xb9c75e21, 0xca0770cb, 0xb495316e,
    +    0x18cf9022, 0x665dd187, 0x159dff6d, 0x6b0fbec8,
    +    0x026b4ebc, 0x7cf90f19, 0x0f3921f3, 0x71ab6056,
    +    0x9af7424c, 0xe46503e9, 0x97a52d03, 0xe9376ca6,
    +    0x80539cd2, 0xfec1dd77, 0x8d01f39d, 0xf393b238,
    +    0x5fc91374, 0x215b52d1, 0x529b7c3b, 0x2c093d9e,
    +    0x456dcdea, 0x3bff8c4f, 0x483fa2a5, 0x36ade300,
    +    0x108ae03c, 0x6e18a199, 0x1dd88f73, 0x634aced6,
    +    0x0a2e3ea2, 0x74bc7f07, 0x077c51ed, 0x79ee1048,
    +    0xd5b4b104, 0xab26f0a1, 0xd8e6de4b, 0xa6749fee,
    +    0xcf106f9a, 0xb1822e3f, 0xc24200d5, 0xbcd04170,
    +    0x8e0c06ad, 0xf09e4708, 0x835e69e2, 0xfdcc2847,
    +    0x94a8d833, 0xea3a9996, 0x99fab77c, 0xe768f6d9,
    +    0x4b325795, 0x35a01630, 0x466038da, 0x38f2797f,
    +    0x5196890b, 0x2f04c8ae, 0x5cc4e644, 0x2256a7e1,
    +    0x0471a4dd, 0x7ae3e578, 0x0923cb92, 0x77b18a37,
    +    0x1ed57a43, 0x60473be6, 0x1387150c, 0x6d1554a9,
    +    0xc14ff5e5, 0xbfddb440, 0xcc1d9aaa, 0xb28fdb0f,
    +    0xdbeb2b7b, 0xa5796ade, 0xd6b94434, 0xa82b0591
    +    },{
    +    0x00000000, 0xb8aa45dd, 0x812367bf, 0x39892262,
    +    0xf331227b, 0x4b9b67a6, 0x721245c4, 0xcab80019,
    +    0xe66344f6, 0x5ec9012b, 0x67402349, 0xdfea6694,
    +    0x1552668d, 0xadf82350, 0x94710132, 0x2cdb44ef,
    +    0x3db164e9, 0x851b2134, 0xbc920356, 0x0438468b,
    +    0xce804692, 0x762a034f, 0x4fa3212d, 0xf70964f0,
    +    0xdbd2201f, 0x637865c2, 0x5af147a0, 0xe25b027d,
    +    0x28e30264, 0x904947b9, 0xa9c065db, 0x116a2006,
    +    0x8b1425d7, 0x33be600a, 0x0a374268, 0xb29d07b5,
    +    0x782507ac, 0xc08f4271, 0xf9066013, 0x41ac25ce,
    +    0x6d776121, 0xd5dd24fc, 0xec54069e, 0x54fe4343,
    +    0x9e46435a, 0x26ec0687, 0x1f6524e5, 0xa7cf6138,
    +    0xb6a5413e, 0x0e0f04e3, 0x37862681, 0x8f2c635c,
    +    0x45946345, 0xfd3e2698, 0xc4b704fa, 0x7c1d4127,
    +    0x50c605c8, 0xe86c4015, 0xd1e56277, 0x694f27aa,
    +    0xa3f727b3, 0x1b5d626e, 0x22d4400c, 0x9a7e05d1,
    +    0xe75fa6ab, 0x5ff5e376, 0x667cc114, 0xded684c9,
    +    0x146e84d0, 0xacc4c10d, 0x954de36f, 0x2de7a6b2,
    +    0x013ce25d, 0xb996a780, 0x801f85e2, 0x38b5c03f,
    +    0xf20dc026, 0x4aa785fb, 0x732ea799, 0xcb84e244,
    +    0xdaeec242, 0x6244879f, 0x5bcda5fd, 0xe367e020,
    +    0x29dfe039, 0x9175a5e4, 0xa8fc8786, 0x1056c25b,
    +    0x3c8d86b4, 0x8427c369, 0xbdaee10b, 0x0504a4d6,
    +    0xcfbca4cf, 0x7716e112, 0x4e9fc370, 0xf63586ad,
    +    0x6c4b837c, 0xd4e1c6a1, 0xed68e4c3, 0x55c2a11e,
    +    0x9f7aa107, 0x27d0e4da, 0x1e59c6b8, 0xa6f38365,
    +    0x8a28c78a, 0x32828257, 0x0b0ba035, 0xb3a1e5e8,
    +    0x7919e5f1, 0xc1b3a02c, 0xf83a824e, 0x4090c793,
    +    0x51fae795, 0xe950a248, 0xd0d9802a, 0x6873c5f7,
    +    0xa2cbc5ee, 0x1a618033, 0x23e8a251, 0x9b42e78c,
    +    0xb799a363, 0x0f33e6be, 0x36bac4dc, 0x8e108101,
    +    0x44a88118, 0xfc02c4c5, 0xc58be6a7, 0x7d21a37a,
    +    0x3fc9a052, 0x8763e58f, 0xbeeac7ed, 0x06408230,
    +    0xccf88229, 0x7452c7f4, 0x4ddbe596, 0xf571a04b,
    +    0xd9aae4a4, 0x6100a179, 0x5889831b, 0xe023c6c6,
    +    0x2a9bc6df, 0x92318302, 0xabb8a160, 0x1312e4bd,
    +    0x0278c4bb, 0xbad28166, 0x835ba304, 0x3bf1e6d9,
    +    0xf149e6c0, 0x49e3a31d, 0x706a817f, 0xc8c0c4a2,
    +    0xe41b804d, 0x5cb1c590, 0x6538e7f2, 0xdd92a22f,
    +    0x172aa236, 0xaf80e7eb, 0x9609c589, 0x2ea38054,
    +    0xb4dd8585, 0x0c77c058, 0x35fee23a, 0x8d54a7e7,
    +    0x47eca7fe, 0xff46e223, 0xc6cfc041, 0x7e65859c,
    +    0x52bec173, 0xea1484ae, 0xd39da6cc, 0x6b37e311,
    +    0xa18fe308, 0x1925a6d5, 0x20ac84b7, 0x9806c16a,
    +    0x896ce16c, 0x31c6a4b1, 0x084f86d3, 0xb0e5c30e,
    +    0x7a5dc317, 0xc2f786ca, 0xfb7ea4a8, 0x43d4e175,
    +    0x6f0fa59a, 0xd7a5e047, 0xee2cc225, 0x568687f8,
    +    0x9c3e87e1, 0x2494c23c, 0x1d1de05e, 0xa5b7a583,
    +    0xd89606f9, 0x603c4324, 0x59b56146, 0xe11f249b,
    +    0x2ba72482, 0x930d615f, 0xaa84433d, 0x122e06e0,
    +    0x3ef5420f, 0x865f07d2, 0xbfd625b0, 0x077c606d,
    +    0xcdc46074, 0x756e25a9, 0x4ce707cb, 0xf44d4216,
    +    0xe5276210, 0x5d8d27cd, 0x640405af, 0xdcae4072,
    +    0x1616406b, 0xaebc05b6, 0x973527d4, 0x2f9f6209,
    +    0x034426e6, 0xbbee633b, 0x82674159, 0x3acd0484,
    +    0xf075049d, 0x48df4140, 0x71566322, 0xc9fc26ff,
    +    0x5382232e, 0xeb2866f3, 0xd2a14491, 0x6a0b014c,
    +    0xa0b30155, 0x18194488, 0x219066ea, 0x993a2337,
    +    0xb5e167d8, 0x0d4b2205, 0x34c20067, 0x8c6845ba,
    +    0x46d045a3, 0xfe7a007e, 0xc7f3221c, 0x7f5967c1,
    +    0x6e3347c7, 0xd699021a, 0xef102078, 0x57ba65a5,
    +    0x9d0265bc, 0x25a82061, 0x1c210203, 0xa48b47de,
    +    0x88500331, 0x30fa46ec, 0x0973648e, 0xb1d92153,
    +    0x7b61214a, 0xc3cb6497, 0xfa4246f5, 0x42e80328
    +    },{
    +    0x00000000, 0xac6f1138, 0x58df2270, 0xf4b03348,
    +    0xb0be45e0, 0x1cd154d8, 0xe8616790, 0x440e76a8,
    +    0x910b67c5, 0x3d6476fd, 0xc9d445b5, 0x65bb548d,
    +    0x21b52225, 0x8dda331d, 0x796a0055, 0xd505116d,
    +    0xd361228f, 0x7f0e33b7, 0x8bbe00ff, 0x27d111c7,
    +    0x63df676f, 0xcfb07657, 0x3b00451f, 0x976f5427,
    +    0x426a454a, 0xee055472, 0x1ab5673a, 0xb6da7602,
    +    0xf2d400aa, 0x5ebb1192, 0xaa0b22da, 0x066433e2,
    +    0x57b5a81b, 0xfbdab923, 0x0f6a8a6b, 0xa3059b53,
    +    0xe70bedfb, 0x4b64fcc3, 0xbfd4cf8b, 0x13bbdeb3,
    +    0xc6becfde, 0x6ad1dee6, 0x9e61edae, 0x320efc96,
    +    0x76008a3e, 0xda6f9b06, 0x2edfa84e, 0x82b0b976,
    +    0x84d48a94, 0x28bb9bac, 0xdc0ba8e4, 0x7064b9dc,
    +    0x346acf74, 0x9805de4c, 0x6cb5ed04, 0xc0dafc3c,
    +    0x15dfed51, 0xb9b0fc69, 0x4d00cf21, 0xe16fde19,
    +    0xa561a8b1, 0x090eb989, 0xfdbe8ac1, 0x51d19bf9,
    +    0xae6a5137, 0x0205400f, 0xf6b57347, 0x5ada627f,
    +    0x1ed414d7, 0xb2bb05ef, 0x460b36a7, 0xea64279f,
    +    0x3f6136f2, 0x930e27ca, 0x67be1482, 0xcbd105ba,
    +    0x8fdf7312, 0x23b0622a, 0xd7005162, 0x7b6f405a,
    +    0x7d0b73b8, 0xd1646280, 0x25d451c8, 0x89bb40f0,
    +    0xcdb53658, 0x61da2760, 0x956a1428, 0x39050510,
    +    0xec00147d, 0x406f0545, 0xb4df360d, 0x18b02735,
    +    0x5cbe519d, 0xf0d140a5, 0x046173ed, 0xa80e62d5,
    +    0xf9dff92c, 0x55b0e814, 0xa100db5c, 0x0d6fca64,
    +    0x4961bccc, 0xe50eadf4, 0x11be9ebc, 0xbdd18f84,
    +    0x68d49ee9, 0xc4bb8fd1, 0x300bbc99, 0x9c64ada1,
    +    0xd86adb09, 0x7405ca31, 0x80b5f979, 0x2cdae841,
    +    0x2abedba3, 0x86d1ca9b, 0x7261f9d3, 0xde0ee8eb,
    +    0x9a009e43, 0x366f8f7b, 0xc2dfbc33, 0x6eb0ad0b,
    +    0xbbb5bc66, 0x17daad5e, 0xe36a9e16, 0x4f058f2e,
    +    0x0b0bf986, 0xa764e8be, 0x53d4dbf6, 0xffbbcace,
    +    0x5cd5a26e, 0xf0bab356, 0x040a801e, 0xa8659126,
    +    0xec6be78e, 0x4004f6b6, 0xb4b4c5fe, 0x18dbd4c6,
    +    0xcddec5ab, 0x61b1d493, 0x9501e7db, 0x396ef6e3,
    +    0x7d60804b, 0xd10f9173, 0x25bfa23b, 0x89d0b303,
    +    0x8fb480e1, 0x23db91d9, 0xd76ba291, 0x7b04b3a9,
    +    0x3f0ac501, 0x9365d439, 0x67d5e771, 0xcbbaf649,
    +    0x1ebfe724, 0xb2d0f61c, 0x4660c554, 0xea0fd46c,
    +    0xae01a2c4, 0x026eb3fc, 0xf6de80b4, 0x5ab1918c,
    +    0x0b600a75, 0xa70f1b4d, 0x53bf2805, 0xffd0393d,
    +    0xbbde4f95, 0x17b15ead, 0xe3016de5, 0x4f6e7cdd,
    +    0x9a6b6db0, 0x36047c88, 0xc2b44fc0, 0x6edb5ef8,
    +    0x2ad52850, 0x86ba3968, 0x720a0a20, 0xde651b18,
    +    0xd80128fa, 0x746e39c2, 0x80de0a8a, 0x2cb11bb2,
    +    0x68bf6d1a, 0xc4d07c22, 0x30604f6a, 0x9c0f5e52,
    +    0x490a4f3f, 0xe5655e07, 0x11d56d4f, 0xbdba7c77,
    +    0xf9b40adf, 0x55db1be7, 0xa16b28af, 0x0d043997,
    +    0xf2bff359, 0x5ed0e261, 0xaa60d129, 0x060fc011,
    +    0x4201b6b9, 0xee6ea781, 0x1ade94c9, 0xb6b185f1,
    +    0x63b4949c, 0xcfdb85a4, 0x3b6bb6ec, 0x9704a7d4,
    +    0xd30ad17c, 0x7f65c044, 0x8bd5f30c, 0x27bae234,
    +    0x21ded1d6, 0x8db1c0ee, 0x7901f3a6, 0xd56ee29e,
    +    0x91609436, 0x3d0f850e, 0xc9bfb646, 0x65d0a77e,
    +    0xb0d5b613, 0x1cbaa72b, 0xe80a9463, 0x4465855b,
    +    0x006bf3f3, 0xac04e2cb, 0x58b4d183, 0xf4dbc0bb,
    +    0xa50a5b42, 0x09654a7a, 0xfdd57932, 0x51ba680a,
    +    0x15b41ea2, 0xb9db0f9a, 0x4d6b3cd2, 0xe1042dea,
    +    0x34013c87, 0x986e2dbf, 0x6cde1ef7, 0xc0b10fcf,
    +    0x84bf7967, 0x28d0685f, 0xdc605b17, 0x700f4a2f,
    +    0x766b79cd, 0xda0468f5, 0x2eb45bbd, 0x82db4a85,
    +    0xc6d53c2d, 0x6aba2d15, 0x9e0a1e5d, 0x32650f65,
    +    0xe7601e08, 0x4b0f0f30, 0xbfbf3c78, 0x13d02d40,
    +    0x57de5be8, 0xfbb14ad0, 0x0f017998, 0xa36e68a0
    +    },{
    +    0x00000000, 0x196b30ef, 0xc3a08cdb, 0xdacbbc34,
    +    0x7737f5b2, 0x6e5cc55d, 0xb4977969, 0xadfc4986,
    +    0x1f180660, 0x0673368f, 0xdcb88abb, 0xc5d3ba54,
    +    0x682ff3d2, 0x7144c33d, 0xab8f7f09, 0xb2e44fe6,
    +    0x3e300cc0, 0x275b3c2f, 0xfd90801b, 0xe4fbb0f4,
    +    0x4907f972, 0x506cc99d, 0x8aa775a9, 0x93cc4546,
    +    0x21280aa0, 0x38433a4f, 0xe288867b, 0xfbe3b694,
    +    0x561fff12, 0x4f74cffd, 0x95bf73c9, 0x8cd44326,
    +    0x8d16f485, 0x947dc46a, 0x4eb6785e, 0x57dd48b1,
    +    0xfa210137, 0xe34a31d8, 0x39818dec, 0x20eabd03,
    +    0x920ef2e5, 0x8b65c20a, 0x51ae7e3e, 0x48c54ed1,
    +    0xe5390757, 0xfc5237b8, 0x26998b8c, 0x3ff2bb63,
    +    0xb326f845, 0xaa4dc8aa, 0x7086749e, 0x69ed4471,
    +    0xc4110df7, 0xdd7a3d18, 0x07b1812c, 0x1edab1c3,
    +    0xac3efe25, 0xb555ceca, 0x6f9e72fe, 0x76f54211,
    +    0xdb090b97, 0xc2623b78, 0x18a9874c, 0x01c2b7a3,
    +    0xeb5b040e, 0xf23034e1, 0x28fb88d5, 0x3190b83a,
    +    0x9c6cf1bc, 0x8507c153, 0x5fcc7d67, 0x46a74d88,
    +    0xf443026e, 0xed283281, 0x37e38eb5, 0x2e88be5a,
    +    0x8374f7dc, 0x9a1fc733, 0x40d47b07, 0x59bf4be8,
    +    0xd56b08ce, 0xcc003821, 0x16cb8415, 0x0fa0b4fa,
    +    0xa25cfd7c, 0xbb37cd93, 0x61fc71a7, 0x78974148,
    +    0xca730eae, 0xd3183e41, 0x09d38275, 0x10b8b29a,
    +    0xbd44fb1c, 0xa42fcbf3, 0x7ee477c7, 0x678f4728,
    +    0x664df08b, 0x7f26c064, 0xa5ed7c50, 0xbc864cbf,
    +    0x117a0539, 0x081135d6, 0xd2da89e2, 0xcbb1b90d,
    +    0x7955f6eb, 0x603ec604, 0xbaf57a30, 0xa39e4adf,
    +    0x0e620359, 0x170933b6, 0xcdc28f82, 0xd4a9bf6d,
    +    0x587dfc4b, 0x4116cca4, 0x9bdd7090, 0x82b6407f,
    +    0x2f4a09f9, 0x36213916, 0xecea8522, 0xf581b5cd,
    +    0x4765fa2b, 0x5e0ecac4, 0x84c576f0, 0x9dae461f,
    +    0x30520f99, 0x29393f76, 0xf3f28342, 0xea99b3ad,
    +    0xd6b7081c, 0xcfdc38f3, 0x151784c7, 0x0c7cb428,
    +    0xa180fdae, 0xb8ebcd41, 0x62207175, 0x7b4b419a,
    +    0xc9af0e7c, 0xd0c43e93, 0x0a0f82a7, 0x1364b248,
    +    0xbe98fbce, 0xa7f3cb21, 0x7d387715, 0x645347fa,
    +    0xe88704dc, 0xf1ec3433, 0x2b278807, 0x324cb8e8,
    +    0x9fb0f16e, 0x86dbc181, 0x5c107db5, 0x457b4d5a,
    +    0xf79f02bc, 0xeef43253, 0x343f8e67, 0x2d54be88,
    +    0x80a8f70e, 0x99c3c7e1, 0x43087bd5, 0x5a634b3a,
    +    0x5ba1fc99, 0x42cacc76, 0x98017042, 0x816a40ad,
    +    0x2c96092b, 0x35fd39c4, 0xef3685f0, 0xf65db51f,
    +    0x44b9faf9, 0x5dd2ca16, 0x87197622, 0x9e7246cd,
    +    0x338e0f4b, 0x2ae53fa4, 0xf02e8390, 0xe945b37f,
    +    0x6591f059, 0x7cfac0b6, 0xa6317c82, 0xbf5a4c6d,
    +    0x12a605eb, 0x0bcd3504, 0xd1068930, 0xc86db9df,
    +    0x7a89f639, 0x63e2c6d6, 0xb9297ae2, 0xa0424a0d,
    +    0x0dbe038b, 0x14d53364, 0xce1e8f50, 0xd775bfbf,
    +    0x3dec0c12, 0x24873cfd, 0xfe4c80c9, 0xe727b026,
    +    0x4adbf9a0, 0x53b0c94f, 0x897b757b, 0x90104594,
    +    0x22f40a72, 0x3b9f3a9d, 0xe15486a9, 0xf83fb646,
    +    0x55c3ffc0, 0x4ca8cf2f, 0x9663731b, 0x8f0843f4,
    +    0x03dc00d2, 0x1ab7303d, 0xc07c8c09, 0xd917bce6,
    +    0x74ebf560, 0x6d80c58f, 0xb74b79bb, 0xae204954,
    +    0x1cc406b2, 0x05af365d, 0xdf648a69, 0xc60fba86,
    +    0x6bf3f300, 0x7298c3ef, 0xa8537fdb, 0xb1384f34,
    +    0xb0faf897, 0xa991c878, 0x735a744c, 0x6a3144a3,
    +    0xc7cd0d25, 0xdea63dca, 0x046d81fe, 0x1d06b111,
    +    0xafe2fef7, 0xb689ce18, 0x6c42722c, 0x752942c3,
    +    0xd8d50b45, 0xc1be3baa, 0x1b75879e, 0x021eb771,
    +    0x8ecaf457, 0x97a1c4b8, 0x4d6a788c, 0x54014863,
    +    0xf9fd01e5, 0xe096310a, 0x3a5d8d3e, 0x2336bdd1,
    +    0x91d2f237, 0x88b9c2d8, 0x52727eec, 0x4b194e03,
    +    0xe6e50785, 0xff8e376a, 0x25458b5e, 0x3c2ebbb1
    +    },{
    +    0x00000000, 0xc82c0368, 0x905906d0, 0x587505b8,
    +    0xd1c5e0a5, 0x19e9e3cd, 0x419ce675, 0x89b0e51d,
    +    0x53fd2d4e, 0x9bd12e26, 0xc3a42b9e, 0x0b8828f6,
    +    0x8238cdeb, 0x4a14ce83, 0x1261cb3b, 0xda4dc853,
    +    0xa6fa5b9c, 0x6ed658f4, 0x36a35d4c, 0xfe8f5e24,
    +    0x773fbb39, 0xbf13b851, 0xe766bde9, 0x2f4abe81,
    +    0xf50776d2, 0x3d2b75ba, 0x655e7002, 0xad72736a,
    +    0x24c29677, 0xecee951f, 0xb49b90a7, 0x7cb793cf,
    +    0xbd835b3d, 0x75af5855, 0x2dda5ded, 0xe5f65e85,
    +    0x6c46bb98, 0xa46ab8f0, 0xfc1fbd48, 0x3433be20,
    +    0xee7e7673, 0x2652751b, 0x7e2770a3, 0xb60b73cb,
    +    0x3fbb96d6, 0xf79795be, 0xafe29006, 0x67ce936e,
    +    0x1b7900a1, 0xd35503c9, 0x8b200671, 0x430c0519,
    +    0xcabce004, 0x0290e36c, 0x5ae5e6d4, 0x92c9e5bc,
    +    0x48842def, 0x80a82e87, 0xd8dd2b3f, 0x10f12857,
    +    0x9941cd4a, 0x516dce22, 0x0918cb9a, 0xc134c8f2,
    +    0x7a07b77a, 0xb22bb412, 0xea5eb1aa, 0x2272b2c2,
    +    0xabc257df, 0x63ee54b7, 0x3b9b510f, 0xf3b75267,
    +    0x29fa9a34, 0xe1d6995c, 0xb9a39ce4, 0x718f9f8c,
    +    0xf83f7a91, 0x301379f9, 0x68667c41, 0xa04a7f29,
    +    0xdcfdece6, 0x14d1ef8e, 0x4ca4ea36, 0x8488e95e,
    +    0x0d380c43, 0xc5140f2b, 0x9d610a93, 0x554d09fb,
    +    0x8f00c1a8, 0x472cc2c0, 0x1f59c778, 0xd775c410,
    +    0x5ec5210d, 0x96e92265, 0xce9c27dd, 0x06b024b5,
    +    0xc784ec47, 0x0fa8ef2f, 0x57ddea97, 0x9ff1e9ff,
    +    0x16410ce2, 0xde6d0f8a, 0x86180a32, 0x4e34095a,
    +    0x9479c109, 0x5c55c261, 0x0420c7d9, 0xcc0cc4b1,
    +    0x45bc21ac, 0x8d9022c4, 0xd5e5277c, 0x1dc92414,
    +    0x617eb7db, 0xa952b4b3, 0xf127b10b, 0x390bb263,
    +    0xb0bb577e, 0x78975416, 0x20e251ae, 0xe8ce52c6,
    +    0x32839a95, 0xfaaf99fd, 0xa2da9c45, 0x6af69f2d,
    +    0xe3467a30, 0x2b6a7958, 0x731f7ce0, 0xbb337f88,
    +    0xf40e6ef5, 0x3c226d9d, 0x64576825, 0xac7b6b4d,
    +    0x25cb8e50, 0xede78d38, 0xb5928880, 0x7dbe8be8,
    +    0xa7f343bb, 0x6fdf40d3, 0x37aa456b, 0xff864603,
    +    0x7636a31e, 0xbe1aa076, 0xe66fa5ce, 0x2e43a6a6,
    +    0x52f43569, 0x9ad83601, 0xc2ad33b9, 0x0a8130d1,
    +    0x8331d5cc, 0x4b1dd6a4, 0x1368d31c, 0xdb44d074,
    +    0x01091827, 0xc9251b4f, 0x91501ef7, 0x597c1d9f,
    +    0xd0ccf882, 0x18e0fbea, 0x4095fe52, 0x88b9fd3a,
    +    0x498d35c8, 0x81a136a0, 0xd9d43318, 0x11f83070,
    +    0x9848d56d, 0x5064d605, 0x0811d3bd, 0xc03dd0d5,
    +    0x1a701886, 0xd25c1bee, 0x8a291e56, 0x42051d3e,
    +    0xcbb5f823, 0x0399fb4b, 0x5becfef3, 0x93c0fd9b,
    +    0xef776e54, 0x275b6d3c, 0x7f2e6884, 0xb7026bec,
    +    0x3eb28ef1, 0xf69e8d99, 0xaeeb8821, 0x66c78b49,
    +    0xbc8a431a, 0x74a64072, 0x2cd345ca, 0xe4ff46a2,
    +    0x6d4fa3bf, 0xa563a0d7, 0xfd16a56f, 0x353aa607,
    +    0x8e09d98f, 0x4625dae7, 0x1e50df5f, 0xd67cdc37,
    +    0x5fcc392a, 0x97e03a42, 0xcf953ffa, 0x07b93c92,
    +    0xddf4f4c1, 0x15d8f7a9, 0x4dadf211, 0x8581f179,
    +    0x0c311464, 0xc41d170c, 0x9c6812b4, 0x544411dc,
    +    0x28f38213, 0xe0df817b, 0xb8aa84c3, 0x708687ab,
    +    0xf93662b6, 0x311a61de, 0x696f6466, 0xa143670e,
    +    0x7b0eaf5d, 0xb322ac35, 0xeb57a98d, 0x237baae5,
    +    0xaacb4ff8, 0x62e74c90, 0x3a924928, 0xf2be4a40,
    +    0x338a82b2, 0xfba681da, 0xa3d38462, 0x6bff870a,
    +    0xe24f6217, 0x2a63617f, 0x721664c7, 0xba3a67af,
    +    0x6077affc, 0xa85bac94, 0xf02ea92c, 0x3802aa44,
    +    0xb1b24f59, 0x799e4c31, 0x21eb4989, 0xe9c74ae1,
    +    0x9570d92e, 0x5d5cda46, 0x0529dffe, 0xcd05dc96,
    +    0x44b5398b, 0x8c993ae3, 0xd4ec3f5b, 0x1cc03c33,
    +    0xc68df460, 0x0ea1f708, 0x56d4f2b0, 0x9ef8f1d8,
    +    0x174814c5, 0xdf6417ad, 0x87111215, 0x4f3d117d
    +    },{
    +    0x00000000, 0x277d3c49, 0x4efa7892, 0x698744db,
    +    0x6d821d21, 0x4aff2168, 0x237865b3, 0x040559fa,
    +    0xda043b42, 0xfd79070b, 0x94fe43d0, 0xb3837f99,
    +    0xb7862663, 0x90fb1a2a, 0xf97c5ef1, 0xde0162b8,
    +    0xb4097684, 0x93744acd, 0xfaf30e16, 0xdd8e325f,
    +    0xd98b6ba5, 0xfef657ec, 0x97711337, 0xb00c2f7e,
    +    0x6e0d4dc6, 0x4970718f, 0x20f73554, 0x078a091d,
    +    0x038f50e7, 0x24f26cae, 0x4d752875, 0x6a08143c,
    +    0x9965000d, 0xbe183c44, 0xd79f789f, 0xf0e244d6,
    +    0xf4e71d2c, 0xd39a2165, 0xba1d65be, 0x9d6059f7,
    +    0x43613b4f, 0x641c0706, 0x0d9b43dd, 0x2ae67f94,
    +    0x2ee3266e, 0x099e1a27, 0x60195efc, 0x476462b5,
    +    0x2d6c7689, 0x0a114ac0, 0x63960e1b, 0x44eb3252,
    +    0x40ee6ba8, 0x679357e1, 0x0e14133a, 0x29692f73,
    +    0xf7684dcb, 0xd0157182, 0xb9923559, 0x9eef0910,
    +    0x9aea50ea, 0xbd976ca3, 0xd4102878, 0xf36d1431,
    +    0x32cb001a, 0x15b63c53, 0x7c317888, 0x5b4c44c1,
    +    0x5f491d3b, 0x78342172, 0x11b365a9, 0x36ce59e0,
    +    0xe8cf3b58, 0xcfb20711, 0xa63543ca, 0x81487f83,
    +    0x854d2679, 0xa2301a30, 0xcbb75eeb, 0xecca62a2,
    +    0x86c2769e, 0xa1bf4ad7, 0xc8380e0c, 0xef453245,
    +    0xeb406bbf, 0xcc3d57f6, 0xa5ba132d, 0x82c72f64,
    +    0x5cc64ddc, 0x7bbb7195, 0x123c354e, 0x35410907,
    +    0x314450fd, 0x16396cb4, 0x7fbe286f, 0x58c31426,
    +    0xabae0017, 0x8cd33c5e, 0xe5547885, 0xc22944cc,
    +    0xc62c1d36, 0xe151217f, 0x88d665a4, 0xafab59ed,
    +    0x71aa3b55, 0x56d7071c, 0x3f5043c7, 0x182d7f8e,
    +    0x1c282674, 0x3b551a3d, 0x52d25ee6, 0x75af62af,
    +    0x1fa77693, 0x38da4ada, 0x515d0e01, 0x76203248,
    +    0x72256bb2, 0x555857fb, 0x3cdf1320, 0x1ba22f69,
    +    0xc5a34dd1, 0xe2de7198, 0x8b593543, 0xac24090a,
    +    0xa82150f0, 0x8f5c6cb9, 0xe6db2862, 0xc1a6142b,
    +    0x64960134, 0x43eb3d7d, 0x2a6c79a6, 0x0d1145ef,
    +    0x09141c15, 0x2e69205c, 0x47ee6487, 0x609358ce,
    +    0xbe923a76, 0x99ef063f, 0xf06842e4, 0xd7157ead,
    +    0xd3102757, 0xf46d1b1e, 0x9dea5fc5, 0xba97638c,
    +    0xd09f77b0, 0xf7e24bf9, 0x9e650f22, 0xb918336b,
    +    0xbd1d6a91, 0x9a6056d8, 0xf3e71203, 0xd49a2e4a,
    +    0x0a9b4cf2, 0x2de670bb, 0x44613460, 0x631c0829,
    +    0x671951d3, 0x40646d9a, 0x29e32941, 0x0e9e1508,
    +    0xfdf30139, 0xda8e3d70, 0xb30979ab, 0x947445e2,
    +    0x90711c18, 0xb70c2051, 0xde8b648a, 0xf9f658c3,
    +    0x27f73a7b, 0x008a0632, 0x690d42e9, 0x4e707ea0,
    +    0x4a75275a, 0x6d081b13, 0x048f5fc8, 0x23f26381,
    +    0x49fa77bd, 0x6e874bf4, 0x07000f2f, 0x207d3366,
    +    0x24786a9c, 0x030556d5, 0x6a82120e, 0x4dff2e47,
    +    0x93fe4cff, 0xb48370b6, 0xdd04346d, 0xfa790824,
    +    0xfe7c51de, 0xd9016d97, 0xb086294c, 0x97fb1505,
    +    0x565d012e, 0x71203d67, 0x18a779bc, 0x3fda45f5,
    +    0x3bdf1c0f, 0x1ca22046, 0x7525649d, 0x525858d4,
    +    0x8c593a6c, 0xab240625, 0xc2a342fe, 0xe5de7eb7,
    +    0xe1db274d, 0xc6a61b04, 0xaf215fdf, 0x885c6396,
    +    0xe25477aa, 0xc5294be3, 0xacae0f38, 0x8bd33371,
    +    0x8fd66a8b, 0xa8ab56c2, 0xc12c1219, 0xe6512e50,
    +    0x38504ce8, 0x1f2d70a1, 0x76aa347a, 0x51d70833,
    +    0x55d251c9, 0x72af6d80, 0x1b28295b, 0x3c551512,
    +    0xcf380123, 0xe8453d6a, 0x81c279b1, 0xa6bf45f8,
    +    0xa2ba1c02, 0x85c7204b, 0xec406490, 0xcb3d58d9,
    +    0x153c3a61, 0x32410628, 0x5bc642f3, 0x7cbb7eba,
    +    0x78be2740, 0x5fc31b09, 0x36445fd2, 0x1139639b,
    +    0x7b3177a7, 0x5c4c4bee, 0x35cb0f35, 0x12b6337c,
    +    0x16b36a86, 0x31ce56cf, 0x58491214, 0x7f342e5d,
    +    0xa1354ce5, 0x864870ac, 0xefcf3477, 0xc8b2083e,
    +    0xccb751c4, 0xebca6d8d, 0x824d2956, 0xa530151f
    +    }
    +};
    +
    +/* Prototypes for functions in assembly files */
    +unsigned int crc32c_le_vgfm_16(uint32_t crc, unsigned char const*buf, unsigned size);
    +
    +/* Pure C implementations of CRC, one byte at a time */
    +unsigned int crc32c_le(uint32_t crc, unsigned char const *buf, unsigned len){
    +	crc = htole32(crc);
    +	if(buf != 0)
    +		while (len--)
    +			crc = crc32ctable_le[0][((crc >> 24) ^ *buf++) & 0xFF] ^ (crc << 8);
    +	else
    +		while (len--)
    +			crc = crc32ctable_le[0][((crc >> 24)) & 0xFF] ^ (crc << 8);
    +	crc = le32toh(crc);
    +	return crc;
    +}
    +
    +unsigned int ceph_crc32c_s390x(uint32_t crc, unsigned char const *data, unsigned datalen)
    +{
    +	unsigned long prealign, aligned, remaining;
    +
    +	if(data == 0)
    +		return crc32c_le(crc, data, datalen);
    +
    +	if(datalen < VX_MIN_LEN + VX_ALIGN_MASK)
    +		return crc32c_le(crc, data, datalen);
    +
    +	if ((unsigned long)data & VX_ALIGN_MASK) {
    +		prealign = VX_ALIGNMENT - ((unsigned long)data & VX_ALIGN_MASK);
    +		datalen -= prealign;
    +		crc = crc32c_le(crc, data, prealign);
    +		data = data + prealign;
    +	}
    +
    +	if (datalen < VX_MIN_LEN)
    +		return crc32c_le(crc, data, datalen);
    +
    +	aligned = datalen & ~VX_ALIGN_MASK;
    +	remaining = datalen & VX_ALIGN_MASK;
    +
    +	crc = crc32c_le_vgfm_16(crc, data, aligned);
    +	data = data + aligned;
    +
    +	if (remaining)
    +		crc = crc32c_le(crc, data, remaining);
    +
    +	return crc;
    +}
    diff --git a/src/common/crc32c_s390x.h b/src/common/crc32c_s390x.h
    new file mode 100644
    index 000000000000..ac71804c0979
    --- /dev/null
    +++ b/src/common/crc32c_s390x.h
    @@ -0,0 +1,39 @@
    +/*
    + * CRC-32 algorithm implemented with the z/Architecture Vector Extension
    + * Facility.
    + *
    + * Copyright 2024 IBM Corporation
    + *
    + * Licensed under the Apache License, Version 2.0 (the "License"); you may not
    + * use this file except in compliance with the License.  You may obtain a copy
    + * of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
    + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
    + * License for the specific language governing permissions and limitations
    + * under the License.
    + */
    +
    +#ifndef CEPH_COMMON_CRC32C_S390X_H
    +#define CEPH_COMMON_CRC32C_S390X_H
    +
    +#ifdef __cplusplus
    +extern "C" {
    +#endif
    +#include 
    +#include 
    +
    +/* Portable implementations of CRC-32 (IEEE and Castagnoli) little-endian variant */
    +unsigned int crc32c_le(uint32_t, unsigned char const*, unsigned);
    +
    +/* Hardware-accelerated version of the above */
    +unsigned int ceph_crc32c_s390x(uint32_t, unsigned char const*, unsigned);
    +
    +#ifdef __cplusplus
    +}
    +#endif
    +
    +#endif
    diff --git a/src/common/crc32c_s390x_le-vx.S b/src/common/crc32c_s390x_le-vx.S
    new file mode 100644
    index 000000000000..a413f759fef2
    --- /dev/null
    +++ b/src/common/crc32c_s390x_le-vx.S
    @@ -0,0 +1,292 @@
    +/*
    + * Hardware-accelerated CRC-32 variants for Linux on z Systems
    + *
    + * Use the z/Architecture Vector Extension Facility to accelerate the computing
    + * of bit-reflected CRC-32 checksums for IEEE 802.3 Ethernet and Castagnoli.
    + *
    + * This CRC-32 implementation algorithm is bit-reflected and processes the
    + * least-significant bit first (Little-Endian).
    + *
    + * Copyright 2015 IBM Corporation
    + *
    + * Licensed under the Apache License, Version 2.0 (the "License"); you may not
    + * use this file except in compliance with the License.  You may obtain a copy
    + * of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
    + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
    + * License for the specific language governing permissions and limitations
    + * under the License.
    + *
    + * Author(s): Hendrik Brueckner 
    + */
    +
    +#include "crc32c_s390x_vx-insn.h"
    +
    +/* Vector register range containing CRC-32 constants */
    +#define CONST_PERM_LE2BE	%v9
    +#define CONST_R2R1		%v10
    +#define CONST_R4R3		%v11
    +#define CONST_R5		%v12
    +#define CONST_RU_POLY		%v13
    +#define CONST_CRC_POLY		%v14
    +
    +.data
    +.align 8
    +
    +/*
    + * The CRC-32 constant block contains reduction constants to fold and
    + * process particular chunks of the input data stream in parallel.
    + *
    + * For the CRC-32 variants, the constants are precomputed according to
    + * these definitions:
    + *
    + *	R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
    + *	R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
    + *	R3 = [(x128+32 mod P'(x) << 32)]'   << 1
    + *	R4 = [(x128-32 mod P'(x) << 32)]'   << 1
    + *	R5 = [(x64 mod P'(x) << 32)]'	    << 1
    + *	R6 = [(x32 mod P'(x) << 32)]'	    << 1
    + *
    + *	The bit-reflected Barret reduction constant, u', is defined as
    + *	the bit reversal of floor(x**64 / P(x)).
    + *
    + *	where P(x) is the polynomial in the normal domain and the P'(x) is the
    + *	polynomial in the reversed (bit-reflected) domain.
    + *
    + * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
    + *
    + *	P(x)  = 0x04C11DB7
    + *	P'(x) = 0xEDB88320
    + *
    + * CRC-32C (Castagnoli) polynomials:
    + *
    + *	P(x)  = 0x1EDC6F41
    + *	P'(x) = 0x82F63B78
    + */
    +
    +.Lconstants_CRC_32_LE:
    +	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
    +	.quad		0x1c6e41596, 0x154442bd4		# R2, R1
    +	.quad		0x0ccaa009e, 0x1751997d0		# R4, R3
    +	.octa		0x163cd6124				# R5
    +	.octa		0x1F7011641				# u'
    +	.octa		0x1DB710641				# P'(x) << 1
    +
    +.Lconstants_CRC_32C_LE:
    +	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
    +	.quad		0x09e4addf8, 0x740eef02			# R2, R1
    +	.quad		0x14cd00bd6, 0xf20c0dfe			# R4, R3
    +	.octa		0x0dd45aab8				# R5
    +	.octa		0x0dea713f1				# u'
    +	.octa		0x105ec76f0				# P'(x) << 1
    +
    +.previous
    +
    +.text
    +/*
    + * The CRC-32 functions use these calling conventions:
    + *
    + * Parameters:
    + *
    + *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
    + *	%r3:	Input buffer pointer, performance might be improved if the
    + *		buffer is on a doubleword boundary.
    + *	%r4:	Length of the buffer, must be 64 bytes or greater.
    + *
    + * Register usage:
    + *
    + *	%r5:	CRC-32 constant pool base pointer.
    + *	V0:	Initial CRC value and intermediate constants and results.
    + *	V1..V4:	Data for CRC computation.
    + *	V5..V8:	Next data chunks that are fetched from the input buffer.
    + *	V9:	Constant for BE->LE conversion and shift operations
    + *
    + *	V10..V14: CRC-32 constants.
    + */
    +
    +ENTRY(crc32_le_vgfm_16)
    +	larl	%r5,.Lconstants_CRC_32_LE
    +	j	crc32_le_vgfm_generic
    +
    +ENTRY(crc32c_le_vgfm_16)
    +	larl	%r5,.Lconstants_CRC_32C_LE
    +	j	crc32_le_vgfm_generic
    +
    +crc32_le_vgfm_generic:
    +	/* Preserve non-volatile vector registers. */
    +	stmg    %r14,%r15,112(%r15)
    +	lay     %r15,-128(%r15)
    +	VSTM    %v8,%v15,0,%r15
    +
    +	/* Load CRC-32 constants into multiple vector registers. */
    +	VLM	CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5  
    +
    +	/*
    +	 * Load the initial CRC value.
    +	 *
    +	 * The CRC value is loaded into the rightmost word of the
    +	 * vector register and is later XORed with the LSB portion
    +	 * of the loaded input data.
    +	 */
    +	VZERO	%v0			/* Clear V0 */
    +	VLVGF	%v0,%r2,3		/* Load CRC into rightmost word */
    +
    +	/* Load a 64-byte data chunk and XOR with CRC */
    +	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
    +
    +	/* Reflect the data since the CRC operates in the bit-reflected domain. */
    +	VPERM	%v1,%v1,%v1,CONST_PERM_LE2BE
    +	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
    +	VPERM	%v3,%v3,%v3,CONST_PERM_LE2BE
    +	VPERM	%v4,%v4,%v4,CONST_PERM_LE2BE
    +	
    +	VX	%v1,%v0,%v1		/* V1 ^= CRC */
    +	aghi	%r3,64			/* BUF = BUF + 64 */
    +	aghi	%r4,-64			/* LEN = LEN - 64 */
    +
    +	/* Check remaining buffer size and jump to proper folding method. */
    +	cghi	%r4,64
    +	jl	.Lless_than_64bytes
    +
    +.Lfold_64bytes_loop:
    +	/* Load the next 64-byte data chunk into V5 to V8 */
    +	VLM	%v5,%v8,0,%r3
    +	VPERM	%v5,%v5,%v5,CONST_PERM_LE2BE
    +	VPERM	%v6,%v6,%v6,CONST_PERM_LE2BE
    +	VPERM	%v7,%v7,%v7,CONST_PERM_LE2BE
    +	VPERM	%v8,%v8,%v8,CONST_PERM_LE2BE
    +
    +	/*
    +	 * Perform a GF(2) multiplication of the doublewords in V1 with
    +	 * the R1 and R2 reduction constants in V10. The intermediate result
    +	 * is then folded (accumulated, or XOR-ed) with the next data chunk
    +	 * in V5 and stored in V1. Repeat this step for the register contents
    +	 * in V2, V3, and V4 respectively.
    +	 */
    +	VGFMAG	%v1,CONST_R2R1,%v1,%v5
    +	VGFMAG	%v2,CONST_R2R1,%v2,%v6
    +	VGFMAG	%v3,CONST_R2R1,%v3,%v7
    +	VGFMAG	%v4,CONST_R2R1,%v4,%v8
    +
    +	/* Adjust buffer pointer and length for next loop. */
    +	aghi	%r3,64			/* BUF = BUF + 64 */
    +	aghi	%r4,-64			/* LEN = LEN - 64 */
    +
    +	cghi	%r4,64
    +	jnl	.Lfold_64bytes_loop
    +
    +.Lless_than_64bytes:
    +	/*
    +	 * Fold V1 to V4 into a single 128-bit value in V1.  Multiply V1 with R3
    +	 * and R4 and accumulating the next 128-bit chunk until a single 128-bit
    +	 * value remains.
    +	 */
    +	VGFMAG	%v1,CONST_R4R3,%v1,%v2
    +	VGFMAG	%v1,CONST_R4R3,%v1,%v3
    +	VGFMAG	%v1,CONST_R4R3,%v1,%v4
    +
    +	/* Check whether to continue with 64-bit folding. */
    +	cghi	%r4,16
    +	jl	.Lfinal_fold
    +
    +.Lfold_16bytes_loop:
    +
    +	VL	%v2,0,,%r3		/* Load next data chunk */
    +	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
    +	VGFMAG	%v1,CONST_R4R3,%v1,%v2	/* Fold next data chunk */
    +
    +	/* Adjust buffer pointer and size for folding next data chunk. */
    +	aghi	%r3,16
    +	aghi	%r4,-16
    +
    +	/* Process remaining data chunks. */
    +	cghi	%r4,16
    +	jnl	.Lfold_16bytes_loop
    +
    +.Lfinal_fold:
    +	/*
    +	 * Set up a vector register for byte shifts.  The shift value must
    +	 * be loaded in bits 1-4 in byte element 7 of a vector register.
    +	 * Shift by 8 bytes: 0x40
    +	 * Shift by 4 bytes: 0x20
    +	 */
    +	VLEIB	%v9,0x40,7
    +
    +	/*
    +	 * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
    +	 * to move R4 into the rightmost doubleword and set the leftmost
    +	 * doubleword to 0x1.
    +	 */
    +	VSRLB	%v0,CONST_R4R3,%v9
    +	VLEIG	%v0,1,0
    +
    +	/*
    +	 * Compute GF(2) product of V1 and V0.	The rightmost doubleword
    +	 * of V1 is multiplied with R4.  The leftmost doubleword of V1 is
    +	 * multiplied by 0x1 and is then XORed with rightmost product.
    +	 * Implicitly, the intermediate leftmost product becomes padded
    +	 */
    +	VGFMG	%v1,%v0,%v1
    +
    +	/*
    +	 * Now do the final 32-bit fold by multiplying the rightmost word
    +	 * in V1 with R5 and XOR the result with the remaining bits in V1.
    +	 *
    +	 * To achieve this by a single VGFMAG, right shift V1 by a word
    +	 * and store the result in V2 which is then accumulated.  Use the
    +	 * vector unpack instruction to load the rightmost half of the
    +	 * doubleword into the rightmost doubleword element of V1; the other
    +	 * half is loaded in the leftmost doubleword.
    +	 * The vector register with CONST_R5 contains the R5 constant in the
    +	 * rightmost doubleword and the leftmost doubleword is zero to ignore
    +	 * the leftmost product of V1.
    +	 */
    +	VLEIB	%v9,0x20,7		  /* Shift by words */
    +	VSRLB	%v2,%v1,%v9		  /* Store remaining bits in V2 */
    +	VUPLLF	%v1,%v1			  /* Split rightmost doubleword */
    +	VGFMAG	%v1,CONST_R5,%v1,%v2	  /* V1 = (V1 * R5) XOR V2 */
    +
    +	/*
    +	 * Apply a Barret reduction to compute the final 32-bit CRC value.
    +	 *
    +	 * The input values to the Barret reduction are the degree-63 polynomial
    +	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
    +	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
    +	 * P(x).
    +	 *
    +	 * The Barret reduction algorithm is defined as:
    +	 *
    +	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
    +	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
    +	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
    +	 *
    +	 *  Note: The leftmost doubleword of vector register containing
    +	 *  CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
    +	 *  is zero and does not contribute to the final result.
    +	 */
    +
    +	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
    +	VUPLLF	%v2,%v1
    +	VGFMG	%v2,CONST_RU_POLY,%v2
    +
    +	/*
    +	 * Compute the GF(2) product of the CRC polynomial with T1(x) in
    +	 * V2 and XOR the intermediate result, T2(x), with the value in V1.
    +	 * The final result is stored in word element 2 of V2.
    +	 */
    +	VUPLLF	%v2,%v2
    +	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
    +
    +.Ldone:
    +	/* Move the result to R2, restore preserved registers and return. */
    +	VLGVF	%r2,%v2,2
    +	VLM     %v8,%v15,0,%r15
    +	lmg     %r14,%r15,240(%r15)
    +	br	%r14
    +
    +.previous
    +
    diff --git a/src/common/crc32c_s390x_vx-insn.h b/src/common/crc32c_s390x_vx-insn.h
    new file mode 100644
    index 000000000000..d3b7a9c800cd
    --- /dev/null
    +++ b/src/common/crc32c_s390x_vx-insn.h
    @@ -0,0 +1,494 @@
    +/*
    + * Support for Vector Instructions
    + *
    + * Assembler macros to generate .byte/.word code for particular vector
    + * instructions that are supported by recent binutils.
    + *
    + * Copyright 2015 IBM Corporation
    + *
    + * Licensed under the Apache License, Version 2.0 (the "License"); you may not
    + * use this file except in compliance with the License.  You may obtain a copy
    + * of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
    + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
    + * License for the specific language governing permissions and limitations
    + * under the License.
    + *
    + * Author(s): Hendrik Brueckner 
    + */
    +
    +#ifndef __ASM_S390_VX_INSN_H
    +#define __ASM_S390_VX_INSN_H
    +
    +/* Boilerplate for function entry points */
    +#define ENTRY(name) \
    +.globl name;        \
    +.align 4, 0x90;     \
    +name:
    +
    +/* Macros to generate vector instruction byte code */
    +
    +#define REG_NUM_INVALID	       255
    +
    +/* GR_NUM - Retrieve general-purpose register number
    + *
    + * @opd:	Operand to store register number
    + * @r64:	String designation register in the format "%rN"
    + */
    +.macro	GR_NUM	opd gr
    +    \opd = REG_NUM_INVALID
    +    .ifc \gr,%r0
    +	\opd = 0
    +    .endif
    +    .ifc \gr,%r1
    +	\opd = 1
    +    .endif
    +    .ifc \gr,%r2
    +	\opd = 2
    +    .endif
    +    .ifc \gr,%r3
    +	\opd = 3
    +    .endif
    +    .ifc \gr,%r4
    +	\opd = 4
    +    .endif
    +    .ifc \gr,%r5
    +	\opd = 5
    +    .endif
    +    .ifc \gr,%r6
    +	\opd = 6
    +    .endif
    +    .ifc \gr,%r7
    +	\opd = 7
    +    .endif
    +    .ifc \gr,%r8
    +	\opd = 8
    +    .endif
    +    .ifc \gr,%r9
    +	\opd = 9
    +    .endif
    +    .ifc \gr,%r10
    +	\opd = 10
    +    .endif
    +    .ifc \gr,%r11
    +	\opd = 11
    +    .endif
    +    .ifc \gr,%r12
    +	\opd = 12
    +    .endif
    +    .ifc \gr,%r13
    +	\opd = 13
    +    .endif
    +    .ifc \gr,%r14
    +	\opd = 14
    +    .endif
    +    .ifc \gr,%r15
    +	\opd = 15
    +    .endif
    +    .if \opd == REG_NUM_INVALID
    +	.error "Invalid general-purpose register designation: \gr"
    +    .endif
    +.endm
    +
    +/* VX_R() - Macro to encode the VX_NUM into the instruction */
    +#define VX_R(v)		(v & 0x0F)
    +
    +/* VX_NUM - Retrieve vector register number
    + *
    + * @opd:	Operand to store register number
    + * @vxr:	String designation register in the format "%vN"
    + *
    + * The vector register number is used for as input number to the
    + * instruction and, as well as, to compute the RXB field of the
    + * instruction.  To encode the particular vector register number,
    + * use the VX_R(v) macro to extract the instruction opcode.
    + */
    +.macro	VX_NUM	opd vxr
    +    \opd = REG_NUM_INVALID
    +    .ifc \vxr,%v0
    +	\opd = 0
    +    .endif
    +    .ifc \vxr,%v1
    +	\opd = 1
    +    .endif
    +    .ifc \vxr,%v2
    +	\opd = 2
    +    .endif
    +    .ifc \vxr,%v3
    +	\opd = 3
    +    .endif
    +    .ifc \vxr,%v4
    +	\opd = 4
    +    .endif
    +    .ifc \vxr,%v5
    +	\opd = 5
    +    .endif
    +    .ifc \vxr,%v6
    +	\opd = 6
    +    .endif
    +    .ifc \vxr,%v7
    +	\opd = 7
    +    .endif
    +    .ifc \vxr,%v8
    +	\opd = 8
    +    .endif
    +    .ifc \vxr,%v9
    +	\opd = 9
    +    .endif
    +    .ifc \vxr,%v10
    +	\opd = 10
    +    .endif
    +    .ifc \vxr,%v11
    +	\opd = 11
    +    .endif
    +    .ifc \vxr,%v12
    +	\opd = 12
    +    .endif
    +    .ifc \vxr,%v13
    +	\opd = 13
    +    .endif
    +    .ifc \vxr,%v14
    +	\opd = 14
    +    .endif
    +    .ifc \vxr,%v15
    +	\opd = 15
    +    .endif
    +    .ifc \vxr,%v16
    +	\opd = 16
    +    .endif
    +    .ifc \vxr,%v17
    +	\opd = 17
    +    .endif
    +    .ifc \vxr,%v18
    +	\opd = 18
    +    .endif
    +    .ifc \vxr,%v19
    +	\opd = 19
    +    .endif
    +    .ifc \vxr,%v20
    +	\opd = 20
    +    .endif
    +    .ifc \vxr,%v21
    +	\opd = 21
    +    .endif
    +    .ifc \vxr,%v22
    +	\opd = 22
    +    .endif
    +    .ifc \vxr,%v23
    +	\opd = 23
    +    .endif
    +    .ifc \vxr,%v24
    +	\opd = 24
    +    .endif
    +    .ifc \vxr,%v25
    +	\opd = 25
    +    .endif
    +    .ifc \vxr,%v26
    +	\opd = 26
    +    .endif
    +    .ifc \vxr,%v27
    +	\opd = 27
    +    .endif
    +    .ifc \vxr,%v28
    +	\opd = 28
    +    .endif
    +    .ifc \vxr,%v29
    +	\opd = 29
    +    .endif
    +    .ifc \vxr,%v30
    +	\opd = 30
    +    .endif
    +    .ifc \vxr,%v31
    +	\opd = 31
    +    .endif
    +    .if \opd == REG_NUM_INVALID
    +	.error "Invalid vector register designation: \vxr"
    +    .endif
    +.endm
    +
    +/* RXB - Compute most significant bit used vector registers
    + *
    + * @rxb:	Operand to store computed RXB value
    + * @v1:		First vector register designated operand
    + * @v2:		Second vector register designated operand
    + * @v3:		Third vector register designated operand
    + * @v4:		Fourth vector register designated operand
    + */
    +.macro	RXB	rxb v1 v2=0 v3=0 v4=0
    +    \rxb = 0
    +    .if \v1 & 0x10
    +	\rxb = \rxb | 0x08
    +    .endif
    +    .if \v2 & 0x10
    +	\rxb = \rxb | 0x04
    +    .endif
    +    .if \v3 & 0x10
    +	\rxb = \rxb | 0x02
    +    .endif
    +    .if \v4 & 0x10
    +	\rxb = \rxb | 0x01
    +    .endif
    +.endm
    +
    +/* MRXB - Generate Element Size Control and RXB value
    + *
    + * @m:		Element size control
    + * @v1:		First vector register designated operand (for RXB)
    + * @v2:		Second vector register designated operand (for RXB)
    + * @v3:		Third vector register designated operand (for RXB)
    + * @v4:		Fourth vector register designated operand (for RXB)
    + */
    +.macro	MRXB	m v1 v2=0 v3=0 v4=0
    +    rxb = 0
    +    RXB	rxb, \v1, \v2, \v3, \v4
    +    .byte	(\m << 4) | rxb
    +.endm
    +
    +/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields
    + *
    + * @m:		Element size control
    + * @opc:	Opcode
    + * @v1:		First vector register designated operand (for RXB)
    + * @v2:		Second vector register designated operand (for RXB)
    + * @v3:		Third vector register designated operand (for RXB)
    + * @v4:		Fourth vector register designated operand (for RXB)
    + */
    +.macro	MRXBOPC	m opc v1 v2=0 v3=0 v4=0
    +    MRXB	\m, \v1, \v2, \v3, \v4
    +    .byte	\opc
    +.endm
    +
    +/* Vector support instructions */
    +
    +/* VECTOR GENERATE BYTE MASK */
    +.macro	VGBM	vr imm2
    +    VX_NUM	v1, \vr
    +    .word	(0xE700 | (VX_R(v1) << 4))
    +    .word	\imm2
    +    MRXBOPC	0, 0x44, v1
    +.endm
    +.macro	VZERO	vxr
    +    VGBM	\vxr, 0
    +.endm
    +.macro	VONE	vxr
    +    VGBM	\vxr, 0xFFFF
    +.endm
    +
    +/* VECTOR LOAD VR ELEMENT FROM GR */
    +.macro	VLVG	v, gr, disp, m
    +    VX_NUM	v1, \v
    +    GR_NUM	b2, "%r0"
    +    GR_NUM	r3, \gr
    +    .word	0xE700 | (VX_R(v1) << 4) | r3
    +    .word	(b2 << 12) | (\disp)
    +    MRXBOPC	\m, 0x22, v1
    +.endm
    +.macro	VLVGB	v, gr, index, base
    +    VLVG	\v, \gr, \index, \base, 0
    +.endm
    +.macro	VLVGH	v, gr, index
    +    VLVG	\v, \gr, \index, 1
    +.endm
    +.macro	VLVGF	v, gr, index
    +    VLVG	\v, \gr, \index, 2
    +.endm
    +.macro	VLVGG	v, gr, index
    +    VLVG	\v, \gr, \index, 3
    +.endm
    +
    +/* VECTOR LOAD */
    +.macro	VL	v, disp, index="%r0", base
    +    VX_NUM	v1, \v
    +    GR_NUM	x2, \index
    +    GR_NUM	b2, \base
    +    .word	0xE700 | (VX_R(v1) << 4) | x2
    +    .word	(b2 << 12) | (\disp)
    +    MRXBOPC 0, 0x06, v1
    +.endm
    +
    +/* VECTOR LOAD ELEMENT */
    +.macro	VLEx	vr1, disp, index="%r0", base, m3, opc
    +    VX_NUM	v1, \vr1
    +    GR_NUM	x2, \index
    +    GR_NUM	b2, \base
    +    .word	0xE700 | (VX_R(v1) << 4) | x2
    +    .word	(b2 << 12) | (\disp)
    +    MRXBOPC	\m3, \opc, v1
    +.endm
    +.macro	VLEB	vr1, disp, index="%r0", base, m3
    +    VLEx	\vr1, \disp, \index, \base, \m3, 0x00
    +.endm
    +.macro	VLEH	vr1, disp, index="%r0", base, m3
    +    VLEx	\vr1, \disp, \index, \base, \m3, 0x01
    +.endm
    +.macro	VLEF	vr1, disp, index="%r0", base, m3
    +    VLEx	\vr1, \disp, \index, \base, \m3, 0x03
    +.endm
    +.macro	VLEG	vr1, disp, index="%r0", base, m3
    +    VLEx	\vr1, \disp, \index, \base, \m3, 0x02
    +.endm
    +
    +/* VECTOR LOAD ELEMENT IMMEDIATE */
    +.macro	VLEIx	vr1, imm2, m3, opc
    +    VX_NUM	v1, \vr1
    +    .word	0xE700 | (VX_R(v1) << 4)
    +    .word	\imm2
    +    MRXBOPC	\m3, \opc, v1
    +.endm
    +.macro	VLEIB	vr1, imm2, index
    +    VLEIx	\vr1, \imm2, \index, 0x40
    +.endm
    +.macro	VLEIH	vr1, imm2, index
    +    VLEIx	\vr1, \imm2, \index, 0x41
    +.endm
    +.macro	VLEIF	vr1, imm2, index
    +    VLEIx	\vr1, \imm2, \index, 0x43
    +.endm
    +.macro	VLEIG	vr1, imm2, index
    +    VLEIx	\vr1, \imm2, \index, 0x42
    +.endm
    +
    +/* VECTOR LOAD GR FROM VR ELEMENT */
    +.macro	VLGV	gr, vr, disp, base="%r0", m
    +    GR_NUM	r1, \gr
    +    GR_NUM	b2, \base
    +    VX_NUM	v3, \vr
    +    .word	0xE700 | (r1 << 4) | VX_R(v3)
    +    .word	(b2 << 12) | (\disp)
    +    MRXBOPC	\m, 0x21, v3
    +.endm
    +.macro	VLGVB	gr, vr, disp, base="%r0"
    +    VLGV	\gr, \vr, \disp, \base, 0
    +.endm
    +.macro	VLGVH	gr, vr, disp, base="%r0"
    +    VLGV	\gr, \vr, \disp, \base, 1
    +.endm
    +.macro	VLGVF	gr, vr, disp, base="%r0"
    +    VLGV	\gr, \vr, \disp, \base, 2
    +.endm
    +.macro	VLGVG	gr, vr, disp, base="%r0"
    +    VLGV	\gr, \vr, \disp, \base, 3
    +.endm
    +
    +/* VECTOR LOAD MULTIPLE */
    +.macro	VLM	vfrom, vto, disp, base
    +    VX_NUM	v1, \vfrom
    +    VX_NUM	v3, \vto
    +    GR_NUM	b2, \base	    /* Base register */
    +    .word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
    +    .word	(b2 << 12) | (\disp)
    +    MRXBOPC	0, 0x36, v1, v3
    +.endm
    +
    +/* VECTOR STORE MULTIPLE */
    +.macro	VSTM	vfrom, vto, disp, base
    +    VX_NUM	v1, \vfrom
    +    VX_NUM	v3, \vto
    +    GR_NUM	b2, \base	    /* Base register */
    +    .word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
    +    .word	(b2 << 12) | (\disp)
    +    MRXBOPC	0, 0x3E, v1, v3
    +.endm
    +
    +/* VECTOR PERMUTE */
    +.macro	VPERM	vr1, vr2, vr3, vr4
    +    VX_NUM	v1, \vr1
    +    VX_NUM	v2, \vr2
    +    VX_NUM	v3, \vr3
    +    VX_NUM	v4, \vr4
    +    .word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
    +    .word	(VX_R(v3) << 12)
    +    MRXBOPC	VX_R(v4), 0x8C, v1, v2, v3, v4
    +.endm
    +
    +/* VECTOR UNPACK LOGICAL LOW */
    +.macro	VUPLL	vr1, vr2, m3
    +    VX_NUM	v1, \vr1
    +    VX_NUM	v2, \vr2
    +    .word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
    +    .word	0x0000
    +    MRXBOPC	\m3, 0xD4, v1, v2
    +.endm
    +.macro	VUPLLB	vr1, vr2
    +    VUPLL	\vr1, \vr2, 0
    +.endm
    +.macro	VUPLLH	vr1, vr2
    +    VUPLL	\vr1, \vr2, 1
    +.endm
    +.macro	VUPLLF	vr1, vr2
    +    VUPLL	\vr1, \vr2, 2
    +.endm
    +
    +
    +/* Vector integer instructions */
    +
    +/* VECTOR EXCLUSIVE OR */
    +.macro	VX	vr1, vr2, vr3
    +    VX_NUM	v1, \vr1
    +    VX_NUM	v2, \vr2
    +    VX_NUM	v3, \vr3
    +    .word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
    +    .word	(VX_R(v3) << 12)
    +    MRXBOPC	0, 0x6D, v1, v2, v3
    +.endm
    +
    +/* VECTOR GALOIS FIELD MULTIPLY SUM */
    +.macro	VGFM	vr1, vr2, vr3, m4
    +    VX_NUM	v1, \vr1
    +    VX_NUM	v2, \vr2
    +    VX_NUM	v3, \vr3
    +    .word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
    +    .word	(VX_R(v3) << 12)
    +    MRXBOPC	\m4, 0xB4, v1, v2, v3
    +.endm
    +.macro	VGFMB	vr1, vr2, vr3
    +    VGFM	\vr1, \vr2, \vr3, 0
    +.endm
    +.macro	VGFMH	vr1, vr2, vr3
    +    VGFM	\vr1, \vr2, \vr3, 1
    +.endm
    +.macro	VGFMF	vr1, vr2, vr3
    +    VGFM	\vr1, \vr2, \vr3, 2
    +.endm
    +.macro	VGFMG	vr1, vr2, vr3
    +    VGFM	\vr1, \vr2, \vr3, 3
    +.endm
    +
    +/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
    +.macro	VGFMA	vr1, vr2, vr3, vr4, m5
    +    VX_NUM	v1, \vr1
    +    VX_NUM	v2, \vr2
    +    VX_NUM	v3, \vr3
    +    VX_NUM	v4, \vr4
    +    .word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
    +    .word	(VX_R(v3) << 12) | (\m5 << 8)
    +    MRXBOPC	VX_R(v4), 0xBC, v1, v2, v3, v4
    +.endm
    +.macro	VGFMAB	vr1, vr2, vr3, vr4
    +    VGFMA	\vr1, \vr2, \vr3, \vr4, 0
    +.endm
    +.macro	VGFMAH	vr1, vr2, vr3, vr4
    +    VGFMA	\vr1, \vr2, \vr3, \vr4, 1
    +.endm
    +.macro	VGFMAF	vr1, vr2, vr3, vr4
    +    VGFMA	\vr1, \vr2, \vr3, \vr4, 2
    +.endm
    +.macro	VGFMAG	vr1, vr2, vr3, vr4
    +    VGFMA	\vr1, \vr2, \vr3, \vr4, 3
    +.endm
    +
    +/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
    +.macro	VSRLB	vr1, vr2, vr3
    +    VX_NUM	v1, \vr1
    +    VX_NUM	v2, \vr2
    +    VX_NUM	v3, \vr3
    +    .word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
    +    .word	(VX_R(v3) << 12)
    +    MRXBOPC	0, 0x7D, v1, v2, v3
    +.endm
    +
    +#endif	/* __ASM_S390_VX_INSN_H */
    diff --git a/src/common/dns_resolve.cc b/src/common/dns_resolve.cc
    index a44510d6deab..435bcc657e40 100644
    --- a/src/common/dns_resolve.cc
    +++ b/src/common/dns_resolve.cc
    @@ -56,6 +56,7 @@ DNSResolver::~DNSResolver()
     #ifdef HAVE_RES_NQUERY
       for (auto iter = states.begin(); iter != states.end(); ++iter) {
         struct __res_state *s = *iter;
    +    res_nclose(s);
         delete s;
       }
     #endif
    diff --git a/src/common/dout.h b/src/common/dout.h
    index 4cd60efff8fe..8d05b12fbe2b 100644
    --- a/src/common/dout.h
    +++ b/src/common/dout.h
    @@ -44,6 +44,18 @@ inline std::ostream& operator<<(std::ostream& out, _bad_endl_use_dendl_t) {
       return out;
     }
     
    +template
    +concept HasPrint = requires(T t, std::ostream& u) {
    +  { t.print(u) } -> std::same_as;
    +};
    +
    +template requires HasPrint
    +static inline std::ostream& operator<<(std::ostream& out, T&& t)
    +{
    +  t.print(out);
    +  return out;
    +}
    +
     class DoutPrefixProvider {
     public:
       virtual std::ostream& gen_prefix(std::ostream& out) const = 0;
    @@ -144,17 +156,27 @@ struct is_dynamic> : public std::true_type {};
     #else
     #define dout_impl(cct, sub, v)						\
       do {									\
    -  const bool should_gather = [&](const auto cctX) {			\
    -    if constexpr (ceph::dout::is_dynamic::value ||	\
    -		  ceph::dout::is_dynamic::value) {		\
    +  const bool should_gather = [&](const auto cctX, auto sub_, auto v_) {	\
    +    /* The check is performed on `sub_` and `v_` to leverage the C++'s 	\
    +     * guarantee on _discarding_ one of blocks of `if constexpr`, which	\
    +     * includes also the checks for ill-formed code (`should_gather<>`	\
    +     * must not be feed with non-const expresions), BUT ONLY within	\
    +     * a template (thus the generic lambda) and under the restriction	\
    +     * it's dependant on a parameter of this template).			\
    +     * GCC prior to v14 was not enforcing these restrictions. */	\
    +    if constexpr (ceph::dout::is_dynamic::value ||	\
    +		  ceph::dout::is_dynamic::value) {	\
           return cctX->_conf->subsys.should_gather(sub, v);			\
         } else {								\
    +      constexpr auto sub_helper = static_cast(sub);	\
    +      constexpr auto v_helper = static_cast(v);		\
           /* The parentheses are **essential** because commas in angle	\
            * brackets are NOT ignored on macro expansion! A language's	\
            * limitation, sorry. */						\
    -      return (cctX->_conf->subsys.template should_gather());	\
    +      return (cctX->_conf->subsys.template should_gather());	\
         }									\
    -  }(cct);								\
    +  }(cct, sub, v);							\
     									\
       if (should_gather) {							\
         ceph::logging::MutableEntry _dout_e(v, sub);                        \
    diff --git a/src/common/dout_fmt.h b/src/common/dout_fmt.h
    new file mode 100644
    index 000000000000..c22fdf30cfe4
    --- /dev/null
    +++ b/src/common/dout_fmt.h
    @@ -0,0 +1,56 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab ft=cpp
    +
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright contributors to the Ceph project
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation.  See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +#include 
    +#include "dout.h"
    +
    +/// \file dout_fmt.h
    +///
    +/// \brief dout macros to format log statements with libfmt
    +///
    +/// A set of dout macros taking a format string and its corresponding argument
    +/// list. Log output is written directly to the underlying std::ostream by
    +/// fmt::print() rather than exposing the stream for ostream operator
    +/// chaining.
    +
    +// work around "warning: value computed is not used" with default dout_prefix
    +inline void dout_fmt_use_prefix(std::ostream&) {}
    +
    +#define lsubdout_fmt(cct, sub, v, ...) \
    +  dout_impl(cct, ceph_subsys_##sub, v) \
    +  dout_fmt_use_prefix(dout_prefix); \
    +  fmt::print(*_dout, __VA_ARGS__); \
    +  *_dout << dendl
    +
    +#define ldout_fmt(cct, v, ...) \
    +  dout_impl(cct, dout_subsys, v) \
    +  dout_fmt_use_prefix(dout_prefix); \
    +  fmt::print(*_dout, __VA_ARGS__); \
    +  *_dout << dendl
    +
    +#define dout_fmt(v, ...) \
    +  ldout_fmt((dout_context), v, __VA_ARGS__)
    +
    +#define ldpp_dout_fmt(dpp, v, ...) \
    +  if (decltype(auto) pdpp = (dpp); pdpp) { /* workaround -Wnonnull-compare for 'this' */ \
    +    dout_impl(pdpp->get_cct(), ceph::dout::need_dynamic(pdpp->get_subsys()), v) \
    +    pdpp->gen_prefix(*_dout); \
    +    fmt::print(*_dout, __VA_ARGS__); \
    +    *_dout << dendl; \
    +  }
    diff --git a/src/common/entity_name.cc b/src/common/entity_name.cc
    index 5357b34eacb7..a9d6fb9c8b10 100644
    --- a/src/common/entity_name.cc
    +++ b/src/common/entity_name.cc
    @@ -29,21 +29,30 @@ const std::array EntityName::STR_TO_ENTITY_
       { CEPH_ENTITY_TYPE_CLIENT, "client" },
     }};
     
    -const std::string& EntityName::
    -to_str() const
    -{
    +void EntityName::dump(ceph::Formatter *f) const {
    +  f->dump_int("type", type);
    +  f->dump_string("id", id);
    +}
    +
    +void EntityName::generate_test_instances(std::list& ls) {
    +  ls.push_back(new EntityName);
    +  ls.push_back(new EntityName);
    +  ls.back()->set_type(CEPH_ENTITY_TYPE_OSD);
    +  ls.back()->set_id("0");
    +  ls.push_back(new EntityName);
    +  ls.back()->set_type(CEPH_ENTITY_TYPE_MDS);
    +  ls.back()->set_id("a");
    +}
    +
    +const std::string& EntityName::to_str() const {
       return type_id;
     }
     
    -const char* EntityName::
    -to_cstr() const
    -{
    +const char* EntityName::to_cstr() const {
       return type_id.c_str();
     }
     
    -bool EntityName::
    -from_str(std::string_view s)
    -{
    +bool EntityName::from_str(std::string_view s) {
       size_t pos = s.find('.');
     
       if (pos == string::npos)
    @@ -56,9 +65,7 @@ from_str(std::string_view s)
       return true;
     }
     
    -void EntityName::
    -set(uint32_t type_, std::string_view id_)
    -{
    +void EntityName::set(uint32_t type_, std::string_view id_) {
       type = type_;
       id = id_;
     
    @@ -71,9 +78,7 @@ set(uint32_t type_, std::string_view id_)
       }
     }
     
    -int EntityName::
    -set(std::string_view type_, std::string_view id_)
    -{
    +int EntityName::set(std::string_view type_, std::string_view id_) {
       uint32_t t = str_to_ceph_entity_type(type_);
       if (t == CEPH_ENTITY_TYPE_ANY)
         return -EINVAL;
    @@ -81,9 +86,7 @@ set(std::string_view type_, std::string_view id_)
       return 0;
     }
     
    -void EntityName::
    -set_type(uint32_t type_)
    -{
    +void EntityName::set_type(uint32_t type_) {
       set(type_, id);
     }
     
    @@ -93,9 +96,7 @@ set_type(std::string_view type_)
       return set(type_, id);
     }
     
    -void EntityName::
    -set_id(std::string_view id_)
    -{
    +void EntityName::set_id(std::string_view id_) {
       set(type, id_);
     }
     
    @@ -106,33 +107,23 @@ void EntityName::set_name(entity_name_t n)
       set(n.type(), s);
     }
     
    -const char* EntityName::
    -get_type_str() const
    -{
    +const char* EntityName::get_type_str() const {
       return ceph_entity_type_name(type);
     }
     
    -std::string_view EntityName::
    -get_type_name() const
    -{
    +std::string_view EntityName::get_type_name() const {
       return ceph_entity_type_name(type);
     }
     
    -const std::string &EntityName::
    -get_id() const
    -{
    +const std::string &EntityName::get_id() const {
       return id;
     }
     
    -bool EntityName::
    -has_default_id() const
    -{
    +bool EntityName::has_default_id() const {
       return (id == "admin");
     }
     
    -std::string EntityName::
    -get_valid_types_as_str()
    -{
    +std::string EntityName::get_valid_types_as_str() {
       std::ostringstream out;
       size_t i;
       for (i = 0; i < STR_TO_ENTITY_TYPE.size(); ++i) {
    diff --git a/src/common/entity_name.h b/src/common/entity_name.h
    index c88ebcbbabde..53f8cd4d5d09 100644
    --- a/src/common/entity_name.h
    +++ b/src/common/entity_name.h
    @@ -41,7 +41,8 @@ struct EntityName
         decode(id_, bl);
         set(type_, id_);
       }
    -
    +  void dump(ceph::Formatter *f) const;
    +  static void generate_test_instances(std::list& ls);
       const std::string& to_str() const;
       const char *to_cstr() const;
       bool from_str(std::string_view s);
    diff --git a/src/common/error_code.cc b/src/common/error_code.cc
    index 60086c550aeb..9c981a210774 100644
    --- a/src/common/error_code.cc
    +++ b/src/common/error_code.cc
    @@ -13,10 +13,10 @@
      * COPYING.
      */
     
    -#include 
    -
     #include "common/error_code.h"
     
    +#include 
    +
     #pragma GCC diagnostic push
     #pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
     #pragma clang diagnostic push
    diff --git a/src/common/error_code.h b/src/common/error_code.h
    index 6bcd8cb1791c..93a1bf31c008 100644
    --- a/src/common/error_code.h
    +++ b/src/common/error_code.h
    @@ -16,10 +16,8 @@
     #ifndef COMMON_CEPH_ERROR_CODE
     #define COMMON_CEPH_ERROR_CODE
     
    -#include 
    -
     #include 
    -#include 
    +#include 
     
     #pragma GCC diagnostic push
     #pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
    diff --git a/src/common/fmt_common.h b/src/common/fmt_common.h
    index d68d6457dcb8..474f6fbc3247 100644
    --- a/src/common/fmt_common.h
    +++ b/src/common/fmt_common.h
    @@ -2,9 +2,12 @@
     // vim: ts=8 sw=2 smarttab
     #pragma once
     
    +#include 
    +
     /**
      * \file default fmtlib formatters for specifically-tagged types
      */
    +#include 
     #include 
     
     /**
    @@ -13,6 +16,10 @@
      * has a begin()/end() method pair. This is a problem because we have
      * such classes in Crimson.
      */
    +
    +template 
    +concept has_formatter = fmt::has_formatter::value;
    +
     /**
      * Tagging classes that provide support for default fmtlib formatting,
      * by having either
    @@ -20,6 +27,8 @@
      * *or*
      * std::string alt_fmt_print(bool short_format) const
      * as public member functions.
    + * *or*
    + * auto fmt_print_ctx(auto &ctx) -> decltype(ctx.out());
      */
     template
     concept has_fmt_print = requires(T t) {
    @@ -29,6 +38,19 @@ template
     concept has_alt_fmt_print = requires(T t) {
       { t.alt_fmt_print(bool{}) } -> std::same_as;
     };
    +#if FMT_VERSION >= 110000
    +template
    +concept has_fmt_print_ctx = requires(
    +  T t, fmt::buffered_context &ctx) {
    +  { t.fmt_print_ctx(ctx) } -> std::same_as;
    +};
    +#else
    +template
    +concept has_fmt_print_ctx = requires(
    +  T t, fmt::buffer_context &ctx) {
    +  { t.fmt_print_ctx(ctx) } -> std::same_as;
    +};
    +#endif
     
     namespace fmt {
     
    @@ -61,4 +83,27 @@ struct formatter {
       }
       bool verbose{true};
     };
    +
    +template 
    +struct formatter {
    +  template 
    +  constexpr auto parse(ParseContext& ctx) { return ctx.begin(); }
    +  template 
    +  auto format(const T& k, FormatContext& ctx) const {
    +    return k.fmt_print_ctx(ctx);
    +  }
    +};
    +
    +template 
    +struct formatter> {
    +  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
    +  template 
    +  auto format(const std::optional &v, FormatContext& ctx) const {
    +    if (v.has_value()) {
    +      return fmt::format_to(ctx.out(), "{}", *v);
    +    }
    +    return fmt::format_to(ctx.out(), "");
    +  }
    +};
    +
     }  // namespace fmt
    diff --git a/src/common/fork_function.h b/src/common/fork_function.h
    index 3a4f2f29c08a..5c94be4dcaf7 100644
    --- a/src/common/fork_function.h
    +++ b/src/common/fork_function.h
    @@ -13,6 +13,9 @@
     #ifndef _WIN32
     #include 
     #endif
    +#ifdef __linux__
    +#include 
    +#endif
     #include 
     
     #include "include/ceph_assert.h"
    @@ -53,17 +56,23 @@ static inline int fork_function(
       // we are forker (first child)
     
       // close all fds
    -  int maxfd = sysconf(_SC_OPEN_MAX);
    -  if (maxfd == -1)
    -    maxfd = 16384;
    -  for (int fd = 0; fd <= maxfd; fd++) {
    -    if (fd == STDIN_FILENO)
    -      continue;
    -    if (fd == STDOUT_FILENO)
    -      continue;
    -    if (fd == STDERR_FILENO)
    -      continue;
    -    ::close(fd);
    +#if defined(__linux__) && defined(SYS_close_range)
    +  if (::syscall(SYS_close_range, STDERR_FILENO + 1, ~0U, 0))
    +#endif
    +  {
    +    // fall back to manually closing
    +    int maxfd = sysconf(_SC_OPEN_MAX);
    +    if (maxfd == -1)
    +      maxfd = 16384;
    +    for (int fd = 0; fd <= maxfd; fd++) {
    +      if (fd == STDIN_FILENO)
    +        continue;
    +      if (fd == STDOUT_FILENO)
    +        continue;
    +      if (fd == STDERR_FILENO)
    +        continue;
    +      ::close(fd);
    +    }
       }
     
       sigset_t mask, oldmask;
    diff --git a/src/common/hobject.cc b/src/common/hobject.cc
    index 1aee4cc42546..01a117c70849 100644
    --- a/src/common/hobject.cc
    +++ b/src/common/hobject.cc
    @@ -2,6 +2,8 @@
     // vim: ts=8 sw=2 smarttab
     
     #include 
    +#include 
    +#include 
     
     #include "hobject.h"
     #include "common/Formatter.h"
    @@ -14,23 +16,25 @@ using std::string;
     using ceph::bufferlist;
     using ceph::Formatter;
     
    -static void append_escaped(const string &in, string *out)
    +namespace {
    +void escape_special_chars(const string& in, string* out)
     {
    -  for (string::const_iterator i = in.begin(); i != in.end(); ++i) {
    -    if (*i == '%') {
    +  for (auto c : in) {
    +    if (c == '%') {
           out->push_back('%');
           out->push_back('p');
    -    } else if (*i == '.') {
    +    } else if (c == '.') {
           out->push_back('%');
           out->push_back('e');
    -    } else if (*i == '_') {
    +    } else if (c == '_') {
           out->push_back('%');
           out->push_back('u');
         } else {
    -      out->push_back(*i);
    +      out->push_back(c);
         }
       }
     }
    +}  // namespace
     
     set hobject_t::get_prefixes(
       uint32_t bits,
    @@ -80,33 +84,25 @@ set hobject_t::get_prefixes(
     
     string hobject_t::to_str() const
     {
    -  string out;
    -
    -  char snap_with_hash[1000];
    -  char *t = snap_with_hash;
    -  const char *end = t + sizeof(snap_with_hash);
    -
       uint64_t poolid(pool);
    -  t += snprintf(t, end - t, "%.*llX", 16, (long long unsigned)poolid);
    -
       uint32_t revhash(get_nibblewise_key_u32());
    -  t += snprintf(t, end - t, ".%.*X", 8, revhash);
     
    -  if (snap == CEPH_NOSNAP)
    -    t += snprintf(t, end - t, ".head");
    -  else if (snap == CEPH_SNAPDIR)
    -    t += snprintf(t, end - t, ".snapdir");
    -  else
    -    t += snprintf(t, end - t, ".%llx", (long long unsigned)snap);
    -
    -  out.append(snap_with_hash, t);
    +  string out;
    +  if (snap == CEPH_NOSNAP) {
    +    out = fmt::format(FMT_COMPILE("{:016X}.{:08X}.head."), poolid, revhash);
    +  } else if (snap == CEPH_SNAPDIR) {
    +    out = fmt::format(FMT_COMPILE("{:016X}.{:08X}.snapdir."), poolid, revhash);
    +  } else {
    +    out = fmt::format(
    +	FMT_COMPILE("{:016X}.{:08X}.{:x}."), poolid, revhash,
    +	(unsigned long long)snap);
    +  }
     
    +  escape_special_chars(oid.name, &out);
       out.push_back('.');
    -  append_escaped(oid.name, &out);
    -  out.push_back('.');
    -  append_escaped(get_key(), &out);
    +  escape_special_chars(get_key(), &out);
       out.push_back('.');
    -  append_escaped(nspace, &out);
    +  escape_special_chars(nspace, &out);
     
       return out;
     }
    diff --git a/src/common/hobject.h b/src/common/hobject.h
    index 34191ccf5ec8..2a2c82a445fc 100644
    --- a/src/common/hobject.h
    +++ b/src/common/hobject.h
    @@ -15,6 +15,9 @@
     #ifndef __CEPH_OS_HOBJECT_H
     #define __CEPH_OS_HOBJECT_H
     
    +#include 
    +#include 
    +
     #if FMT_VERSION >= 90000
     #include 
     #endif
    @@ -166,6 +169,7 @@ struct hobject_t {
         return ret;
       }
     
    +  /// @return min hobject_t ret s.t. ret.get_head() == get_head()
       hobject_t get_object_boundary() const {
         if (is_max())
           return *this;
    @@ -174,6 +178,15 @@ struct hobject_t {
         return ret;
       }
     
    +  /// @return max hobject_t ret s.t. ret.get_head() == get_head()
    +  hobject_t get_max_object_boundary() const {
    +    if (is_max())
    +      return *this;
    +    // CEPH_SNAPDIR happens to sort above HEAD and MAX_SNAP and is no longer used
    +    // for actual objects
    +    return get_snapdir();
    +  }
    +
       /// @return head version of this hobject_t
       hobject_t get_head() const {
         hobject_t ret(*this);
    @@ -300,6 +313,26 @@ struct hobject_t {
         return nspace;
       }
     
    +  /**
    +   * PG_LOCAL_NS
    +   *
    +   * Used exclusively by crimson at this time.
    +   *
    +   * Namespace for objects maintained by the local pg instantiation updated
    +   * independently of the pg log.  librados IO to this namespace should fail.
    +   * Listing operations related to pg objects should exclude objects in this
    +   * namespace along with temp objects, ec rollback objects, and the pg
    +   * meta object. Such operations include:
    +   * - scrub
    +   * - backfill
    +   * - pgls
    +   * See crimson/osd/pg_backend PGBackend::list_objects
    +   */
    +  static constexpr std::string_view INTERNAL_PG_LOCAL_NS = ".internal_pg_local";
    +  bool is_internal_pg_local() const {
    +    return nspace == INTERNAL_PG_LOCAL_NS;
    +  }
    +
       bool parse(const std::string& s);
     
       void encode(ceph::buffer::list& bl) const;
    @@ -308,7 +341,7 @@ struct hobject_t {
       void dump(ceph::Formatter *f) const;
       static void generate_test_instances(std::list& o);
       friend int cmp(const hobject_t& l, const hobject_t& r);
    -  auto operator<=>(const hobject_t &rhs) const noexcept {
    +  constexpr auto operator<=>(const hobject_t &rhs) const noexcept {
         auto cmp = max <=> rhs.max;
         if (cmp != 0) return cmp;
         cmp = pool <=> rhs.pool;
    @@ -325,10 +358,11 @@ struct hobject_t {
         if (cmp != 0) return cmp;
         return snap <=> rhs.snap;
       }
    -  bool operator==(const hobject_t& rhs) const noexcept {
    +  constexpr bool operator==(const hobject_t& rhs) const noexcept {
         return operator<=>(rhs) == 0;
       }
       friend struct ghobject_t;
    +  friend struct test_hobject_fmt_t;
     };
     WRITE_CLASS_ENCODER(hobject_t)
     
    @@ -341,6 +375,54 @@ template<> struct hash {
     };
     } // namespace std
     
    +namespace fmt {
    +template <>
    +struct formatter {
    +
    +  template 
    +  static inline auto
    +  append_sanitized(FormatContext& ctx, const std::string& in, int sep = 0)
    +  {
    +    for (const auto i : in) {
    +      if (i == '%' || i == ':' || i == '/' || i < 32 || i >= 127) {
    +	fmt::format_to(
    +	    ctx.out(), FMT_COMPILE("%{:02x}"), static_cast(i));
    +      } else {
    +	fmt::format_to(ctx.out(), FMT_COMPILE("{:c}"), i);
    +      }
    +    }
    +    if (sep) {
    +      fmt::format_to(
    +	  ctx.out(), FMT_COMPILE("{:c}"), sep);
    +    }
    +    return ctx.out();
    +  }
    +
    +  constexpr auto parse(format_parse_context& ctx) const { return ctx.begin(); }
    +
    +  template 
    +  auto format(const hobject_t& ho, FormatContext& ctx) const
    +  {
    +    if (ho == hobject_t{}) {
    +      return fmt::format_to(ctx.out(), "MIN");
    +    }
    +
    +    if (ho.is_max()) {
    +      return fmt::format_to(ctx.out(), "MAX");
    +    }
    +
    +    fmt::format_to(
    +	ctx.out(), FMT_COMPILE("{}:{:08x}:"), static_cast(ho.pool),
    +	ho.get_bitwise_key_u32());
    +    append_sanitized(ctx, ho.nspace, ':');
    +    append_sanitized(ctx, ho.get_key(), ':');
    +    append_sanitized(ctx, ho.oid.name);
    +    return fmt::format_to(ctx.out(), FMT_COMPILE(":{}"), ho.snap);
    +  }
    +};
    +}  // namespace fmt
    +
    +
     std::ostream& operator<<(std::ostream& out, const hobject_t& o);
     
     template 
    @@ -420,6 +502,30 @@ struct ghobject_t {
         return hobj.pool >= 0 && hobj.oid.name.empty();
       }
     
    +  bool is_internal_pg_local() const {
    +    return hobj.is_internal_pg_local();
    +  }
    +
    +  /**
    +   * SNAPMAPPER_OID, make_snapmapper, is_snapmapper
    +   *
    +   * Used exclusively by crimson at this time.
    +   * 
    +   * Unlike classic, crimson uses a snap mapper object for each pg.
    +   * The snapmapper object provides an index for efficient trimming of clones as
    +   * snapshots are removed.
    +   *
    +   * As with the pgmeta object, we pin the hash to the pg hash.
    +   */
    +  static constexpr std::string_view SNAPMAPPER_OID = "snapmapper";
    +  static ghobject_t make_snapmapper(
    +    int64_t pool, uint32_t hash, shard_id_t shard) {
    +    hobject_t h(object_t(SNAPMAPPER_OID), std::string(),
    +		CEPH_NOSNAP, hash, pool,
    +		std::string(hobject_t::INTERNAL_PG_LOCAL_NS));
    +    return ghobject_t(h, NO_GEN, shard);
    +  }
    +
       bool match(uint32_t bits, uint32_t match) const {
         return hobj.match_hash(hobj.hash, bits, match);
       }
    @@ -485,7 +591,7 @@ struct ghobject_t {
       void dump(ceph::Formatter *f) const;
       static void generate_test_instances(std::list& o);
       friend int cmp(const ghobject_t& l, const ghobject_t& r);
    -  auto operator<=>(const ghobject_t&) const = default;
    +  constexpr auto operator<=>(const ghobject_t&) const = default;
       bool operator==(const ghobject_t&) const = default;
     };
     WRITE_CLASS_ENCODER(ghobject_t)
    diff --git a/src/common/hobject_fmt.h b/src/common/hobject_fmt.h
    deleted file mode 100644
    index 622611121ae6..000000000000
    --- a/src/common/hobject_fmt.h
    +++ /dev/null
    @@ -1,53 +0,0 @@
    -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    -// vim: ts=8 sw=2 smarttab
    -#pragma once
    -
    -/**
    - * \file fmtlib formatters for some hobject.h classes
    - */
    -#include 
    -#include 
    -
    -#include "common/hobject.h"
    -#include "include/object_fmt.h"
    -#include "msg/msg_fmt.h"
    -
    -// \todo reimplement
    -static inline void append_out_escaped(const std::string& in, std::string* out)
    -{
    -  for (auto i = in.cbegin(); i != in.cend(); ++i) {
    -    if (*i == '%' || *i == ':' || *i == '/' || *i < 32 || *i >= 127) {
    -      char buf[4];
    -      snprintf(buf, sizeof(buf), "%%%02x", (int)(unsigned char)*i);
    -      out->append(buf);
    -    } else {
    -      out->push_back(*i);
    -    }
    -  }
    -}
    -
    -template <> struct fmt::formatter {
    -
    -  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
    -
    -  template  auto format(const hobject_t& ho, FormatContext& ctx)
    -  {
    -    if (ho == hobject_t{}) {
    -      return fmt::format_to(ctx.out(), "MIN");
    -    }
    -
    -    if (ho.is_max()) {
    -      return fmt::format_to(ctx.out(), "MAX");
    -    }
    -
    -    std::string v;
    -    append_out_escaped(ho.nspace, &v);
    -    v.push_back(':');
    -    append_out_escaped(ho.get_key(), &v);
    -    v.push_back(':');
    -    append_out_escaped(ho.oid.name, &v);
    -
    -    return fmt::format_to(ctx.out(), "{}:{:08x}:{}:{}", static_cast(ho.pool),
    -			  ho.get_bitwise_key_u32(), v, ho.snap);
    -  }
    -};
    diff --git a/src/common/intrusive_lru.h b/src/common/intrusive_lru.h
    index fc63bea2636a..3ed3625d8a0b 100644
    --- a/src/common/intrusive_lru.h
    +++ b/src/common/intrusive_lru.h
    @@ -12,13 +12,12 @@ namespace ceph::common {
     /**
      * intrusive_lru: lru implementation with embedded map and list hook
      *
    - * Elements will be stored in an intrusive set. Once an element is no longer
    - * referenced it will remain in the set. The unreferenced elements will be
    - * evicted from the set once the set size exceeds the `lru_target_size`.
    - * Referenced elements will not be evicted as this is a registery with
    - * extra caching capabilities.
    + * Elements with live references are guarranteed to remain accessible.
    + * Elements without live references may remain accessible -- implementation
    + * will release unreferenced elements based on lru_target_size.
      *
    - * Note, this implementation currently is entirely thread-unsafe.
    + * Accesses, mutations, and references must be confined to a single thread or
    + * serialized via some other mechanism.
      */
     
     template 
    @@ -43,11 +42,36 @@ void intrusive_ptr_release(intrusive_lru_base *p);
     
     template 
     class intrusive_lru_base {
    +  /* object invariants
    +   *
    +   * intrusive_lru objects may be in one of three states:
    +   * 1. referenced
    +   *    - accessible via intrusive_lru
    +   *    - intrusive_lru_base::lru is points to parent intrusive_lru
    +   *    - present in intrusive_lru::lru_set
    +   *    - absent from intrusive_lru::unreferenced_list
    +   *    - use_count > 0
    +   *    - not eligible for eviction
    +   *    - intrusive_lru_release may be invoked externally
    +   * 2. unreferenced
    +   *    - accessible via intrusive_lru
    +   *    - intrusive_lru_base::lru is null
    +   *    - present in intrusive_lru::lru_set
    +   *    - present in intrusive_lru::unreferenced_list
    +   *    - use_count == 0
    +   *    - eligible for eviction
    +   *    - intrusive_lru_release cannot be invoked
    +   * 3. invalidated
    +   *    - inaccessible via intrusive_lru
    +   *    - intrusive_lru_base::lru is null
    +   *    - absent from intrusive_lru::lru_set
    +   *    - absent from intrusive_lru::unreferenced_list
    +   *    - use_count > 0
    +   *    - intrusive_lru_release may be invoked externally
    +   */
       unsigned use_count = 0;
     
    -  // lru points to the corresponding intrusive_lru
    -  // which will be set to null if its use_count
    -  // is zero (aka unreferenced).
    +  // See above, points at intrusive_lru iff referenced
       intrusive_lru *lru = nullptr;
     
     public:
    @@ -55,7 +79,10 @@ class intrusive_lru_base {
         return static_cast(lru);
       }
       bool is_unreferenced() const {
    -    return !is_referenced();
    +    return !is_referenced() && use_count == 0;
    +  }
    +  bool is_invalidated() const {
    +    return !is_referenced() && use_count > 0;
       }
       boost::intrusive::set_member_hook<> set_hook;
       boost::intrusive::list_member_hook<> list_hook;
    @@ -98,6 +125,7 @@ class intrusive_lru {
     
       using lru_list_t = boost::intrusive::list<
         base_t,
    +    boost::intrusive::constant_time_size,
         boost::intrusive::member_hook<
           base_t,
           boost::intrusive::list_member_hook<>,
    @@ -108,9 +136,9 @@ class intrusive_lru {
     
       // when the lru_set exceeds its target size, evict
       // only unreferenced elements from it (if any).
    -  void evict() {
    +  void evict(unsigned target_size) {
         while (!unreferenced_list.empty() &&
    -	   lru_set.size() > lru_target_size) {
    +	   lru_set.size() > target_size) {
           auto &evict_target = unreferenced_list.front();
           assert(evict_target.is_unreferenced());
           unreferenced_list.pop_front();
    @@ -136,7 +164,7 @@ class intrusive_lru {
         assert(b.is_unreferenced());
         lru_set.insert(b);
         b.lru = this;
    -    evict();
    +    evict(lru_target_size);
       }
     
       // an element in the lru_set has no users,
    @@ -145,7 +173,7 @@ class intrusive_lru {
         assert(b.is_referenced());
         unreferenced_list.push_back(b);
         b.lru = nullptr;
    -    evict();
    +    evict(lru_target_size);
       }
     
     public:
    @@ -189,6 +217,21 @@ class intrusive_lru {
           }
       }
     
    +  /// drop all elements from lru, invoke f on any with outstanding references
    +  template 
    +  void clear(F &&f) {
    +    evict(0);
    +    assert(unreferenced_list.empty());
    +    for (auto &i: lru_set) {
    +      std::invoke(f, static_cast(i));
    +      i.lru = nullptr;
    +      assert(i.is_invalidated());
    +    }
    +    lru_set.clear_and_dispose([](auto *i){
    +      assert(i->use_count > 0); /* don't delete, still has a ref count */
    +    });
    +  }
    +
       template 
       void for_each(F&& f) {
         for (auto& v : lru_set) {
    @@ -212,7 +255,7 @@ class intrusive_lru {
     
       void set_target_size(size_t target_size) {
         lru_target_size = target_size;
    -    evict();
    +    evict(lru_target_size);
       }
     
       ~intrusive_lru() {
    @@ -226,17 +269,24 @@ class intrusive_lru {
     template 
     void intrusive_ptr_add_ref(intrusive_lru_base *p) {
       assert(p);
    -  assert(p->lru);
       p->use_count++;
    +  assert(p->is_referenced() || p->is_invalidated());
     }
     
     template 
     void intrusive_ptr_release(intrusive_lru_base *p) {
    +  /* See object invariants above -- intrusive_ptr_release can only be invoked on
    +   * is_referenced() or is_invalidated() objects with live external references */
       assert(p);
       assert(p->use_count > 0);
    +  assert(p->is_referenced() || p->is_invalidated());
       --p->use_count;
       if (p->use_count == 0) {
    -    p->lru->mark_as_unreferenced(*p);
    +    if (p->lru) {
    +      p->lru->mark_as_unreferenced(*p);
    +    } else {
    +      delete p;
    +    }
       }
     }
     
    diff --git a/src/common/intrusive_timer.h b/src/common/intrusive_timer.h
    new file mode 100644
    index 000000000000..b32286a20963
    --- /dev/null
    +++ b/src/common/intrusive_timer.h
    @@ -0,0 +1,222 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +
    +#pragma once
    +
    +#include 
    +#include 
    +
    +#include 
    +
    +#include "common/ceph_time.h"
    +
    +namespace ceph::common {
    +
    +/**
    + * intrusive_timer
    + *
    + * SafeTimer (common/Timer.h) isn't well suited to usage in high
    + * usage pathways for a few reasons:
    + * - Usage generally requires allocation of a fresh context for each
    + *   scheduled operation.  One could override Context::complete to avoid
    + *   destroying the instance, but actually reusing the instance is tricky
    + *   as SafeTimer doesn't guarrantee cancelation if safe_callbacks is false.
    + * - SafeTimer only guarrantees cancelation if safe_timer is true, which
    + *   it generally won't be if the user needs to call into SafeTimer while
    + *   holding locks taken by callbacks.
    + *
    + * This implementation allows the user to repeatedly schedule and cancel
    + * an object inheriting from the callback_t interface below while
    + * guarranteeing cancelation provided that the user holds the lock
    + * associated with a particular callback while calling into intrusive_timer.
    + */
    +class intrusive_timer {
    +  using clock_t = ceph::coarse_real_clock;
    +
    +public:
    +  /**
    +   * callback_t
    +   *
    +   * Objects inheriting from callback_t can be scheduled
    +   * via intrusive_timer.
    +   */
    +  class callback_t : public boost::intrusive::set_base_hook<> {
    +    friend class intrusive_timer;
    +    clock_t::time_point schedule_point;
    +    unsigned incarnation = 0;
    +
    +  public:
    +    /**
    +     * add_ref, dec_ref
    +     *
    +     * callback_t must remain live and all methods must remain
    +     * safe to call as long as calls to add_ref() outnumber calls
    +     * to dec_ref().
    +     */
    +    virtual void add_ref() = 0;
    +    virtual void dec_ref() = 0;
    +
    +    /**
    +     * lock, unlock
    +     *
    +     * For any specific callback_t, must lock/unlock a lock held while
    +     * accessing intrusive_timer public methods for that callback_t
    +     * instance.
    +     */
    +    virtual void lock() = 0;
    +    virtual void unlock() = 0;
    +
    +    /// Invokes callback, will be called with lock held
    +    virtual void invoke() = 0;
    +
    +    /**
    +     * is_scheduled
    +     *
    +     * Return true iff callback is scheduled to be invoked.
    +     * May only be validly invoked while lock associated with
    +     * callback_t instance is held.
    +     */
    +    bool is_scheduled() const { return incarnation % 2 == 1; }
    +    virtual ~callback_t() = default;
    +
    +    /// Order callback_t by schedule_point
    +    auto operator<=>(const callback_t &rhs) const {
    +      return std::make_pair(schedule_point, this) <=>
    +	std::make_pair(rhs.schedule_point, &rhs);
    +    }
    +  };
    +
    +private:
    +  /// protects events, stopping
    +  std::mutex lock;
    +
    +  /// stopping, cv used to signal that t should halt
    +  std::condition_variable cv;
    +  bool stopping = false;
    +
    +  /// queued events ordered by callback_t::schedule_point
    +  boost::intrusive::set events;
    +
    +  /// thread responsible for calling scheduled callbacks
    +  std::thread t;
    +
    +  /// peek front of queue, null if empty
    +  callback_t *peek() {
    +    return events.empty() ? nullptr : &*(events.begin());
    +  }
    +
    +  /// entry point for t
    +  void _run() {
    +    std::unique_lock l(lock);
    +    while (true) {
    +      if (stopping) {
    +	return;
    +      }
    +    
    +      auto next = peek();
    +      if (!next) {
    +	cv.wait(l);
    +	continue;
    +      }
    +
    +      if (next->schedule_point > clock_t::now()) {
    +	cv.wait_until(l, next->schedule_point);
    +	continue;
    +      }
    +
    +      // we release the reference below
    +      events.erase(*next);
    +
    +      /* cancel() and schedule_after() both hold both intrusive_timer::lock
    +       * and the callback_t lock (precondition of both) while mutating
    +       * next->incarnation, so this read is safe.  We're relying on the
    +       * fact that only this method in this thread will access
    +       * next->incarnation under only one of the two. */
    +      auto incarnation = next->incarnation;
    +      l.unlock();
    +      {
    +	/* Note that intrusive_timer::cancel may observe that
    +	 * callback_t::is_scheduled() returns true while
    +	 * callback_t::is_linked() is false since we drop
    +	 * intrusive_timer::lock between removing next from the
    +	 * queue and incrementing callback_t::incarnation here
    +	 * under the callback_t lock.  In that case, cancel()
    +	 * increments incarnation logically canceling the callback
    +	 * but leaves the reference for us to drop.
    +	 */
    +	std::unique_lock m(*next);
    +	if (next->incarnation == incarnation) {
    +	  /* As above, cancel() and schedule_after() hold both locks so this
    +	   * mutation and read are safe. */
    +	  ++next->incarnation;
    +	  next->invoke();
    +	}
    +	/* else, next was canceled between l.unlock() and next->lock().
    +	 * Note that if incarnation does not match, we do nothing to next
    +	 * other than drop our reference -- it might well have been
    +	 * rescheduled already! */
    +      }
    +      next->dec_ref();
    +      l.lock();
    +    }
    +  }
    +
    +public:
    +  intrusive_timer() : t([this] { _run(); }) {}
    +
    +  /**
    +   * schedule_after
    +   *
    +   * Schedule cb to run after the specified period.
    +   * The lock associated with cb must be held.
    +   * cb must not already be scheduled.
    +   *
    +   * @param cb [in] callback to schedule
    +   * @param after [in] period after which to schedule cb
    +   */
    +  template 
    +  void schedule_after(callback_t &cb, T after) {
    +    ceph_assert(!cb.is_scheduled());
    +    std::unique_lock l(lock);
    +    ceph_assert(!cb.is_linked());
    +
    +    ++cb.incarnation;
    +    cb.schedule_point = clock_t::now() + after;
    +
    +    cb.add_ref();
    +    events.insert(cb);
    +
    +    cv.notify_one();
    +  }
    +
    +  /**
    +   * cancel
    +   *
    +   * Cancel already scheduled cb.
    +   * The lock associated with cb must be held.
    +   *
    +   * @param cb [in] callback to cancel
    +   */
    +  void cancel(callback_t &cb) {
    +    ceph_assert(cb.is_scheduled());
    +    std::unique_lock l(lock);
    +    ++cb.incarnation;
    +
    +    if (cb.is_linked()) {
    +      events.erase(cb);
    +      cb.dec_ref();
    +    }
    +  }
    +
    +  /// Stop intrusive_timer
    +  void stop() {
    +    {
    +      std::unique_lock l(lock);
    +      stopping = true;
    +      cv.notify_one();
    +    }
    +    t.join();
    +  }
    +};
    +
    +}
    diff --git a/src/common/io_exerciser/CMakeLists.txt b/src/common/io_exerciser/CMakeLists.txt
    new file mode 100644
    index 000000000000..07091df86e10
    --- /dev/null
    +++ b/src/common/io_exerciser/CMakeLists.txt
    @@ -0,0 +1,13 @@
    +add_library(object_io_exerciser STATIC
    +  DataGenerator.cc
    +  IoOp.cc
    +  IoSequence.cc
    +  Model.cc
    +  ObjectModel.cc
    +  RadosIo.cc
    +)
    +
    +target_link_libraries(object_io_exerciser
    +  librados 
    +  global
    +)
    \ No newline at end of file
    diff --git a/src/common/io_exerciser/DataGenerator.cc b/src/common/io_exerciser/DataGenerator.cc
    new file mode 100644
    index 000000000000..9aa77eeb6e98
    --- /dev/null
    +++ b/src/common/io_exerciser/DataGenerator.cc
    @@ -0,0 +1,753 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +#include "DataGenerator.h"
    +
    +#include "ObjectModel.h"
    +
    +#include "common/debug.h"
    +#include "common/dout.h"
    +
    +#include "fmt/format.h"
    +#include "fmt/ranges.h"
    +
    +#include 
    +#include 
    +#include 
    +
    +#define dout_subsys ceph_subsys_rados
    +#define dout_context g_ceph_context
    +
    +using DataGenerator = ceph::io_exerciser::data_generation::DataGenerator;
    +using SeededRandomGenerator = ceph::io_exerciser::data_generation
    +                                ::SeededRandomGenerator;
    +using HeaderedSeededRandomGenerator = ceph::io_exerciser::data_generation
    +                                        ::HeaderedSeededRandomGenerator;
    +
    +std::unique_ptr DataGenerator::create_generator(
    +    GenerationType generationType, const ObjectModel& model)
    +{
    +  switch(generationType)
    +  {
    +    case GenerationType::SeededRandom:
    +      return std::make_unique(model);
    +    case GenerationType::HeaderedSeededRandom:
    +      return std::make_unique(model);
    +    default:
    +      throw std::invalid_argument("Not yet implemented");
    +  }
    +
    +  return nullptr;
    +}
    +
    +bufferlist DataGenerator::generate_wrong_data(uint64_t offset, uint64_t length)
    +{
    +  bufferlist retlist;
    +  uint64_t block_size = m_model.get_block_size();
    +  char buffer[block_size];
    +  for (uint64_t block_offset = offset;
    +       block_offset < offset + length;
    +       block_offset++)
    +  {
    +    std::memset(buffer, 0, block_size);
    +    retlist.append(ceph::bufferptr(buffer, block_size));
    +  }
    +  return retlist;
    +}
    +
    +bool DataGenerator::validate(bufferlist& bufferlist, uint64_t offset, uint64_t length)
    +{
    +  return bufferlist.contents_equal(generate_data(offset, length));
    +}
    +
    +ceph::bufferptr SeededRandomGenerator::generate_block(uint64_t block_offset)
    +{
    +  uint64_t block_size = m_model.get_block_size();
    +  char buffer[block_size];
    +
    +  std::mt19937_64 random_generator(m_model.get_seed(block_offset));
    +  uint64_t rand1 = random_generator();
    +  uint64_t rand2 = random_generator();
    +
    +  constexpr size_t generation_length = sizeof(uint64_t);
    +
    +  for (uint64_t i = 0; i < block_size; i+=(2*generation_length), rand1++, rand2--)
    +  {
    +    std::memcpy(buffer + i, &rand1, generation_length);
    +    std::memcpy(buffer + i + generation_length, &rand2, generation_length);
    +  }
    +
    +  size_t remainingBytes = block_size % (generation_length * 2);
    +  if (remainingBytes > generation_length)
    +  {
    +    size_t remainingBytes2 = remainingBytes - generation_length;
    +    std::memcpy(buffer + block_size - remainingBytes, &rand1, remainingBytes);
    +    std::memcpy(buffer + block_size - remainingBytes2, &rand2, remainingBytes2);
    +  }
    +  else if (remainingBytes > 0)
    +  {
    +    std::memcpy(buffer + block_size - remainingBytes, &rand1, remainingBytes);
    +  }
    +
    +  return ceph::bufferptr(buffer, block_size);
    +}
    +
    +ceph::bufferptr SeededRandomGenerator::generate_wrong_block(uint64_t block_offset)
    +{
    +  uint64_t block_size = m_model.get_block_size();
    +  char buffer[block_size];
    +
    +  std::mt19937_64 random_generator(m_model.get_seed(block_offset));
    +  uint64_t rand1 = random_generator() - 1;
    +  uint64_t rand2 = random_generator() + 1;
    +
    +  constexpr size_t generation_length = sizeof(uint64_t);
    +
    +  for (uint64_t i = 0; i < block_size; i+=(2*generation_length), rand1++, rand2--)
    +  {
    +    std::memcpy(buffer + i, &rand1, generation_length);
    +    std::memcpy(buffer + i + generation_length, &rand2, generation_length);
    +  }
    +
    +  size_t remainingBytes = block_size % (generation_length * 2);
    +  if (remainingBytes > generation_length)
    +  {
    +    size_t remainingBytes2 = remainingBytes - generation_length;
    +    std::memcpy(buffer + block_size - remainingBytes, &rand1, remainingBytes);
    +    std::memcpy(buffer + block_size - remainingBytes2, &rand2, remainingBytes2);
    +  }
    +  else if (remainingBytes > 0)
    +  {
    +    std::memcpy(buffer + block_size - remainingBytes, &rand1, remainingBytes);
    +  }
    +
    +  return ceph::bufferptr(buffer, block_size);
    +}
    +
    +bufferlist SeededRandomGenerator::generate_data(uint64_t offset, uint64_t length)
    +{
    +  bufferlist retlist;
    +
    +  for (uint64_t block_offset = offset; block_offset < offset + length; block_offset++)
    +  {
    +    retlist.append(generate_block(block_offset));
    +  }
    +
    +  return retlist;
    +}
    +
    +bufferlist SeededRandomGenerator::generate_wrong_data(uint64_t offset, uint64_t length)
    +{
    +  bufferlist retlist;
    +
    +  for (uint64_t block_offset = offset; block_offset < offset + length; block_offset++)
    +  {
    +    retlist.append(generate_wrong_block(block_offset));
    +  }
    +
    +  return retlist;
    +}
    +
    +HeaderedSeededRandomGenerator
    +  ::HeaderedSeededRandomGenerator(const ObjectModel& model,
    +                                  std::optional unique_run_id) :
    +    SeededRandomGenerator(model),
    +    unique_run_id(unique_run_id.value_or(generate_unique_run_id()))
    +{
    +
    +}
    +
    +uint64_t HeaderedSeededRandomGenerator::generate_unique_run_id()
    +{
    +  std::mt19937_64 random_generator =
    +        std::mt19937_64(duration_cast(
    +          std::chrono::system_clock::now().time_since_epoch()).count());
    +
    +      return random_generator();
    +}
    +
    +ceph::bufferptr HeaderedSeededRandomGenerator::generate_block(uint64_t block_offset)
    +{
    +  SeedBytes seed = m_model.get_seed(block_offset);
    +  TimeBytes current_time = duration_cast(
    +      std::chrono::system_clock::now().time_since_epoch()).count();
    +
    +  ceph::bufferptr bufferptr = SeededRandomGenerator::generate_block(block_offset);
    +
    +  std::memcpy(bufferptr.c_str() + uniqueIdStart(), &unique_run_id, uniqueIdLength());
    +  std::memcpy(bufferptr.c_str() + seedStart(), &seed, seedLength());
    +  std::memcpy(bufferptr.c_str() + timeStart(), ¤t_time, timeLength());
    +
    +  return bufferptr;
    +}
    +
    +ceph::bufferptr HeaderedSeededRandomGenerator::generate_wrong_block(uint64_t block_offset)
    +{
    +  return HeaderedSeededRandomGenerator::generate_block(block_offset % 8);
    +}
    +
    +const HeaderedSeededRandomGenerator::UniqueIdBytes
    +  HeaderedSeededRandomGenerator::readUniqueRunId(uint64_t block_offset,
    +                                                 const bufferlist& bufferlist)
    +{
    +  UniqueIdBytes read_unique_run_id = 0;
    +  std::memcpy(&read_unique_run_id,
    +              &bufferlist[(block_offset * m_model.get_block_size()) + uniqueIdStart()],
    +              uniqueIdLength());
    +  return read_unique_run_id;
    +}
    +
    +const HeaderedSeededRandomGenerator::SeedBytes
    +  HeaderedSeededRandomGenerator::readSeed(uint64_t block_offset,
    +                                          const bufferlist& bufferlist)
    +{
    +  SeedBytes read_seed = 0;
    +  std::memcpy(&read_seed,
    +              &bufferlist[(block_offset * m_model.get_block_size()) + seedStart()],
    +              seedLength());
    +  return read_seed;
    +}
    +
    +const HeaderedSeededRandomGenerator::TimeBytes
    +  HeaderedSeededRandomGenerator::readDateTime(uint64_t block_offset,
    +                                              const bufferlist& bufferlist)
    +{
    +  TimeBytes read_time = 0;
    +  std::memcpy(&read_time,
    +              &bufferlist[(block_offset * m_model.get_block_size()) + timeStart()],
    +              timeLength());
    +  return read_time;
    +}
    +
    +bool HeaderedSeededRandomGenerator::validate(bufferlist& bufferlist,
    +                                             uint64_t offset, uint64_t length)
    +{
    +  std::vector invalid_block_offsets;
    +
    +  for (uint64_t block_offset = offset; block_offset < offset + length; block_offset++)
    +  {
    +    bool valid_block
    +      = validate_block(block_offset,
    +                       (bufferlist.c_str() + ((block_offset - offset) *
    +                       m_model.get_block_size())));
    +    if (!valid_block)
    +    {
    +      invalid_block_offsets.push_back(block_offset);
    +    }
    +  }
    +
    +  if (!invalid_block_offsets.empty())
    +  {
    +    printDebugInformationForOffsets(offset, invalid_block_offsets, bufferlist);
    +  }
    +
    +  return invalid_block_offsets.empty();
    +}
    +
    +bool HeaderedSeededRandomGenerator::validate_block(uint64_t block_offset,
    +                                                   const char* buffer_start)
    +{
    +  // We validate the block matches what we generate byte for byte
    +  // however we ignore the time section of the header
    +  ceph::bufferptr bufferptr = generate_block(block_offset);
    +  bool valid = strncmp(bufferptr.c_str(), buffer_start, timeStart()) == 0;
    +  valid = valid ? strncmp(bufferptr.c_str() + timeEnd(),
    +                          buffer_start + timeEnd(),
    +                          m_model.get_block_size() - timeEnd()) == 0 : valid;
    +  return valid;
    +}
    +
    +const HeaderedSeededRandomGenerator::ErrorType
    +  HeaderedSeededRandomGenerator::getErrorTypeForBlock(uint64_t read_offset,
    +                                                      uint64_t block_offset,
    +                                                      const bufferlist& bufferlist)
    +{
    +  try
    +  {
    +    UniqueIdBytes read_unique_run_id = readUniqueRunId(block_offset - read_offset,
    +                                                       bufferlist);
    +    if (unique_run_id != read_unique_run_id)
    +    {
    +      return ErrorType::RUN_ID_MISMATCH;
    +    }
    +
    +    SeedBytes read_seed = readSeed(block_offset - read_offset, bufferlist);
    +    if (m_model.get_seed(block_offset) != read_seed)
    +    {
    +      return ErrorType::SEED_MISMATCH;
    +    }
    +
    +    if (std::strncmp(&bufferlist[((block_offset - read_offset) *
    +                      m_model.get_block_size()) + bodyStart()],
    +                     generate_block(block_offset).c_str() + bodyStart(),
    +                     m_model.get_block_size() - bodyStart()) != 0)
    +    {
    +      return ErrorType::DATA_MISMATCH;
    +    }
    +  }
    +  catch(const std::exception& e)
    +  {
    +    return ErrorType::DATA_NOT_FOUND;
    +  }
    +
    +  return ErrorType::UNKNOWN;
    +}
    +
    +void HeaderedSeededRandomGenerator
    +  ::printDebugInformationForBlock(uint64_t read_offset, uint64_t block_offset,
    +                                  const bufferlist& bufferlist)
    +{
    +  ErrorType blockError = getErrorTypeForBlock(read_offset, block_offset, bufferlist);
    +
    +  TimeBytes read_time = 0;
    +  std::time_t ttp;
    +
    +  char read_bytes[m_model.get_block_size()];
    +  char generated_bytes[m_model.get_block_size()];
    +
    +  if (blockError == ErrorType::DATA_MISMATCH || blockError == ErrorType::UNKNOWN)
    +  {
    +    read_time = readDateTime(block_offset - read_offset, bufferlist);
    +    std::chrono::system_clock::time_point time_point{std::chrono::milliseconds{read_time}};
    +    ttp = std::chrono::system_clock::to_time_t(time_point);
    +
    +    std::memcpy(&read_bytes,
    +                &bufferlist[((block_offset - read_offset) * m_model.get_block_size())],
    +                m_model.get_block_size() - bodyStart());
    +    std::memcpy(&generated_bytes,
    +                generate_block(block_offset).c_str(),
    +                m_model.get_block_size() - bodyStart());
    +  }
    +
    +  std::string error_string;
    +  switch(blockError)
    +  {
    +    case ErrorType::RUN_ID_MISMATCH:
    +    {
    +      UniqueIdBytes read_unique_run_id = readUniqueRunId((block_offset - read_offset),
    +                                                          bufferlist);
    +      error_string = fmt::format("Header (Run ID) mismatch detected at block {} "
    +        "(byte offset {}) Header expected run id {} but found id {}. "
    +        "Block data corrupt or not written from this instance of this application.",
    +      block_offset,
    +      block_offset * m_model.get_block_size(),
    +      unique_run_id,
    +      read_unique_run_id);
    +    }
    +    break;
    +
    +    case ErrorType::SEED_MISMATCH:
    +    {
    +      SeedBytes read_seed = readSeed((block_offset - read_offset), bufferlist);
    +
    +      if (m_model.get_seed_offsets(read_seed).size() == 0)
    +      {
    +        error_string = fmt::format("Data (Seed) mismatch detected at block {}"
    +          " (byte offset {}). Header expected seed {} but found seed {}. "
    +          "Read data was not from any other recognised block in the object.",
    +            block_offset,
    +            block_offset * m_model.get_block_size(),
    +            m_model.get_seed(block_offset),
    +            read_seed);
    +      }
    +      else
    +      {
    +        std::vector seed_offsets = m_model.get_seed_offsets(read_seed);
    +        error_string = fmt::format("Data (Seed) mismatch detected at block {}"
    +          " (byte offset {}). Header expected seed {} but found seed {}."
    +          " Read data was from a different block(s): {}",
    +            block_offset,
    +            block_offset * m_model.get_block_size(),
    +            m_model.get_seed(block_offset),
    +            read_seed,
    +            fmt::join(seed_offsets.begin(), seed_offsets.end(), ""));
    +      }
    +    }
    +    break;
    +
    +    case ErrorType::DATA_MISMATCH:
    +    {
    +      error_string = fmt::format("Data (Body) mismatch detected at block {}"
    +        " (byte offset {}). Header data matches, data body does not."
    +        " Data written at {}\nExpected data: \n{:02x}\nRead data:{:02x}",
    +          block_offset,
    +          block_offset * m_model.get_block_size(),
    +          std::ctime(&ttp),
    +          fmt::join(generated_bytes, generated_bytes + m_model.get_block_size(), ""),
    +          fmt::join(read_bytes, read_bytes + m_model.get_block_size(), ""));
    +    }
    +    break;
    +
    +    case ErrorType::DATA_NOT_FOUND:
    +    {
    +      uint64_t bufferlist_length = bufferlist.to_str().size();
    +      error_string = fmt::format("Data (Body) could not be read at block {}"
    +        " (byte offset {}) offset in bufferlist returned from read: {}"
    +        " ({} bytes). Returned bufferlist length: {}.",
    +          block_offset,
    +          block_offset * m_model.get_block_size(),
    +          (block_offset - read_offset),
    +          (block_offset - read_offset) * m_model.get_block_size(),
    +          bufferlist_length);
    +    }
    +    break;
    +
    +    case ErrorType::UNKNOWN:
    +      [[ fallthrough ]];
    +
    +    default:
    +    {
    +      error_string = fmt::format("Data mismatch detected at block {}"
    +        " (byte offset {}).\nExpected data:\n{:02x}\nRead data:\n{:02x}",
    +          block_offset,
    +          block_offset * m_model.get_block_size(),
    +          fmt::join(generated_bytes, generated_bytes + m_model.get_block_size(), ""),
    +          fmt::join(read_bytes, read_bytes + m_model.get_block_size(), ""));
    +    }
    +    break;
    +  }
    +  dout(0) << error_string << dendl;
    +}
    +
    +void HeaderedSeededRandomGenerator
    +  ::printDebugInformationForRange(uint64_t read_offset,
    +                                  uint64_t start_block_offset,
    +                                  uint64_t range_length_in_blocks,
    +                                  ErrorType rangeError,
    +                                  const bufferlist& bufferlist)
    +{
    +  switch(rangeError)
    +  {
    +  case ErrorType::RUN_ID_MISMATCH:
    +    printDebugInformationForRunIdMismatchRange(read_offset, start_block_offset,
    +                                               range_length_in_blocks, bufferlist);
    +    break;
    +  case ErrorType::SEED_MISMATCH:
    +    printDebugInformationForSeedMismatchRange(read_offset, start_block_offset,
    +                                              range_length_in_blocks, bufferlist);
    +    break;
    +  case ErrorType::DATA_MISMATCH:
    +    printDebugInformationDataBodyMismatchRange(read_offset, start_block_offset,
    +                                               range_length_in_blocks, bufferlist);
    +    break;
    +  case ErrorType::DATA_NOT_FOUND:
    +    printDebugInformationDataNotFoundRange(read_offset, start_block_offset,
    +                                           range_length_in_blocks, bufferlist);
    +    break;
    +  case ErrorType::UNKNOWN:
    +    [[ fallthrough ]];
    +  default:
    +    printDebugInformationCorruptRange(read_offset, start_block_offset,
    +                                      range_length_in_blocks, bufferlist);
    +    break;
    +  }
    +}
    +
    +void HeaderedSeededRandomGenerator
    +  ::printDebugInformationForRunIdMismatchRange(uint64_t read_offset,
    +                                               uint64_t start_block_offset,
    +                                               uint64_t range_length_in_blocks,
    +                                               const bufferlist& bufferlist)
    +{
    +  uint64_t range_start = start_block_offset;
    +  uint64_t range_length = 0;
    +  UniqueIdBytes initial_read_unique_run_id = readUniqueRunId(start_block_offset - read_offset,
    +                                                             bufferlist);
    +  for (uint64_t i = start_block_offset;
    +       i < start_block_offset + range_length_in_blocks; i++)
    +  {
    +    ceph_assert(getErrorTypeForBlock(read_offset, i, bufferlist)
    +                == ErrorType::RUN_ID_MISMATCH);
    +
    +    UniqueIdBytes read_unique_run_id = readUniqueRunId(i - read_offset, bufferlist);
    +    if (initial_read_unique_run_id != read_unique_run_id ||
    +        i == (start_block_offset + range_length_in_blocks - 1))
    +    {
    +      if (range_length == 1)
    +      {
    +        printDebugInformationForBlock(read_offset, i, bufferlist);
    +      }
    +      else if (range_length > 1)
    +      {
    +        dout(0) << fmt::format("Data (Run ID) Mismatch detected from block {} ({} bytes)"
    +                    " and spanning a range of {} blocks ({} bytes). "
    +                    "Expected run id {} for range but found id {}"
    +                    " for all blocks in range. "
    +                    "Block data corrupt or not written from this instance of this application.",
    +                      range_start,
    +                      range_start * m_model.get_block_size(),
    +                      range_length,
    +                      range_length * m_model.get_block_size(),
    +                      unique_run_id,
    +                      initial_read_unique_run_id) << dendl;
    +      }
    +
    +      range_start = i;
    +      range_length = 1;
    +      initial_read_unique_run_id = read_unique_run_id;
    +    }
    +    else
    +    {
    +      range_length++;
    +    }
    +  }
    +
    +  if (range_length == 1)
    +  {
    +    printDebugInformationForBlock(read_offset,
    +                                  start_block_offset + range_length_in_blocks - 1,
    +                                  bufferlist);
    +  }
    +  else if (range_length > 1)
    +  {
    +    dout(0) << fmt::format("Data (Run ID) Mismatch detected from block {}"
    +                " ({} bytes) and spanning a range of {} blocks ({} bytes). "
    +                "Expected run id {} for range but found id for all blocks in range. "
    +                "Block data corrupt or not written from this instance of this application.",
    +                  range_start,
    +                  range_start * m_model.get_block_size(),
    +                  range_length,
    +                  range_length * m_model.get_block_size(),
    +                  unique_run_id,
    +                  initial_read_unique_run_id)
    +            << dendl;
    +  }
    +}
    +
    +void HeaderedSeededRandomGenerator
    +  ::printDebugInformationForSeedMismatchRange(uint64_t read_offset,
    +                                              uint64_t start_block_offset,
    +                                              uint64_t range_length_in_blocks,
    +                                              const bufferlist& bufferlist)
    +{
    +  uint64_t range_start = start_block_offset;
    +  uint64_t range_length = 0;
    +
    +  // Assert here if needed, as we can't support values
    +  // that can't be converted to a signed integer.
    +  ceph_assert(m_model.get_block_size() < (std::numeric_limits::max() / 2));
    +  std::optional range_offset = 0;
    +
    +  for (uint64_t i = start_block_offset;
    +       i < start_block_offset + range_length_in_blocks; i++)
    +  {
    +    ceph_assert(getErrorTypeForBlock(read_offset, i, bufferlist)
    +                == ErrorType::SEED_MISMATCH);
    +    SeedBytes read_seed = readSeed(i - read_offset, bufferlist);
    +
    +    std::vector seed_found_offsets = m_model.get_seed_offsets(read_seed);
    +
    +    if ((seed_found_offsets.size() == 1 &&
    +        (static_cast(seed_found_offsets.front() - i) == range_offset)) ||
    +        range_length == 0)
    +    {
    +      if (range_length == 0)
    +      {
    +        range_start = i;
    +        if (seed_found_offsets.size() > 0)
    +        {
    +          range_offset = seed_found_offsets.front() - i;
    +        }
    +        else
    +        {
    +          range_offset = std::nullopt;
    +        }
    +      }
    +      range_length++;
    +    }
    +    else
    +    {
    +      if (range_length == 1)
    +      {
    +        printDebugInformationForBlock(read_offset, i - 1, bufferlist);
    +      }
    +      else if (range_length > 1 && range_offset.has_value())
    +      {
    +        dout(0) << fmt::format("Data (Seed) Mismatch detected from block {}"
    +                    " ({} bytes) and spanning a range of {} blocks ({} bytes). "
    +                    "Returned data located starting from block {} ({} bytes) "
    +                    "and spanning a range of {} blocks ({} bytes).",
    +                      range_start,
    +                      range_start * m_model.get_block_size(),
    +                      range_length, range_length * m_model.get_block_size(),
    +                      static_cast(*range_offset) + range_start,
    +                      (static_cast(*range_offset) + range_start)
    +                        * m_model.get_block_size(),
    +                      range_length,
    +                      range_length * m_model.get_block_size())
    +                << dendl;
    +      }
    +      else
    +      {
    +        dout(0) << fmt::format("Data (Seed) Mismatch detected from block {}"
    +                    " ({} bytes) and spanning a range of {} blocks ({} bytes). "
    +                    "Data seed mismatch spanning a range of {} blocks ({} bytes).",
    +                      range_start,
    +                      range_start * m_model.get_block_size(),
    +                      range_length, range_length * m_model.get_block_size(),
    +                      range_length,
    +                      range_length * m_model.get_block_size())
    +                << dendl;
    +      }
    +      range_length = 1;
    +      range_start = i;
    +      if (seed_found_offsets.size() > 0)
    +      {
    +        range_offset = seed_found_offsets.front() - i;
    +      }
    +      else
    +      {
    +        range_offset = std::nullopt;
    +      }
    +    }
    +  }
    +
    +  if (range_length == 1)
    +  {
    +    printDebugInformationForBlock(read_offset,
    +                                  start_block_offset + range_length_in_blocks - 1,
    +                                  bufferlist);
    +  }
    +  else if (range_length > 1 && range_offset.has_value())
    +  {
    +    dout(0) << fmt::format("Data (Seed) Mismatch detected from block {} ({} bytes) "
    +                "and spanning a range of {} blocks ({} bytes). "
    +                "Returned data located starting from block {} ({} bytes) "
    +                "and spanning a range of {} blocks ({} bytes).",
    +                  range_start,
    +                  range_start * m_model.get_block_size(),
    +                  range_length,
    +                  range_length * m_model.get_block_size(),
    +                  *range_offset + range_start,
    +                  (*range_offset + range_start) * m_model.get_block_size(),
    +                  range_length,
    +                  range_length * m_model.get_block_size())
    +            << dendl;
    +  }
    +  else
    +  {
    +    dout(0) << fmt::format("Data (Seed) Mismatch detected from block {} ({} bytes) "
    +                "and spanning a range of {} blocks ({} bytes). "
    +                "and spanning a range of {} blocks ({} bytes).",
    +                  range_start,
    +                  range_start * m_model.get_block_size(),
    +                  range_length,
    +                  range_length * m_model.get_block_size(),
    +                  range_length,
    +                  range_length * m_model.get_block_size())
    +            << dendl;
    +  }
    +}
    +
    +void HeaderedSeededRandomGenerator
    +::printDebugInformationDataBodyMismatchRange(uint64_t read_offset,
    +                                             uint64_t start_block_offset,
    +                                             uint64_t range_length_in_blocks,
    +                                             const bufferlist& bufferlist)
    +{
    +  dout(0) << fmt::format("Data Mismatch detected in blocks from {} to {}. "
    +              "Headers look as expected for range, "
    +              "but generated data body does not match. "
    +              "More information given for individual blocks below.",
    +                start_block_offset,
    +                start_block_offset + range_length_in_blocks - 1)
    +          << dendl;
    +
    +  for (uint64_t i = start_block_offset;
    +       i < start_block_offset + range_length_in_blocks; i++)
    +  {
    +    printDebugInformationForBlock(read_offset, i, bufferlist);
    +  }
    +}
    +
    +void HeaderedSeededRandomGenerator
    +  ::printDebugInformationCorruptRange(uint64_t read_offset,
    +                                      uint64_t start_block_offset,
    +                                      uint64_t range_length_in_blocks,
    +                                      const bufferlist& bufferlist)
    +{
    +  dout(0) << fmt::format("Data Mismatch detected in blocks from {} to {}. "
    +              "Headers look as expected for range, "
    +              "but generated data body does not match. "
    +              "More information given for individual blocks below.",
    +                start_block_offset,
    +                start_block_offset + range_length_in_blocks - 1)
    +          << dendl;
    +
    +  for (uint64_t i = start_block_offset;
    +       i < start_block_offset + range_length_in_blocks; i++)
    +  {
    +    printDebugInformationForBlock(read_offset, i, bufferlist);
    +  }
    +}
    +
    +void HeaderedSeededRandomGenerator
    +  ::printDebugInformationDataNotFoundRange(uint64_t read_offset,
    +                                           uint64_t start_block_offset,
    +                                           uint64_t range_length_in_blocks,
    +                                           const bufferlist& bufferlist)
    +{
    +  dout(0) << fmt::format("Data not found for blocks from {} to {}. "
    +              "More information given for individual blocks below.",
    +                start_block_offset,
    +                start_block_offset + range_length_in_blocks - 1)
    +          << dendl;
    +
    +  for (uint64_t i = start_block_offset; i < start_block_offset + range_length_in_blocks; i++)
    +  {
    +    printDebugInformationForBlock(read_offset, i, bufferlist);
    +  }
    +}
    +
    +void HeaderedSeededRandomGenerator
    +  ::printDebugInformationForOffsets(uint64_t read_offset,
    +                                    std::vector offsets,
    +                                    const bufferlist& bufferlist)
    +{
    +  uint64_t range_start = 0;
    +  uint64_t range_length = 0;
    +  ErrorType rangeError = ErrorType::UNKNOWN;
    +
    +  for (const uint64_t& block_offset : offsets)
    +  {
    +    ErrorType blockError = getErrorTypeForBlock(read_offset, block_offset,
    +                                                bufferlist);
    +
    +    if (range_start == 0 && range_length == 0)
    +    {
    +      range_start = block_offset;
    +      range_length = 1;
    +      rangeError = blockError;
    +    }
    +    else if (blockError == rangeError &&
    +             range_start + range_length == block_offset)
    +{
    +      range_length++;
    +    }
    +    else
    +    {
    +      if (range_length == 1)
    +      {
    +        printDebugInformationForBlock(read_offset, range_start, bufferlist);
    +      }
    +      else if (range_length > 1)
    +      {
    +        printDebugInformationForRange(read_offset, range_start, range_length,
    +                                      rangeError, bufferlist);
    +      }
    +
    +      range_start = block_offset;
    +      range_length = 1;
    +      rangeError = blockError;
    +    }
    +  }
    +
    +  if (range_length == 1)
    +  {
    +    printDebugInformationForBlock(read_offset, range_start, bufferlist);
    +  }
    +  else if (range_length > 1)
    +  {
    +    printDebugInformationForRange(read_offset, range_start, range_length,
    +                                  rangeError, bufferlist);
    +  }
    +}
    \ No newline at end of file
    diff --git a/src/common/io_exerciser/DataGenerator.h b/src/common/io_exerciser/DataGenerator.h
    new file mode 100644
    index 000000000000..1e5784a54ccd
    --- /dev/null
    +++ b/src/common/io_exerciser/DataGenerator.h
    @@ -0,0 +1,171 @@
    +#pragma once
    +
    +#include 
    +#include 
    +
    +#include "include/buffer.h"
    +#include "ObjectModel.h"
    +
    +/* Overview
    + *
    + * class DataGenerator
    + *   Generates data buffers for write I/Os using state queried
    + *   from ObjectModel. Validates data buffers for read I/Os
    + *   against the state in the ObjectModel. If a data miscompare
    + *   is detected provide debug information about the state of the
    + *   object, the buffer that was read and the expected buffer.
    + *
    + *
    + * class SeededRandomGenerator
    + *   Inherits from DataGenerator. Generates entirely random patterns
    + *   based on the seed retrieved by the model.
    + *
    + *
    + * class HeaderedSeededRandomGenerator
    + *   Inherits from SeededDataGenerator. Generates entirely random patterns
    + *   based on the seed retrieved by the model, however also appends a 
    + *   header to the start of each block. This generator also provides
    + *   a range of verbose debug options to help disagnose a miscompare
    + *   whenever it detects unexpected data.
    + */
    +
    +namespace ceph {
    +  namespace io_exerciser {
    +    namespace data_generation {
    +      enum class GenerationType {
    +        SeededRandom,
    +        HeaderedSeededRandom
    +        // CompressedGenerator
    +        // MixedGenerator
    +      };
    +
    +      class DataGenerator {
    +      public:
    +        virtual ~DataGenerator() = default;
    +        static std::unique_ptr
    +          create_generator(GenerationType generatorType,
    +                           const ObjectModel& model);
    +        virtual bufferlist generate_data(uint64_t length, uint64_t offset)=0;
    +        virtual bool validate(bufferlist& bufferlist, uint64_t offset,
    +                              uint64_t length);
    +
    +        // Used for testing debug outputs from data generation
    +        virtual bufferlist generate_wrong_data(uint64_t offset, uint64_t length);
    +
    +      protected:
    +        const ObjectModel& m_model;
    +
    +        DataGenerator(const ObjectModel& model) : m_model(model) {}
    +      };
    +
    +      class SeededRandomGenerator : public DataGenerator
    +      {
    +        public:
    +          SeededRandomGenerator(const ObjectModel& model)
    +            : DataGenerator(model) {}
    +
    +          virtual bufferptr generate_block(uint64_t offset);
    +          virtual bufferlist generate_data(uint64_t length, uint64_t offset);
    +          virtual bufferptr generate_wrong_block(uint64_t offset);
    +          virtual bufferlist generate_wrong_data(uint64_t offset, uint64_t length) override;
    +      };
    +
    +      class HeaderedSeededRandomGenerator : public SeededRandomGenerator
    +      {
    +        public:
    +          HeaderedSeededRandomGenerator(const ObjectModel& model,
    +                                        std::optional unique_run_id = std::nullopt);
    +
    +          bufferptr generate_block(uint64_t offset) override;
    +          bufferptr generate_wrong_block(uint64_t offset) override;
    +          bool validate(bufferlist& bufferlist, uint64_t offset,
    +                        uint64_t length) override;
    +
    +        private:
    +          using UniqueIdBytes = uint64_t;
    +          using SeedBytes = int;
    +          using TimeBytes = uint64_t;
    +
    +          enum class ErrorType {
    +            RUN_ID_MISMATCH,
    +            SEED_MISMATCH,
    +            DATA_MISMATCH,
    +            DATA_NOT_FOUND,
    +            UNKNOWN
    +          };
    +
    +          constexpr uint8_t headerStart() const
    +            { return 0; };
    +          constexpr uint8_t uniqueIdStart() const
    +            { return headerStart(); };
    +          constexpr uint8_t uniqueIdLength() const
    +            { return sizeof(UniqueIdBytes); };
    +          constexpr uint8_t seedStart() const
    +            { return uniqueIdStart() + uniqueIdLength(); };
    +          constexpr uint8_t seedLength() const
    +            { return sizeof(SeedBytes); };
    +          constexpr uint8_t timeStart() const
    +            { return seedStart() + seedLength(); };
    +          constexpr uint8_t timeLength() const
    +            { return sizeof(TimeBytes); };
    +          constexpr uint8_t timeEnd() const
    +            { return timeStart() + timeLength(); };
    +          constexpr uint8_t headerLength() const
    +            { return uniqueIdLength() + seedLength() + timeLength(); };
    +          constexpr uint8_t bodyStart() const
    +            { return headerStart() + headerLength(); };
    +
    +          const UniqueIdBytes readUniqueRunId(uint64_t block_offset,
    +                                              const bufferlist& bufferlist);
    +          const SeedBytes readSeed(uint64_t block_offset,
    +                                   const bufferlist& bufferlist);
    +          const TimeBytes readDateTime(uint64_t block_offset,
    +                                       const bufferlist& bufferlist);
    +
    +          const UniqueIdBytes unique_run_id;
    +
    +          uint64_t generate_unique_run_id();
    +
    +          bool validate_block(uint64_t block_offset, const char* buffer_start);
    +
    +          const ErrorType getErrorTypeForBlock(uint64_t read_offset,
    +                                               uint64_t block_offset,
    +                                               const bufferlist& bufferlist);
    +
    +          void printDebugInformationForBlock(uint64_t read_offset,
    +                                             uint64_t block_offset,
    +                                             const bufferlist& bufferlist);
    +          void printDebugInformationForRange(uint64_t read_offset,
    +                                             uint64_t start_block_offset,
    +                                             uint64_t range_length_in_blocks,
    +                                             ErrorType rangeError,
    +                                             const bufferlist& bufferlist);
    +
    +          void printDebugInformationForRunIdMismatchRange(uint64_t read_offset,
    +                                                          uint64_t start_block_offset,
    +                                                          uint64_t range_length_in_blocks,
    +                                                          const bufferlist& bufferlist);
    +          void printDebugInformationForSeedMismatchRange(uint64_t read_offset,
    +                                                         uint64_t start_block_offset,
    +                                                         uint64_t range_length_in_blocks,
    +                                                         const bufferlist& bufferlist);
    +          void printDebugInformationDataBodyMismatchRange(uint64_t read_offset,
    +                                                          uint64_t start_block_offset,
    +                                                          uint64_t range_length_in_blocks,
    +                                                          const bufferlist& bufferlist);
    +          void printDebugInformationDataNotFoundRange(uint64_t ßread_offset,
    +                                                      uint64_t start_block_offset,
    +                                                      uint64_t range_length_in_blocks,
    +                                                      const bufferlist& bufferlist);
    +          void printDebugInformationCorruptRange(uint64_t read_offset,
    +                                                 uint64_t start_block_offset,
    +                                                 uint64_t range_length_in_blocks,
    +                                                 const bufferlist& bufferlist);
    +
    +          void printDebugInformationForOffsets(uint64_t read_offset,
    +                                               std::vector offsets,
    +                                               const bufferlist& bufferlist);
    +      };
    +    }
    +  }
    +}
    diff --git a/src/common/io_exerciser/IoOp.cc b/src/common/io_exerciser/IoOp.cc
    new file mode 100644
    index 000000000000..cd855ba6fff8
    --- /dev/null
    +++ b/src/common/io_exerciser/IoOp.cc
    @@ -0,0 +1,188 @@
    +#include "IoOp.h"
    +
    +using IoOp = ceph::io_exerciser::IoOp;
    +
    +IoOp::IoOp( OpType op,
    +            uint64_t offset1, uint64_t length1,
    +            uint64_t offset2, uint64_t length2,
    +            uint64_t offset3, uint64_t length3) :
    +  op(op),
    +  offset1(offset1), length1(length1),
    +  offset2(offset2), length2(length2),
    +  offset3(offset3), length3(length3)
    +{
    +
    +}
    +
    +std::string IoOp::value_to_string(uint64_t v) const
    +{
    +  if (v < 1024 || (v % 1024) != 0) {
    +    return std::to_string(v);
    +  }else if (v < 1024*1024 || (v % (1024 * 1024)) != 0 ) {
    +    return std::to_string(v / 1024) + "K";
    +  }else{
    +    return std::to_string(v / 1024 / 1024) + "M";
    +  }
    +}
    +
    +std::unique_ptr IoOp
    +  ::generate_done() {
    +
    +    return std::make_unique(OpType::Done);
    +}
    +
    +std::unique_ptr IoOp
    +  ::generate_barrier() {
    +
    +  return std::make_unique(OpType::BARRIER);
    +}
    +
    +std::unique_ptr IoOp
    +  ::generate_create(uint64_t size) {
    +
    +  return std::make_unique(OpType::CREATE,0,size);
    +}
    +
    +std::unique_ptr IoOp
    +  ::generate_remove() {
    +
    +  return std::make_unique(OpType::REMOVE);
    +}
    +
    +std::unique_ptr IoOp
    +  ::generate_read(uint64_t offset, uint64_t length) {
    +
    +  return std::make_unique(OpType::READ, offset, length);
    +}
    +
    +std::unique_ptr IoOp
    +  ::generate_read2(uint64_t offset1, uint64_t length1,
    +                   uint64_t offset2, uint64_t length2) {
    +
    +  if (offset1 < offset2) {
    +    ceph_assert( offset1 + length1 <= offset2 );
    +  } else {
    +    ceph_assert( offset2 + length2 <= offset1 );
    +  }
    +
    +  return std::make_unique(OpType::READ2,
    +                                offset1, length1,
    +                                offset2, length2);
    +}
    +
    +std::unique_ptr IoOp
    +  ::generate_read3(uint64_t offset1, uint64_t length1,
    +                   uint64_t offset2, uint64_t length2,
    +                   uint64_t offset3, uint64_t length3) {
    +
    +  if (offset1 < offset2) {
    +    ceph_assert( offset1 + length1 <= offset2 );
    +  } else {
    +    ceph_assert( offset2 + length2 <= offset1 );
    +  }
    +  if (offset1 < offset3) {
    +    ceph_assert( offset1 + length1 <= offset3 );
    +  } else {
    +    ceph_assert( offset3 + length3 <= offset1 );
    +  }
    +  if (offset2 < offset3) {
    +    ceph_assert( offset2 + length2 <= offset3 );
    +  } else {
    +    ceph_assert( offset3 + length3 <= offset2 );
    +  }
    +  return std::make_unique(OpType::READ3,
    +                                offset1, length1,
    +                                offset2, length2,
    +                                offset3, length3);
    +}
    +
    +std::unique_ptr IoOp::generate_write(uint64_t offset, uint64_t length) {
    +  return std::make_unique(OpType::WRITE, offset, length);
    +}
    +
    +std::unique_ptr IoOp::generate_write2(uint64_t offset1, uint64_t length1,
    +                                            uint64_t offset2, uint64_t length2) {
    +  if (offset1 < offset2) {
    +    ceph_assert( offset1 + length1 <= offset2 );
    +  } else {
    +    ceph_assert( offset2 + length2 <= offset1 );
    +  }
    +  return std::make_unique(OpType::WRITE2,
    +                                offset1, length1,
    +                                offset2, length2);
    +}
    +
    +std::unique_ptr IoOp::generate_write3(uint64_t offset1, uint64_t length1, 
    +                                            uint64_t offset2, uint64_t length2,
    +                                            uint64_t offset3, uint64_t length3) {
    +  if (offset1 < offset2) {
    +    ceph_assert( offset1 + length1 <= offset2 );
    +  } else {
    +    ceph_assert( offset2 + length2 <= offset1 );
    +  }
    +  if (offset1 < offset3) {
    +    ceph_assert( offset1 + length1 <= offset3 );
    +  } else {
    +    ceph_assert( offset3 + length3 <= offset1 );
    +  }
    +  if (offset2 < offset3) {
    +    ceph_assert( offset2 + length2 <= offset3 );
    +  } else {
    +    ceph_assert( offset3 + length3 <= offset2 );
    +  }
    +  return std::make_unique(OpType::WRITE3,
    +                                offset1, length1,
    +                                offset2, length2,
    +                                offset3, length3);
    +}
    +
    +bool IoOp::done() {
    +  return (op == OpType::Done);
    +}
    +
    +std::string IoOp::to_string(uint64_t block_size) const
    +{
    +  switch (op) {
    +  case OpType::Done:
    +    return "Done";
    +  case OpType::BARRIER:
    +    return "Barrier";
    +  case OpType::CREATE:
    +    return "Create (size=" + value_to_string(length1 * block_size) + ")";
    +  case OpType::REMOVE:
    +    return "Remove";
    +  case OpType::READ:
    +    return "Read (offset=" + value_to_string(offset1 * block_size) +
    +           ",length=" + value_to_string(length1 * block_size) + ")";
    +  case OpType::READ2:
    +    return "Read2 (offset1=" + value_to_string(offset1 * block_size) +
    +           ",length1=" + value_to_string(length1 * block_size) +
    +           ",offset2=" + value_to_string(offset2 * block_size) +
    +           ",length2=" + value_to_string(length2 * block_size) + ")";
    +  case OpType::READ3:
    +    return "Read3 (offset1=" + value_to_string(offset1 * block_size) +
    +           ",length1=" + value_to_string(length1 * block_size) +
    +           ",offset2=" + value_to_string(offset2 * block_size) +
    +           ",length2=" + value_to_string(length2 * block_size) +
    +           ",offset3=" + value_to_string(offset3 * block_size) +
    +           ",length3=" + value_to_string(length3 * block_size) + ")";
    +  case OpType::WRITE:
    +    return "Write (offset=" + value_to_string(offset1 * block_size) +
    +           ",length=" + value_to_string(length1 * block_size) + ")";
    +  case OpType::WRITE2:
    +    return "Write2 (offset1=" + value_to_string(offset1 * block_size) +
    +           ",length1=" + value_to_string(length1 * block_size) +
    +           ",offset2=" + value_to_string(offset2 * block_size) +
    +           ",length2=" + value_to_string(length2 * block_size) + ")";
    +  case OpType::WRITE3:
    +    return "Write3 (offset1=" + value_to_string(offset1 * block_size) +
    +           ",length1=" + value_to_string(length1 * block_size) +
    +           ",offset2=" + value_to_string(offset2 * block_size) +
    +           ",length2=" + value_to_string(length2 * block_size) +
    +           ",offset3=" + value_to_string(offset3 * block_size) +
    +           ",length3=" + value_to_string(length3 * block_size) + ")";
    +  default:
    +    break;
    +  }
    +  return "Unknown";
    +}
    \ No newline at end of file
    diff --git a/src/common/io_exerciser/IoOp.h b/src/common/io_exerciser/IoOp.h
    new file mode 100644
    index 000000000000..60c02a93d4e2
    --- /dev/null
    +++ b/src/common/io_exerciser/IoOp.h
    @@ -0,0 +1,94 @@
    +#pragma once
    +
    +#include 
    +#include 
    +#include "include/ceph_assert.h"
    +
    +/* Overview
    + *
    + * enum OpType
    + *   Enumeration of different types of I/O operation
    + *
    + * class IoOp
    + *   Stores details for an I/O operation. Generated by IoSequences
    + *   and applied by IoExerciser's
    + */
    +
    +namespace ceph {
    +  namespace io_exerciser {
    +
    +    enum class OpType {
    +      Done,       // End of I/O sequence
    +      BARRIER,    // Barrier - all prior I/Os must complete
    +      CREATE,     // Create object and pattern with data
    +      REMOVE,     // Remove object
    +      READ,       // Read
    +      READ2,      // 2 Reads in one op
    +      READ3,      // 3 Reads in one op
    +      WRITE,      // Write
    +      WRITE2,     // 2 Writes in one op
    +      WRITE3      // 3 Writes in one op
    +    };
    +
    +    class IoOp {
    +    protected:
    +      std::string value_to_string(uint64_t v) const;
    +
    +    public:
    +      OpType op;
    +      uint64_t offset1;
    +      uint64_t length1;
    +      uint64_t offset2;
    +      uint64_t length2;
    +      uint64_t offset3;
    +      uint64_t length3;
    +
    +      IoOp( OpType op,
    +            uint64_t offset1 = 0, uint64_t length1 = 0,
    +            uint64_t offset2 = 0, uint64_t length2 = 0,
    +            uint64_t offset3 = 0, uint64_t length3 = 0 );
    +
    +      static std::unique_ptr generate_done();
    +
    +      static std::unique_ptr generate_barrier();
    +
    +      static std::unique_ptr generate_create(uint64_t size);
    +
    +      static std::unique_ptr generate_remove();
    +
    +      static std::unique_ptr generate_read(uint64_t offset,
    +                                                 uint64_t length);
    +
    +      static std::unique_ptr generate_read2(uint64_t offset1,
    +                                                  uint64_t length1,
    +                                                  uint64_t offset2,
    +                                                  uint64_t length2);
    +
    +      static std::unique_ptr generate_read3(uint64_t offset1,
    +                                                  uint64_t length1,
    +                                                  uint64_t offset2,
    +                                                  uint64_t length2,
    +                                                  uint64_t offset3,
    +                                                  uint64_t length3);
    +
    +      static std::unique_ptr generate_write(uint64_t offset,
    +                                                  uint64_t length);
    +
    +      static std::unique_ptr generate_write2(uint64_t offset1,
    +                                                   uint64_t length1,
    +                                                   uint64_t offset2,
    +                                                   uint64_t length2);
    +
    +      static std::unique_ptr generate_write3(uint64_t offset1,
    +                                                   uint64_t length1,
    +                                                   uint64_t offset2,
    +                                                   uint64_t length2,
    +                                                   uint64_t offset3,
    +                                                   uint64_t length3);
    +
    +      bool done();
    +
    +      std::string to_string(uint64_t block_size) const;
    +    };
    +  }
    +}
    \ No newline at end of file
    diff --git a/src/common/io_exerciser/IoSequence.cc b/src/common/io_exerciser/IoSequence.cc
    new file mode 100644
    index 000000000000..4a7ca0593d1d
    --- /dev/null
    +++ b/src/common/io_exerciser/IoSequence.cc
    @@ -0,0 +1,500 @@
    +#include "IoSequence.h"
    +
    +using Sequence = ceph::io_exerciser::Sequence;
    +using IoSequence = ceph::io_exerciser::IoSequence;
    +
    +std::ostream& ceph::io_exerciser::operator<<(std::ostream& os, const Sequence& seq)
    +{
    +  switch (seq)
    +  {
    +    case Sequence::SEQUENCE_SEQ0:
    +      os << "SEQUENCE_SEQ0";
    +      break;
    +    case Sequence::SEQUENCE_SEQ1:
    +      os << "SEQUENCE_SEQ1";
    +      break;
    +    case Sequence::SEQUENCE_SEQ2:
    +      os << "SEQUENCE_SEQ2";
    +      break;
    +    case Sequence::SEQUENCE_SEQ3:
    +      os << "SEQUENCE_SEQ3";
    +      break;
    +    case Sequence::SEQUENCE_SEQ4:
    +      os << "SEQUENCE_SEQ4";
    +      break;
    +    case Sequence::SEQUENCE_SEQ5:
    +      os << "SEQUENCE_SEQ5";
    +      break;
    +    case Sequence::SEQUENCE_SEQ6:
    +      os << "SEQUENCE_SEQ6";
    +      break;
    +    case Sequence::SEQUENCE_SEQ7:
    +      os << "SEQUENCE_SEQ7";
    +      break;
    +    case Sequence::SEQUENCE_SEQ8:
    +      os << "SEQUENCE_SEQ8";
    +      break;
    +    case Sequence::SEQUENCE_SEQ9:
    +      os << "SEQUENCE_SEQ9";
    +      break;
    +    case Sequence::SEQUENCE_END:
    +      os << "SEQUENCE_END";
    +      break;
    +  }
    +  return os;
    +}
    +
    +IoSequence::IoSequence(std::pair obj_size_range,
    +                                           int seed) :
    +        min_obj_size(obj_size_range.first), max_obj_size(obj_size_range.second),
    +        create(true), barrier(false), done(false), remove(false),
    +        obj_size(min_obj_size), step(-1), seed(seed)
    +{
    +  rng.seed(seed);
    +}
    +
    +std::unique_ptr IoSequence::generate_sequence(Sequence s,
    +                                                          std::pair obj_size_range,
    +                                                          int seed)
    +{
    +  switch (s) {
    +    case Sequence::SEQUENCE_SEQ0:
    +      return std::make_unique(obj_size_range, seed);
    +    case Sequence::SEQUENCE_SEQ1:
    +      return std::make_unique(obj_size_range, seed);
    +    case Sequence::SEQUENCE_SEQ2:
    +      return std::make_unique(obj_size_range, seed);
    +    case Sequence::SEQUENCE_SEQ3:
    +      return std::make_unique(obj_size_range, seed);
    +    case Sequence::SEQUENCE_SEQ4:
    +      return std::make_unique(obj_size_range, seed);
    +    case Sequence::SEQUENCE_SEQ5:
    +      return std::make_unique(obj_size_range, seed);
    +    case Sequence::SEQUENCE_SEQ6:
    +      return std::make_unique(obj_size_range, seed);
    +    case Sequence::SEQUENCE_SEQ7:
    +      return std::make_unique(obj_size_range, seed);
    +    case Sequence::SEQUENCE_SEQ8:
    +      return std::make_unique(obj_size_range, seed);
    +    case Sequence::SEQUENCE_SEQ9:
    +      return std::make_unique(obj_size_range, seed);
    +    default:
    +      break;
    +  }
    +  return nullptr;
    +}
    +
    +int IoSequence::get_step() const
    +{
    +  return step;
    +}
    +
    +int IoSequence::get_seed() const
    +{
    +  return seed;
    +}
    +
    +void IoSequence::set_min_object_size(uint64_t size)
    +{
    +  min_obj_size = size;
    +  if (obj_size < size) {
    +    obj_size = size;
    +    if (obj_size > max_obj_size) {
    +      done = true;
    +    }
    +  }
    +}
    +
    +void IoSequence::set_max_object_size(uint64_t size)
    +{
    +  max_obj_size = size;
    +  if (obj_size > size) {
    +    done = true;
    +  }
    +}
    +
    +void IoSequence::select_random_object_size()
    +{
    +  if (max_obj_size != min_obj_size) {
    +    obj_size = min_obj_size + rng(max_obj_size - min_obj_size);
    +  }
    +}
    +
    +std::unique_ptr IoSequence::increment_object_size()
    +{
    +  obj_size++;
    +  if (obj_size > max_obj_size) {
    +    done = true;
    +  }
    +  create = true;
    +  barrier = true;
    +  remove = true;
    +  return IoOp::generate_barrier();
    +}
    +
    +std::unique_ptr IoSequence::next()
    +{
    +  step++;
    +  if (remove) {
    +    remove = false;
    +    return IoOp::generate_remove();
    +  }
    +  if (barrier) {
    +    barrier = false;
    +    return IoOp::generate_barrier();
    +  }
    +  if (done) {
    +    return IoOp::generate_done();
    +  }
    +  if (create) {
    +    create = false;
    +    barrier = true;
    +    return IoOp::generate_create(obj_size);
    +  }
    +  return _next();
    +}
    +
    +
    +
    +ceph::io_exerciser::Seq0::Seq0(std::pair obj_size_range, int seed) :
    +  IoSequence(obj_size_range, seed), offset(0)
    +{
    +  select_random_object_size();
    +  length = 1 + rng(obj_size - 1);
    +}
    +
    +std::string ceph::io_exerciser::Seq0::get_name() const
    +{
    +  return "Sequential reads of length " + std::to_string(length) +
    +    " with queue depth 1 (seqseed " + std::to_string(get_seed()) + ")";
    +}
    +
    +std::unique_ptr ceph::io_exerciser::Seq0::_next()
    +{
    +  std::unique_ptr r;
    +  if (offset >= obj_size) {
    +    done = true;
    +    barrier = true;
    +    remove = true;
    +    return IoOp::generate_barrier();
    +  }
    +  if (offset + length > obj_size) {
    +    r = IoOp::generate_read(offset, obj_size - offset);
    +  } else {
    +    r = IoOp::generate_read(offset, length);
    +  }
    +  offset += length;
    +  return r;
    +}
    +
    +
    +
    +ceph::io_exerciser::Seq1::Seq1(std::pair obj_size_range, int seed) :
    +  IoSequence(obj_size_range, seed)
    +{
    +  select_random_object_size();
    +  count = 3 * obj_size;
    +}
    +
    +std::string ceph::io_exerciser::Seq1::get_name() const
    +{
    +  return "Random offset, random length read/write I/O with queue depth 1 (seqseed "
    +    + std::to_string(get_seed()) + ")";
    +}
    +
    +std::unique_ptr ceph::io_exerciser::Seq1::_next()
    +{
    +  barrier = true;
    +  if (count-- == 0) {
    +    done = true;
    +    remove = true;
    +    return IoOp::generate_barrier();
    +  }
    +
    +  uint64_t offset = rng(obj_size - 1);
    +  uint64_t length = 1 + rng(obj_size - 1 - offset);
    +  return (rng(2) != 0) ? IoOp::generate_write(offset, length) :
    +    IoOp::generate_read(offset, length);
    +}
    +
    +
    +
    +ceph::io_exerciser::Seq2::Seq2(std::pair obj_size_range, int seed) :
    +  IoSequence(obj_size_range, seed), offset(0), length(0) {}
    +
    +std::string ceph::io_exerciser::Seq2::get_name() const
    +{
    +  return "Permutations of offset and length read I/O";
    +}
    +
    +std::unique_ptr ceph::io_exerciser::Seq2::_next()
    +{
    +  length++;
    +  if (length > obj_size - offset) {
    +    length = 1;
    +    offset++;
    +    if (offset >= obj_size) {
    +      offset = 0;
    +      length = 0;
    +      return increment_object_size();
    +    }
    +  }
    +  return IoOp::generate_read(offset, length);
    +}
    +
    +
    +
    +ceph::io_exerciser::Seq3::Seq3(std::pair obj_size_range, int seed) :
    +  IoSequence(obj_size_range, seed), offset1(0), offset2(0)
    +{
    +  set_min_object_size(2);
    +}
    +
    +std::string ceph::io_exerciser::Seq3::get_name() const
    +{
    +  return "Permutations of offset 2-region 1-block read I/O";
    +}
    +
    +std::unique_ptr ceph::io_exerciser::Seq3::_next()
    +{
    +  offset2++;
    +  if (offset2 >= obj_size - offset1) {
    +    offset2 = 1;
    +    offset1++;
    +    if (offset1 + 1 >= obj_size) {
    +      offset1 = 0;
    +      offset2 = 0;
    +      return increment_object_size();
    +    }
    +  }
    +  return IoOp::generate_read2(offset1, 1, offset1 + offset2, 1);
    +}
    +
    +
    +
    +ceph::io_exerciser::Seq4::Seq4(std::pair obj_size_range, int seed) :
    +  IoSequence(obj_size_range, seed), offset1(0), offset2(1)
    +{
    +  set_min_object_size(3);
    +}
    +
    +std::string ceph::io_exerciser::Seq4::get_name() const
    +{
    +  return "Permutations of offset 3-region 1-block read I/O";
    +}
    +
    +std::unique_ptr ceph::io_exerciser::Seq4::_next()
    +{
    +  offset2++;
    +  if (offset2 >= obj_size - offset1) {
    +    offset2 = 2;
    +    offset1++;
    +    if (offset1 + 2 >= obj_size) {
    +      offset1 = 0;
    +      offset2 = 1;
    +      return increment_object_size();
    +    }
    +  }
    +  return IoOp::generate_read3(offset1, 1,
    +                              offset1 + offset2, 1,
    +                              (offset1 * 2 + offset2)/2, 1);
    +}
    +
    +
    +
    +ceph::io_exerciser::Seq5::Seq5(std::pair obj_size_range, int seed) :
    +  IoSequence(obj_size_range, seed), offset(0), length(1),
    +  doneread(false), donebarrier(false) {}
    +
    +std::string ceph::io_exerciser::Seq5::get_name() const
    +{
    +  return "Permutation of length sequential writes";
    +}
    +
    +std::unique_ptr ceph::io_exerciser::Seq5::_next()
    +{
    +  if (offset >= obj_size) {
    +    if (!doneread) {
    +      if (!donebarrier) {
    +        donebarrier = true;
    +        return IoOp::generate_barrier();
    +      }
    +      doneread = true;
    +      barrier = true;
    +      return IoOp::generate_read(0, obj_size);
    +    }
    +    doneread = false;
    +    donebarrier = false;
    +    offset = 0;
    +    length++;
    +    if (length > obj_size) {
    +      length = 1;
    +      return increment_object_size();
    +    }
    +  }
    +  uint64_t io_len = (offset + length > obj_size) ? (obj_size - offset) : length;
    +  std::unique_ptr r = IoOp::generate_write(offset, io_len);
    +  offset += io_len;
    +  return r;
    +}
    +
    +
    +
    +ceph::io_exerciser::Seq6::Seq6(std::pair obj_size_range, int seed) :
    +  IoSequence(obj_size_range, seed), offset(0), length(1),
    +  doneread(false), donebarrier(false) {}
    +
    +std::string ceph::io_exerciser::Seq6::get_name() const
    +{
    +  return "Permutation of length sequential writes, different alignment";
    +}
    +
    +std::unique_ptr ceph::io_exerciser::Seq6::_next()
    +{
    +  if (offset >= obj_size) {
    +    if (!doneread) {
    +      if (!donebarrier) {
    +        donebarrier = true;
    +        return IoOp::generate_barrier();
    +      }
    +      doneread = true;
    +      barrier = true;
    +      return IoOp::generate_read(0, obj_size);
    +    }
    +    doneread = false;
    +    donebarrier = false;
    +    offset = 0;
    +    length++;
    +    if (length > obj_size) {
    +      length = 1;
    +      return increment_object_size();
    +    }
    +  }
    +  uint64_t io_len = (offset == 0) ? (obj_size % length) : length;
    +  if (io_len == 0) {
    +    io_len = length;
    +  }
    +  std::unique_ptr r = IoOp::generate_write(offset, io_len);
    +  offset += io_len;
    +  return r;
    +}
    +
    +
    +
    +ceph::io_exerciser::Seq7::Seq7(std::pair obj_size_range, int seed) :
    +  IoSequence(obj_size_range, seed)
    +{
    +  set_min_object_size(2);
    +  offset = obj_size;
    +}
    +
    +std::string ceph::io_exerciser::Seq7::get_name() const
    +{
    +  return "Permutations of offset 2-region 1-block writes";
    +}
    +
    +std::unique_ptr ceph::io_exerciser::Seq7::_next()
    +{
    +  if (!doneread) {
    +    if (!donebarrier) {
    +      donebarrier = true;
    +      return IoOp::generate_barrier();
    +    }
    +    doneread = true;
    +    barrier = true;
    +    return IoOp::generate_read(0, obj_size);
    +  }
    +  if (offset == 0) {
    +    doneread = false;
    +    donebarrier = false;
    +    offset = obj_size+1;
    +    return increment_object_size();
    +  }
    +  offset--;
    +  if (offset == obj_size/2) {
    +    return _next();
    +  }
    +  doneread = false;
    +  donebarrier = false;
    +  return IoOp::generate_write2(offset, 1, obj_size/2, 1);
    +}
    +
    +
    +
    +ceph::io_exerciser::Seq8::Seq8(std::pair obj_size_range, int seed) :
    +  IoSequence(obj_size_range, seed), offset1(0), offset2(1)
    +{
    +  set_min_object_size(3);
    +}
    +
    +std::string ceph::io_exerciser::Seq8::get_name() const
    +{
    +  return "Permutations of offset 3-region 1-block write I/O";
    +}
    +
    +std::unique_ptr ceph::io_exerciser::Seq8::_next()
    +{
    +  if (!doneread) {
    +    if (!donebarrier) {
    +      donebarrier = true;
    +      return IoOp::generate_barrier();
    +    }
    +    doneread = true;
    +    barrier = true;
    +    return IoOp::generate_read(0, obj_size);
    +  }
    +  offset2++;
    +  if (offset2 >= obj_size - offset1) {
    +    offset2 = 2;
    +    offset1++;
    +    if (offset1 + 2 >= obj_size) {
    +      offset1 = 0;
    +      offset2 = 1;
    +      return increment_object_size();
    +    }
    +  }
    +  doneread = false;
    +  donebarrier = false;
    +  return IoOp::generate_write3(offset1, 1,
    +                              offset1 + offset2, 1,
    +                              (offset1 * 2 + offset2)/2, 1);
    +}
    +
    +
    +
    +ceph::io_exerciser::Seq9::Seq9(std::pair obj_size_range, int seed) :
    +  IoSequence(obj_size_range, seed), offset(0), length(0)
    +{
    +  
    +}
    +
    +std::string ceph::io_exerciser::Seq9::get_name() const
    +{
    +  return "Permutations of offset and length write I/O";
    +}
    +
    +std::unique_ptr ceph::io_exerciser::Seq9::_next()
    +{
    +  if (!doneread) {
    +    if (!donebarrier) {
    +      donebarrier = true;
    +      return IoOp::generate_barrier();
    +    }
    +    doneread = true;
    +    barrier = true;
    +    return IoOp::generate_read(0, obj_size);
    +  }
    +  length++;
    +  if (length > obj_size - offset) {
    +    length = 1;
    +    offset++;
    +    if (offset >= obj_size) {
    +      offset = 0;
    +      length = 0;
    +      return increment_object_size();
    +    }
    +  }
    +  doneread = false;
    +  donebarrier = false;
    +  return IoOp::generate_write(offset, length);
    +}
    \ No newline at end of file
    diff --git a/src/common/io_exerciser/IoSequence.h b/src/common/io_exerciser/IoSequence.h
    new file mode 100644
    index 000000000000..114ff76303f4
    --- /dev/null
    +++ b/src/common/io_exerciser/IoSequence.h
    @@ -0,0 +1,223 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +#pragma once
    +
    +#include "IoOp.h"
    +
    +#include "include/random.h"
    +
    +/* Overview
    + *
    + * enum Sequence
    + *   Enumeration of the different sequences
    + *
    + * class IoSequence
    + *   Virtual class. IoSequences generate a stream of IoOPs.
    + *   Sequences typically exhastively test permutations of
    + *   offset and length to allow validation of code such as
    + *   Erasure Coding. An IoSequence does not determine
    + *   whether I/Os are issued sequentially or in parallel,
    + *   it must generate barrier I/Os where operations must
    + *   be serialized.
    + *
    + * class Seq*
    + *   Implementations of IoSequence. Each class generates
    + *   a different sequence of I/O.
    + *
    + * generate_sequence
    + *   Create an IoSequence
    + */
    +
    +namespace ceph {
    +  namespace io_exerciser {
    +
    +    enum class Sequence {
    +      SEQUENCE_SEQ0,
    +      SEQUENCE_SEQ1,
    +      SEQUENCE_SEQ2,
    +      SEQUENCE_SEQ3,
    +      SEQUENCE_SEQ4,
    +      SEQUENCE_SEQ5,
    +      SEQUENCE_SEQ6,
    +      SEQUENCE_SEQ7,
    +      SEQUENCE_SEQ8,
    +      SEQUENCE_SEQ9,
    +      //
    +      SEQUENCE_END,
    +      SEQUENCE_BEGIN = SEQUENCE_SEQ0
    +    };
    +
    +    inline Sequence operator++( Sequence& s )
    +    {
    +      return s = (Sequence)(((int)(s) + 1));
    +    }
    +
    +    std::ostream& operator<<(std::ostream& os, const Sequence& seq);
    +
    +    /* I/O Sequences */
    +
    +    class IoSequence {
    +    public:
    +      virtual ~IoSequence() = default;
    +
    +      virtual std::string get_name() const = 0;
    +      int get_step() const;
    +      int get_seed() const;
    +
    +      std::unique_ptr next();
    +
    +      static std::unique_ptr
    +        generate_sequence(Sequence s, std::pair obj_size_range, int seed );
    +
    +    protected:
    +      uint64_t min_obj_size;
    +      uint64_t max_obj_size;
    +      bool create;
    +      bool barrier;
    +      bool done;
    +      bool remove;
    +      uint64_t obj_size;
    +      int step;
    +      int seed;
    +      ceph::util::random_number_generator rng =
    +        ceph::util::random_number_generator();
    +
    +      IoSequence(std::pair obj_size_range, int seed);
    +
    +      virtual std::unique_ptr _next() = 0;
    +
    +      void set_min_object_size(uint64_t size);
    +      void set_max_object_size(uint64_t size);
    +      void select_random_object_size();
    +      std::unique_ptr increment_object_size();
    +
    +    };
    +
    +    class Seq0: public IoSequence {
    +    public:
    +      Seq0(std::pair obj_size_range, int seed);
    +
    +      std::string get_name() const override;
    +      std::unique_ptr _next() override;
    +
    +    private:
    +      uint64_t offset;
    +      uint64_t length;
    +    };
    +
    +    class Seq1: public IoSequence {  
    +    public:
    +      Seq1(std::pair obj_size_range, int seed);
    +
    +      std::string get_name() const override;
    +      std::unique_ptr _next();
    +
    +    private:
    +      int count;
    +    };
    +      
    +    class Seq2: public IoSequence {
    +    public:
    +      Seq2(std::pair obj_size_range, int seed);
    +
    +      std::string get_name() const override;
    +      std::unique_ptr _next() override;
    +    
    +    private:
    +      uint64_t offset;
    +      uint64_t length;
    +    };
    +
    +    class Seq3: public IoSequence {
    +    public:
    +      Seq3(std::pair obj_size_range, int seed);
    +
    +      std::string get_name() const override;
    +      std::unique_ptr _next() override;
    +    private:
    +      uint64_t offset1;
    +      uint64_t offset2;
    +    };
    +
    +    class Seq4: public IoSequence {
    +    public:
    +      Seq4(std::pair obj_size_range, int seed);
    +
    +      std::string get_name() const override;
    +      std::unique_ptr _next() override;
    +
    +    private:
    +      uint64_t offset1;
    +      uint64_t offset2;
    +    };
    +
    +    class Seq5: public IoSequence {
    +    public:
    +      Seq5(std::pair obj_size_range, int seed);
    +
    +      std::string get_name() const override;
    +      std::unique_ptr _next() override;
    +
    +    private:
    +      uint64_t offset;
    +      uint64_t length;
    +      bool doneread;
    +      bool donebarrier;
    +    };
    +
    +    class Seq6: public IoSequence {
    +    public:
    +      Seq6(std::pair obj_size_range, int seed);
    +
    +      std::string get_name() const override;
    +      std::unique_ptr _next() override;
    +
    +    private:
    +      uint64_t offset;
    +      uint64_t length;
    +      bool doneread;
    +      bool donebarrier;
    +    };
    +
    +    class Seq7: public IoSequence {
    +    public:
    +      Seq7(std::pair obj_size_range, int seed);
    +
    +      std::string get_name() const override;
    +      std::unique_ptr _next() override;
    +
    +    private:
    +      uint64_t offset;
    +      bool doneread = true;
    +      bool donebarrier = false;
    +    };
    +
    +    class Seq8: public IoSequence {
    +    public:
    +      Seq8(std::pair obj_size_range, int seed);
    +
    +      std::string get_name() const override;
    +      std::unique_ptr _next() override;
    +    private:
    +      uint64_t offset1;
    +      uint64_t offset2;
    +      bool doneread = true;
    +      bool donebarrier = false;
    +    };
    +
    +    class Seq9: public IoSequence {
    +    private:
    +      uint64_t offset;
    +      uint64_t length;
    +      bool doneread = true;
    +      bool donebarrier = false;
    +
    +    public:
    +      Seq9(std::pair obj_size_range, int seed);
    +
    +      std::string get_name() const override;
    +
    +      std::unique_ptr _next() override;
    +    };
    +  }
    +}
    \ No newline at end of file
    diff --git a/src/common/io_exerciser/Model.cc b/src/common/io_exerciser/Model.cc
    new file mode 100644
    index 000000000000..50812ecbb155
    --- /dev/null
    +++ b/src/common/io_exerciser/Model.cc
    @@ -0,0 +1,28 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +#include "Model.h"
    +
    +using Model = ceph::io_exerciser::Model;
    +
    +Model::Model(const std::string& oid, uint64_t block_size) : 
    +num_io(0),
    +oid(oid),
    +block_size(block_size)
    +{
    +
    +}
    +
    +const uint64_t Model::get_block_size() const
    +{
    +  return block_size;
    +}
    +
    +const std::string Model::get_oid() const
    +{
    +  return oid;
    +}
    +
    +int Model::get_num_io() const
    +{
    +  return num_io;
    +}
    \ No newline at end of file
    diff --git a/src/common/io_exerciser/Model.h b/src/common/io_exerciser/Model.h
    new file mode 100644
    index 000000000000..58d107409a65
    --- /dev/null
    +++ b/src/common/io_exerciser/Model.h
    @@ -0,0 +1,49 @@
    +#pragma once
    +
    +#include "IoOp.h"
    +
    +#include 
    +
    +#include "librados/librados_asio.h"
    +
    +#include "include/interval_set.h"
    +#include "global/global_init.h"
    +#include "global/global_context.h"
    +#include "common/Thread.h"
    +
    +/* Overview
    + *
    + * class Model
    + *   Virtual class. Models apply IoOps generated by an
    + *   IoSequence, they can choose how many I/Os to execute in
    + *   parallel and scale up the size of I/Os by the blocksize
    + *
    + */
    +
    +namespace ceph {
    +  namespace io_exerciser {
    +
    +    class Model
    +    {
    +    protected:
    +      int num_io{0};
    +      std::string oid;
    +      uint64_t block_size;
    +
    +    public:
    +      Model(const std::string& oid, uint64_t block_size);
    +      virtual ~Model() = default;
    +
    +      virtual bool readyForIoOp(IoOp& op) = 0;
    +      virtual void applyIoOp(IoOp& op) = 0;
    +      
    +      const std::string get_oid() const;
    +      const uint64_t get_block_size() const;
    +      int get_num_io() const;
    +    };
    +
    +    /* Simple RADOS I/O generator */
    +
    +    
    +  }
    +}
    \ No newline at end of file
    diff --git a/src/common/io_exerciser/ObjectModel.cc b/src/common/io_exerciser/ObjectModel.cc
    new file mode 100644
    index 000000000000..589f6434282b
    --- /dev/null
    +++ b/src/common/io_exerciser/ObjectModel.cc
    @@ -0,0 +1,174 @@
    +#include "ObjectModel.h"
    +
    +#include 
    +#include 
    +#include 
    +
    +using ObjectModel = ceph::io_exerciser::ObjectModel;
    +
    +ObjectModel::ObjectModel(const std::string& oid, uint64_t block_size, int seed) :
    +  Model(oid, block_size), created(false)
    +{
    +  rng.seed(seed);
    +}
    +
    +int ObjectModel::get_seed(uint64_t offset) const
    +{
    +  ceph_assert(offset < contents.size());
    +  return contents[offset];
    +}
    +
    +std::vector ObjectModel::get_seed_offsets(int seed) const
    +{
    +  std::vector offsets;
    +  for (size_t i = 0; i < contents.size(); i++)
    +  {
    +    if (contents[i] == seed)
    +    {
    +      offsets.push_back(i);
    +    }
    +  }
    +
    +  return offsets;
    +}
    +
    +std::string ObjectModel::to_string(int mask) const
    +{
    +  if (!created) {
    +    return "Object does not exist";
    +  }
    +  std::string result = "{";
    +  for (uint64_t i = 0; i < contents.size(); i++) {
    +    if (i != 0) {
    +      result += ",";
    +    }
    +    result += std::to_string(contents[i] & mask);
    +  }
    +  result += "}";
    +  return result;
    +}
    +
    +bool ObjectModel::readyForIoOp(IoOp& op)
    +{
    +  return true;
    +}
    +
    +void ObjectModel::applyIoOp(IoOp& op)
    +{
    +  auto generate_random = [&rng = rng]() {
    +    return rng();
    +  };
    +
    +  switch (op.op) {
    +  case OpType::BARRIER:
    +    reads.clear();
    +    writes.clear();
    +    break;
    +
    +  case OpType::CREATE:
    +    ceph_assert(!created);
    +    ceph_assert(reads.empty());
    +    ceph_assert(writes.empty());
    +    created = true;
    +    contents.resize(op.length1);
    +    std::generate(std::execution::seq, contents.begin(), contents.end(),
    +                  generate_random);
    +    break;
    +
    +  case OpType::REMOVE:
    +    ceph_assert(created);
    +    ceph_assert(reads.empty());
    +    ceph_assert(writes.empty());
    +    created = false;
    +    contents.resize(0);
    +    break;
    +
    +  case OpType::READ3:
    +    ceph_assert(created);
    +    ceph_assert(op.offset3 + op.length3 <= contents.size());
    +    // Not allowed: read overlapping with parallel write
    +    ceph_assert(!writes.intersects(op.offset3, op.length3));
    +    reads.union_insert(op.offset3, op.length3);
    +    [[fallthrough]];
    +
    +  case OpType::READ2:
    +    ceph_assert(created);
    +    ceph_assert(op.offset2 + op.length2 <= contents.size());
    +    // Not allowed: read overlapping with parallel write
    +    ceph_assert(!writes.intersects(op.offset2, op.length2));
    +    reads.union_insert(op.offset2, op.length2);
    +    [[fallthrough]];
    +
    +  case OpType::READ:
    +    ceph_assert(created);
    +    ceph_assert(op.offset1 + op.length1 <= contents.size());
    +    // Not allowed: read overlapping with parallel write
    +    ceph_assert(!writes.intersects(op.offset1, op.length1));
    +    reads.union_insert(op.offset1, op.length1);
    +    num_io++;
    +    break;
    +
    +  case OpType::WRITE3:
    +    ceph_assert(created);
    +    // Not allowed: write overlapping with parallel read or write
    +    ceph_assert(!reads.intersects(op.offset3, op.length3));
    +    ceph_assert(!writes.intersects(op.offset3, op.length3));
    +    writes.union_insert(op.offset3, op.length3);
    +    ceph_assert(op.offset3 + op.length3 <= contents.size());
    +    std::generate(std::execution::seq,
    +                  std::next(contents.begin(), op.offset3),
    +                  std::next(contents.begin(), op.offset3 + op.length3),
    +                  generate_random);
    +    [[fallthrough]];
    +
    +  case OpType::WRITE2:
    +    ceph_assert(created);
    +    // Not allowed: write overlapping with parallel read or write
    +    ceph_assert(!reads.intersects(op.offset2, op.length2));
    +    ceph_assert(!writes.intersects(op.offset2, op.length2));
    +    writes.union_insert(op.offset2, op.length2);
    +    ceph_assert(op.offset2 + op.length2 <= contents.size());
    +    std::generate(std::execution::seq,
    +                  std::next(contents.begin(), op.offset2),
    +                  std::next(contents.begin(), op.offset2 + op.length2),
    +                  generate_random);
    +    [[fallthrough]];
    +
    +  case OpType::WRITE:
    +    ceph_assert(created);
    +    // Not allowed: write overlapping with parallel read or write
    +    ceph_assert(!reads.intersects(op.offset1, op.length1));
    +    ceph_assert(!writes.intersects(op.offset1, op.length1));
    +    writes.union_insert(op.offset1, op.length1);
    +    ceph_assert(op.offset1 + op.length1 <= contents.size());
    +    std::generate(std::execution::seq,
    +                  std::next(contents.begin(), op.offset1),
    +                  std::next(contents.begin(), op.offset1 + op.length1),
    +                  generate_random);
    +    num_io++;
    +    break;
    +  default:
    +    break;
    +  }
    +}
    +
    +void ObjectModel::encode(ceph::buffer::list& bl) const {
    +  ENCODE_START(1, 1, bl);
    +  encode(created, bl);
    +  if (created) {
    +    encode(contents, bl);
    +  }
    +  ENCODE_FINISH(bl);
    +}
    +
    +void ObjectModel::decode(ceph::buffer::list::const_iterator& bl) {
    +  DECODE_START(1, bl);
    +  DECODE_OLDEST(1);
    +  decode(created, bl);
    +  if (created) {
    +    decode(contents, bl);
    +  } else {
    +    contents.resize(0);
    +  }
    +  DECODE_FINISH(bl);
    +}
    diff --git a/src/common/io_exerciser/ObjectModel.h b/src/common/io_exerciser/ObjectModel.h
    new file mode 100644
    index 000000000000..93c70f414297
    --- /dev/null
    +++ b/src/common/io_exerciser/ObjectModel.h
    @@ -0,0 +1,53 @@
    +#pragma once
    +
    +#include "Model.h"
    +
    +/* Overview
    + *
    + * class ObjectModel
    + *   An IoExerciser. Tracks the data stored in an object, applies
    + *   IoOp's to update the model. Polices that I/Os that are
    + *   permitted to run in parallel do not break rules. Provides
    + *   interface to query state of object. State can be encoded
    + *   and decoded
    + *
    + */
    +
    +namespace ceph {
    +  namespace io_exerciser {
    +    /* Model of an object to track its data contents */
    +
    +    class ObjectModel : public Model {
    +    private:
    +      bool created;
    +      std::vector contents;
    +      ceph::util::random_number_generator rng =
    +        ceph::util::random_number_generator();
    +
    +      // Track read and write I/Os that can be submitted in
    +      // parallel to detect violations:
    +      //
    +      // * Read may not overlap with a parallel write
    +      // * Write may not overlap with a parallel read or write
    +      // * Create / remove may not be in parallel with read or write
    +      //
    +      // Fix broken test cases by adding barrier ops to restrict
    +      // I/O exercisers from issuing conflicting ops in parallel
    +      interval_set reads;
    +      interval_set writes;
    +    public:
    +      ObjectModel(const std::string& oid, uint64_t block_size, int seed);
    +      
    +      int get_seed(uint64_t offset) const;
    +      std::vector get_seed_offsets(int seed) const;
    +
    +      std::string to_string(int mask = -1) const;
    +
    +      bool readyForIoOp(IoOp& op);
    +      void applyIoOp(IoOp& op);
    +      
    +      void encode(ceph::buffer::list& bl) const;
    +      void decode(ceph::buffer::list::const_iterator& bl);
    +    };
    +  }
    +}
    \ No newline at end of file
    diff --git a/src/common/io_exerciser/RadosIo.cc b/src/common/io_exerciser/RadosIo.cc
    new file mode 100644
    index 000000000000..44b82260263a
    --- /dev/null
    +++ b/src/common/io_exerciser/RadosIo.cc
    @@ -0,0 +1,300 @@
    +#include "RadosIo.h"
    +
    +#include "DataGenerator.h"
    +
    +using RadosIo = ceph::io_exerciser::RadosIo;
    +
    +RadosIo::RadosIo(librados::Rados& rados,
    +        boost::asio::io_context& asio,
    +        const std::string& pool,
    +        const std::string& oid,
    +        uint64_t block_size,
    +        int seed,
    +	int threads,
    +        ceph::mutex& lock,
    +        ceph::condition_variable& cond) :
    +  Model(oid, block_size),
    +  rados(rados),
    +  asio(asio),
    +  om(std::make_unique(oid, block_size, seed)),
    +  db(data_generation::DataGenerator::create_generator(
    +      data_generation::GenerationType::HeaderedSeededRandom, *om)),
    +  pool(pool),
    +  threads(threads),
    +  lock(lock),
    +  cond(cond),
    +  outstanding_io(0)
    +{
    +  int rc;
    +  rc = rados.ioctx_create(pool.c_str(), io);
    +  ceph_assert(rc == 0);
    +  allow_ec_overwrites(true);
    +}
    +
    +RadosIo::~RadosIo()
    +{
    +}
    +
    +void RadosIo::start_io()
    +{
    +  std::lock_guard l(lock);
    +  outstanding_io++;
    +}
    +
    +void RadosIo::finish_io()
    +{
    +  std::lock_guard l(lock);
    +  ceph_assert(outstanding_io > 0);
    +  outstanding_io--;
    +  cond.notify_all();
    +}
    +
    +void RadosIo::wait_for_io(int count)
    +{
    +  std::unique_lock l(lock);
    +  while (outstanding_io > count) {
    +    cond.wait(l);
    +  }
    +}
    +
    +void RadosIo::allow_ec_overwrites(bool allow)
    +{
    +  int rc;
    +  bufferlist inbl, outbl;
    +  std::string cmdstr =
    +    "{\"prefix\": \"osd pool set\", \"pool\": \"" + pool + "\", \
    +      \"var\": \"allow_ec_overwrites\", \"val\": \"" +
    +    (allow ? "true" : "false") + "\"}";
    +  rc = rados.mon_command(cmdstr, inbl, &outbl, nullptr);
    +  ceph_assert(rc == 0);
    +}
    +
    +RadosIo::AsyncOpInfo::AsyncOpInfo(uint64_t offset1, uint64_t length1,
    +                                  uint64_t offset2, uint64_t length2,
    +                                  uint64_t offset3, uint64_t length3 ) :
    +  offset1(offset1), length1(length1),
    +  offset2(offset2), length2(length2),
    +  offset3(offset3), length3(length3)
    +{
    +
    +}
    +
    +bool RadosIo::readyForIoOp(IoOp &op)
    +{
    +  ceph_assert(ceph_mutex_is_locked_by_me(lock)); //Must be called with lock held
    +  if (!om->readyForIoOp(op)) {
    +    return false;
    +  }
    +  switch (op.op) {
    +  case OpType::Done:
    +  case OpType::BARRIER:
    +    return outstanding_io == 0;
    +  default:
    +    return outstanding_io < threads;
    +  }
    +}
    +
    +void RadosIo::applyIoOp(IoOp &op)
    +{
    +  std::shared_ptr op_info;
    +
    +  om->applyIoOp(op);
    +
    +  // If there are thread concurrent I/Os in flight then wait for
    +  // at least one I/O to complete
    +  wait_for_io(threads-1);
    +  
    +  switch (op.op) {
    +  case OpType::Done:
    +  [[ fallthrough ]];
    +  case OpType::BARRIER:
    +    // Wait for all outstanding I/O to complete
    +    wait_for_io(0);
    +    break;    
    +
    +  case OpType::CREATE:
    +    {
    +      start_io();
    +      op_info = std::make_shared(0, op.length1);
    +      op_info->bl1 = db->generate_data(0, op.length1);
    +      op_info->wop.write_full(op_info->bl1);
    +      auto create_cb = [this] (boost::system::error_code ec,
    +                               version_t ver) {
    +        ceph_assert(ec == boost::system::errc::success);
    +        finish_io();
    +      };
    +      librados::async_operate(asio, io, oid,
    +                              &op_info->wop, 0, nullptr, create_cb);
    +    }
    +    break;
    +
    +  case OpType::REMOVE:
    +    {
    +      start_io();
    +      op_info = std::make_shared();
    +      op_info->wop.remove();
    +      auto remove_cb = [this] (boost::system::error_code ec,
    +                               version_t ver) {
    +        ceph_assert(ec == boost::system::errc::success);
    +        finish_io();
    +      };
    +      librados::async_operate(asio, io, oid,
    +                              &op_info->wop, 0, nullptr, remove_cb);
    +    }
    +    break;
    +
    +  case OpType::READ:
    +    {
    +      start_io();
    +      op_info = std::make_shared(op.offset1, op.length1);
    +      op_info->rop.read(op.offset1 * block_size,
    +                        op.length1 * block_size,
    +                        &op_info->bl1, nullptr);
    +      auto read_cb = [this, op_info] (boost::system::error_code ec,
    +                                      version_t ver,
    +                                      bufferlist bl) {
    +        ceph_assert(ec == boost::system::errc::success);
    +        ceph_assert(db->validate(op_info->bl1,
    +                                 op_info->offset1,
    +                                 op_info->length1));
    +        finish_io();
    +      };
    +      librados::async_operate(asio, io, oid,
    +                              &op_info->rop, 0, nullptr, read_cb);
    +      num_io++;
    +    }
    +    break;
    +
    +  case OpType::READ2:
    +    {
    +      start_io();
    +      op_info = std::make_shared(op.offset1,
    +                                              op.length1,
    +                                              op.offset2,
    +                                              op.length2);
    +
    +      op_info->rop.read(op.offset1 * block_size,
    +                        op.length1 * block_size,
    +                        &op_info->bl1, nullptr);
    +      op_info->rop.read(op.offset2 * block_size,
    +                    op.length2 * block_size,
    +                    &op_info->bl2, nullptr);
    +      auto read2_cb = [this, op_info] (boost::system::error_code ec,
    +                                       version_t ver,
    +                                       bufferlist bl) {
    +        ceph_assert(ec == boost::system::errc::success);
    +        ceph_assert(db->validate(op_info->bl1,
    +                                 op_info->offset1,
    +                                 op_info->length1));
    +        ceph_assert(db->validate(op_info->bl2,
    +                                 op_info->offset2,
    +                                 op_info->length2));
    +        finish_io();
    +      };
    +      librados::async_operate(asio, io, oid,
    +                              &op_info->rop, 0, nullptr, read2_cb);
    +      num_io++;
    +    }
    +    break;
    +
    +  case OpType::READ3:
    +    {
    +      start_io();
    +      op_info = std::make_shared(op.offset1, op.length1,
    +                                              op.offset2, op.length2,
    +                                              op.offset3, op.length3);
    +      op_info->rop.read(op.offset1 * block_size,
    +                    op.length1 * block_size,
    +                    &op_info->bl1, nullptr);
    +      op_info->rop.read(op.offset2 * block_size,
    +                    op.length2 * block_size,
    +                    &op_info->bl2, nullptr);
    +      op_info->rop.read(op.offset3 * block_size,
    +                    op.length3 * block_size,
    +                    &op_info->bl3, nullptr);
    +      auto read3_cb = [this, op_info] (boost::system::error_code ec,
    +                                       version_t ver,
    +                                       bufferlist bl) {
    +        ceph_assert(ec == boost::system::errc::success);
    +        ceph_assert(db->validate(op_info->bl1,
    +                                 op_info->offset1,
    +                                 op_info->length1));
    +        ceph_assert(db->validate(op_info->bl2,
    +                                 op_info->offset2,
    +                                 op_info->length2));
    +        ceph_assert(db->validate(op_info->bl3,
    +                                 op_info->offset3,
    +                                 op_info->length3));
    +        finish_io();
    +      };
    +      librados::async_operate(asio, io, oid,
    +                              &op_info->rop, 0, nullptr, read3_cb);
    +      num_io++;
    +    }
    +    break;
    +
    +  case OpType::WRITE:
    +    {
    +      start_io();
    +      op_info = std::make_shared(op.offset1, op.length1);
    +      op_info->bl1 = db->generate_data(op.offset1, op.length1);
    +
    +      op_info->wop.write(op.offset1 * block_size, op_info->bl1);
    +      auto write_cb = [this] (boost::system::error_code ec,
    +                              version_t ver) {
    +        ceph_assert(ec == boost::system::errc::success);
    +        finish_io();
    +      };
    +      librados::async_operate(asio, io, oid,
    +                              &op_info->wop, 0, nullptr, write_cb);
    +      num_io++;
    +    }
    +    break;
    +
    +  case OpType::WRITE2:
    +    {
    +      start_io();
    +      op_info = std::make_shared(op.offset1, op.length1,
    +                                              op.offset2, op.length2);
    +      op_info->bl1 = db->generate_data(op.offset1, op.length1);
    +      op_info->bl2 = db->generate_data(op.offset2, op.length2);
    +      op_info->wop.write(op.offset1 * block_size, op_info->bl1);
    +      op_info->wop.write(op.offset2 * block_size, op_info->bl2);
    +      auto write2_cb = [this] (boost::system::error_code ec,
    +                               version_t ver) {
    +        ceph_assert(ec == boost::system::errc::success);
    +        finish_io();
    +      };
    +      librados::async_operate(asio, io, oid,
    +                              &op_info->wop, 0, nullptr, write2_cb);
    +      num_io++;
    +    }
    +    break;
    +
    +  case OpType::WRITE3:
    +    {
    +      start_io();
    +      op_info = std::make_shared(op.offset1, op.length1,
    +                                              op.offset2, op.length2,
    +                                              op.offset3, op.length3);
    +      op_info->bl1 = db->generate_data(op.offset1, op.length1);
    +      op_info->bl2 = db->generate_data(op.offset2, op.length2);
    +      op_info->bl3 = db->generate_data(op.offset3, op.length3);
    +      op_info->wop.write(op.offset1 * block_size, op_info->bl1);
    +      op_info->wop.write(op.offset2 * block_size, op_info->bl2);
    +      op_info->wop.write(op.offset3 * block_size, op_info->bl3);
    +      auto write3_cb = [this] (boost::system::error_code ec,
    +                               version_t ver) {
    +        ceph_assert(ec == boost::system::errc::success);
    +        finish_io();
    +      };
    +      librados::async_operate(asio, io, oid,
    +                              &op_info->wop, 0, nullptr, write3_cb);
    +      num_io++;
    +    }
    +    break;
    +
    +  default:
    +    break;
    +  }
    +}
    diff --git a/src/common/io_exerciser/RadosIo.h b/src/common/io_exerciser/RadosIo.h
    new file mode 100644
    index 000000000000..179c5bba3aea
    --- /dev/null
    +++ b/src/common/io_exerciser/RadosIo.h
    @@ -0,0 +1,80 @@
    +#pragma once
    +
    +#include "ObjectModel.h"
    +
    +/* Overview
    + *
    + * class RadosIo
    + *   An IoExerciser. A simple RADOS client that generates I/Os
    + *   from IoOps. Uses an ObjectModel to track the data stored
    + *   in the object. Uses DataBuffer to create and validate
    + *   data buffers. When there are not barrier I/Os this may
    + *   issue multiple async I/Os in parallel.
    + * 
    + */
    +
    +namespace ceph {
    +  namespace io_exerciser {
    +    namespace data_generation {
    +      class DataGenerator;
    +    }
    +    
    +    class RadosIo: public Model {
    +    protected:
    +      librados::Rados& rados;
    +      boost::asio::io_context& asio;
    +      std::unique_ptr om;
    +      std::unique_ptr db;
    +      std::string pool;
    +      int threads;
    +      ceph::mutex& lock;
    +      ceph::condition_variable& cond;
    +      librados::IoCtx io;
    +      int outstanding_io;
    +
    +      void start_io();
    +      void finish_io();
    +      void wait_for_io(int count);
    +      
    +    public:
    +      RadosIo(librados::Rados& rados,
    +              boost::asio::io_context& asio,
    +              const std::string& pool,
    +              const std::string& oid,
    +              uint64_t block_size,
    +              int seed,
    +              int threads,
    +              ceph::mutex& lock,
    +              ceph::condition_variable& cond);
    +
    +      ~RadosIo();
    +
    +      void allow_ec_overwrites(bool allow);
    +
    +      class AsyncOpInfo {
    +      public:
    +        librados::ObjectReadOperation rop;
    +        librados::ObjectWriteOperation wop;
    +        ceph::buffer::list bl1;
    +        ceph::buffer::list bl2;
    +        ceph::buffer::list bl3;
    +        uint64_t offset1;
    +        uint64_t length1;
    +        uint64_t offset2;
    +        uint64_t length2;
    +        uint64_t offset3;
    +        uint64_t length3;
    +
    +        AsyncOpInfo(uint64_t offset1 = 0, uint64_t length1 = 0,
    +                uint64_t offset2 = 0, uint64_t length2 = 0,
    +                uint64_t offset3 = 0, uint64_t length3 = 0 );
    +        ~AsyncOpInfo() = default;
    +      };
    +
    +      // Must be called with lock held
    +      bool readyForIoOp(IoOp& op);
    +      
    +      void applyIoOp(IoOp& op);
    +    };
    +  }
    +}
    \ No newline at end of file
    diff --git a/src/common/mClockPriorityQueue.h b/src/common/mClockPriorityQueue.h
    deleted file mode 100644
    index c1f9f3c2517d..000000000000
    --- a/src/common/mClockPriorityQueue.h
    +++ /dev/null
    @@ -1,369 +0,0 @@
    -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    -// vim: ts=8 sw=2 smarttab
    -/*
    - * Ceph - scalable distributed file system
    - *
    - * Copyright (C) 2016 Red Hat Inc.
    - *
    - * This is free software; you can redistribute it and/or
    - * modify it under the terms of the GNU Lesser General Public
    - * License version 2.1, as published by the Free Software
    - * Foundation.  See file COPYING.
    - *
    - */
    -
    -#pragma once
    -
    -
    -#include 
    -#include 
    -#include 
    -#include 
    -
    -#include "common/Formatter.h"
    -#include "common/OpQueue.h"
    -
    -#include "dmclock/src/dmclock_server.h"
    -
    -// the following is done to unclobber _ASSERT_H so it returns to the
    -// way ceph likes it
    -#include "include/ceph_assert.h"
    -
    -
    -namespace ceph {
    -
    -  namespace dmc = crimson::dmclock;
    -
    -  template 
    -  class mClockQueue : public OpQueue  {
    -
    -    using priority_t = unsigned;
    -    using cost_t = unsigned;
    -
    -    typedef std::list > ListPairs;
    -
    -    static void filter_list_pairs(ListPairs *l,
    -				  std::function f) {
    -      for (typename ListPairs::iterator i = l->end();
    -	   i != l->begin();
    -	   /* no inc */
    -	) {
    -	auto next = i;
    -	--next;
    -	if (f(std::move(next->second))) {
    -	  l->erase(next);
    -	} else {
    -	  i = next;
    -	}
    -      }
    -    }
    -
    -    struct SubQueue {
    -    private:
    -      typedef std::map Classes;
    -      // client-class to ordered queue
    -      Classes q;
    -
    -      unsigned tokens, max_tokens;
    -
    -      typename Classes::iterator cur;
    -
    -    public:
    -
    -      SubQueue(const SubQueue &other)
    -	: q(other.q),
    -	  tokens(other.tokens),
    -	  max_tokens(other.max_tokens),
    -	  cur(q.begin()) {}
    -
    -      SubQueue()
    -	: tokens(0),
    -	  max_tokens(0),
    -	  cur(q.begin()) {}
    -
    -      void set_max_tokens(unsigned mt) {
    -	max_tokens = mt;
    -      }
    -
    -      unsigned get_max_tokens() const {
    -	return max_tokens;
    -      }
    -
    -      unsigned num_tokens() const {
    -	return tokens;
    -      }
    -
    -      void put_tokens(unsigned t) {
    -	tokens += t;
    -	if (tokens > max_tokens) {
    -	  tokens = max_tokens;
    -	}
    -      }
    -
    -      void take_tokens(unsigned t) {
    -	if (tokens > t) {
    -	  tokens -= t;
    -	} else {
    -	  tokens = 0;
    -	}
    -      }
    -
    -      void enqueue(K cl, cost_t cost, T&& item) {
    -	q[cl].emplace_back(cost, std::move(item));
    -	if (cur == q.end())
    -	  cur = q.begin();
    -      }
    -
    -      void enqueue_front(K cl, cost_t cost, T&& item) {
    -	q[cl].emplace_front(cost, std::move(item));
    -	if (cur == q.end())
    -	  cur = q.begin();
    -      }
    -
    -      const std::pair& front() const {
    -	ceph_assert(!(q.empty()));
    -	ceph_assert(cur != q.end());
    -	return cur->second.front();
    -      }
    -
    -      std::pair& front() {
    -	ceph_assert(!(q.empty()));
    -	ceph_assert(cur != q.end());
    -	return cur->second.front();
    -      }
    -
    -      void pop_front() {
    -	ceph_assert(!(q.empty()));
    -	ceph_assert(cur != q.end());
    -	cur->second.pop_front();
    -	if (cur->second.empty()) {
    -	  auto i = cur;
    -	  ++cur;
    -	  q.erase(i);
    -	} else {
    -	  ++cur;
    -	}
    -	if (cur == q.end()) {
    -	  cur = q.begin();
    -	}
    -      }
    -
    -      unsigned get_size_slow() const {
    -	unsigned count = 0;
    -	for (const auto& cls : q) {
    -	  count += cls.second.size();
    -	}
    -	return count;
    -      }
    -
    -      bool empty() const {
    -	return q.empty();
    -      }
    -
    -      void remove_by_filter(std::function f) {
    -	for (typename Classes::iterator i = q.begin();
    -	     i != q.end();
    -	     /* no-inc */) {
    -	  filter_list_pairs(&(i->second), f);
    -	  if (i->second.empty()) {
    -	    if (cur == i) {
    -	      ++cur;
    -	    }
    -	    i = q.erase(i);
    -	  } else {
    -	    ++i;
    -	  }
    -	}
    -	if (cur == q.end()) cur = q.begin();
    -      }
    -
    -      void remove_by_class(K k, std::list *out) {
    -	typename Classes::iterator i = q.find(k);
    -	if (i == q.end()) {
    -	  return;
    -	}
    -	if (i == cur) {
    -	  ++cur;
    -	}
    -	if (out) {
    -	  for (auto j = i->second.rbegin(); j != i->second.rend(); ++j) {
    -	    out->push_front(std::move(j->second));
    -	  }
    -	}
    -	q.erase(i);
    -	if (cur == q.end()) cur = q.begin();
    -      }
    -
    -      void dump(ceph::Formatter *f) const {
    -	f->dump_int("size", get_size_slow());
    -	f->dump_int("num_keys", q.size());
    -      }
    -    };
    -
    -    using SubQueues = std::map;
    -
    -    SubQueues high_queue;
    -
    -    using Queue = dmc::PullPriorityQueue;
    -    Queue queue;
    -
    -    // when enqueue_front is called, rather than try to re-calc tags
    -    // to put in mClock priority queue, we'll just keep a separate
    -    // list from which we dequeue items first, and only when it's
    -    // empty do we use queue.
    -    std::list> queue_front;
    -
    -  public:
    -
    -    mClockQueue(
    -      const typename Queue::ClientInfoFunc& info_func,
    -      double anticipation_timeout = 0.0) :
    -      queue(info_func, dmc::AtLimit::Allow, anticipation_timeout)
    -    {
    -      // empty
    -    }
    -
    -    unsigned get_size_slow() const {
    -      unsigned total = 0;
    -      total += queue_front.size();
    -      total += queue.request_count();
    -      for (auto i = high_queue.cbegin(); i != high_queue.cend(); ++i) {
    -	ceph_assert(i->second.get_size_slow());
    -	total += i->second.get_size_slow();
    -      }
    -      return total;
    -    }
    -
    -    // be sure to do things in reverse priority order and push_front
    -    // to the list so items end up on list in front-to-back priority
    -    // order
    -    void remove_by_filter(std::function filter_accum) {
    -      queue.remove_by_req_filter([&] (std::unique_ptr&& r) {
    -          return filter_accum(std::move(*r));
    -        }, true);
    -
    -      for (auto i = queue_front.rbegin(); i != queue_front.rend(); /* no-inc */) {
    -	if (filter_accum(std::move(i->second))) {
    -	  i = decltype(i){ queue_front.erase(std::next(i).base()) };
    -	} else {
    -	  ++i;
    -	}
    -      }
    -
    -      for (typename SubQueues::iterator i = high_queue.begin();
    -	   i != high_queue.end();
    -	   /* no-inc */ ) {
    -	i->second.remove_by_filter(filter_accum);
    -	if (i->second.empty()) {
    -	  i = high_queue.erase(i);
    -	} else {
    -	  ++i;
    -	}
    -      }
    -    }
    -
    -    void remove_by_class(K k, std::list *out = nullptr) override final {
    -      if (out) {
    -	queue.remove_by_client(k,
    -			       true,
    -			       [&out] (std::unique_ptr&& t) {
    -				 out->push_front(std::move(*t));
    -			       });
    -      } else {
    -	queue.remove_by_client(k, true);
    -      }
    -
    -      for (auto i = queue_front.rbegin(); i != queue_front.rend(); /* no-inc */) {
    -	if (k == i->first) {
    -	  if (nullptr != out) out->push_front(std::move(i->second));
    -	  i = decltype(i){ queue_front.erase(std::next(i).base()) };
    -	} else {
    -	  ++i;
    -	}
    -      }
    -
    -      for (auto i = high_queue.begin(); i != high_queue.end(); /* no-inc */) {
    -	i->second.remove_by_class(k, out);
    -	if (i->second.empty()) {
    -	  i = high_queue.erase(i);
    -	} else {
    -	  ++i;
    -	}
    -      }
    -    }
    -
    -    void enqueue_strict(K cl, unsigned priority, T&& item) override final {
    -      high_queue[priority].enqueue(cl, 1, std::move(item));
    -    }
    -
    -    void enqueue_strict_front(K cl, unsigned priority, T&& item) override final {
    -      high_queue[priority].enqueue_front(cl, 1, std::move(item));
    -    }
    -
    -    void enqueue(K cl, unsigned priority, unsigned cost, T&& item) override final {
    -      // priority is ignored
    -      queue.add_request(std::move(item), cl, cost);
    -    }
    -
    -    void enqueue_front(K cl,
    -		       unsigned priority,
    -		       unsigned cost,
    -		       T&& item) override final {
    -      queue_front.emplace_front(std::pair(cl, std::move(item)));
    -    }
    -
    -    bool empty() const override final {
    -      return queue.empty() && high_queue.empty() && queue_front.empty();
    -    }
    -
    -    T dequeue() override final {
    -      ceph_assert(!empty());
    -
    -      if (!high_queue.empty()) {
    -	T ret = std::move(high_queue.rbegin()->second.front().second);
    -	high_queue.rbegin()->second.pop_front();
    -	if (high_queue.rbegin()->second.empty()) {
    -	  high_queue.erase(high_queue.rbegin()->first);
    -	}
    -	return ret;
    -      }
    -
    -      if (!queue_front.empty()) {
    -	T ret = std::move(queue_front.front().second);
    -	queue_front.pop_front();
    -	return ret;
    -      }
    -
    -      auto pr = queue.pull_request();
    -      ceph_assert(pr.is_retn());
    -      auto& retn = pr.get_retn();
    -      return std::move(*(retn.request));
    -    }
    -
    -    void dump(ceph::Formatter *f) const override final {
    -      f->open_array_section("high_queues");
    -      for (typename SubQueues::const_iterator p = high_queue.begin();
    -	   p != high_queue.end();
    -	   ++p) {
    -	f->open_object_section("subqueue");
    -	f->dump_int("priority", p->first);
    -	p->second.dump(f);
    -	f->close_section();
    -      }
    -      f->close_section();
    -
    -      f->open_object_section("queue_front");
    -      f->dump_int("size", queue_front.size());
    -      f->close_section();
    -
    -      f->open_object_section("queue");
    -      f->dump_int("size", queue.request_count());
    -      f->close_section();
    -    } // dump
    -
    -    void print(std::ostream &os) const final {
    -      os << "mClockPriorityQueue";
    -    }
    -  };
    -
    -} // namespace ceph
    diff --git a/src/common/map_cacher.hpp b/src/common/map_cacher.hpp
    index a83f924b622c..95353425de9e 100644
    --- a/src/common/map_cacher.hpp
    +++ b/src/common/map_cacher.hpp
    @@ -16,6 +16,7 @@
     #define MAPCACHER_H
     
     #include "include/Context.h"
    +#include "include/expected.hpp"
     #include "common/sharedptr_registry.hpp"
     
     namespace MapCacher {
    @@ -85,6 +86,10 @@ class MapCacher {
     public:
       MapCacher(StoreDriver *driver) : driver(driver) {}
     
    +  void reset() {
    +    in_progress.reset();
    +  }
    +
       /// Fetch first key/value std::pair after specified key
       int get_next(
         K key,               ///< [in] key after which to get next
    @@ -126,6 +131,50 @@ class MapCacher {
         return -EINVAL;
       } ///< @return error value, 0 on success, -ENOENT if no more entries
     
    +  /// Fetch first key/value std::pair after specified key
    +  struct PosAndData {
    +    K last_key;
    +    V data;
    +  };
    +  using MaybePosAndData = tl::expected;
    +
    +  MaybePosAndData get_1st_after_key(
    +      K key  ///< [in] key after which to get next
    +  )
    +  {
    +    ceph_assert(driver);
    +    while (true) {
    +      std::pair> cached;
    +      bool got_cached = in_progress.get_next(key, &cached);
    +
    +      ///\todo a driver->get_next() that returns an expected would be nice
    +      bool got_store{false};
    +      std::pair store;
    +      int r = driver->get_next(key, &store);
    +      if (r < 0 && r != -ENOENT) {
    +        return tl::unexpected(r);
    +      } else if (r == 0) {
    +	got_store = true;
    +      }
    +
    +      if (!got_cached && !got_store) {
    +        return tl::unexpected(-ENOENT);
    +      } else if (got_cached && (!got_store || store.first >= cached.first)) {
    +	if (cached.second) {
    +	  return PosAndData{cached.first, *cached.second};
    +	} else {
    +	  key = cached.first;
    +	  continue;  // value was cached as removed, recurse
    +	}
    +      } else {
    +	return PosAndData{store.first, store.second};
    +      }
    +    }
    +    ceph_abort();  // not reachable
    +    return tl::unexpected(-EINVAL);
    +  }
    +
    +
       /// Adds operation setting keys to Transaction
       void set_keys(
         const std::map &keys,  ///< [in] keys/values to std::set
    diff --git a/src/common/mempool.cc b/src/common/mempool.cc
    index 79354f708216..4ecfaf81fc66 100644
    --- a/src/common/mempool.cc
    +++ b/src/common/mempool.cc
    @@ -15,9 +15,12 @@
     #include "include/mempool.h"
     #include "include/demangle.h"
     
    +#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
    +#else
     // Thread local variables should save index, not &shard[index],
     // because shard[] is defined in the class
     static thread_local size_t thread_shard_index = mempool::num_shards;
    +#endif
     
     // default to debug_mode off
     bool mempool::debug_mode = false;
    @@ -95,9 +98,21 @@ size_t mempool::pool_t::allocated_items() const
     
     void mempool::pool_t::adjust_count(ssize_t items, ssize_t bytes)
     {
    -  thread_shard_index = (thread_shard_index == num_shards) ? pick_a_shard_int() : thread_shard_index;
    -  shard[thread_shard_index].items += items;
    -  shard[thread_shard_index].bytes += bytes;
    +#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
    +  // the expected path: we alway pick the shard for a cpu core
    +  // a thread is executing on.
    +  const size_t shard_index = pick_a_shard_int();
    +#else
    +  // fallback for lack of sched_getcpu()
    +  const size_t shard_index = []() {
    +    if (thread_shard_index == num_shards) {
    +      thread_shard_index = pick_a_shard_int();
    +    }
    +    return thread_shard_index;
    +  }();
    +#endif
    +  shard[shard_index].items += items;
    +  shard[shard_index].bytes += bytes;
     }
     
     void mempool::pool_t::get_stats(
    @@ -113,8 +128,17 @@ void mempool::pool_t::get_stats(
         for (auto &p : type_map) {
           std::string n = ceph_demangle(p.second.type_name);
           stats_t &s = (*by_type)[n];
    +#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
    +      s.bytes = 0;
    +      s.items = 0;
    +      for (size_t i = 0 ; i < num_shards; ++i) {
    +        s.bytes += p.second.shards[i].items * p.second.item_size;
    +        s.items += p.second.shards[i].items;
    +      }
    +#else
           s.bytes = p.second.items * p.second.item_size;
           s.items = p.second.items;
    +#endif
         }
       }
     }
    diff --git a/src/common/mutex_debug.h b/src/common/mutex_debug.h
    index c1a4ff2a4350..d56d0ebee998 100644
    --- a/src/common/mutex_debug.h
    +++ b/src/common/mutex_debug.h
    @@ -169,20 +169,16 @@ class mutex_debug_impl : public mutex_debugging_base
       }
     
       bool try_lock(bool no_lockdep = false) {
    -    bool locked = try_lock_impl();
    -    if (locked) {
    -      if (enable_lockdep(no_lockdep))
    -	_locked();
    -      _post_lock();
    -    }
    -    return locked;
    +    ceph_assert(recursive || !is_locked_by_me());
    +    return _try_lock(no_lockdep);
       }
     
       void lock(bool no_lockdep = false) {
    +    ceph_assert(recursive || !is_locked_by_me());
         if (enable_lockdep(no_lockdep))
           _will_lock(recursive);
     
    -    if (try_lock(no_lockdep))
    +    if (_try_lock(no_lockdep))
           return;
     
         lock_impl();
    @@ -198,6 +194,16 @@ class mutex_debug_impl : public mutex_debugging_base
         unlock_impl();
       }
     
    +private:
    +  bool _try_lock(bool no_lockdep) {
    +    bool locked = try_lock_impl();
    +    if (locked) {
    +      if (enable_lockdep(no_lockdep))
    +	_locked();
    +      _post_lock();
    +    }
    +    return locked;
    +  }
     };
     
     
    diff --git a/src/common/not_before_queue.h b/src/common/not_before_queue.h
    new file mode 100644
    index 000000000000..2bae3fe026c2
    --- /dev/null
    +++ b/src/common/not_before_queue.h
    @@ -0,0 +1,368 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +
    +#pragma once
    +
    +#include 
    +
    +#include "include/utime.h"
    +
    +/**
    + * not_before_queue_t
    + *
    + * Implements a generic priority queue with two additional properties:
    + * - Items are not eligible to be dequeued until their not_before value
    + *   is after the current time (see project_not_before and advance_time)
    + * - Items can be dequeued efficiently by removal_class (see
    + *   project_removal_class and remove_by_class)
    + *
    + * User must define the following free functions:
    + *  - bool operator<(const V &lhs, const V &rhs)
    + *  - const T &project_not_before(const V&)
    + *  - const K &project_removal_class(const V&)
    + *
    + * operator< above should be defined such that if lhs is more urgent than
    + * rhs, lhs < rhs evaluates to true.
    + *
    + * project_removal_class returns a reference to a type K used in
    + * remove_by_class.
    + *
    + * project_not_before returns a time value comparable to the time type T.
    + *
    + * V must also have a copy constructor.
    + *
    + * The purpose of this queue implementation is to add a not_before concept
    + * to allow specifying a point in time before which the item will not be
    + * eligible for dequeueing orthogonal to the main priority.  Once that point
    + * is passed, ordering is determined by priority as defined by the operator<
    + * definition.
    + */
    +template 
    +class not_before_queue_t {
    +
    +  enum class status_t {
    +    INVALID,  // Not queued, only possible during construction and destruction
    +    INELIGIBLE,	 // Queued in ineligible_queue
    +    ELIGIBLE	 // Queued in eligible_queue
    +  };
    +
    +  /**
    +   * container_t
    +   *
    +   * Each item has a single container_t.  Every container_t is linked
    +   * into and owned by removal_registry_t.  Additionally, every element
    +   * will be linked into exactly one of ineligible_queue and eligible_queue.
    +   */
    +  struct container_t : boost::intrusive::set_base_hook<> // see removal_registry
    +  {
    +    // see ineligible_queue and eligible_queue
    +    using queue_hook_t = boost::intrusive::set_member_hook<>;
    +    queue_hook_t queue_hook;
    +
    +    status_t status = status_t::INVALID;
    +
    +    const V v;
    +
    +    template 
    +    container_t(Args&&... args) : v(std::forward(args)...) {}
    +    ~container_t() {
    +      assert(status == status_t::INVALID);
    +    }
    +  };
    +
    +  using queue_hook_option_t = boost::intrusive::member_hook<
    +    container_t,
    +    typename container_t::queue_hook_t,
    +    &container_t::queue_hook>;
    +
    +  /**
    +   * ineligible_queue
    +   *
    +   * - Contained items have project_not_before(v) > current_time.
    +   * - Contained elements have status set to INELIGIBLE.
    +   * - Contained elements are contained and owned by removal_registry_t
    +   * - Uses same hook as and is mutually exclusive with eligible_queue.
    +   */
    +  struct compare_by_nb_t {
    +    bool operator()(const container_t &lhs, const container_t &rhs) const {
    +      return project_not_before(lhs.v) < project_not_before(rhs.v);
    +    }
    +  };
    +  using ineligible_queue_t = boost::intrusive::multiset<
    +    container_t,
    +    queue_hook_option_t,
    +    boost::intrusive::compare>;
    +  ineligible_queue_t ineligible_queue;
    +
    +  /**
    +   * eligible_queue
    +   *
    +   * - Contains items where project_not_before(v) <= current_time.
    +   * - Contained elements have status set to ELIGIBLE.
    +   * - Contained elements are contained and owned by removal_registry_t
    +   * - Uses same hook as and is mutually exclusive with ineligible_queue.
    +   */
    +  struct compare_by_user_order_t {
    +    bool operator()(const container_t &lhs, const container_t &rhs) const {
    +      return lhs.v < rhs.v;
    +    }
    +  };
    +  using eligible_queue_t = boost::intrusive::multiset<
    +    container_t,
    +    queue_hook_option_t,
    +    boost::intrusive::compare>;
    +  eligible_queue_t eligible_queue;
    +
    +  /**
    +   * removal_registry_t
    +   *
    +   * - Used to efficiently remove items by removal_class.
    +   * - Contains an entry for every item in not_before_queue_t
    +   *   (ELIGIBLE or INELIGIBLE)
    +   * - Owns every contained item.
    +   */
    +  struct compare_by_removal_class_t {
    +    bool operator()(const container_t &lhs, const container_t &rhs) const {
    +      return project_removal_class(lhs.v) < project_removal_class(rhs.v);
    +    }
    +
    +    template 
    +    bool operator()(const U &lhs, const container_t &rhs) const {
    +      return lhs < project_removal_class(rhs.v);
    +    }
    +
    +    template 
    +    bool operator()(const container_t &lhs, const U &rhs) const {
    +      return project_removal_class(lhs.v) < rhs;
    +    }
    +  };
    +  struct removal_registry_disposer_t {
    +    void operator()(container_t *p) { delete p; }
    +  };
    +  using removal_registry_t = boost::intrusive::multiset<
    +    container_t,
    +    boost::intrusive::compare>;
    +  removal_registry_t removal_registry;
    +
    +  /// current time, see advance_time
    +  T current_time;
    +public:
    +  /// Enqueue item constructed constructible from args...
    +  template 
    +  void enqueue(Args&&... args) {
    +    auto *item = new container_t(std::forward(args)...);
    +    removal_registry.insert(*item);
    +
    +    if (project_not_before(item->v) > current_time) {
    +      item->status = status_t::INELIGIBLE;
    +      ineligible_queue.insert(*item);
    +    } else {
    +      item->status = status_t::ELIGIBLE;
    +      eligible_queue.insert(*item);
    +    }
    +  }
    +
    +  /// Dequeue next item, return std::nullopt there are no eligible items
    +  std::optional dequeue() {
    +    if (eligible_queue.empty()) {
    +      return std::nullopt;
    +    }
    +
    +    auto iter = eligible_queue.begin();
    +    assert(iter->status == status_t::ELIGIBLE);
    +
    +    eligible_queue.erase(
    +      typename eligible_queue_t::const_iterator(iter));
    +    iter->status = status_t::INVALID;
    +
    +    std::optional ret(iter->v);
    +    removal_registry.erase_and_dispose(
    +      removal_registry_t::s_iterator_to(std::as_const(*iter)),
    +      removal_registry_disposer_t{});
    +    return ret;
    +  }
    +
    +  /// Dequeue 1st eligible item that satisfies pred, std::nullopt if none
    +  template 
    +  std::optional dequeue_by_pred(const PRED& pred) {
    +    auto iter = std::find_if(
    +	eligible_queue.begin(), eligible_queue.end(),
    +	[&pred](const auto &i) { return pred(i.v); });
    +
    +    if (iter == eligible_queue.end()) {
    +      return std::nullopt;
    +    }
    +
    +    assert(iter->status == status_t::ELIGIBLE);
    +    eligible_queue.erase(typename eligible_queue_t::const_iterator(iter));
    +    iter->status = status_t::INVALID;
    +
    +    std::optional ret(iter->v);
    +    removal_registry.erase_and_dispose(
    +	removal_registry_t::s_iterator_to(std::as_const(*iter)),
    +	removal_registry_disposer_t{});
    +    return ret;
    +  }
    +
    +  /**
    +   * advance_time
    +   *
    +   * Advances the eligibility cutoff, argument must be non-decreasing in
    +   * successive calls.
    +   */
    +  void advance_time(T next_time) {
    +    assert(next_time >= current_time);
    +    current_time = next_time;
    +    while (true) {
    +      if (ineligible_queue.empty()) {
    +	break;
    +      }
    +
    +      auto iter = ineligible_queue.begin();
    +      auto &item = *iter;
    +      assert(item.status == status_t::INELIGIBLE);
    +
    +      if (project_not_before(item.v) > current_time) {
    +	break;
    +      }
    +
    +      item.status = status_t::ELIGIBLE;
    +      ineligible_queue.erase(typename ineligible_queue_t::const_iterator(iter));
    +      eligible_queue.insert(item);
    +    }
    +  }
    +
    +  /**
    +   * remove_by_class
    +   *
    +   * Remove all items such that project_removal_class(item) == k
    +   */
    +  template 
    +  void remove_by_class(const K &k) {
    +    for (auto iter = removal_registry.lower_bound(
    +	   k, compare_by_removal_class_t{});
    +	 iter != removal_registry.upper_bound(
    +	   k, compare_by_removal_class_t{}); ) {
    +      if (iter->status == status_t::INELIGIBLE) {
    +	ineligible_queue.erase(
    +	  ineligible_queue_t::s_iterator_to(std::as_const(*iter)));
    +      } else if (iter->status == status_t::ELIGIBLE) {
    +	eligible_queue.erase(
    +	  eligible_queue_t::s_iterator_to(std::as_const(*iter)));
    +      } else {
    +	assert(0 == "impossible status");
    +      }
    +      iter->status = status_t::INVALID;
    +      removal_registry.erase_and_dispose(
    +	typename removal_registry_t::const_iterator(iter++),
    +	removal_registry_disposer_t{});
    +    }
    +  }
    +
    +  /**
    +   * remove_if_by_class
    +   *
    +   * Remove up to 'max_removed' items for which project_removal_class(item) == k
    +   * AND PRED(item) == true
    +   *
    +   * Returns the number of items removed
    +   */
    +  template 
    +  int remove_if_by_class(
    +      const K& k,
    +      PRED&& pred,
    +      std::optional max_removed = std::nullopt) {
    +    int removed = 0;
    +    for (auto iter =
    +	     removal_registry.lower_bound(k, compare_by_removal_class_t{});
    +	 iter !=
    +	 removal_registry.upper_bound(k, compare_by_removal_class_t{});) {
    +
    +      if (!pred(iter->v)) {
    +	++iter;
    +	continue;
    +      }
    +
    +      if (iter->status == not_before_queue_t::status_t::INELIGIBLE) {
    +	ineligible_queue.erase(
    +	    ineligible_queue_t::s_iterator_to(std::as_const(*iter)));
    +      } else if (iter->status == not_before_queue_t::status_t::ELIGIBLE) {
    +	eligible_queue.erase(
    +	    eligible_queue_t::s_iterator_to(std::as_const(*iter)));
    +      } else {
    +	assert(0 == "impossible status");
    +      }
    +      iter->status = not_before_queue_t::status_t::INVALID;
    +      removal_registry.erase_and_dispose(
    +	typename removal_registry_t::const_iterator(iter++),
    +	removal_registry_disposer_t{});
    +      removed++;
    +      if (max_removed && removed >= *max_removed) {
    +	break;
    +      }
    +    }
    +    return removed;
    +  }
    +
    +  /**
    +   * accumulate
    +   *
    +   * (mimics std::accumulate() for a binary operator)
    +   * Accumulate (performing a 'left fold') over all entries.  Invokes passed
    +   * function with three params:
    +   * f(acc, v, eligible_for_dequeue);
    +   */
    +  template 
    +  ACC accumulate(BOP&& op) const {
    +    ACC acc;
    +    acc = std::accumulate(
    +	eligible_queue.begin(), eligible_queue.end(), std::move(acc),
    +	[op](ACC&& acc, const auto& i) {
    +	  return op(std::move(acc), i.v, true);
    +	});
    +    acc = std::accumulate(
    +	ineligible_queue.begin(), ineligible_queue.end(), std::move(acc),
    +	[op](ACC&& acc, const auto& i) {
    +	  return op(std::move(acc), i.v, false);
    +	});
    +    return acc;
    +  }
    +
    +  /**
    +   * for_each
    +   *
    +   * Traverse contents of queue.  Invokes passed function with two params:
    +   * f(val, eligible_for_dequeue);
    +   */
    +  template 
    +  void for_each(F&& f) const {
    +    for (auto&& i : eligible_queue) {
    +      std::invoke(f, i.v, true);
    +    }
    +    for (auto&& i : ineligible_queue) {
    +      std::invoke(f, i.v, false);
    +    }
    +  }
    +
    +  template 
    +  void for_each_n(F&& f, int up_to) const {
    +    for (auto&& i : eligible_queue) {
    +      if (up_to-- <= 0) {
    +	return;
    +      }
    +      std::invoke(f, i.v, true);
    +    }
    +    for (auto&& i : ineligible_queue) {
    +      if (up_to-- <= 0) {
    +	return;
    +      }
    +      std::invoke(f, i.v, false);
    +    }
    +  }
    +
    +  int total_count() const {
    +    return ineligible_queue.size() + eligible_queue.size();
    +  }
    +
    +  int eligible_count() const { return eligible_queue.size(); }
    +};
    diff --git a/src/common/obj_bencher.cc b/src/common/obj_bencher.cc
    index 32ecc9586188..f5e744e23391 100644
    --- a/src/common/obj_bencher.cc
    +++ b/src/common/obj_bencher.cc
    @@ -99,6 +99,7 @@ ostream& ObjBencher::out(ostream& os)
     }
     
     void *ObjBencher::status_printer(void *_bencher) {
    +  ceph_pthread_setname("OB::stat_print");
       ObjBencher *bencher = static_cast(_bencher);
       bench_data& data = bencher->data;
       Formatter *formatter = bencher->formatter;
    @@ -453,7 +454,6 @@ int ObjBencher::write_bench(int secondsToRun,
       pthread_t print_thread;
     
       pthread_create(&print_thread, NULL, ObjBencher::status_printer, (void *)this);
    -  ceph_pthread_setname(print_thread, "write_stat");
       std::unique_lock locker{lock};
       data.finished = 0;
       data.start_time = mono_clock::now();
    @@ -691,7 +691,6 @@ int ObjBencher::seq_read_bench(
     
       pthread_t print_thread;
       pthread_create(&print_thread, NULL, status_printer, (void *)this);
    -  ceph_pthread_setname(print_thread, "seq_read_stat");
     
       mono_time finish_time = data.start_time + time_to_run;
       //start initial reads
    @@ -903,7 +902,6 @@ int ObjBencher::rand_read_bench(
     
       pthread_t print_thread;
       pthread_create(&print_thread, NULL, status_printer, (void *)this);
    -  ceph_pthread_setname(print_thread, "rand_read_stat");
     
       mono_time finish_time = data.start_time + time_to_run;
       //start initial reads
    diff --git a/src/common/options.cc b/src/common/options.cc
    index a68e2474a3dc..3f6894b01c16 100644
    --- a/src/common/options.cc
    +++ b/src/common/options.cc
    @@ -5,6 +5,7 @@
     #include "options.h"
     #include "common/Formatter.h"
     #include "common/options/build_options.h"
    +#include "common/strtol.h" // for strict_si_cast()
     
     // Helpers for validators
     #include "include/stringify.h"
    diff --git a/src/common/options.h b/src/common/options.h
    index e1d4ec16ed70..ec6db7770c32 100644
    --- a/src/common/options.h
    +++ b/src/common/options.h
    @@ -4,6 +4,7 @@
     #pragma once
     
     #include 
    +#include  // for std::cerr
     #include 
     #include 
     #include 
    @@ -116,6 +117,18 @@ struct Option {
         }
       }
     
    +  static level_t str_to_level(std::string_view s) {
    +    if (s == "basic") {
    +      return LEVEL_BASIC;
    +    } else if (s == "advanced") {
    +      return LEVEL_ADVANCED;
    +    } else if (s == "dev") {
    +      return LEVEL_DEV;
    +    } else {
    +      return LEVEL_UNKNOWN;
    +    }
    +  }
    +
       enum flag_t {
         FLAG_RUNTIME = 0x1,         ///< option can be changed at runtime
         FLAG_NO_MON_UPDATE = 0x2,   ///< option cannot be changed via mon config
    @@ -195,8 +208,8 @@ struct Option {
       typedef std::function validator_fn_t;
       validator_fn_t validator;
     
    -  Option(std::string const &name, type_t t, level_t l)
    -    : name(name), type(t), level(l)
    +  Option(std::string &&name, type_t t, level_t l)
    +    : name(std::move(name)), type(t), level(l)
       {
         // While value_t is nullable (via std::monostate), we don't ever
         // want it set that way in an Option instance: within an instance,
    diff --git a/src/common/options/CMakeLists.txt b/src/common/options/CMakeLists.txt
    index f12a5513a635..fcec49e549ad 100644
    --- a/src/common/options/CMakeLists.txt
    +++ b/src/common/options/CMakeLists.txt
    @@ -104,8 +104,10 @@ add_options(rgw)
     
     add_library(common-options-objs OBJECT
       ${common_options_srcs})
    -add_custom_target(legacy-option-headers
    -  DEPENDS ${legacy_options_headers})
    +add_library(legacy-option-headers INTERFACE)
    +target_sources(legacy-option-headers
    +  PRIVATE
    +    ${legacy_options_headers})
     
     include(AddCephTest)
     add_ceph_test(validate-options
    diff --git a/src/common/options/ceph-exporter.yaml.in b/src/common/options/ceph-exporter.yaml.in
    index 798a185e96bc..c4b24ee43d4b 100644
    --- a/src/common/options/ceph-exporter.yaml.in
    +++ b/src/common/options/ceph-exporter.yaml.in
    @@ -25,6 +25,20 @@ options:
       default: 9926
       services:
       - ceph-exporter
    +- name: exporter_cert_file
    +  type: str
    +  level: advanced
    +  desc: Certificate file for TLS.
    +  default:
    +  services:
    +  - ceph-exporter
    +- name: exporter_key_file
    +  type: str
    +  level: advanced
    +  desc: Key certificate file for TLS.
    +  default:
    +  services:
    +  - ceph-exporter
     - name: exporter_prio_limit
       type: int
       level: advanced
    diff --git a/src/common/options/cephfs-mirror.yaml.in b/src/common/options/cephfs-mirror.yaml.in
    index 78f86dfb1a76..f826161872b8 100644
    --- a/src/common/options/cephfs-mirror.yaml.in
    +++ b/src/common/options/cephfs-mirror.yaml.in
    @@ -91,4 +91,15 @@ options:
       default: 10
       services:
       - cephfs-mirror
    -  min: 0
    \ No newline at end of file
    +  min: 0
    +- name: cephfs_mirror_perf_stats_prio
    +  type: int
    +  level: advanced
    +  desc: Priority level for mirror daemon replication perf counters
    +  long_desc: The daemon will send perf counter data to the manager daemon if the priority
    +    is not lower than mgr_stats_threshold.
    +  default: 5
    +  services:
    +  - cephfs-mirror
    +  min: 0
    +  max: 11
    diff --git a/src/common/options/crimson.yaml.in b/src/common/options/crimson.yaml.in
    index 1007998fade9..36b7f8bc1e33 100644
    --- a/src/common/options/crimson.yaml.in
    +++ b/src/common/options/crimson.yaml.in
    @@ -6,7 +6,7 @@ options:
       type: uint
       level: advanced
       desc: Number of obcs to cache
    -  default: 10
    +  default: 512
     - name: crimson_osd_scheduler_concurrency
       type: uint
       level: advanced
    @@ -19,17 +19,32 @@ options:
       default: 6
       flags:
       - startup
    -- name: crimson_seastar_smp
    -  type: uint
    +- name: crimson_seastar_cpu_cores
    +  type: str
       level: advanced
    -  desc: Number of seastar reactor threads to use for the osd
    -  default: 1
    +  desc: CPU cores on which seastar reactor threads will run in cpuset(7) format, smp::count is deduced from this option
       flags:
       - startup
     - name: crimson_alien_thread_cpu_cores
       type: str
       level: advanced
       desc: CPU cores on which alienstore threads will run in cpuset(7) format
    +  flags:
    +  - startup
    +- name: crimson_seastar_num_threads
    +  type: uint
    +  level: advanced
    +  default: 0
    +  desc: The number of threads for serving seastar reactors without CPU pinning, overridden if crimson_seastar_cpu_cores is set
    +  flags:
    +  - startup
    +  min: 0
    +  max: 32
    +- name: crimson_osd_stat_interval
    +  type: int
    +  level: advanced
    +  default: 0
    +  desc: Report OSD status periodically in seconds, 0 to disable
     - name: seastore_segment_size
       type: size
       desc: Segment size to use for SegmentManager
    @@ -77,6 +92,21 @@ options:
       level: dev
       desc: default logical address space reservation for seastore objects' metadata
       default: 16777216
    +# TODO: implement sub-extent checksum and deprecate this configuration.
    +- name: seastore_full_integrity_check
    +  type: bool
    +  level: dev
    +  desc: Whether seastore need to fully check the integrity of each extent,
    +        non-full integrity check means the integrity check might be skipped
    +        during extent remapping for better performance, disable with caution
    +  default: false
    +# TODO: seastore_max_data_allocation_size should be dropped once the sub-extent
    +#       read/checksum is implemented.
    +- name: seastore_max_data_allocation_size
    +  type: size
    +  level: advanced
    +  desc: Max size in bytes that an extent can be
    +  default: 32_K
     - name: seastore_cache_lru_size
       type: size
       level: advanced
    @@ -87,6 +117,8 @@ options:
       level: advanced
       desc: split extent if ratio of total extent size to write size exceeds this value
       default: 1.25
    +# TODO: seastore_obj_data_write_amplification is no longer correct if 
    +#       seastore_data_delta_based_overwrite is enabled. So, this should be reconsidered.
     - name: seastore_max_concurrent_transactions
       type: uint
       level: advanced
    @@ -117,3 +149,15 @@ options:
       level: advanced
       desc: Begin fast eviction when the used ratio of the main tier reaches this value.
       default: 0.7
    +- name: seastore_data_delta_based_overwrite
    +  type: size
    +  level: dev
    +  desc: overwrite the existing data block based on delta if the overwrite size is equal to or less than the value, otherwise do overwrite based on remapping, set to 0 to enforce the remap-based overwrite.
    +  default: 0
    +- name: seastore_disable_end_to_end_data_protection 
    +  type: bool
    +  level: dev
    +  desc: When false, upon mkfs, try to discover whether the nvme device supports
    +        internal checksum feature without using sever CPU then enable if available,
    +        set to true to disable unconditionally.
    +  default: true
    diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in
    index 48c6788a88b3..b331601baf6b 100644
    --- a/src/common/options/global.yaml.in
    +++ b/src/common/options/global.yaml.in
    @@ -104,8 +104,8 @@ options:
     - name: public_network_interface
       type: str
       level: advanced
    -  desc: Interface name(s) from which to choose an address from a public_network to
    -    bind to; public_network must also be specified.
    +  desc: Interface name(s) from which to choose an address from a ``public_network`` to
    +    bind to; ``public_network`` must also be specified.
       tags:
       - network
       services:
    @@ -135,8 +135,8 @@ options:
     - name: cluster_network_interface
       type: str
       level: advanced
    -  desc: Interface name(s) from which to choose an address from a cluster_network to
    -    bind to; cluster_network must also be specified.
    +  desc: Interface name(s) from which to choose an address from a ``cluster_network`` to
    +    bind to; ``cluster_network`` must also be specified.
       tags:
       - network
       services:
    @@ -250,6 +250,29 @@ options:
       flags:
       - startup
       with_legacy: true
    +- name: tmp_dir
    +  type: str
    +  level: advanced
    +  desc: path for the 'tmp' directory
    +  default: /tmp
    +  services:
    +  - common
    +  see_also:
    +  - admin_socket
    +  flags:
    +  - runtime
    +- name: tmp_file_template
    +  type: str
    +  level: advanced
    +  desc: Template for temporary files created by daemons for ceph tell commands
    +  long_desc: The template file name prefix for temporary files. For example, temporary files may be created by `ceph tell` commands using the --daemon-output-file switch.
    +  daemon_default: $tmp_dir/$cluster-$name.XXXXXX
    +  services:
    +  - osd
    +  - mds
    +  - mon
    +  flags:
    +  - runtime
     - name: admin_socket
       type: str
       level: advanced
    @@ -767,6 +790,24 @@ options:
       level: advanced
       desc: Set the maximum number of session within Qatzip when using QAT compressor
       default: 256
    +- name: qat_compressor_busy_polling
    +  type: bool
    +  level: advanced
    +  desc: Set QAT busy bolling to reduce latency at the cost of potentially increasing CPU usage
    +  default: false
    +- name: uadk_compressor_enabled
    +  type: bool
    +  level: advanced
    +  desc: Enable UADK acceleration support for compression if available
    +  default: false
    +  with_legacy: true
    +- name: uadk_wd_sync_ctx_num
    +  type: int
    +  level: advanced
    +  desc: Set the number of instances in the queue
    +  default: 2
    +  min: 2
    +  max: 1024
     - name: plugin_crypto_accelerator
       type: str
       level: advanced
    @@ -1276,6 +1317,23 @@ options:
       desc: Inject a network congestions that stuck with N times operations
       default: 0
       with_legacy: true
    +- name: ms_time_events_min_wait_interval
    +  type: uint
    +  level: dev
    +  desc: In microseconds, msgr-worker's time_events min wait time for epoll_wait timeout
    +  default: 1000
    +  min: 0
    +  max: 60000000
    +  with_legacy: true
    +- name: ms_client_throttle_retry_time_interval
    +  type: uint
    +  level: dev
    +  desc: In microseconds, user client, the time interval between the next retry
    +        when the throttle get_or_fail.
    +  default: 5000
    +  min: 1000
    +  max: 60000000
    +  with_legacy: true
     - name: ms_blackhole_osd
       type: bool
       level: dev
    @@ -1715,6 +1773,13 @@ options:
       default: 500
       services:
       - mon
    +- name: mon_max_nvmeof_epochs
    +  type: int
    +  level: advanced
    +  desc: max number of nvmeof gateway maps to store
    +  default: 500
    +  services:
    +  - mon
     - name: mon_max_osd
       type: int
       level: advanced
    @@ -1961,19 +2026,19 @@ options:
       see_also:
       - mon_debug_dump_transactions
       with_legacy: true
    -- name: mon_debug_no_require_quincy
    +- name: mon_debug_no_require_reef
       type: bool
       level: dev
    -  desc: do not set quincy feature for new mon clusters
    +  desc: do not set reef feature for new mon clusters
       default: false
       services:
       - mon
       flags:
       - cluster_create
    -- name: mon_debug_no_require_reef
    +- name: mon_debug_no_require_squid
       type: bool
       level: dev
    -  desc: do not set reef feature for new mon clusters
    +  desc: do not set squid feature for new mon clusters
       default: false
       services:
       - mon
    @@ -2550,6 +2615,18 @@ options:
       - mon
       flags:
       - runtime
    +- name: osd_pool_default_read_ratio
    +  type: uint
    +  level: advanced
    +  desc: Default read ratio (the percent of read IOs out of all IOs) for a pool.
    +  long_desc: Default read ratio (the percent of read IOs out of all IOs) for a pool.
    +    applicable to replicated pools only. This value is used to improve read balancing
    +    when OSDs have different weights.
    +  default: 70
    +  services:
    +  - mon
    +  flags:
    +  - runtime
     - name: osd_erasure_code_plugins
       type: str
       level: advanced
    @@ -2903,11 +2980,19 @@ options:
       default: 5_min
       with_legacy: true
     # report pg stats for any given pg at least this often
    -- name: osd_pg_stat_report_interval_max
    +- name: osd_pg_stat_report_interval_max_seconds
    +  type: int
    +  level: advanced
    +  desc: How often (in seconds) should PGs stats be collected.
    +  with_legacy: false
    +  default: 5
    +- name: osd_pg_stat_report_interval_max_epochs
       type: int
       level: advanced
    +  desc: The maximum number of epochs allowed to pass before PG stats
    +        are collected.
       default: 500
    -  with_legacy: true
    +  with_legacy: false
     # Max number of snap intervals to report to mgr in pg_stat_t
     - name: osd_max_snap_prune_intervals_per_epoch
       type: uint
    @@ -3228,6 +3313,12 @@ options:
       level: dev
       default: false
       with_legacy: true
    +- name: osd_skip_check_past_interval_bounds
    +  type: bool
    +  level: dev
    +  desc: See https://tracker.ceph.com/issues/64002
    +  default: false
    +  with_legacy: true
     - name: osd_debug_pretend_recovery_active
       type: bool
       level: dev
    @@ -3613,12 +3704,9 @@ options:
     - name: osd_requested_scrub_priority
       type: uint
       level: advanced
    -  default: 120
    -  fmt_desc: The priority set for user requested scrub on the work queue.  If
    -    this value were to be smaller than ``osd_client_op_priority`` it
    -    can be boosted to the value of ``osd_client_op_priority`` when
    -    scrub is blocking client operations.
    -  with_legacy: true
    +  default: 5
    +  fmt_desc: deprecated.  Use ``osd_scrub_priority`` instead.
    +  with_legacy: false
     - name: osd_recovery_priority
       type: uint
       level: advanced
    @@ -3718,6 +3806,22 @@ options:
       flags:
       - create
       with_legacy: true
    +- name: osd_objectstore_ideal_list_max
    +  type: uint
    +  level: advanced
    +  desc: The max number of results of ObjectStore::collection_list()
    +  long_desc: This value caps the maximal number of entries a single
    +    call to collection_list() can return. The configurable controls
    +    this aspect of PG deletion and OSD::clear_temp_objects().
    +    Increasing it trade-offs less agressive chunking (and thus less
    +    CPU consumption overall) for higher memory pressure.
    +    Please note that in the case of PG deletion the chunking is
    +    steered by std::min of the this value and the value of
    +    osd_target_transaction_size.
    +  default: 64
    +  see_also:
    +  - osd_memory_target
    +  with_legacy: true
     # true if LTTng-UST tracepoints should be enabled
     - name: osd_objectstore_tracing
       type: bool
    @@ -3990,15 +4094,26 @@ options:
       default: false
       with_legacy: true
     - name: bdev_enable_discard
    +  desc: send discards to the block device
       type: bool
       level: advanced
       default: false
       with_legacy: true
    -- name: bdev_async_discard
    -  type: bool
    +  flags:
    +  - runtime
    +  see_also:
    +  - bdev_async_discard_threads
    +- name: bdev_async_discard_threads
    +  desc: number of discard threads used to issue discards to the device
    +  type: uint
       level: advanced
    -  default: false
    -  with_legacy: true
    +  default: 0
    +  min: 0
    +  with_legacy: false
    +  flags:
    +  - runtime
    +  see_also:
    +  - bdev_enable_discard
     - name: bdev_flock_retry_interval
       type: float
       level: advanced
    @@ -4106,7 +4221,9 @@ options:
       - bitmap
       - stupid
       - avl
    +  - btree
       - hybrid
    +  - hybrid_btree2
       with_legacy: true
     - name: bluefs_log_replay_check_allocations
       type: bool
    @@ -4305,6 +4422,40 @@ options:
       flags:
       - create
       with_legacy: true
    +- name: bluestore_bdev_label_multi
    +  type: bool
    +  level: advanced
    +  desc: Keep multiple copies of block device label.
    +  long_desc: Having multiple labels is only useful in error conditions.
    +    The label located at offset 0 has been known to be sometimes overwritten by unknown causes,
    +    but without it OSD cannot run.
    +  default: true
    +  flags:
    +  - create
    +  with_legacy: false
    +- name: bluestore_bdev_label_require_all
    +  type: bool
    +  level: advanced
    +  desc: Require all copies to match.
    +  long_desc: Under normal conditions, all copies should be the same.
    +    Clearing this flag allows to run OSD if at least one of labels
    +    could be properly read.
    +  default: true
    +  see_also:
    +  - bluestore_bdev_label_multi
    +  flags:
    +  - runtime
    +  with_legacy: false
    +- name: bluestore_bdev_label_multi_upgrade
    +  type: bool
    +  level: advanced
    +  desc: Let repair upgrade to multi label.
    +  long_desc: By default single label is preserved.
    +    Setting this variable before running fsck-repair upgrades single label into multi label.
    +  default: false
    +  flags:
    +  - startup
    +  with_legacy: false
     # whether preallocate space if block/db_path/wal_path is file rather that block device.
     - name: bluestore_block_preallocate_file
       type: bool
    @@ -4386,6 +4537,21 @@ options:
       flags:
       - create
       with_legacy: true
    +- name: bluestore_debug_enforce_min_alloc_size
    +  type: uint
    +  level: dev
    +  desc: Enforces specific min_alloc size usages
    +  long_desc: This overrides actual min_alloc_size value persisted on mkfs
    +    (and originally obtained from bluestore_min_alloc_size) and permits to
    +    use arbitrary value for this value. Intended primarily for dev/debug
    +    purposes and should be used with care and deep understanding of potential
    +    consequences, e.g. data corruption.
    +  default: 0
    +  see_also:
    +  - bluestore_min_alloc_size
    +  flags:
    +  - startup
    +  with_legacy: true
     - name: bluestore_use_optimal_io_size_for_min_alloc_size 
       type: bool
       level: advanced
    @@ -4886,6 +5052,29 @@ options:
       flags:
       - create
       with_legacy: false
    +- name: bluestore_write_v2
    +  type: bool
    +  level: advanced
    +  desc: Use faster write path
    +  long_desc: Original write path was developed over long time by constantly adding features.
    +    The price was layered inefficiencies gained along the way.
    +    Rework of write path done from scratch clears it and optimizes for typical cases.
    +    Write_v2 is necessary for recompression feature.
    +  default: false
    +  flags:
    +  - startup
    +  with_legacy: false
    +- name: bluestore_write_v2_random
    +  type: bool
    +  level: advanced
    +  desc: Random selection of write path mode
    +  long_desc: For testing purposes. If true, value of bluestore_write_v2 is randomly selected.
    +  default: false
    +  see_also:
    +  - bluestore_write_v2
    +  flags:
    +  - startup
    +  with_legacy: false
     - name: bluestore_allocator
       type: str
       level: advanced
    @@ -4896,8 +5085,9 @@ options:
       - bitmap
       - stupid
       - avl
    +  - btree
       - hybrid
    -  - zoned
    +  - hybrid_btree2
       with_legacy: true
     - name: bluestore_freelist_blocks_per_key
       type: size
    @@ -4975,10 +5165,17 @@ options:
         [hash_begin..hash_end) defines characters to use for hash calculation. Recommended
         hash ranges: O(0-13) P(0-8) m(0-16). Sharding of S,T,C,M,B prefixes is inadvised'
       fmt_desc: Definition of BlueStore's RocksDB sharding.
    -    The optimal value depends on multiple factors, and modification is invadvisable.
    +    The optimal value depends on multiple factors, and modification is inadvisable.
         This setting is used only when OSD is doing ``--mkfs``.
         Next runs of OSD retrieve sharding from disk.
       default: m(3) p(3,0-12) O(3,0-13)=block_cache={type=binned_lru} L=min_write_buffer_number_to_merge=32 P=min_write_buffer_number_to_merge=32
    +- name: bluestore_async_db_compaction
    +  type: bool
    +  level: dev
    +  desc: Perform DB compaction requests asynchronously
    +  long_desc: 'How to perform DB compactions triggered either through async socket or
    +    by OSD initialization procedure on start.'
    +  default: true
     - name: bluestore_qfsck_on_mount
       type: bool
       level: dev
    @@ -5205,12 +5402,6 @@ options:
       level: dev
       default: false
       with_legacy: true
    -- name: bluestore_debug_prefill
    -  type: float
    -  level: dev
    -  desc: simulate fragmentation
    -  default: 0
    -  with_legacy: true
     - name: bluestore_debug_prefragment_max
       type: size
       level: dev
    @@ -5291,6 +5482,18 @@ options:
       desc: Enable health indication when spurious read errors are observed by OSD
       default: true
       with_legacy: true
    +- name: bluestore_slow_ops_warn_lifetime
    +  type: uint
    +  level: advanced
    +  desc: A configurable duration for slow ops warning to be appeared if number of occurence pass `bluestore_slow_ops_warn_threshold` in `bluestore_slow_ops_warn_lifetime` seconds
    +  default: 86400
    +  with_legacy: true
    +- name: bluestore_slow_ops_warn_threshold
    +  type: uint
    +  level: advanced
    +  desc: A configurable number for slow ops warning to be appeared if number of occurence pass `bluestore_slow_ops_warn_threshold` in `bluestore_slow_ops_warn_lifetime` seconds
    +  default: 1
    +  with_legacy: true
     - name: bluestore_fsck_error_on_no_per_pool_omap
       type: bool
       level: advanced
    @@ -5393,6 +5596,11 @@ options:
       level: dev
       desc: Maximum RAM hybrid allocator should use before enabling bitmap supplement
       default: 64_M
    +- name: bluestore_btree2_alloc_weight_factor
    +  type: float
    +  level: dev
    +  desc: Large continuous extents weight factor
    +  default: 2
     - name: bluestore_volume_selection_policy
       type: str
       level: dev
    @@ -5654,12 +5862,6 @@ options:
       level: dev
       default: false
       with_legacy: true
    -- name: filestore_debug_random_read_err
    -  type: float
    -  level: dev
    -  default: 0
    -  with_legacy: true
    -# Expensive debugging check on sync
     - name: filestore_debug_omap_check
       type: bool
       level: dev
    @@ -6186,6 +6388,54 @@ options:
       level: dev
       desc: Time to wait during shutdown to deregister service with mgr
       default: 1
    +- name: mgr_enable_op_tracker
    +  type: bool
    +  level: advanced
    +  desc: Enable / disable MGR Op Tracker
    +  default: true
    +  with_legacy: true
    +- name: mgr_num_op_tracker_shard
    +  type: uint
    +  level: advanced
    +  desc: The number of shards for holding the ops
    +  default: 32
    +  with_legacy: true
    +- name: mgr_op_complaint_time
    +  type: float
    +  level: advanced
    +  default: 30
    +  desc: An operation becomes complaint worthy after the specified number of seconds have elapsed.
    +  with_legacy: true
    +- name: mgr_op_log_threshold
    +  type: int
    +  level: advanced
    +  default: 5
    +  fmt_desc: How many operations logs to display at once.
    +  with_legacy: true
    +- name: mgr_op_history_size
    +  type: uint
    +  level: advanced
    +  default: 20
    +  fmt_desc: The maximum number of completed operations to track.
    +  with_legacy: true
    +- name: mgr_op_history_duration
    +  type: uint
    +  level: advanced
    +  default: 600
    +  desc: The oldest completed operation to track.
    +  with_legacy: true
    +- name: mgr_op_history_slow_op_size
    +  type: uint
    +  level: advanced
    +  default: 20
    +  desc: Max number of slow ops to track
    +  with_legacy: true
    +- name: mgr_op_history_slow_op_threshold
    +  type: float
    +  level: advanced
    +  default: 10
    +  desc: Duration of an op to be considered as a historical slow op
    +  with_legacy: true
     - name: throttler_perf_counter
       type: bool
       level: advanced
    @@ -6326,7 +6576,18 @@ options:
       - aio
       - spdk
       - pmem
    -  - hm_smr
    +- name: bdev_stalled_read_warn_lifetime
    +  type: uint
    +  level: advanced
    +  desc: A configurable duration for stalled read warning to be appeared if number of stalled read occurence pass `bdev_stalled_read_warn_threshold` in `bdev_stalled_read_warn_lifetime` seconds
    +  default: 86400
    +  with_legacy: true
    +- name: bdev_stalled_read_warn_threshold
    +  type: uint
    +  level: advanced
    +  desc: A configurable number for stalled read warning to be appeared if number of stalled read occurence pass `bdev_stalled_read_warn_threshold` in `bdev_stalled_read_warn_lifetime` seconds
    +  default: 1
    +  with_legacy: true
     - name: bluestore_cleaner_sleep_interval
       type: float
       level: advanced
    @@ -6357,3 +6618,9 @@ options:
       default: 0
       services:
       - mgr
    +- name: objectstore_debug_throw_on_failed_txc
    +  type: bool
    +  level: dev
    +  desc: Enables exception throwing instead of process abort on transaction submission error.
    +  default: false
    +  with_legacy: false
    diff --git a/src/common/options/mds-client.yaml.in b/src/common/options/mds-client.yaml.in
    index 1f7600dee510..28912cdb129c 100644
    --- a/src/common/options/mds-client.yaml.in
    +++ b/src/common/options/mds-client.yaml.in
    @@ -251,6 +251,14 @@ options:
       default: 0
       services:
       - mds_client
    +- name: client_debug_inject_features
    +  type: str
    +  level: dev
    +  services:
    +  - mds_client
    +  flags:
    +  - startup
    +  with_legacy: true
     - name: client_max_inline_size
       type: size
       level: dev
    diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in
    index 2599b6532b5d..94824faef6bc 100644
    --- a/src/common/options/mds.yaml.in
    +++ b/src/common/options/mds.yaml.in
    @@ -74,6 +74,24 @@ options:
       - mds
       flags:
       - runtime
    +- name: mds_cache_quiesce_delay
    +  type: millisecs
    +  level: dev
    +  desc: delay before starting recursive quiesce inode operations
    +  default: 0
    +  services:
    +  - mds
    +  flags:
    +  - runtime
    +- name: mds_cache_quiesce_splitauth
    +  type: bool
    +  level: advanced
    +  desc: allow recursive quiesce across auth boundaries
    +  default: true
    +  services:
    +  - mds
    +  flags:
    +  - runtime
     - name: mds_cache_release_free_interval
       type: secs
       level: dev
    @@ -145,6 +163,33 @@ options:
       - mds
       flags:
       - runtime
    +- name: mds_cache_quiesce_decay_rate
    +  type: float
    +  level: advanced
    +  desc: decay rate for quiescing inodes throttle
    +  default: 1
    +  services:
    +  - mds
    +  flags:
    +  - runtime
    +- name: mds_cache_quiesce_threshold
    +  type: size
    +  level: advanced
    +  desc: threshold for number of inodes that can be quiesced
    +  default: 512_K
    +  services:
    +  - mds
    +  flags:
    +  - runtime
    +- name: mds_cache_quiesce_sleep
    +  type: millisecs
    +  level: advanced
    +  desc: sleep time for request after passing quiesce threshold
    +  default: 200
    +  services:
    +  - mds
    +  flags:
    +  - runtime
     - name: mds_max_file_recover
       type: uint
       level: advanced
    @@ -541,16 +586,6 @@ options:
       min: 1
       services:
       - mds
    -- name: mds_log_major_segment_event_ratio
    -  type: uint
    -  level: advanced
    -  desc: multiple of mds_log_events_per_segment between major segments
    -  default: 12
    -  services:
    -  - mds
    -  min: 1
    -  see_also:
    -  - mds_log_events_per_segment
     # segment size for mds log, default to default file_layout_t
     - name: mds_log_segment_size
       type: size
    @@ -588,7 +623,8 @@ options:
       default: true
       services:
       - mds
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_export_ephemeral_random
       type: bool
       level: advanced
    @@ -645,7 +681,8 @@ options:
       default: 3
       services:
       - mds
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_bal_replicate_threshold
       type: float
       level: advanced
    @@ -655,7 +692,8 @@ options:
       default: 8000
       services:
       - mds
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_bal_unreplicate_threshold
       type: float
       level: advanced
    @@ -665,7 +703,8 @@ options:
       default: 0
       services:
       - mds
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_bal_split_size
       type: int
       level: advanced
    @@ -675,7 +714,8 @@ options:
       default: 10000
       services:
       - mds
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_bal_split_rd
       type: float
       level: advanced
    @@ -685,7 +725,8 @@ options:
       default: 25000
       services:
       - mds
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_bal_split_wr
       type: float
       level: advanced
    @@ -695,7 +736,8 @@ options:
       default: 10000
       services:
       - mds
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_bal_split_bits
       type: int
       level: advanced
    @@ -704,9 +746,10 @@ options:
       default: 3
       services:
       - mds
    +  flags:
    +  - runtime
       min: 1
       max: 24
    -  with_legacy: true
     - name: mds_bal_merge_size
       type: int
       level: advanced
    @@ -716,7 +759,8 @@ options:
       default: 50
       services:
       - mds
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_bal_interval
       type: int
       level: advanced
    @@ -725,6 +769,8 @@ options:
       default: 10
       services:
       - mds
    +  flags:
    +  - runtime
     - name: mds_bal_fragment_interval
       type: int
       level: advanced
    @@ -734,6 +780,8 @@ options:
       default: 5
       services:
       - mds
    +  flags:
    +  - runtime
     # order of magnitude higher than split size
     - name: mds_bal_fragment_size_max
       type: int
    @@ -755,7 +803,8 @@ options:
       default: 1.5
       services:
       - mds
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_bal_fragment_dirs
       type: bool
       level: advanced
    @@ -768,6 +817,8 @@ options:
       default: true
       services:
       - mds
    +  flags:
    +  - runtime
     - name: mds_bal_idle_threshold
       type: float
       level: advanced
    @@ -777,7 +828,8 @@ options:
       default: 0
       services:
       - mds
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_bal_max
       type: int
       level: dev
    @@ -786,7 +838,8 @@ options:
       - mds
       fmt_desc: The number of iterations to run balancer before Ceph stops.
         (used for testing purposes only)
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_bal_max_until
       type: int
       level: dev
    @@ -795,7 +848,8 @@ options:
       - mds
       fmt_desc: The number of seconds to run balancer before Ceph stops.
         (used for testing purposes only)
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_bal_mode
       type: int
       level: dev
    @@ -808,7 +862,8 @@ options:
           - ``0`` = Hybrid.
           - ``1`` = Request rate and latency.
           - ``2`` = CPU load.
    -  with_legacy: true
    +  flags:
    +  - runtime
     # must be this much above average before we export anything
     - name: mds_bal_min_rebalance
       type: float
    @@ -818,7 +873,8 @@ options:
       default: 0.1
       services:
       - mds
    -  with_legacy: true
    +  flags:
    +  - runtime
     # must be overloaded for more than these epochs before we export anything
     - name: mds_bal_overload_epochs
       type: int
    @@ -837,7 +893,8 @@ options:
       services:
       - mds
       fmt_desc: The minimum subtree temperature before Ceph searches a subtree.
    -  with_legacy: true
    +  flags:
    +  - runtime
     # take within this range of what we need
     - name: mds_bal_need_min
       type: float
    @@ -846,7 +903,8 @@ options:
       services:
       - mds
       fmt_desc: The minimum fraction of target subtree size to accept.
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_bal_need_max
       type: float
       level: dev
    @@ -854,7 +912,8 @@ options:
       services:
       - mds
       fmt_desc: The maximum fraction of target subtree size to accept.
    -  with_legacy: true
    +  flags:
    +  - runtime
     # any sub bigger than this taken in full
     - name: mds_bal_midchunk
       type: float
    @@ -864,7 +923,8 @@ options:
       - mds
       fmt_desc: Ceph will migrate any subtree that is larger than this fraction
         of the target subtree size.
    -  with_legacy: true
    +  flags:
    +  - runtime
     # never take anything smaller than this
     - name: mds_bal_minchunk
       type: float
    @@ -874,7 +934,8 @@ options:
       - mds
       fmt_desc: Ceph will ignore any subtree that is smaller than this fraction
         of the target subtree size.
    -  with_legacy: true
    +  flags:
    +  - runtime
     # target decay half-life in MDSMap (2x larger is approx. 2x slower)
     - name: mds_bal_target_decay
       type: float
    @@ -883,7 +944,8 @@ options:
       default: 10
       services:
       - mds
    -  with_legacy: true
    +  flags:
    +  - runtime
     - name: mds_oft_prefetch_dirfrags
       type: bool
       level: advanced
    @@ -1061,6 +1123,14 @@ options:
       fmt_desc: Ceph will inject MDS failure in the subtree import code
         (for developers only).
       with_legacy: true
    +- name: mds_kill_dirfrag_at
    +  type: int
    +  level: dev
    +  default: 0
    +  services:
    +  - mds
    +  flags:
    +  - runtime
     - name: mds_kill_link_at
       type: int
       level: dev
    @@ -1127,14 +1197,14 @@ options:
       default: false
       services:
       - mds
    -- name: mds_kill_skip_replaying_inotable
    +- name: mds_kill_after_journal_logs_flushed
       type: bool
       level: dev
       default: false
       services:
       - mds
    -  fmt_desc: Ceph will skip replaying the inotable when replaying the journal, and
    -    the premary MDS will crash, while the replacing MDS won't.
    +  fmt_desc: The primary MDS will crash just after the mknod/openc journal logs
    +    are flushed to the pool.
         (for testing only).
       with_legacy: true
     - name: mds_inject_skip_replaying_inotable
    @@ -1143,8 +1213,7 @@ options:
       default: false
       services:
       - mds
    -  fmt_desc: Ceph will skip replaying the inotable when replaying the journal, and
    -    the premary MDS will crash, while the replacing MDS won't.
    +  fmt_desc: MDS will skip replaying the inotable when replaying the journal logs.
         (for testing only).
       with_legacy: true
     #  percentage of MDS modify replies to skip sending the client a trace on [0-1]
    @@ -1582,11 +1651,22 @@ options:
       long_desc: Laggy OSD(s) can make clients laggy or unresponsive, this can
         lead to their eviction, this option once enabled can help defer client
         eviction.
    -  default: true
    +  default: false
       services:
       - mds
       flags:
       - runtime
    +- name: mds_scrub_stats_review_period
    +  type: uint
    +  level: advanced
    +  desc: Period for which scrub stats will be available for review.
    +  long_desc: Number of days for which scrub stats will be available for review since
    +    start of scrub operation. After this period, the stats will be auto purged.
    +    These stats will not be saved to the disk. So any restart or failover of mds
    +    will cause stats to be lost forever.
    +  default: 1
    +  min: 1
    +  max: 60
     - name: mds_session_metadata_threshold
       type: size
       level: advanced
    @@ -1597,3 +1677,66 @@ options:
       - mds
       flags:
       - runtime
    +- name: mds_log_trim_threshold
    +  type: size
    +  level: advanced
    +  desc: MDS log trim threshold
    +  long_desc: The threshold of the number of log segment that can be trimmed.
    +  default: 128
    +  min: 1
    +  services:
    +  - mds
    +  see_also:
    +  - mds_log_max_events
    +  - mds_log_max_segments
    +  flags:
    +  - runtime
    +- name: mds_log_trim_decay_rate
    +  type: float
    +  level: advanced
    +  desc: MDS log trim decay rate
    +  long_desc: The decay rate for trimming the MDS log. Increasing this value leads to the MDS spending less time in trimming the log.
    +  default: 1.0
    +  min: 0.01
    +  services:
    +  - mds
    +  see_also:
    +  - mds_log_max_events
    +  - mds_log_max_segments
    +  flags:
    +  - runtime
    +- name: mds_log_trim_upkeep_interval
    +  type: millisecs
    +  level: advanced
    +  desc: MDS log trimming interval
    +  long_desc: Interval in milliseconds to trim MDS logs.
    +  default: 1000
    +  services:
    +  - mds
    +  flags:
    +  - runtime
    +- name: mds_server_dispatch_killpoint_random
    +  type: float
    +  level: dev
    +  default: 0.0
    +  services:
    +  - mds
    +  flags:
    +  - runtime
    +- name: mds_server_dispatch_client_request_delay
    +  type: millisecs
    +  level: dev
    +  default: 0
    +  services:
    +  - mds
    +  flags:
    +  - runtime
    +- name: mds_log_minor_segments_per_major_segment
    +  type: uint
    +  level: advanced
    +  desc: number of minor segments per major segment.
    +  long_desc: The number of minor mds log segments since last major segment after which a major segment is started/logged.
    +  default: 16
    +  services:
    +  - mds
    +  min: 8
    diff --git a/src/common/options/mgr.yaml.in b/src/common/options/mgr.yaml.in
    index 7d7b68035b7d..5095710afdff 100644
    --- a/src/common/options/mgr.yaml.in
    +++ b/src/common/options/mgr.yaml.in
    @@ -103,6 +103,13 @@ options:
       services:
       - mgr
       with_legacy: true
    +- name: mgr_max_pg_creating
    +  type: uint
    +  level: advanced
    +  desc: bound on max creating pgs when acting to create more pgs
    +  default: 1024
    +  services:
    +  - mgr
     - name: mgr_module_path
       type: str
       level: advanced
    @@ -145,7 +152,7 @@ options:
         first started after installation, to populate the list of enabled manager modules.  Subsequent
         updates are done using the 'mgr module [enable|disable]' commands.  List may be
         comma or space separated.
    -  default: restful iostat nfs
    +  default: iostat nfs
       services:
       - mon
       - common
    @@ -285,6 +292,15 @@ options:
       default: true
       services:
       - mgr
    +- name: mon_warn_on_pool_no_app_grace
    +  type: secs
    +  level: dev
    +  desc: time after which POOL_APP_NOT_ENABLED health warning is issued
    +  default: 5_min
    +  services:
    +  - mgr
    +  see_also:
    +  - mon_warn_on_pool_no_app
     - name: mon_warn_on_too_few_osds
       type: bool
       level: advanced
    diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in
    index ff8813c982f9..ab1634bc154b 100644
    --- a/src/common/options/mon.yaml.in
    +++ b/src/common/options/mon.yaml.in
    @@ -55,6 +55,15 @@ options:
       default: 1_min
       services:
       - mon
    +- name: mon_down_uptime_grace
    +  type: secs
    +  level: advanced
    +  desc: Period in seconds that the cluster may have a mon down after this (leader) monitor comes up.
    +  default: 1_min
    +  services:
    +  - mon
    +  flags:
    +  - runtime
     - name: mon_mgr_beacon_grace
       type: secs
       level: advanced
    @@ -63,6 +72,25 @@ options:
       default: 30
       services:
       - mon
    +- name: mon_nvmeofgw_beacon_grace
    +  type: secs
    +  level: advanced
    +  desc: Period in seconds from last beacon to monitor marking a  NVMeoF gateway as
    +    failed
    +  default: 10
    +  services:
    +  - mon
    +- name: mon_nvmeofgw_set_group_id_retry
    +  type: uint
    +  level: advanced
    +  desc: Retry wait time in microsecond for set group id between the monitor client
    +    and gateway
    +  long_desc: The monitor server determines the gateway's group ID. If the monitor client
    +    receives a monitor group ID assignment before the gateway is fully up during
    +    initialization, a retry is required.
    +  default: 1000
    +  services:
    +  - mon
     - name: mon_mgr_inactive_grace
       type: int
       level: advanced
    @@ -112,18 +140,6 @@ options:
       flags:
       - runtime
       with_legacy: true
    -- name: mon_cluster_log_to_syslog_level
    -  type: str
    -  level: advanced
    -  desc: Syslog level for cluster log messages
    -  default: info
    -  services:
    -  - mon
    -  see_also:
    -  - mon_cluster_log_to_syslog
    -  flags:
    -  - runtime
    -  with_legacy: true
     - name: mon_cluster_log_to_syslog_facility
       type: str
       level: advanced
    @@ -172,10 +188,12 @@ options:
       flags:
       - runtime
       with_legacy: true
    -- name: mon_cluster_log_file_level
    +- name: mon_cluster_log_level
       type: str
       level: advanced
    -  desc: Lowest level to include is cluster log file
    +  desc: Lowest level to include in cluster log file and/or in external log server
    +  long_desc: Log level to control the cluster log message verbosity for the cluster
    +    log file as well as for all external entities.
       default: debug
       services:
       - mon
    @@ -779,6 +797,18 @@ options:
       services:
       - mon
       with_legacy: true
    +- name: mon_fsmap_prune_threshold
    +  type: secs
    +  level: advanced
    +  desc: prune fsmap older than this threshold in seconds
    +  fmt_desc: The monitors keep historical fsmaps in memory to optimize asking
    +    when an MDS daemon was last seen in the FSMap. This option controls
    +    how far back in time the monitors will look.
    +  default: 300
    +  flags:
    +  - runtime
    +  services:
    +  - mon
     - name: mds_beacon_mon_down_grace
       type: secs
       level: advanced
    @@ -1249,14 +1279,6 @@ options:
       services:
       - mon
       with_legacy: true
    -- name: mon_osd_max_creating_pgs
    -  type: int
    -  level: advanced
    -  desc: maximum number of PGs the mon will create at once
    -  default: 1024
    -  services:
    -  - mon
    -  with_legacy: true
     - name: mon_osd_max_initial_pgs
       type: int
       level: advanced
    @@ -1338,3 +1360,18 @@ options:
       with_legacy: true
       see_also:
       - osd_heartbeat_use_min_delay_socket
    +- name: nvmeof_mon_client_disconnect_panic
    +  type: secs
    +  level: advanced
    +  desc: The duration, expressed in seconds, after which the nvmeof gateway
    +    should trigger a panic if it loses connection to the monitor
    +  default: 100
    +  services:
    +  - mon
    +- name: nvmeof_mon_client_tick_period
    +  type: secs
    +  level: advanced
    +  desc: Period in seconds of nvmeof gateway beacon messages to monitor
    +  default: 2
    +  services:
    +  - mon
    diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in
    index 5d8d40cf12d1..49099f42b716 100644
    --- a/src/common/options/osd.yaml.in
    +++ b/src/common/options/osd.yaml.in
    @@ -58,7 +58,10 @@ options:
         in recovery and 1 shard of another recovering PG.
       fmt_desc: The maximum number of backfills allowed to or from a single OSD.
         Note that this is applied separately for read and write operations.
    +    This setting is automatically reset when the mClock scheduler is used.
       default: 1
    +  see_also:
    +  - osd_mclock_override_recovery_settings
       flags:
       - runtime
       with_legacy: true
    @@ -95,6 +98,7 @@ options:
       fmt_desc: Time in seconds to sleep before the next recovery or backfill op.
         Increasing this value will slow down recovery operation while
         client operations will be less impacted.
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 0
       flags:
       - runtime
    @@ -105,6 +109,7 @@ options:
       desc: Time in seconds to sleep before next recovery or backfill op for HDDs
       fmt_desc: Time in seconds to sleep before next recovery or backfill op
         for HDDs.
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 0.1
       flags:
       - runtime
    @@ -115,6 +120,7 @@ options:
       desc: Time in seconds to sleep before next recovery or backfill op for SSDs
       fmt_desc: Time in seconds to sleep before the next recovery or backfill op
         for SSDs.
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 0
       see_also:
       - osd_recovery_sleep
    @@ -128,6 +134,7 @@ options:
         on HDD and journal is on SSD
       fmt_desc: Time in seconds to sleep before the next recovery or backfill op
         when OSD data is on HDD and OSD journal / WAL+DB is on SSD.
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 0.025
       see_also:
       - osd_recovery_sleep
    @@ -141,6 +148,7 @@ options:
       fmt_desc: Time in seconds to sleep before next snap trim op.
         Increasing this value will slow down snap trimming.
         This option overrides backend specific variants.
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 0
       flags:
       - runtime
    @@ -149,6 +157,7 @@ options:
       type: float
       level: advanced
       desc: Time in seconds to sleep before next snap trim for HDDs
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 5
       flags:
       - runtime
    @@ -158,6 +167,7 @@ options:
       desc: Time in seconds to sleep before next snap trim for SSDs
       fmt_desc: Time in seconds to sleep before next snap trim op
         for SSD OSDs (including NVMe).
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 0
       flags:
       - runtime
    @@ -168,6 +178,7 @@ options:
         is on SSD
       fmt_desc: Time in seconds to sleep before next snap trim op
         when OSD data is on an HDD and the OSD journal or WAL+DB is on an SSD.
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 2
       flags:
       - runtime
    @@ -182,6 +193,7 @@ options:
       desc: Maximum concurrent scrubs on a single OSD
       fmt_desc: The maximum number of simultaneous scrub operations for
         a Ceph OSD Daemon.
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 3
       with_legacy: true
     - name: osd_scrub_during_recovery
    @@ -194,6 +206,11 @@ options:
         load on busy clusters.
       default: false
       with_legacy: true
    +- name: osd_debug_trim_objects
    +  type: bool
    +  level: advanced
    +  desc: Asserts that no clone-objects were added to a snap after we start trimming it
    +  default: false
     - name: osd_repair_during_recovery
       type: bool
       level: advanced
    @@ -207,11 +224,8 @@ options:
       long_desc: Use osd_scrub_begin_hour=0 and osd_scrub_end_hour=0 for the entire day.
       fmt_desc: This restricts scrubbing to this hour of the day or later.
         Use ``osd_scrub_begin_hour = 0`` and ``osd_scrub_end_hour = 0``
    -    to allow scrubbing the entire day.  Along with ``osd_scrub_end_hour``, they define a time
    -    window, in which the scrubs can happen.
    -    But a scrub will be performed
    -    no matter whether the time window allows or not, as long as the placement
    -    group's scrub interval exceeds ``osd_scrub_max_interval``.
    +    to allow scrubbing the entire day.  Along with ``osd_scrub_end_hour`` they define a time
    +    window, only in which will periodic scrubs be initiated.
       default: 0
       see_also:
       - osd_scrub_end_hour
    @@ -223,12 +237,10 @@ options:
       level: advanced
       desc: Restrict scrubbing to hours of the day earlier than this
       long_desc: Use osd_scrub_begin_hour=0 and osd_scrub_end_hour=0 for the entire day.
    -  fmt_desc: This restricts scrubbing to the hour earlier than this.
    +  fmt_desc: This restricts scrubbing to the hours earlier than this.
         Use ``osd_scrub_begin_hour = 0`` and ``osd_scrub_end_hour = 0`` to allow scrubbing
         for the entire day.  Along with ``osd_scrub_begin_hour``, they define a time
    -    window, in which the scrubs can happen. But a scrub will be performed
    -    no matter whether the time window allows or not, as long as the placement
    -    group's scrub interval exceeds ``osd_scrub_max_interval``.
    +    window, only in which can periodic scrubs be automatically initiated.
       default: 0
       see_also:
       - osd_scrub_begin_hour
    @@ -245,9 +257,7 @@ options:
         0  = Sunday, 1 = Monday, etc. Use ``osd_scrub_begin_week_day = 0``
         and ``osd_scrub_end_week_day = 0`` to allow scrubbing for the entire week.
         Along with ``osd_scrub_end_week_day``, they define a time window in which
    -    scrubs can happen. But a scrub will be performed
    -    no matter whether the time window allows or not, when the PG's
    -    scrub interval exceeds ``osd_scrub_max_interval``.
    +    periodic scrubs can be automatically initiated.
       default: 0
       see_also:
       - osd_scrub_end_week_day
    @@ -264,9 +274,7 @@ options:
         0 = Sunday, 1 = Monday, etc.  Use ``osd_scrub_begin_week_day = 0``
         and ``osd_scrub_end_week_day = 0`` to allow scrubbing for the entire week.
         Along with ``osd_scrub_begin_week_day``, they define a time
    -    window, in which the scrubs can happen. But a scrub will be performed
    -    no matter whether the time window allows or not, as long as the placement
    -    group's scrub interval exceeds ``osd_scrub_max_interval``.
    +    window, in which periodic scrubs can be automatically initiated.
       default: 0
       see_also:
       - osd_scrub_begin_week_day
    @@ -277,8 +285,9 @@ options:
       type: float
       level: advanced
       desc: Allow scrubbing when system load divided by number of CPUs is below this value
    -  fmt_desc: The normalized maximum load. Ceph will not scrub when the system load
    -    (as defined by ``getloadavg() / number of online CPUs``) is higher than this number.
    +  fmt_desc: The normalized maximum load. Ceph will not initiate periodic (regular)
    +    scrubs when the system load (as defined by ``getloadavg() / number of online CPUs``)
    +    is higher than this number.
         Default is ``0.5``.
       default: 0.5
       with_legacy: true
    @@ -287,8 +296,7 @@ options:
       type: float
       level: advanced
       desc: The desired interval between scrubs of a specific PG.
    -  fmt_desc: The desired interval in seconds between scrubs of a specific PG
    -    when the Ceph Storage Cluster load is low.
    +  fmt_desc: The desired interval in seconds between scrubs of a specific PG.
       default: 1_day
       see_also:
       - osd_scrub_max_interval
    @@ -298,8 +306,7 @@ options:
       type: float
       level: advanced
       desc: Scrub each PG no less often than this interval
    -  fmt_desc: The maximum interval in seconds for scrubbing the Ceph OSD Daemon
    -    irrespective of cluster load.
    +  fmt_desc: The maximum interval in seconds for scrubbing each PG.
       default: 7_day
       see_also:
       - osd_scrub_min_interval
    @@ -310,7 +317,7 @@ options:
       level: advanced
       desc: Ratio of scrub interval to randomly vary
       long_desc: This prevents a scrub 'stampede' by randomly varying the scrub intervals
    -    so that they are soon uniformly distributed over the week
    +    so that they are uniformly distributed over time.
       fmt_desc: Add a random delay to ``osd_scrub_min_interval`` when scheduling
         the next scrub job for a PG. The delay is a random
         value less than ``osd_scrub_min_interval`` \*
    @@ -339,16 +346,18 @@ options:
       default: 5
       see_also:
       - osd_scrub_chunk_max
    -  with_legacy: true
    +  with_legacy: false
     - name: osd_scrub_chunk_max
       type: int
       level: advanced
       desc: Maximum number of objects to deep-scrub in a single chunk
    -  fmt_desc: The maximum number of object store chunks to scrub during single operation.
    -  default: 25
    +  fmt_desc: The maximum number of objects to deep-scrub during single internal
    +    scrub operation. Large values would improve scrubbing performance but
    +    may adversely affect client operations' latency.
    +  default: 15
       see_also:
       - osd_scrub_chunk_min
    -  with_legacy: true
    +  with_legacy: false
     - name: osd_shallow_scrub_chunk_min
       type: int
       level: advanced
    @@ -360,7 +369,7 @@ options:
       see_also:
       - osd_shallow_scrub_chunk_max
       - osd_scrub_chunk_min
    -  with_legacy: true
    +  with_legacy: false
     - name: osd_shallow_scrub_chunk_max
       type: int
       level: advanced
    @@ -371,14 +380,16 @@ options:
       see_also:
       - osd_shallow_scrub_chunk_min
       - osd_scrub_chunk_max
    -  with_legacy: true
    +  with_legacy: false
     # sleep between [deep]scrub ops
     - name: osd_scrub_sleep
       type: float
       level: advanced
    -  desc: Duration to inject a delay during scrubbing
    -  fmt_desc: Time to sleep before scrubbing the next group of chunks (seconds). Increasing this value will slow
    -    down the overall rate of scrubbing so that client operations will be less impacted.
    +  desc: Duration (in seconds) of delay injected between chunks when scrubbing
    +  fmt_desc: Sleep time in seconds before scrubbing the next group of objects (the next chunk).
    +    Increasing this value will slow down the overall rate of scrubbing, reducing scrub
    +    impact on client operations.
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 0
       flags:
       - runtime
    @@ -387,7 +398,13 @@ options:
     - name: osd_scrub_extended_sleep
       type: float
       level: advanced
    -  desc: Duration to inject a delay during scrubbing out of scrubbing hours (seconds)
    +  desc: Duration (in seconds) of delay injected between chunks when scrubbing out
    +    of scrubbing hours
    +  fmt_desc: Sleep time in seconds before scrubbing the next group of objects (the next chunk).
    +    This configuration value is used for scrubbing out of scrubbing hours.
    +    Increasing this value will slow down the overall rate of scrubbing, reducing scrub
    +    impact on client operations.
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 0
       see_also:
       - osd_scrub_begin_hour
    @@ -427,17 +444,32 @@ options:
       type: float
       level: advanced
       desc: Deep scrub each PG (i.e., verify data checksums) at least this often
    -  fmt_desc: The interval for "deep" scrubbing (fully reading all data). The
    -    ``osd_scrub_load_threshold`` does not affect this setting.
    +  fmt_desc: The interval for "deep" scrubbing (fully reading all data).
       default: 7_day
       with_legacy: true
    +- name: osd_deep_scrub_interval_cv
    +  type: float
    +  level: advanced
    +  desc: determining the amount of variation in the deep scrub interval
    +  long_desc: deep scrub intervals are varied by a random amount to prevent
    +    stampedes. This parameter determines the amount of variation.
    +    Technically - osd_deep_scrub_interval_cv is the coefficient of variation for
    +    the deep scrub interval.
    +  fmt_desc: The coefficient of variation for the deep scrub interval, specified as a
    +    ratio. On average, the next deep scrub for a PG is scheduled osd_deep_scrub_interval
    +    after the last deep scrub . The actual time is randomized to a normal distribution
    +    with a standard deviation of osd_deep_scrub_interval * osd_deep_scrub_interval_cv
    +    (clamped to within 2 standard deviations).
    +    The default value guarantees that 95% of the deep scrubs will be scheduled in the range
    +    [0.8 * osd_deep_scrub_interval, 1.2 * osd_deep_scrub_interval].
    +  min: 0
    +  max: 0.4
    +  default: 0.2
    +  with_legacy: false
     - name: osd_deep_scrub_randomize_ratio
       type: float
       level: advanced
    -  desc: Scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs
    -    are deep)
    -  long_desc: This prevents a deep scrub 'stampede' by spreading deep scrubs so they
    -    are uniformly distributed over the week
    +  desc: deprecated. Has no effect.
       default: 0.15
       with_legacy: true
     - name: osd_deep_scrub_stride
    @@ -507,28 +539,79 @@ options:
         stats (inc. scrub/block duration) every this many seconds.
       default: 120
       with_legacy: false
    -- name: osd_scrub_slow_reservation_response
    -  type: millisecs
    -  level: advanced
    -  desc: Maximum wait (milliseconds) for scrub reservations before issuing a cluster-log warning
    -  long_desc: Waiting too long for a replica to respond to scrub resource reservation request
    -   (after at least half of the replicas have responded). Disable by setting to a very large value.
    -  default: 2200
    -  min: 500
    +- name: osd_scrub_retry_delay
    +  type: int
    +  level: advanced
    +  desc: Period (in seconds) before retrying a PG that has failed a prior scrub.
    +  long_desc: Minimum delay after a failed attempt to scrub a PG. The delay is
    +    either applied to one of the scheduled scrubs for the PG (the next shallow
    +    scrub or the next deep scrub), or to both.
    +    This is a default value, used when the cause of the delay does not have an
    +    associated configuration option. See the 'see also' for the configuration
    +    options for some delay reasons that have their own configuration.
    +  default: 30
    +  min: 1
    +  see_also:
    +  - osd_scrub_retry_pg_state
    +  - osd_scrub_retry_after_noscrub
    +  - osd_scrub_retry_new_interval
    +  - osd_scrub_retry_trimming
    +  with_legacy: false
    +- name: osd_scrub_retry_after_noscrub
    +  type: int
    +  level: advanced
    +  desc: Period (in seconds) before retrying to scrub a PG at a specific level
    +    after detecting a no-scrub or no-deep-scrub flag
    +  long_desc: Minimum delay after a failed attempt to scrub a PG at a level
    +    (shallow or deep) that is disabled by cluster or pool no-scrub or no-deep-scrub
    +    flags.
    +  default: 60
    +  min: 1
    +  see_also:
    +  - osd_scrub_retry_delay
    +  with_legacy: false
    +- name: osd_scrub_retry_pg_state
    +  type: int
    +  level: advanced
    +  desc: Period (in seconds) before retrying to scrub a previously inactive/not-clean PG
    +  long_desc: Minimum delay after a failed attempt to scrub a PG that is not
    +    active and clean.
    +  default: 60
    +  min: 1
       see_also:
    -  - osd_scrub_reservation_timeout
    +  - osd_scrub_retry_delay
       with_legacy: false
    -- name: osd_scrub_reservation_timeout
    -  type: millisecs
    -  level: advanced
    -  desc: Maximum wait (milliseconds) for replicas' response to scrub reservation requests
    -  long_desc: Maximum wait (milliseconds) for all replicas to respond to
    -    scrub reservation requests, before the scrub session is aborted. Disable by setting
    -    to a very large value.
    -  default: 5000
    -  min: 2000
    +- name: osd_scrub_retry_trimming
    +  type: int
    +  level: advanced
    +  desc: Period (in seconds) before retrying to scrub a previously snap-trimming PG
    +  long_desc: Minimum delay after a failed attempt to scrub a PG that was performing
    +    snap trimming and not available for scrubbing.
    +  default: 10
    +  min: 1
       see_also:
    -  - osd_scrub_slow_reservation_response
    +  - osd_scrub_retry_delay
    +  with_legacy: false
    +- name: osd_scrub_retry_new_interval
    +  type: int
    +  level: advanced
    +  desc: Period (in seconds) before retrying a scrub aborted on a new interval
    +  long_desc: Minimum delay before retrying, after a scrub was aborted as the
    +    PG interval changed.
    +  default: 10
    +  min: 1
    +  see_also:
    +  - osd_scrub_retry_delay
    +  with_legacy: false
    +- name: osd_scrub_disable_reservation_queuing
    +  type: bool
    +  level: advanced
    +  desc: Disable queuing of scrub reservations
    +  long_desc: When set - scrub replica reservations are responded to immediately, with
    +    either success or failure (the pre-Squid version behaviour). This configuration
    +    option is introduced to support mixed-version clusters and debugging, and will
    +    be removed in the next release.
    +  default: false
       with_legacy: false
     # where rados plugins are stored
     - name: osd_class_dir
    @@ -834,6 +917,9 @@ options:
     - name: osd_op_num_threads_per_shard
       type: int
       level: advanced
    +  fmt_desc: The number of worker threads spawned per OSD shard for a given OSD.
    +    Each worker thread when operational processes items in the shard queue.
    +    This setting overrides _ssd and _hdd if non-zero.
       default: 0
       flags:
       - startup
    @@ -841,7 +927,9 @@ options:
     - name: osd_op_num_threads_per_shard_hdd
       type: int
       level: advanced
    -  default: 1
    +  fmt_desc: The number of worker threads spawned per OSD shard for a given OSD
    +    (for rotational media).
    +  default: 5
       see_also:
       - osd_op_num_threads_per_shard
       flags:
    @@ -850,6 +938,8 @@ options:
     - name: osd_op_num_threads_per_shard_ssd
       type: int
       level: advanced
    +  fmt_desc: The number of worker threads spawned per OSD shard for a given OSD
    +    (for solid state media).
       default: 2
       see_also:
       - osd_op_num_threads_per_shard
    @@ -870,7 +960,7 @@ options:
       type: int
       level: advanced
       fmt_desc: the number of shards allocated for a given OSD (for rotational media).
    -  default: 5
    +  default: 1
       see_also:
       - osd_op_num_shards
       flags:
    @@ -892,13 +982,13 @@ options:
       desc: Do not store full-object checksums if the backend (bluestore) does its own
         checksums.  Only usable with all BlueStore OSDs.
       default: false
    -# PrioritzedQueue (prio), Weighted Priority Queue (wpq ; default),
    -# mclock_opclass, mclock_client, or debug_random. "mclock_opclass"
    -# and "mclock_client" are based on the mClock/dmClock algorithm
    -# (Gulati, et al. 2010). "mclock_opclass" prioritizes based on the
    -# class the operation belongs to. "mclock_client" does the same but
    -# also works to ienforce fairness between clients. "debug_random"
    -# chooses among all four with equal probability.
    +# Weighted Priority Queue (wpq), mClock Scheduler (mclock_scheduler: default)
    +# or debug_random. "mclock_scheduler" is based on the mClock/dmClock
    +# algorithm (Gulati, et al. 2010). "mclock_scheduler" prioritizes based on
    +# the class the operation belongs to. "wpq" dequeues ops based on their
    +# priorities. "debug_random" chooses among the two with equal probability.
    +# Note: PrioritzedQueue (prio) implementation is not used for scheduling ops
    +# within OSDs and is therefore not listed.
     - name: osd_op_queue
       type: str
       level: advanced
    @@ -1205,12 +1295,33 @@ options:
       level: basic
       desc: The threshold IOPs capacity (at 4KiB block size) beyond which to ignore
         the OSD bench results for an OSD (for rotational media)
    -  long_desc: This option specifies the threshold IOPS capacity for an OSD under
    -    which the OSD bench results can be considered for QoS calculations. Only
    -    considered for osd_op_queue = mclock_scheduler
    +  long_desc: This option specifies the high threshold IOPS capacity for an OSD
    +    below which the OSD bench results can be considered for QoS calculations.
    +    Only considered when osd_op_queue = mclock_scheduler
       fmt_desc: The threshold IOPS capacity (at 4KiB block size) beyond which to
    -    ignore OSD bench results for an OSD (for rotational media)
    +    ignore OSD bench results for an OSD (for rotational media) and fall back to
    +    the last valid or default IOPS capacity defined by
    +    ``osd_mclock_max_capacity_iops_hdd``.
       default: 500
    +  see_also:
    +  - osd_mclock_max_capacity_iops_hdd
    +  flags:
    +  - runtime
    +- name: osd_mclock_iops_capacity_low_threshold_hdd
    +  type: float
    +  level: basic
    +  desc: The threshold IOPs capacity (at 4KiB block size) below which to ignore
    +    the OSD bench results for an OSD (for rotational media)
    +  long_desc: This option specifies the low threshold IOPS capacity of an OSD
    +    above which the OSD bench results can be considered for QoS calculations.
    +    Only considered when osd_op_queue = mclock_scheduler
    +  fmt_desc: The threshold IOPS capacity (at 4KiB block size) below which to
    +    ignore OSD bench results for an OSD (for rotational media) and fall back to
    +    the last valid or default IOPS capacity defined by
    +    ``osd_mclock_max_capacity_iops_hdd``.
    +  default: 50
    +  see_also:
    +  - osd_mclock_max_capacity_iops_hdd
       flags:
       - runtime
     - name: osd_mclock_iops_capacity_threshold_ssd
    @@ -1218,12 +1329,33 @@ options:
       level: basic
       desc: The threshold IOPs capacity (at 4KiB block size) beyond which to ignore
         the OSD bench results for an OSD (for solid state media)
    -  long_desc: This option specifies the threshold IOPS capacity for an OSD under
    -    which the OSD bench results can be considered for QoS calculations. Only
    -    considered for osd_op_queue = mclock_scheduler
    +  long_desc: This option specifies the high threshold IOPS capacity for an OSD
    +    below which the OSD bench results can be considered for QoS calculations.
    +    Only considered when osd_op_queue = mclock_scheduler
       fmt_desc: The threshold IOPS capacity (at 4KiB block size) beyond which to
    -    ignore OSD bench results for an OSD (for solid state media)
    +    ignore OSD bench results for an OSD (for solid state media) and fall back to
    +    the last valid or default IOPS capacity defined by
    +    ``osd_mclock_max_capacity_iops_ssd``.
       default: 80000
    +  see_also:
    +  - osd_mclock_max_capacity_iops_ssd
    +  flags:
    +  - runtime
    +- name: osd_mclock_iops_capacity_low_threshold_ssd
    +  type: float
    +  level: basic
    +  desc: The threshold IOPs capacity (at 4KiB block size) below which to ignore
    +    the OSD bench results for an OSD (for solid state media)
    +  long_desc: This option specifies the low threshold IOPS capacity for an OSD
    +    above which the OSD bench results can be considered for QoS calculations.
    +    Only considered when osd_op_queue = mclock_scheduler
    +  fmt_desc: The threshold IOPS capacity (at 4KiB block size) below which to
    +    ignore OSD bench results for an OSD (for solid state media) and fall back to
    +    the last valid or default IOPS capacity defined by
    +    ``osd_mclock_max_capacity_iops_ssd``.
    +  default: 1000
    +  see_also:
    +  - osd_mclock_max_capacity_iops_ssd
       flags:
       - runtime
     # Set to true for testing.  Users should NOT set this.
    @@ -1234,6 +1366,11 @@ options:
       level: advanced
       default: false
       with_legacy: true
    +- name: osd_ec_partial_reads
    +  type: bool
    +  level: advanced
    +  default: true
    +  with_legacy: true
     - name: osd_recovery_delay_start
       type: float
       level: advanced
    @@ -1253,10 +1390,12 @@ options:
         is ``0``, which means that the ``hdd`` or ``ssd`` values
         (below) are used, depending on the type of the primary
         device backing the OSD.
    +    This setting is automatically reset when the mClock scheduler is used.
       default: 0
       see_also:
       - osd_recovery_max_active_hdd
       - osd_recovery_max_active_ssd
    +  - osd_mclock_override_recovery_settings
       flags:
       - runtime
       with_legacy: true
    @@ -1267,10 +1406,12 @@ options:
         devices)
       fmt_desc: The number of active recovery requests per OSD at one time, if the
         primary device is rotational.
    +  note: This setting is automatically reset when the mClock scheduler is used.
       default: 3
       see_also:
       - osd_recovery_max_active
       - osd_recovery_max_active_ssd
    +  - osd_mclock_override_recovery_settings
       flags:
       - runtime
       with_legacy: true
    @@ -1281,10 +1422,12 @@ options:
         solid state devices)
       fmt_desc: The number of active recovery requests per OSD at one time, if the
         primary device is non-rotational (i.e., an SSD).
    +  note: This setting is automatically reset when the mClock scheduler is used.
       default: 10
       see_also:
       - osd_recovery_max_active
       - osd_recovery_max_active_hdd
    +  - osd_mclock_override_recovery_settings
       flags:
       - runtime
       with_legacy: true
    @@ -1379,13 +1522,15 @@ options:
         overrides _ssd, _hdd, and _hybrid if non-zero.
       fmt_desc: Time in seconds to sleep before the next removal transaction. This
         throttles the PG deletion process.
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 0
       flags:
       - runtime
     - name: osd_delete_sleep_hdd
       type: float
       level: advanced
    -  desc: Time in seconds to sleep before next removal transaction for HDDs
    +  desc: Time in seconds to sleep before next removal transaction for HDDs.
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 5
       flags:
       - runtime
    @@ -1393,6 +1538,7 @@ options:
       type: float
       level: advanced
       desc: Time in seconds to sleep before next removal transaction for SSDs
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 1
       flags:
       - runtime
    @@ -1401,6 +1547,7 @@ options:
       level: advanced
       desc: Time in seconds to sleep before next removal transaction when OSD data is on HDD
         and OSD journal or WAL+DB is on SSD
    +  note: This setting is ignored when the mClock scheduler is used.
       default: 1
       flags:
       - runtime
    diff --git a/src/common/options/rbd.yaml.in b/src/common/options/rbd.yaml.in
    index c2da27aaaaf7..a86a2e973046 100644
    --- a/src/common/options/rbd.yaml.in
    +++ b/src/common/options/rbd.yaml.in
    @@ -294,6 +294,8 @@ options:
       default: 0
       services:
       - rbd
    +  see_also:
    +  - mon_osd_blocklist_default_expire
     - name: rbd_request_timed_out_seconds
       type: uint
       level: advanced
    @@ -394,7 +396,7 @@ options:
       - rbd
     - name: rbd_validate_pool
       type: bool
    -  level: advanced
    +  level: dev
       desc: validate empty pools for RBD compatibility
       default: true
       services:
    diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in
    index 3971929e412d..0ce5bc332fd6 100644
    --- a/src/common/options/rgw.yaml.in
    +++ b/src/common/options/rgw.yaml.in
    @@ -51,6 +51,22 @@ options:
       services:
       - rgw
       with_legacy: true
    +- name: rgw_disable_s3select
    +  type: bool
    +  level: advanced
    +  desc: disable the s3select operation; RGW will report an error and will return ERR_INVALID_REQUEST.
    +  default: false
    +  services:
    +  - rgw
    +  with_legacy: true
    +- name: rgw_parquet_buffer_size
    +  type: size
    +  level: advanced
    +  desc: the Maximum parquet buffer size, a limit to memory consumption for parquet reading operations.
    +  default: 16_M
    +  services:
    +  - rgw
    +  with_legacy: true
     - name: rgw_rados_tracing
       type: bool
       level: advanced
    @@ -233,7 +249,7 @@ options:
       long_desc: The lifecycle maintenance thread is responsible for lifecycle related
         maintenance work. The thread itself can be disabled, but in order for lifecycle
         to work correctly, at least one RGW in each zone needs to have this thread running.
    -    Havingthe thread enabled on multiple RGW processes within the same zone can spread
    +    Having the thread enabled on multiple RGW processes within the same zone can spread
         some of the maintenance work between them.
       default: true
       services:
    @@ -290,7 +306,7 @@ options:
       desc: Max number of items in RGW metadata cache.
       long_desc: When full, the RGW metadata cache evicts least recently used entries.
       fmt_desc: The number of entries in the Ceph Object Gateway cache.
    -  default: 10000
    +  default: 25000
       services:
       - rgw
       see_also:
    @@ -359,7 +375,11 @@ options:
       type: str
       level: advanced
       desc: Lifecycle allowed work time
    -  long_desc: Local time window in which the lifecycle maintenance thread can work.
    +  long_desc: Local time window in which the lifecycle maintenance thread can work. It expects
    +    24-hour time notation. For example, "00:00-23:59" means starting at midnight lifecycle
    +    is allowed to run for the whole day (24 hours). When lifecycle completes, it waits for the
    +    next maintenance window. In this example, if it completes at 01:00, it will resume processing
    +    23 hours later at the following midnight.
       default: 00:00-06:00
       services:
       - rgw
    @@ -436,6 +456,19 @@ options:
       services:
       - rgw
       with_legacy: true
    +- name: rgw_restore_debug_interval
    +  type: int
    +  level: dev
    +  desc: The number of seconds that simulate one "day" in order to debug RGW CloudRestore.
    +    Do *not* modify for a production cluster.
    +  long_desc: For debugging RGW Cloud Restore, the number of seconds that are equivalent to
    +    one simulated "day". Values less than 1 are ignored and do not change Restore behavior.
    +    For example, during debugging if one wanted every 10 minutes to be equivalent to one day,
    +    then this would be set to 600, the number of seconds in 10 minutes.
    +  default: -1
    +  services:
    +  - rgw
    +  with_legacy: true
     - name: rgw_mp_lock_max_time
       type: int
       level: advanced
    @@ -771,16 +804,6 @@ options:
       services:
       - rgw
       with_legacy: true
    -- name: rgw_keystone_api_version
    -  type: int
    -  level: advanced
    -  desc: Version of Keystone API to use (2 or 3).
    -  fmt_desc: The version (2 or 3) of OpenStack Identity API that should be
    -    used for communication with the Keystone server.
    -  default: 2
    -  services:
    -  - rgw
    -  with_legacy: true
     - name: rgw_keystone_accepted_roles
       type: str
       level: advanced
    @@ -888,6 +911,15 @@ options:
       services:
       - rgw
       with_legacy: true
    +- name: rgw_s3_auth_disable_signature_url
    +  type: bool
    +  level: advanced
    +  desc: Should authentication with presigned URLs be disabled
    +  long_desc: 'If enabled, any request that is presigned with either V2 or V4 signature will be denied'
    +  default: false
    +  services:
    +  - rgw
    +  with_legacy: true
     - name: rgw_barbican_url
       type: str
       level: advanced
    @@ -900,8 +932,8 @@ options:
     - name: rgw_ldap_uri
       type: str
       level: advanced
    -  desc: Space-separated list of LDAP servers in URI format.
    -  default: ldaps://
    +  desc: Space-separated list of LDAP servers in URI format, e.g., "ldaps://".
    +  default:
       services:
       - rgw
       with_legacy: true
    @@ -1447,7 +1479,7 @@ options:
       desc: Ops log object name format
       long_desc: Defines the format of the RADOS objects names that ops log uses to store
         ops log data
    -  fmt_desc: The logging format for an object name. See ma npage
    +  fmt_desc: The logging format for an object name. See man page
         :manpage:`date` for details about format specifiers.
       default: '%Y-%m-%d-%H-%i-%n'
       services:
    @@ -1854,6 +1886,18 @@ options:
       services:
       - rgw
       with_legacy: true
    +- name: rgw_graceful_stop
    +  type: bool
    +  level: advanced
    +  desc: Delay the shutdown until all outstanding requests have completed
    +  long_desc: Wait for up to `rgw_exit_timeout_secs` for all outstanding requests to complete
    +    before exiting unconditionally. (new HTTP requests will not be accepted during this time.)
    +  default: false
    +  services:
    +  - rgw
    +  see_also:
    +  - rgw_exit_timeout_secs
    +  with_legacy: true
     - name: rgw_get_obj_window_size
       type: size
       level: advanced
    @@ -2055,14 +2099,6 @@ options:
       services:
       - rgw
       with_legacy: true
    -- name: rgw_data_log_obj_prefix
    -  type: str
    -  level: dev
    -  default: data_log
    -  fmt_desc: The object name prefix for the data log.
    -  services:
    -  - rgw
    -  with_legacy: true
     - name: rgw_data_sync_poll_interval
       type: int
       level: dev
    @@ -2217,6 +2253,14 @@ options:
       services:
       - rgw
       with_legacy: true
    +- name: rgw_asio_assert_yielding
    +  type: bool
    +  level: dev
    +  desc: Trigger an assertion failure if an operation would block an asio thread
    +  default: false
    +  services:
    +  - rgw
    +  with_legacy: true
     - name: rgw_user_quota_bucket_sync_interval
       type: int
       level: advanced
    @@ -2287,6 +2331,31 @@ options:
       services:
       - rgw
       with_legacy: true
    +- name: rgw_account_default_quota_max_objects
    +  type: int
    +  level: basic
    +  desc: Account quota max objects
    +  long_desc: The default quota configuration for total number of objects for a single
    +    account. A negative number means 'unlimited'.
    +  fmt_desc: Default max number of objects for a account. This includes all
    +    objects in all buckets owned by the account. Set on new accounts
    +    if no other quota is specified. Has no effect on existing accounts.
    +  default: -1
    +  services:
    +  - rgw
    +  with_legacy: true
    +- name: rgw_account_default_quota_max_size
    +  type: int
    +  level: basic
    +  desc: Account quota max size
    +  long_desc: The default quota configuration for total size of objects for a single
    +    account. A negative number means 'unlimited'.
    +  fmt_desc: The value for account max size quota in bytes set on new accounts,
    +    if no other quota is specified.  Has no effect on existing accounts.
    +  default: -1
    +  services:
    +  - rgw
    +  with_legacy: true
     - name: rgw_multipart_min_part_size
       type: size
       level: advanced
    @@ -2371,6 +2440,15 @@ options:
       services:
       - rgw
       with_legacy: true
    +- name: rgw_enable_mdsearch
    +  type: bool
    +  level: basic
    +  desc: Enable elastic metadata search APIs
    +  long_desc: This configurable controls whether RGW handles the elastic metadata search APIs.
    +  default: true
    +  services:
    +  - rgw
    +  with_legacy: true
     - name: rgw_user_unique_email
       type: bool
       level: basic
    @@ -2638,6 +2716,46 @@ options:
       - rgw
       - rgw
       min: 30
    +- name: rgw_debug_inject_latency_bi_unlink
    +  type: uint
    +  level: dev
    +  desc: Latency (in seconds) injected before rgw bucket index unlink op calls to simulate
    +    queueing latency and validate behavior of simultaneuous delete requests which
    +    target the same object.
    +  default: 0
    +  with_legacy: true
    +  services:
    +  - rgw
    +- name: rgw_reshard_progress_judge_interval
    +  type: uint
    +  level: dev
    +  desc: interval (in seconds) of judging if bucket reshard failed in block state
    +  default: 120
    +  services:
    +  - rgw
    +- name: rgw_reshard_progress_judge_ratio
    +  type: float
    +  level: dev
    +  desc: ratio of reshard progress judge interval to randomly vary
    +  long_desc: Add a random delay to rgw_reshard_progress_judge_interval for deciding when
    +    to judge the reshard process. The default setting spreads judge time window of
    +    [1, 1.5] * rgw_reshard_progress_judge_interval.
    +  default: 0.5
    +  services:
    +  - rgw
    +  see_also:
    +  - rgw_reshard_progress_judge_interval
    +- name: rgw_reshardlog_threshold
    +  type: uint
    +  level: dev
    +  desc: threshold for a shard to record log before blocking writes
    +  default: 30000
    +  with_legacy: true
    +  services:
    +  - rgw
    +  - osd
    +  see_also:
    +  - rgw_reshard_progress_judge_interval
     - name: rgw_debug_inject_set_olh_err
       type: uint
       level: dev
    @@ -3179,6 +3297,36 @@ options:
       see_also:
       - rgw_max_objs_per_shard
       - rgw_max_dynamic_shards
    +- name: rgw_dynamic_resharding_may_reduce
    +  type: bool
    +  level: advanced
    +  desc: Whether dynamic resharding can reduce the number of shards
    +  long_desc: If true, RGW's dynamic resharding ability is allowed to
    +    reduce the number of shards if it appears there are too many.
    +  default: true
    +  services:
    +  - rgw
    +  see_also:
    +  - rgw_dynamic_resharding
    +- name: rgw_dynamic_resharding_reduction_wait
    +  type: uint
    +  level: advanced
    +  desc: Number of hours to delay bucket index shard reduction.
    +  long_desc: >-
    +    In order to avoid resharding buckets with object
    +    counts that fluctuate up and down regularly, we implemement a delay
    +    between noting a shard reduction might be appropriate and when it's
    +    actually done. This allows us to cancel the reshard operation if the
    +    number of object significantly increases during this delay.
    +    WARNING: Setting this value too low could result in significantly reduced
    +    cluster performance.
    +  default: 120
    +  min: 0
    +  services:
    +  - rgw
    +  see_also:
    +  - rgw_dynamic_resharding
    +  - rgw_dynamic_resharding_may_reduce
     - name: rgw_max_objs_per_shard
       type: uint
       level: basic
    @@ -3214,6 +3362,23 @@ options:
       services:
       - rgw
       min: 10
    +- name: rgw_reshard_debug_interval
    +  type: int
    +  level: dev
    +  desc: The number of seconds that simulate one "day" in order to debug RGW dynamic resharding.
    +    Do *not* modify for a production cluster.
    +  long_desc: For debugging RGW dynamic resharding, the number of seconds that are equivalent to
    +    one simulated "day". Values less than 1 are ignored and do not change dynamic resharding behavior.
    +    For example, during debugging if one wanted every 10 minutes to be equivalent to one day,
    +    then this would be set to 600, the number of seconds in 10 minutes.
    +  default: -1
    +  services:
    +  - rgw
    +  with_legacy: true
    +  see_also:
    +  - rgw_dynamic_resharding
    +  - rgw_reshard_thread_interval
    +  - rgw_dynamic_resharding_reduction_wait
     - name: rgw_cache_expiry_interval
       type: uint
       level: advanced
    @@ -3295,8 +3460,11 @@ options:
       type: uint
       level: advanced
       desc: Session token max duration
    -  long_desc: Max duration in seconds for which the session token is valid.
    +  long_desc: This option can be used to configure the upper limit of the
    +    durationSeconds of temporary credentials returned by 'GetSessionToken'.
       default: 43200
    +  see_also:
    +  - rgw_sts_min_session_duration
       services:
       - rgw
       with_legacy: true
    @@ -3304,18 +3472,22 @@ options:
       type: uint
       level: advanced
       desc: Minimum allowed duration of a session
    +  long_desc: This option can be used to configure the lower limit of
    +    durationSeconds of temporary credentials returned by 'AssumeRole*' calls.
       default: 900
       services:
       - rgw
       with_legacy: true
    +  see_also:
    +  - rgw_sts_max_session_duration
     - name: rgw_max_listing_results
       type: uint
       level: advanced
    -  desc: Upper bound on results in listing operations, ListBucket max-keys
    +  desc: Upper bound on results in listing operations, ListObjects max-keys
       long_desc: This caps the maximum permitted value for listing-like operations in
    -    RGW S3. Affects ListBucket(max-keys), ListBucketVersions(max-keys), ListBucketMultipartUploads(max-uploads),
    -    ListMultipartUploadParts(max-parts)
    -  default: 1000
    +    RGW S3. Affects ListObjects(max-keys), ListObjectsVersions(max-keys),
    +    ListMultipartUploads(max-uploads), ListParts(max-parts)
    +  default: 5000
       services:
       - rgw
       - rgw
    @@ -3582,6 +3754,89 @@ options:
       see_also:
       - rgw_thread_pool_size
       with_legacy: true
    +- name: rgw_d4n_l1_datacache_persistent_path
    +  type: str
    +  level: advanced
    +  desc: path used for storing locally cached object data
    +  long_desc: One cache backend option for D4N is the local SSD, which uses this path to
    +    write and read object data. This is the default cache backend chosen by the D4N filter.
    +    Only the SSD cache backend uses this path for object data storage since the RedisDriver
    +    uses a Redis server instead and there are no additional cache backend implementations
    +    available at the moment. 
    +  default: /tmp/rgw_d4n_datacache/
    +  services:
    +  - rgw
    +  with_legacy: true
    +- name: rgw_d4n_l1_datacache_size
    +  type: size
    +  level: advanced
    +  desc: maximum size on disk for datacache
    +  long_desc: The local SSD cache uses this option to configure its size in bytes. This 
    +    option is not used by the Redis cache backend. 
    +  default: 1_G
    +  services:
    +  - rgw
    +  with_legacy: true
    +- name: rgw_d4n_l1_evict_cache_on_start
    +  type: bool
    +  level: advanced
    +  desc: clear the contents of the persistent datacache on start
    +  long_desc: The local SSD cache uses this option to clear the contents of the path supplied
    +    by the rgw_d4n_l1_datacache_persistent_path config option on start. If false, the path's 
    +    contents will be retained. 
    +  default: true
    +  services:
    +  - rgw
    +  with_legacy: true
    +- name: rgw_d4n_l1_fadvise
    +  type: int
    +  level: advanced
    +  desc: posix_fadvise() flag for access pattern of cache files
    +  long_desc: For example, to bypass the page-cache -
    +    POSIX_FADV_DONTNEED=4
    +  default: 4
    +  services:
    +  - rgw
    +  with_legacy: true
    +- name: rgw_d4n_libaio_aio_threads
    +  type: int
    +  level: advanced
    +  desc: specifies the maximum number of worker threads that may be used by libaio
    +  long_desc: This option is used by the SSD cache backend during initialization to set the maximum
    +    number of worker threads libaio may use. It does not apply to the Redis cache backend. 
    +  default: 20
    +  services:
    +  - rgw
    +  see_also:
    +  - rgw_thread_pool_size
    +  with_legacy: true
    +- name: rgw_d4n_libaio_aio_num
    +  type: int
    +  level: advanced
    +  desc: specifies the maximum number of simultaneous I/O requests that libaio expects to enqueue
    +  long_desc: This option is used by the SSD cache backend during initialization to set the maximum
    +    number of simultaneous I/O requests that libaio can expect to enqueue. It
    +    does not apply to the Redis cache backend. 
    +  default: 64
    +  services:
    +  - rgw
    +  see_also:
    +  - rgw_thread_pool_size
    +  with_legacy: true
    +- name: rgw_lfuda_sync_frequency
    +  type: int
    +  level: advanced
    +  desc: LFUDA variables' sync frequency in seconds 
    +  long_desc: By default, the D4N cache uses the Least Frequently Used with Dynamic Aging (LFUDA) 
    +    cache replacement policy. This class globally stores values that are used by the policy's 
    +    algorithm. However, strong consistency for these values is not necessary and adds additional
    +    overhead to support. As a result, a thread periodically retrieves these global values and posts
    +    updates when certain conditions are satisfied. This Redis thread completes this logic in a loop
    +    that is called once every interval, with the interval being set by this option.  
    +  default: 60
    +  services:
    +  - rgw
    +  with_legacy: true
     - name: rgw_backend_store
       type: str
       level: advanced
    @@ -3831,21 +4086,15 @@ options:
       default: true
       services:
       - rgw
    -- name: rgw_d4n_host
    +- name: rgw_d4n_address
       type: str
       level: advanced
    -  desc: The rgw directory host
    -  default: 127.0.0.1
    -  services: 
    -  - rgw
    -  flags:
    -  - startup
    -  with_legacy: true
    -- name: rgw_d4n_port
    -  type: int
    -  level: advanced
    -  desc: The rgw directory port
    -  default: 6379
    +  desc: address for the D4N Redis connection
    +  long_desc: The current D4N implementation supports one Redis node
    +    which the D4N directory, policy, and overall filter communicate
    +    with. This default value is also the address that a Redis server 
    +    with no additional configuration will use.
    +  default: 127.0.0.1:6379
       services: 
       - rgw
       flags:
    @@ -3896,3 +4145,108 @@ options:
       services:
       - rgw
       with_legacy: true
    +- name: rgw_topic_require_publish_policy
    +  type: bool
    +  level: basic
    +  desc: Whether to validate user permissions to publish notifications to topics.
    +  long_desc: If true, all users (other then the owner of the topic) will need
    +    to have a policy to publish notifications to topics.
    +    The topic policy can be set by owner via CreateTopic() or SetTopicAttribute().
    +    Following permissions can be granted "sns:Publish", "sns:GetTopicAttributes",
    +    "sns:SetTopicAttributes", "sns:DeleteTopic" and "sns:CreateTopic" via Policy.
    +    NOTE that even if set to "false" topics will still follow the policies if set on them.
    +  default: false
    +  services:
    +  - rgw
    +  with_legacy: true
    +- name: rgw_user_counters_cache
    +  type: bool
    +  level: dev
    +  default: false
    +  desc: enable a rgw perf counters cache for counters with user label
    +  long desc: If set to true, rgw creates perf counters with a label for the user and stores them
    +    in a perf counters cache. This perf counters cache contains only perf counters labeled by user.
    +  see_also:
    +  - rgw_user_counters_cache_size
    +  services:
    +  - rgw
    +  with_legacy: true
    +- name: rgw_user_counters_cache_size
    +  type: uint
    +  level: advanced
    +  desc: Number of labeled perf counters the user perf counters cache can store
    +  default: 10000
    +  services:
    +  - rgw
    +  see_also:
    +  - rgw_user_counters_cache
    +  with_legacy: true
    +- name: rgw_bucket_counters_cache
    +  type: bool
    +  level: dev
    +  default: false
    +  desc: enable a rgw perf counters cache for counters with bucket label
    +  long desc: If set to true, rgw creates perf counters with a label for the bucket and stores them
    +    in a perf counters cache. This perf counters cache contains only perf counters labeled by bucket.
    +  see_also:
    +  - rgw_bucket_counters_cache_size
    +  services:
    +  - rgw
    +  with_legacy: true
    +- name: rgw_bucket_counters_cache_size
    +  type: uint
    +  level: advanced
    +  desc: Number of labeled perf counters the bucket perf counters cache can store
    +  default: 10000
    +  services:
    +  - rgw
    +  see_also:
    +  - rgw_bucket_counters_cache
    +  with_legacy: true
    +- name: rgw_kafka_connection_idle
    +  type: uint 
    +  level: advanced
    +  desc: Time in seconds to delete idle kafka connections
    +  long_desc: A conection will be considered "idle" if no messages
    +    are sent to it for more than the time defined.
    +    Note that the connection will not be considered idle, even if it is down,
    +    as long as there are attempts to send messages to it.
    +  default: 300
    +  services:
    +  - rgw
    +  with_legacy: true
    +- name: rgw_kafka_sleep_timeout
    +  type: uint 
    +  level: advanced
    +  desc: Time in milliseconds to sleep while polling for kafka replies
    +  long_desc: This will be used to prevent busy waiting for the kafka replies
    +    As well as for the cases where the broker is down and we try to reconnect.
    +    The same values times 3 will be used to sleep if there were no messages
    +    sent or received across all kafka connections
    +  default: 10
    +  services:
    +  - rgw
    +  with_legacy: true
    +- name: rgw_kafka_message_timeout
    +  type: uint 
    +  level: advanced
    +  desc: This is the maximum time in milliseconds to deliver a message (including retries)
    +  long_desc: Delivery error occurs when the message timeout is exceeded.
    +    Value must be greater than zero, if set to zero, a value of 1 millisecond will be used.
    +  default: 5000
    +  services:
    +  - rgw
    +  with_legacy: true
    +- name: rgw_d4n_l1_datacache_address
    +  type: str
    +  level: advanced
    +  desc: local Redis cache address 
    +  long_desc: This is the address used to configure the Redis cache backend connection. The default
    +    value is the same address used by Redis without any additional configuration. The SSD cache 
    +    does not use this option.
    +  default: 127.0.0.1:6379
    +  services:
    +  - rgw
    +  flags:
    +  - startup
    +  with_legacy: true
    diff --git a/src/common/perf_counters.cc b/src/common/perf_counters.cc
    index b5e361b505cd..2eeaa80aae8e 100644
    --- a/src/common/perf_counters.cc
    +++ b/src/common/perf_counters.cc
    @@ -18,6 +18,7 @@
     #include "common/dout.h"
     #include "common/valgrind.h"
     #include "include/common_fwd.h"
    +#include "include/utime.h"
     
     using std::ostringstream;
     using std::make_pair;
    diff --git a/src/common/perf_counters.h b/src/common/perf_counters.h
    index 942edf6d7e54..0d0fe86a0920 100644
    --- a/src/common/perf_counters.h
    +++ b/src/common/perf_counters.h
    @@ -17,6 +17,8 @@
     #ifndef CEPH_COMMON_PERF_COUNTERS_H
     #define CEPH_COMMON_PERF_COUNTERS_H
     
    +#include 
    +#include 
     #include 
     #include 
     #include 
    @@ -24,11 +26,12 @@
     #include 
     
     #include "common/perf_histogram.h"
    -#include "include/utime.h"
     #include "include/common_fwd.h"
     #include "common/ceph_mutex.h"
     #include "common/ceph_time.h"
     
    +class utime_t;
    +
     namespace TOPNSPC::common {
       class CephContext;
       class PerfCountersBuilder;
    diff --git a/src/common/perf_counters_cache.cc b/src/common/perf_counters_cache.cc
    new file mode 100644
    index 000000000000..fb63b7acfed4
    --- /dev/null
    +++ b/src/common/perf_counters_cache.cc
    @@ -0,0 +1,115 @@
    +#include "common/perf_counters_cache.h"
    +#include "common/perf_counters_key.h"
    +
    +namespace ceph::perf_counters {
    +
    +void PerfCountersCache::check_key(const std::string &key) {
    +  [[maybe_unused]] std::string_view key_name = ceph::perf_counters::key_name(key);
    +  // don't accept an empty key name
    +  assert(key_name != "");
    +
    +  // if there are no labels, key name is not valid
    +  auto key_labels = ceph::perf_counters::key_labels(key);
    +  assert(key_labels.begin() != key_labels.end());
    +
    +  // don't accept keys where any labels in the key have an empty key name
    +  for ([[maybe_unused]] auto key_label : key_labels) {
    +    assert(key_label.first != "");
    +  }
    +}
    +
    +std::shared_ptr PerfCountersCache::add(const std::string &key) {
    +  check_key(key);
    +
    +  auto [ref, key_existed] = cache.get_or_create(key);
    +  if (!key_existed) {
    +    ref->counters = create_counters(key, cct);
    +    assert(ref->counters);
    +    ref->cct = cct;
    +  }
    +  return ref->counters;
    +}
    +
    +
    +std::shared_ptr PerfCountersCache::get(const std::string &key) {
    +  std::lock_guard lock(m_lock);
    +  return add(key);
    +}
    +
    +void PerfCountersCache::inc(const std::string &key, int indx, uint64_t v) {
    +  std::lock_guard lock(m_lock);
    +  auto counters = add(key);
    +  if (counters) {
    +    counters->inc(indx, v);
    +  }
    +}
    +
    +void PerfCountersCache::dec(const std::string &key, int indx, uint64_t v) {
    +  std::lock_guard lock(m_lock);
    +  auto counters = add(key);
    +  if (counters) {
    +    counters->dec(indx, v);
    +  }
    +}
    +
    +void PerfCountersCache::tinc(const std::string &key, int indx, utime_t amt) {
    +  std::lock_guard lock(m_lock);
    +  auto counters = add(key);
    +  if (counters) {
    +    counters->tinc(indx, amt);
    +  }
    +}
    +
    +void PerfCountersCache::tinc(const std::string &key, int indx, ceph::timespan amt) {
    +  std::lock_guard lock(m_lock);
    +  auto counters = add(key);
    +  if (counters) {
    +    counters->tinc(indx, amt);
    +  }
    +}
    +
    +void PerfCountersCache::set_counter(const std::string &key, int indx, uint64_t val) {
    +  std::lock_guard lock(m_lock);
    +  auto counters = add(key);
    +  if (counters) {
    +    counters->set(indx, val);
    +  }
    +}
    +
    +uint64_t PerfCountersCache::get_counter(const std::string &key, int indx) {
    +  std::lock_guard lock(m_lock);
    +  auto counters = add(key);
    +  uint64_t val = 0;
    +  if (counters) {
    +    val = counters->get(indx);
    +  }
    +  return val;
    +}
    +
    +utime_t PerfCountersCache::tget(const std::string &key, int indx) {
    +  std::lock_guard lock(m_lock);
    +  auto counters = add(key);
    +  utime_t val;
    +  if (counters) {
    +    val = counters->tget(indx);
    +    return val;
    +  } else {
    +    return utime_t();
    +  }
    +}
    +
    +void PerfCountersCache::tset(const std::string &key, int indx, utime_t amt) {
    +  std::lock_guard lock(m_lock);
    +  auto counters = add(key);
    +  if (counters) {
    +    counters->tset(indx, amt);
    +  }
    +}
    +
    +PerfCountersCache::PerfCountersCache(CephContext *_cct, size_t _target_size,
    +      std::function(const std::string&, CephContext*)> _create_counters)
    +      : cct(_cct), create_counters(_create_counters), m_lock(ceph::make_mutex("PerfCountersCache")) { cache.set_target_size(_target_size); }
    +
    +PerfCountersCache::~PerfCountersCache() { cache.set_target_size(0); }
    +
    +} // namespace ceph::perf_counters
    diff --git a/src/common/perf_counters_cache.h b/src/common/perf_counters_cache.h
    new file mode 100644
    index 000000000000..aa786fc5bf09
    --- /dev/null
    +++ b/src/common/perf_counters_cache.h
    @@ -0,0 +1,84 @@
    +#pragma once
    +
    +#include "common/perf_counters.h"
    +#include "common/ceph_context.h"
    +#include "common/intrusive_lru.h"
    +#include "include/utime.h"
    +
    +namespace ceph::perf_counters {
    +
    +struct perf_counters_cache_item_to_key;
    +
    +struct PerfCountersCacheEntry : public ceph::common::intrusive_lru_base<
    +                                       ceph::common::intrusive_lru_config<
    +                                       std::string, PerfCountersCacheEntry, perf_counters_cache_item_to_key>> {
    +  std::string key;
    +  std::shared_ptr counters;
    +  CephContext *cct;
    +
    +  PerfCountersCacheEntry(const std::string &_key) : key(_key) {}
    +
    +  ~PerfCountersCacheEntry() {
    +    if (counters) {
    +      cct->get_perfcounters_collection()->remove(counters.get());
    +    }
    +  }
    +};
    +
    +struct perf_counters_cache_item_to_key {
    +  using type = std::string;
    +  const type &operator()(const PerfCountersCacheEntry &entry) {
    +    return entry.key;
    +  }
    +};
    +
    +class PerfCountersCache {
    +private:
    +  CephContext *cct;
    +  std::function(const std::string&, CephContext*)> create_counters;
    +  PerfCountersCacheEntry::lru_t cache;
    +  mutable ceph::mutex m_lock;
    +
    +  /* check to make sure key name is non-empty and non-empty labels
    +   *
    +   * A valid key has the the form 
    +   * key\0label1\0val1\0label2\0val2 ... label\0valN
    +   * The following 3 properties checked for in this function
    +   * 1. A non-empty key
    +   * 2. At least 1 set of labels
    +   * 3. Each label has a non-empty key and value
    +   *
    +   * See perf_counters_key.h
    +   */
    +  void check_key(const std::string &key);
    +
    +  // adds a new entry to the cache and returns its respective PerfCounter*
    +  // or returns the PerfCounter* of an existing entry in the cache
    +  std::shared_ptr add(const std::string &key);
    +
    +public:
    +
    +  // get() and its associated shared_ptr reference counting should be avoided 
    +  // unless the caller intends to modify multiple counter values at the same time.
    +  // If multiple counter values will not be modified at the same time, inc/dec/etc. 
    +  // are recommended.
    +  std::shared_ptr get(const std::string &key);
    +
    +  void inc(const std::string &key, int indx, uint64_t v);
    +  void dec(const std::string &key, int indx, uint64_t v);
    +  void tinc(const std::string &key, int indx, utime_t amt);
    +  void tinc(const std::string &key, int indx, ceph::timespan amt);
    +  void set_counter(const std::string &key, int indx, uint64_t val);
    +  uint64_t get_counter(const std::string &key, int indx);
    +  utime_t tget(const std::string &key, int indx);
    +  void tset(const std::string &key, int indx, utime_t amt);
    +
    +  // _create_counters should be a function that returns a valid, newly created perf counters instance
    +  // Ceph components utilizing the PerfCountersCache are encouraged to pass in a factory function that would
    +  // create and initialize different kinds of counters based on the name returned from ceph::perfcounters::key_name(key)
    +  PerfCountersCache(CephContext *_cct, size_t _target_size,
    +                    std::function(const std::string&, CephContext*)> _create_counters);
    +  ~PerfCountersCache();
    +};
    +
    +} // namespace ceph::perf_counters
    diff --git a/src/common/pick_address.cc b/src/common/pick_address.cc
    index 2fd076808ac7..d125d7171e00 100644
    --- a/src/common/pick_address.cc
    +++ b/src/common/pick_address.cc
    @@ -15,8 +15,15 @@
     #include "common/pick_address.h"
     
     #include 
    +#include  // for struct ifaddrs
     #include 
     #include 
    +#ifdef _WIN32
    +#include 
    +#else
    +#include  // inet_pton()
    +#include  // IFF_UP
    +#endif
     #include 
     #include 
     #include 
    @@ -34,6 +41,7 @@
     #include "common/debug.h"
     #include "common/errno.h"
     #include "common/numa.h"
    +#include "common/safe_io.h"
     
     #ifndef HAVE_IN_ADDR_T
     typedef uint32_t in_addr_t;
    @@ -638,15 +646,24 @@ bool is_addr_in_subnet(
     {
       const auto nets = get_str_list(networks);
       ceph_assert(!nets.empty());
    -  const auto &net = nets.front();
    -  struct ifaddrs ifa;
    +
       unsigned ipv = CEPH_PICK_ADDRESS_IPV4;
       struct sockaddr_in public_addr;
    -
    -  ifa.ifa_next = nullptr;
    -  ifa.ifa_addr = (struct sockaddr*)&public_addr;
       public_addr.sin_family = AF_INET;
    -  inet_pton(AF_INET, addr.c_str(), &public_addr.sin_addr);
     
    -  return matches_with_net(cct, ifa, net, ipv);
    +  if(inet_pton(AF_INET, addr.c_str(), &public_addr.sin_addr) != 1) {
    +    lderr(cct) << "unable to convert chosen address to string: " << addr << dendl;
    +    return false;
    +  }
    +
    +  for (const auto &net : nets) {
    +    struct ifaddrs ifa;
    +    memset(&ifa, 0, sizeof(ifa));
    +    ifa.ifa_next = nullptr;
    +    ifa.ifa_addr = (struct sockaddr*)&public_addr;
    +    if(matches_with_net(cct, ifa, net, ipv)) {
    +      return true;
    +    }
    +  }
    +  return false;
     }
    diff --git a/src/common/random_string.cc b/src/common/random_string.cc
    index c728956182a4..9ce8ded18a3e 100644
    --- a/src/common/random_string.cc
    +++ b/src/common/random_string.cc
    @@ -125,3 +125,19 @@ std::string gen_rand_alphanumeric_plain(CephContext *cct, size_t size)
       str.pop_back(); // pop the extra \0
       return str;
     }
    +
    +void gen_rand_numeric(CephContext *cct, char *dest, size_t size) /* size should be the required string size + 1 */
    +{
    +  static constexpr char table[] = "0123456789";
    +  choose_from(cct->random(), table, dest, size-1);
    +  dest[size-1] = 0;
    +}
    +
    +std::string gen_rand_numeric(CephContext *cct, size_t size)
    +{
    +  std::string str;
    +  str.resize(size + 1);
    +  gen_rand_numeric(cct, str.data(), str.size());
    +  str.pop_back(); // pop the extra \0
    +  return str;
    +}
    diff --git a/src/common/random_string.h b/src/common/random_string.h
    index b5dd9825ebf4..2516425a6b99 100644
    --- a/src/common/random_string.h
    +++ b/src/common/random_string.h
    @@ -26,6 +26,7 @@ void gen_rand_alphanumeric_lower(CephContext *cct, char *dest, size_t size);
     void gen_rand_alphanumeric_upper(CephContext *cct, char *dest, size_t size);
     void gen_rand_alphanumeric_no_underscore(CephContext *cct, char *dest, size_t size);
     void gen_rand_alphanumeric_plain(CephContext *cct, char *dest, size_t size);
    +void gen_rand_numeric(CephContext *cct, char *dest, size_t size);
     
     // returns a std::string with 'size' random characters
     std::string gen_rand_alphanumeric(CephContext *cct, size_t size);
    @@ -33,3 +34,4 @@ std::string gen_rand_alphanumeric_lower(CephContext *cct, size_t size);
     std::string gen_rand_alphanumeric_upper(CephContext *cct, size_t size);
     std::string gen_rand_alphanumeric_no_underscore(CephContext *cct, size_t size);
     std::string gen_rand_alphanumeric_plain(CephContext *cct, size_t size);
    +std::string gen_rand_numeric(CephContext *cct, size_t size);
    diff --git a/src/common/scrub_types.cc b/src/common/scrub_types.cc
    index 9168ee0a2793..4b4d191e09c3 100644
    --- a/src/common/scrub_types.cc
    +++ b/src/common/scrub_types.cc
    @@ -55,10 +55,12 @@ static void encode(const osd_shard_t& shard, bufferlist& bl) {
     
     void shard_info_wrapper::set_object(const ScrubMap::object& object)
     {
    -  for (auto attr : object.attrs) {
    -    bufferlist bl;
    -    bl.push_back(attr.second);
    -    attrs.insert(std::make_pair(attr.first, std::move(bl)));
    +  // logically no-op, changes the comparator from std::less
    +  // while avoiding `reinterpret_cast&>(object.attrs)`
    +  attrs.clear();
    +  for (const auto& kv : object.attrs) {
    +    attrs.insert(kv);
       }
       size = object.size;
       if (object.omap_digest_present) {
    @@ -159,6 +161,13 @@ void inconsistent_obj_wrapper::encode(bufferlist& bl) const
       ENCODE_FINISH(bl);
     }
     
    +bufferlist inconsistent_obj_wrapper::encode() const
    +{
    +  bufferlist bl;
    +  encode(bl);
    +  return bl;
    +}
    +
     void inconsistent_obj_wrapper::decode(bufferlist::const_iterator& bp)
     {
       DECODE_START(2, bp);
    @@ -238,6 +247,13 @@ void inconsistent_snapset_wrapper::encode(bufferlist& bl) const
       ENCODE_FINISH(bl);
     }
     
    +bufferlist inconsistent_snapset_wrapper::encode() const
    +{
    +  bufferlist bl;
    +  encode(bl);
    +  return bl;
    +}
    +
     void inconsistent_snapset_wrapper::decode(bufferlist::const_iterator& bp)
     {
       DECODE_START(2, bp);
    diff --git a/src/common/scrub_types.h b/src/common/scrub_types.h
    index 0394eddd7e6b..d86fc12b6c8c 100644
    --- a/src/common/scrub_types.h
    +++ b/src/common/scrub_types.h
    @@ -4,6 +4,8 @@
     #ifndef CEPH_SCRUB_TYPES_H
     #define CEPH_SCRUB_TYPES_H
     
    +#include 
    +
     #include "osd/osd_types.h"
     
     // wrappers around scrub types to offer the necessary bits other than
    @@ -111,6 +113,10 @@ namespace librados {
     struct inconsistent_obj_wrapper : librados::inconsistent_obj_t {
       explicit inconsistent_obj_wrapper(const hobject_t& hoid);
     
    +  void merge(obj_err_t other) {
    +    errors |= other.errors;
    +  }
    +
       void set_object_info_inconsistency() {
         errors |= obj_err_t::OBJECT_INFO_INCONSISTENCY;
       }
    @@ -146,6 +152,7 @@ struct inconsistent_obj_wrapper : librados::inconsistent_obj_t {
     			const pg_shard_t &primary);
       void set_version(uint64_t ver) { version = ver; }
       void encode(ceph::buffer::list& bl) const;
    +  ceph::buffer::list encode() const;
       void decode(ceph::buffer::list::const_iterator& bp);
     };
     
    @@ -175,6 +182,7 @@ struct inconsistent_snapset_wrapper : public librados::inconsistent_snapset_t {
       void set_size_mismatch();
     
       void encode(ceph::buffer::list& bl) const;
    +  ceph::buffer::list encode() const;
       void decode(ceph::buffer::list::const_iterator& bp);
     };
     
    @@ -207,4 +215,197 @@ struct scrub_ls_result_t {
     
     WRITE_CLASS_ENCODER(scrub_ls_result_t);
     
    +template <>
    +struct fmt::formatter {
    +  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
    +
    +  template 
    +  auto format(const auto &oid, FormatContext& ctx) const
    +  {
    +    return fmt::format_to(ctx.out(), "{}/{}/{}", oid.locator, oid.nspace, oid.name);
    +  }
    +};
    +
    +template <>
    +struct fmt::formatter {
    +  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
    +
    +  template 
    +  auto format(const auto &err, FormatContext& ctx) const
    +  {
    +    bool first = true;
    +#define F(FLAG_NAME)					\
    +    if (err.errors & librados::err_t::FLAG_NAME) {	\
    +      if (!first) {					\
    +	fmt::format_to(ctx.out(), "|");			\
    +      } else {						\
    +	first = false;					\
    +      }							\
    +      fmt::format_to(ctx.out(), #FLAG_NAME);		\
    +    }
    +    F(SHARD_MISSING);
    +    F(SHARD_STAT_ERR);
    +    F(SHARD_READ_ERR);
    +    F(DATA_DIGEST_MISMATCH_INFO);
    +    F(OMAP_DIGEST_MISMATCH_INFO);
    +    F(SIZE_MISMATCH_INFO);
    +    F(SHARD_EC_HASH_MISMATCH);
    +    F(SHARD_EC_SIZE_MISMATCH);
    +    F(INFO_MISSING);
    +    F(INFO_CORRUPTED);
    +    F(SNAPSET_MISSING);
    +    F(SNAPSET_CORRUPTED);
    +    F(OBJ_SIZE_INFO_MISMATCH);
    +    F(HINFO_MISSING);
    +    F(HINFO_CORRUPTED);
    +#undef F
    +    return ctx.out();
    +  }
    +};
    +
    +template <>
    +struct fmt::formatter {
    +  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
    +
    +  template 
    +  auto format(const auto &err, FormatContext& ctx) const
    +  {
    +    return fmt::format_to(
    +      ctx.out(),
    +      "shard_info_t(error: {}, "
    +      "size: {}, "
    +      "omap_digest_present: {}, "
    +      "omap_digest: {}, "
    +      "data_digest_present: {}, "
    +      "data_digest: {}, "
    +      "selected_io: {}, "
    +      "primary: {})",
    +      static_cast(err),
    +      err.size,
    +      err.omap_digest_present,
    +      err.omap_digest,
    +      err.data_digest_present,
    +      err.data_digest,
    +      err.selected_oi,
    +      err.primary);
    +  }
    +};
    +
    +template <>
    +struct fmt::formatter :
    +  fmt::formatter {};
    +
    +template <>
    +struct fmt::formatter {
    +  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
    +
    +  template 
    +  auto format(const auto &err, FormatContext& ctx) const
    +  {
    +    bool first = true;
    +#define F(FLAG_NAME)					\
    +    if (err.errors & librados::obj_err_t::FLAG_NAME) {	\
    +      if (!first) {					\
    +	fmt::format_to(ctx.out(), "|");			\
    +      } else {						\
    +	first = false;					\
    +      }							\
    +      fmt::format_to(ctx.out(), #FLAG_NAME);		\
    +    }
    +    F(OBJECT_INFO_INCONSISTENCY);
    +    F(DATA_DIGEST_MISMATCH);
    +    F(OMAP_DIGEST_MISMATCH);
    +    F(SIZE_MISMATCH);
    +    F(ATTR_VALUE_MISMATCH);
    +    F(ATTR_NAME_MISMATCH);
    +    F(SNAPSET_INCONSISTENCY);
    +    F(HINFO_INCONSISTENCY);
    +    F(SIZE_TOO_LARGE);
    +#undef F
    +    return ctx.out();
    +  }
    +};
    +
    +template <>
    +struct fmt::formatter {
    +  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
    +
    +  template 
    +  auto format(const auto &shard, FormatContext& ctx) const
    +  {
    +    return fmt::format_to(
    +      ctx.out(),
    +      "osd_shard_t(osd: {}, shard: {})",
    +      shard.osd, shard.shard);
    +  }
    +};
    +
    +template <>
    +struct fmt::formatter {
    +  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
    +
    +  template 
    +  auto format(const auto &err, FormatContext& ctx) const
    +  {
    +    return fmt::format_to(
    +      ctx.out(),
    +      "inconsistent_obj_t(error: {}, "
    +      "object: {}, "
    +      "version: {}, "
    +      "shards: {}, "
    +      "union_shards: {})",
    +      static_cast(err),
    +      err.object,
    +      err.version,
    +      err.shards,
    +      err.union_shards);
    +  }
    +};
    +
    +template <>
    +struct fmt::formatter :
    +  fmt::formatter {};
    +
    +template <>
    +struct fmt::formatter {
    +  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
    +
    +  template 
    +  auto format(const auto &err, FormatContext& ctx) const
    +  {
    +    fmt::format_to(ctx.out(), "inconsistent_snapset_t(errors: ");
    +    bool first = true;
    +#define F(FLAG_NAME)							\
    +    if (err.errors & librados::inconsistent_snapset_t::FLAG_NAME) {	\
    +      if (!first) {							\
    +	fmt::format_to(ctx.out(), "|");					\
    +      } else {								\
    +	first = false;							\
    +      }									\
    +      fmt::format_to(ctx.out(), #FLAG_NAME);				\
    +    }
    +    F(SNAPSET_MISSING);
    +    F(SNAPSET_CORRUPTED);
    +    F(CLONE_MISSING);
    +    F(SNAP_ERROR);
    +    F(HEAD_MISMATCH);
    +    F(HEADLESS_CLONE);
    +    F(SIZE_MISMATCH);
    +    F(OI_MISSING);
    +    F(INFO_MISSING);
    +    F(OI_CORRUPTED);
    +    F(INFO_CORRUPTED);
    +    F(EXTRA_CLONES);
    +#undef F
    +    return fmt::format_to(
    +      ctx.out(),
    +      ", object: {}, clones: {}, missing: {}",
    +      err.object, err.clones, err.missing);
    +  }
    +};
    +
    +template <>
    +struct fmt::formatter :
    +  fmt::formatter {};
    +
     #endif
    diff --git a/src/common/sharedptr_registry.hpp b/src/common/sharedptr_registry.hpp
    index 3b3cf01bb28a..8c0db6c24a70 100644
    --- a/src/common/sharedptr_registry.hpp
    +++ b/src/common/sharedptr_registry.hpp
    @@ -18,6 +18,7 @@
     #include 
     #include 
     #include "common/ceph_mutex.h"
    +#include "include/ceph_assert.h"
     
     /**
      * Provides a registry of shared_ptr indexed by K while
    @@ -61,6 +62,11 @@ class SharedPtrRegistry {
         waiting(0)
       {}
     
    +  void reset() {
    +    ceph_assert(!waiting);
    +    contents.clear();
    +  }
    +
       bool empty() {
         std::lock_guard l(lock);
         return contents.empty();
    diff --git a/src/common/strtol.cc b/src/common/strtol.cc
    index c9e982b63962..c97942adec53 100644
    --- a/src/common/strtol.cc
    +++ b/src/common/strtol.cc
    @@ -146,43 +146,54 @@ T strict_iec_cast(std::string_view str, std::string *err)
       if (u != std::string_view::npos) {
         n = str.substr(0, u);
         unit = str.substr(u, str.length() - u);
    +    // handling cases when prefixes entered as KB, MB, ...
    +    // and KiB, MiB, ....
    +    if (unit.length() > 1 && unit.back() == 'B') {
    +      unit = unit.substr(0, unit.length() - 1);
    +    }
         // we accept both old si prefixes as well as the proper iec prefixes
         // i.e. K, M, ... and Ki, Mi, ...
    -    if (unit.back() == 'i') {
    -      if (unit.front() == 'B') {
    -        *err = "strict_iecstrtoll: illegal prefix \"Bi\"";
    -        return 0;
    -      }
    -    }
         if (unit.length() > 2) {
           *err = "strict_iecstrtoll: illegal prefix (length > 2)";
           return 0;
         }
    -    switch(unit.front()) {
    -      case 'K':
    -        m = 10;
    -        break;
    -      case 'M':
    -        m = 20;
    -        break;
    -      case 'G':
    -        m = 30;
    -        break;
    -      case 'T':
    -        m = 40;
    -        break;
    -      case 'P':
    -        m = 50;
    -        break;
    -      case 'E':
    -        m = 60;
    -        break;
    -      case 'B':
    -        break;
    -      default:
    -        *err = "strict_iecstrtoll: unit prefix not recognized";
    -        return 0;
    +    if ((unit.back() == 'i') || (unit.length() == 1)) {
    +      if (unit.back() == 'i') {
    +        if (unit.front() == 'B') {
    +          *err = "strict_iecstrtoll: illegal prefix \"Bi\"";
    +          return 0;
    +        }
    +      }
    +      switch(unit.front()) {
    +        case 'K':
    +          m = 10;
    +          break;
    +        case 'M':
    +          m = 20;
    +          break;
    +        case 'G':
    +          m = 30;
    +          break;
    +        case 'T':
    +          m = 40;
    +          break;
    +        case 'P':
    +          m = 50;
    +          break;
    +        case 'E':
    +          m = 60;
    +          break;
    +        case 'B':
    +          break;
    +        default:
    +          *err = ("strict_iecstrtoll: unit prefix not recognized '" + std::string{unit} + "' ");
    +          return 0;
    +      }
         }
    +    else {
    +      *err = ("strict_iecstrtoll: illegal prefix '" + std::string{unit} + "' ");
    +      return 0;
    +    }   
       }
     
       long long ll = strict_strtoll(n, 10, err);
    diff --git a/src/common/subsys.h b/src/common/subsys.h
    index e798f987aa0d..67bee2a8b5ac 100644
    --- a/src/common/subsys.h
    +++ b/src/common/subsys.h
    @@ -28,6 +28,7 @@ SUBSYS(mds_locker, 1, 5)
     SUBSYS(mds_log, 1, 5)
     SUBSYS(mds_log_expire, 1, 5)
     SUBSYS(mds_migrator, 1, 5)
    +SUBSYS(mds_quiesce, 3, 5)
     SUBSYS(buffer, 0, 1)
     SUBSYS(timer, 0, 1)
     SUBSYS(filer, 0, 1)
    @@ -64,6 +65,8 @@ SUBSYS(rgw_datacache, 1, 5)
     SUBSYS(rgw_access, 1, 5)
     SUBSYS(rgw_dbstore, 1, 5)
     SUBSYS(rgw_flight, 1, 5)
    +SUBSYS(rgw_lifecycle, 1, 5)
    +SUBSYS(rgw_notification, 1, 5)
     SUBSYS(javaclient, 1, 5)
     SUBSYS(asok, 1, 5)
     SUBSYS(throttle, 1, 1)
    @@ -83,6 +86,7 @@ SUBSYS(prioritycache, 1, 5)
     SUBSYS(test, 0, 5)
     SUBSYS(cephfs_mirror, 0, 5)
     SUBSYS(cephsqlite, 0, 5)
    +SUBSYS(crimson_interrupt, 0, 5)
     SUBSYS(seastore, 0, 5)       // logs above seastore tm
     SUBSYS(seastore_onode, 0, 5)
     SUBSYS(seastore_odata, 0, 5)
    @@ -103,6 +107,7 @@ SUBSYS(cyanstore, 0, 5)
     SUBSYS(ceph_exporter, 1, 5)
     SUBSYS(memstore, 1, 5)
     SUBSYS(trace, 1, 5)
    +SUBSYS(ceph_dedup, 0, 5)
     // *********************************************************************
     // Developers should update /doc/rados/troubleshooting/log-and-debug.rst
     // when adding or removing a subsystem accordingly.
    diff --git a/src/common/sync_filesystem.h b/src/common/sync_filesystem.h
    deleted file mode 100644
    index f457f655df59..000000000000
    --- a/src/common/sync_filesystem.h
    +++ /dev/null
    @@ -1,56 +0,0 @@
    -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    -// vim: ts=8 sw=2 smarttab
    -/*
    - * Ceph - scalable distributed file system
    - *
    - * Copyright (C) 2011 New Dream Network
    - *
    - * This is free software; you can redistribute it and/or
    - * modify it under the terms of the GNU Lesser General Public
    - * License version 2.1, as published by the Free Software
    - * Foundation.  See file COPYING.
    - *
    - */
    -
    -#ifndef CEPH_SYNC_FILESYSTEM_H
    -#define CEPH_SYNC_FILESYSTEM_H
    -
    -#include 
    -
    -#if defined(__linux__)
    -#include 
    -#include 
    -#include "os/fs/btrfs_ioctl.h"
    -#endif
    -
    -inline int sync_filesystem(int fd)
    -{
    -  /* On Linux, newer versions of glibc have a function called syncfs that
    -   * performs a sync on only one filesystem. If we don't have this call, we
    -   * have to fall back on sync(), which synchronizes every filesystem on the
    -   * computer. */
    -#ifdef HAVE_SYS_SYNCFS
    -  if (syncfs(fd) == 0)
    -    return 0;
    -#elif defined(SYS_syncfs)
    -  if (syscall(SYS_syncfs, fd) == 0)
    -    return 0;
    -#elif defined(__NR_syncfs)
    -  if (syscall(__NR_syncfs, fd) == 0)
    -    return 0;
    -#endif
    -
    -#if defined(HAVE_SYS_SYNCFS) || defined(SYS_syncfs) || defined(__NR_syncfs)
    -  else if (errno == ENOSYS) {
    -    sync();
    -    return 0;
    -  } else {
    -    return -errno;
    -  }
    -#else
    -  sync();
    -  return 0;
    -#endif
    -}
    -
    -#endif
    diff --git a/src/common/tracer.cc b/src/common/tracer.cc
    index 1146da319500..6a84480d60b5 100644
    --- a/src/common/tracer.cc
    +++ b/src/common/tracer.cc
    @@ -17,7 +17,7 @@
     namespace tracing {
     
     const opentelemetry::nostd::shared_ptr Tracer::noop_tracer = opentelemetry::trace::Provider::GetTracerProvider()->GetTracer("no-op", OPENTELEMETRY_SDK_VERSION);
    -const jspan Tracer::noop_span = noop_tracer->StartSpan("noop");
    +const jspan_ptr Tracer::noop_span = noop_tracer->StartSpan("noop");
     
     using bufferlist = ceph::buffer::list;
     
    @@ -38,7 +38,7 @@ void Tracer::init(CephContext* _cct, opentelemetry::nostd::string_view service_n
       }
     }
     
    -jspan Tracer::start_trace(opentelemetry::nostd::string_view trace_name) {
    +jspan_ptr Tracer::start_trace(opentelemetry::nostd::string_view trace_name) {
       ceph_assert(cct);
       if (is_enabled()) {
         ceph_assert(tracer);
    @@ -48,7 +48,7 @@ jspan Tracer::start_trace(opentelemetry::nostd::string_view trace_name) {
       return noop_span;
     }
     
    -jspan Tracer::start_trace(opentelemetry::nostd::string_view trace_name, bool trace_is_enabled) {
    +jspan_ptr Tracer::start_trace(opentelemetry::nostd::string_view trace_name, bool trace_is_enabled) {
       ceph_assert(cct);
       ldout(cct, 20) << "start trace enabled " << trace_is_enabled << " " << dendl;
       if (trace_is_enabled) {
    @@ -59,9 +59,8 @@ jspan Tracer::start_trace(opentelemetry::nostd::string_view trace_name, bool tra
       return noop_tracer->StartSpan(trace_name);
     }
     
    -jspan Tracer::add_span(opentelemetry::nostd::string_view span_name, const jspan& parent_span) {
    -  if (parent_span && parent_span->IsRecording()) {
    -    ceph_assert(tracer);
    +jspan_ptr Tracer::add_span(opentelemetry::nostd::string_view span_name, const jspan_ptr& parent_span) {
    +  if (is_enabled() && parent_span && parent_span->IsRecording()) {
         opentelemetry::trace::StartSpanOptions span_opts;
         span_opts.parent = parent_span->GetContext();
         ldout(cct, 20) << "adding span " << span_name << " " << dendl;
    @@ -70,7 +69,7 @@ jspan Tracer::add_span(opentelemetry::nostd::string_view span_name, const jspan&
       return noop_span;
     }
     
    -jspan Tracer::add_span(opentelemetry::nostd::string_view span_name, const jspan_context& parent_ctx) {
    +jspan_ptr Tracer::add_span(opentelemetry::nostd::string_view span_name, const jspan_context& parent_ctx) {
       if (parent_ctx.IsValid()) {
         ceph_assert(tracer);
         opentelemetry::trace::StartSpanOptions span_opts;
    @@ -85,41 +84,6 @@ bool Tracer::is_enabled() const {
       return cct->_conf->jaeger_tracing_enable;
     }
     
    -void encode(const jspan_context& span_ctx, bufferlist& bl, uint64_t f) {
    -  ENCODE_START(1, 1, bl);
    -  using namespace opentelemetry;
    -  using namespace trace;
    -  auto is_valid = span_ctx.IsValid();
    -  encode(is_valid, bl);
    -  if (is_valid) {
    -    encode_nohead(std::string_view(reinterpret_cast(span_ctx.trace_id().Id().data()), TraceId::kSize), bl);
    -    encode_nohead(std::string_view(reinterpret_cast(span_ctx.span_id().Id().data()), SpanId::kSize), bl);
    -    encode(span_ctx.trace_flags().flags(), bl);
    -  }
    -  ENCODE_FINISH(bl);
    -}
    -
    -void decode(jspan_context& span_ctx, bufferlist::const_iterator& bl) {
    -  using namespace opentelemetry;
    -  using namespace trace;
    -  DECODE_START(1, bl);
    -  bool is_valid;
    -  decode(is_valid, bl);
    -  if (is_valid) {
    -    std::array trace_id;
    -    std::array span_id;
    -    uint8_t flags;
    -    decode(trace_id, bl);
    -    decode(span_id, bl);
    -    decode(flags, bl);
    -    span_ctx = SpanContext(
    -      TraceId(nostd::span(trace_id)),
    -      SpanId(nostd::span(span_id)),
    -      TraceFlags(flags),
    -      true);
    -  }
    -  DECODE_FINISH(bl);
    -}
     } // namespace tracing
     
     #endif // HAVE_JAEGER
    diff --git a/src/common/tracer.h b/src/common/tracer.h
    index 8a19db39021a..ee0b74407545 100644
    --- a/src/common/tracer.h
    +++ b/src/common/tracer.h
    @@ -4,25 +4,32 @@
     #pragma once
     
     #include "acconfig.h"
    -#include "include/buffer.h"
    +#include "include/encoding.h"
     
     #ifdef HAVE_JAEGER
     #include "opentelemetry/trace/provider.h"
     
    -using jspan = opentelemetry::nostd::shared_ptr;
    +using jspan = opentelemetry::trace::Span;
    +using jspan_ptr = opentelemetry::nostd::shared_ptr;
     using jspan_context = opentelemetry::trace::SpanContext;
     using jspan_attribute = opentelemetry::common::AttributeValue;
     
     namespace tracing {
     
    +static constexpr int TraceIdkSize = 16;
    +static constexpr int SpanIdkSize = 8;
    +static_assert(TraceIdkSize == opentelemetry::trace::TraceId::kSize);
    +static_assert(SpanIdkSize == opentelemetry::trace::SpanId::kSize);
    +
     class Tracer {
      private:
       const static opentelemetry::nostd::shared_ptr noop_tracer;
    -  const static jspan noop_span;
    +  const static jspan_ptr noop_span;
       CephContext* cct = nullptr;;
       opentelemetry::nostd::shared_ptr tracer;
     
      public:
    +
       Tracer() = default;
     
       void init(CephContext* _cct, opentelemetry::nostd::string_view service_name);
    @@ -30,23 +37,56 @@ class Tracer {
       bool is_enabled() const;
       // creates and returns a new span with `trace_name`
       // this span represents a trace, since it has no parent.
    -  jspan start_trace(opentelemetry::nostd::string_view trace_name);
    +  jspan_ptr start_trace(opentelemetry::nostd::string_view trace_name);
     
       // creates and returns a new span with `trace_name`
       // if false is given to `trace_is_enabled` param, noop span will be returned
    -  jspan start_trace(opentelemetry::nostd::string_view trace_name, bool trace_is_enabled);
    +  jspan_ptr start_trace(opentelemetry::nostd::string_view trace_name, bool trace_is_enabled);
     
       // creates and returns a new span with `span_name` which parent span is `parent_span'
    -  jspan add_span(opentelemetry::nostd::string_view span_name, const jspan& parent_span);
    +  jspan_ptr add_span(opentelemetry::nostd::string_view span_name, const jspan_ptr& parent_span);
       // creates and return a new span with `span_name`
       // the span is added to the trace which it's context is `parent_ctx`.
       // parent_ctx contains the required information of the trace.
    -  jspan add_span(opentelemetry::nostd::string_view span_name, const jspan_context& parent_ctx);
    +  jspan_ptr add_span(opentelemetry::nostd::string_view span_name, const jspan_context& parent_ctx);
     
     };
     
    -void encode(const jspan_context& span, ceph::buffer::list& bl, uint64_t f = 0);
    -void decode(jspan_context& span_ctx, ceph::buffer::list::const_iterator& bl);
    +inline void encode(const jspan_context& span_ctx, bufferlist& bl, uint64_t f = 0) {
    +  ENCODE_START(1, 1, bl);
    +  using namespace opentelemetry;
    +  using namespace trace;
    +  auto is_valid = span_ctx.IsValid();
    +  encode(is_valid, bl);
    +  if (is_valid) {
    +    encode_nohead(std::string_view(reinterpret_cast(span_ctx.trace_id().Id().data()), TraceIdkSize), bl);
    +    encode_nohead(std::string_view(reinterpret_cast(span_ctx.span_id().Id().data()), SpanIdkSize), bl);
    +    encode(span_ctx.trace_flags().flags(), bl);
    +  }
    +  ENCODE_FINISH(bl);
    +}
    +
    +inline void decode(jspan_context& span_ctx, bufferlist::const_iterator& bl) {
    +  using namespace opentelemetry;
    +  using namespace trace;
    +  DECODE_START(1, bl);
    +  bool is_valid;
    +  decode(is_valid, bl);
    +  if (is_valid) {
    +    std::array trace_id;
    +    std::array span_id;
    +    uint8_t flags;
    +    decode(trace_id, bl);
    +    decode(span_id, bl);
    +    decode(flags, bl);
    +    span_ctx = SpanContext(
    +      TraceId(nostd::span(trace_id)),
    +      SpanId(nostd::span(span_id)),
    +      TraceFlags(flags),
    +      true);
    +  }
    +  DECODE_FINISH(bl);
    +}
     
     } // namespace tracing
     
    @@ -62,33 +102,44 @@ class Value {
     
     using jspan_attribute = Value;
     
    -struct jspan_context {
    -  jspan_context() {}
    -  jspan_context(bool sampled_flag, bool is_remote) {}
    +namespace opentelemetry {
    +inline namespace v1 {
    +namespace trace {
    +class SpanContext {
    +public:
    +  SpanContext() = default;
    +  SpanContext(bool sampled_flag, bool is_remote) {}
    +  bool IsValid() const { return false;}
     };
    +} // namespace trace
    +} // namespace v1
    +} // namespace opentelemetry
    +
    +using jspan_context = opentelemetry::v1::trace::SpanContext;
     
    -struct span_stub {
    +class jspan {
       jspan_context _ctx;
    +public:
       template 
       void SetAttribute(std::string_view key, const T& value) const noexcept {}
       void AddEvent(std::string_view) {}
       void AddEvent(std::string_view, std::initializer_list> fields) {}
       template  void AddEvent(std::string_view name, const T& fields = {}) {}
    -  const jspan_context& GetContext() { return _ctx; }
    +  jspan_context GetContext() const { return _ctx; }
       void UpdateName(std::string_view) {}
       bool IsRecording() { return false; }
     };
     
    -class jspan {
    -  span_stub span;
    - public:
    -  span_stub& operator*() { return span; }
    -  const span_stub& operator*() const { return span; }
    -
    -  span_stub* operator->() { return &span; }
    -  const span_stub* operator->() const { return &span; }
    -
    +class jspan_ptr {
    +  jspan span;
    +public:
    +  jspan& operator*() { return span; }
    +  const jspan& operator*() const { return span; }
    +  jspan* operator->() { return &span; }
    +  const jspan* operator->() const { return &span; }
       operator bool() const { return false; }
    +  jspan* get() { return &span; }
    +  const jspan* get() const { return &span; }
     };
     
     namespace tracing {
    @@ -96,14 +147,25 @@ namespace tracing {
     struct Tracer {
       void init(CephContext* _cct, std::string_view service_name) {}
       bool is_enabled() const { return false; }
    -  jspan start_trace(std::string_view, bool enabled = true) { return {}; }
    -  jspan add_span(std::string_view, const jspan&) { return {}; }
    -  jspan add_span(std::string_view span_name, const jspan_context& parent_ctx) { return {}; }
    +  jspan_ptr start_trace(std::string_view, bool enabled = true) { return {}; }
    +  jspan_ptr add_span(std::string_view, const jspan_ptr&) { return {}; }
    +  jspan_ptr add_span(std::string_view span_name, const jspan_context& parent_ctx) { return {}; }
     };
     
    -inline void encode(const jspan_context& span, bufferlist& bl, uint64_t f=0) {}
    -inline void decode(jspan_context& span_ctx, ceph::buffer::list::const_iterator& bl) {}
    +inline void encode(const jspan_context& span_ctx, bufferlist& bl, uint64_t f = 0) {
    +  ENCODE_START(1, 1, bl);
    +  // jaeger is missing, set "is_valid" to false.
    +  bool is_valid = false;
    +  encode(is_valid, bl);
    +  ENCODE_FINISH(bl);
    +}
     
    +inline void decode(jspan_context& span_ctx, bufferlist::const_iterator& bl) {
    +  DECODE_START(254, bl);
    +  // jaeger is missing, consume the buffer but do not decode it.
    +  DECODE_FINISH(bl);
     }
     
    +} // namespace tracing
    +
     #endif // !HAVE_JAEGER
    diff --git a/src/common/version.cc b/src/common/version.cc
    index 96f17863e18f..1f25f629ef3a 100644
    --- a/src/common/version.cc
    +++ b/src/common/version.cc
    @@ -49,7 +49,11 @@ std::string const pretty_version_to_str(void)
       oss << "ceph version " << CEPH_GIT_NICE_VER
           << " (" << STRINGIFY(CEPH_GIT_VER) << ") "
           << ceph_release_name(CEPH_RELEASE)
    -      << " (" << CEPH_RELEASE_TYPE << ")";
    +      << " (" << CEPH_RELEASE_TYPE << ")"
    +#ifdef WITH_SEASTAR
    +      << " (crimson)"
    +#endif
    +      ;
       return oss.str();
     }
     
    diff --git a/src/common/versioned_variant.h b/src/common/versioned_variant.h
    new file mode 100644
    index 000000000000..124c58839169
    --- /dev/null
    +++ b/src/common/versioned_variant.h
    @@ -0,0 +1,234 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright contributors to the Ceph project
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation.  See file COPYING.
    + *
    + */
    +
    +#pragma once
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +
    +#include  // for mp_with_index
    +#include "include/encoding.h"
    +
    +/// \file
    +/// \brief Contains binary encoding strategies for std::variant.
    +
    +namespace ceph {
    +
    +// null encoding for std::monostate
    +inline void encode(const std::monostate&, bufferlist& bl) {}
    +inline void decode(std::monostate&, bufferlist::const_iterator& p) {}
    +
    +// largest value that can be represented by `__u8 struct_v`
    +inline constexpr size_t max_version = std::numeric_limits<__u8>::max();
    +
    +/// \namespace versioned_variant
    +/// \brief A backward-compatible binary encoding for std::variant.
    +///
    +/// The variant index is encoded in struct_v so the correct decoder can be
    +/// selected. This means that existing variant types cannot be changed or
    +/// removed without breaking the decode of earlier ceph versions. New types
    +/// can only be added to the end of the variant.
    +///
    +/// In addition to struct_v, the variant index is also encoded in compatv. As
    +/// the variant is extended, this means that existing decoders can continue to
    +/// decode the types they recognize, but reject the encodings of new types they
    +/// don't.
    +///
    +/// The variant types themselves are free to change their encodings, provided
    +/// they manage their own versioning. The types must be default-constructible
    +/// so they can be constructed before decode.
    +///
    +/// The contained encode/decode functions won't be found by argument-dependent
    +/// lookup, so you must either qualify the calls with `versioned_variant::` or
    +/// add `using namespace versioned_variant` to the calling scope.
    +namespace versioned_variant {
    +
    +// Requirements for the list of types for versioned std::variant encoding.
    +template 
    +concept valid_types = requires {
    +    sizeof...(Ts) > 0; // variant cannot be empty
    +    sizeof...(Ts) <= max_version; // index must fit in u8
    +    requires (std::default_initializable && ...); // default-constructible
    +  };
    +
    +/// \brief A versioned_variant encoder.
    +///
    +/// Example:
    +/// \code
    +/// struct example {
    +///   std::variant value;
    +///
    +///   void encode(bufferlist& bl) const {
    +///     ENCODE_START(0, 0, bl);
    +///     ceph::versioned_variant::encode(value, bl);
    +///     ...
    +/// \endcode
    +template  requires valid_types
    +void encode(const std::variant& v, bufferlist& bl, uint64_t features=0)
    +{
    +  // encode the variant index in struct_v and compatv
    +  const uint8_t ver = static_cast(v.index());
    +  ENCODE_START(ver, ver, bl);
    +  // use the variant type's encoder
    +  std::visit([&bl] (const auto& value) mutable {
    +      encode(value, bl);
    +    }, v);
    +  ENCODE_FINISH(bl);
    +}
    +
    +/// \brief A versioned_variant decoder.
    +///
    +/// Example:
    +/// \code
    +/// struct example {
    +///   std::variant value;
    +///
    +///   void decode(bufferlist::const_iterator& bl) const {
    +///     DECODE_START(0, bl);
    +///     ceph::versioned_variant::decode(value, bl);
    +///     ...
    +/// \endcode
    +template  requires valid_types
    +void decode(std::variant& v, bufferlist::const_iterator& p)
    +{
    +  constexpr uint8_t max_version = sizeof...(Ts) - 1;
    +  DECODE_START(max_version, p);
    +  // use struct_v as an index into the variant after converting it into a
    +  // compile-time index I
    +  const uint8_t index = struct_v;
    +  boost::mp11::mp_with_index(index, [&v, &p] (auto I) {
    +      // default-construct the type at index I and call its decoder
    +      decode(v.template emplace(), p);
    +    });
    +  DECODE_FINISH(p);
    +}
    +
    +} // namespace versioned_variant
    +
    +
    +/// \namespace converted_variant
    +/// \brief A std::variant encoding that is backward-compatible with T.
    +///
    +/// The encoding works the same as versioned_variant, except that a block of
    +/// version numbers are reserved for the first type T to allow its encoding
    +/// to continue evolving. T must itself use versioned encoding (ie
    +/// ENCODE_START/FINISH).
    +///
    +/// This encoding strategy allows a serialized type T to be transparently
    +/// converted into a variant that can represent other types too.
    +namespace converted_variant {
    +
    +// For converted variants, reserve the first 128 versions for the original
    +// type. Variant types after the first use the version numbers above this.
    +inline constexpr uint8_t converted_max_version = 128;
    +
    +// Requirements for the list of types for converted std::variant encoding.
    +template 
    +concept valid_types = requires {
    +    sizeof...(Ts) > 0; // variant cannot be empty
    +    sizeof...(Ts) <= (max_version - converted_max_version); // index must fit in u8
    +    requires (std::default_initializable && ...); // default-constructible
    +  };
    +
    +/// \brief A converted_variant encoder.
    +///
    +/// Example:
    +/// \code
    +/// struct example {
    +///   std::variant value; // replaced `int value`
    +///
    +///   void encode(bufferlist& bl) const {
    +///     ENCODE_START(1, 0, bl);
    +///     ceph::converted_variant::encode(value, bl);
    +///     ...
    +/// \endcode
    +template  requires valid_types
    +void encode(const std::variant& v, bufferlist& bl, uint64_t features=0)
    +{
    +  const uint8_t index = static_cast(v.index());
    +  if (index == 0) {
    +    // encode the first type with its own versioning scheme
    +    encode(std::get<0>(v), bl);
    +    return;
    +  }
    +
    +  // encode the variant index in struct_v and compatv
    +  const uint8_t ver = converted_max_version + index;
    +  ENCODE_START(ver, ver, bl);
    +  // use the variant type's encoder
    +  std::visit([&bl] (const auto& value) mutable {
    +      encode(value, bl);
    +    }, v);
    +  ENCODE_FINISH(bl);
    +}
    +
    +/// \brief A converted_variant decoder.
    +///
    +/// Example:
    +/// \code
    +/// struct example {
    +///   std::variant value; // replaced `int value`
    +///
    +///   void decode(bufferlist::const_iterator& bl) {
    +///     DECODE_START(1, bl);
    +///     ceph::converted_variant::decode(value, bl);
    +///     ...
    +/// \endcode
    +template  requires valid_types
    +void decode(std::variant& v, bufferlist::const_iterator& p)
    +{
    +  // save the iterator position so the first type can restart decode
    +  const bufferlist::const_iterator prev = p;
    +
    +  constexpr uint8_t max_version = converted_max_version + sizeof...(Ts) - 1;
    +  DECODE_START(max_version, p);
    +  if (struct_v <= converted_max_version) {
    +    p = prev; // rewind and use type 0's DECODE_START/FINISH
    +    decode(v.template emplace<0>(), p);
    +    return;
    +  }
    +
    +  // use struct_v as an index into the variant after converting it into a
    +  // compile-time index I
    +  const uint8_t index = struct_v - converted_max_version;
    +  boost::mp11::mp_with_index(index, [&v, &p] (auto I) {
    +      // default-construct the type at index I and call its decoder
    +      decode(v.template emplace(), p);
    +    });
    +  DECODE_FINISH(p);
    +}
    +
    +} // namespace converted_variant
    +
    +
    +/// \brief Generate a list with a default-constructed variant of each type.
    +///
    +/// This can be used in generate_test_instances() for types that contain
    +/// variants to ensure that an encoding of each type is present in the
    +/// ceph-object-corpus. This allows the ceph-dencoder tests to catch any
    +/// breaking changes to the variant types that are present in encodings.
    +template 
    +void generate_test_instances(std::list>& instances)
    +{
    +  // use an immediately-invoked lambda to get a parameter pack of variant indices
    +  [&instances]  (std::index_sequence) {
    +    // use a fold expression to call emplace_back() for each index in the pack
    +    // use in_place_index to default-construct a variant of the type at index I
    +    (instances.emplace_back(std::in_place_index), ...);
    +  } (std::make_index_sequence{});
    +}
    +
    +} // namespace ceph
    diff --git a/src/common/weighted_shuffle.h b/src/common/weighted_shuffle.h
    index 10def0a011a4..dd8f22da014d 100644
    --- a/src/common/weighted_shuffle.h
    +++ b/src/common/weighted_shuffle.h
    @@ -14,6 +14,8 @@ void weighted_shuffle(RandomIt first, RandomIt last,
     {
       if (first == last) {
         return;
    +  } else if (std::accumulate(weight_first, weight_last, 0) == 0) {
    +    return;
       } else {
         std::discrete_distribution d{weight_first, weight_last};
         if (auto n = d(g); n > 0) {
    diff --git a/src/common/win32/SubProcess.cc b/src/common/win32/SubProcess.cc
    index 3ed3b4f54c71..59975b1e6d31 100644
    --- a/src/common/win32/SubProcess.cc
    +++ b/src/common/win32/SubProcess.cc
    @@ -18,6 +18,7 @@
     
     #include "common/SubProcess.h"
     #include "common/errno.h"
    +#include "common/win32/wstring.h"
     #include "include/ceph_assert.h"
     #include "include/compat.h"
     
    @@ -174,8 +175,9 @@ int SubProcess::spawn() {
       for (auto& arg : cmd_args) {
         cmdline << " " << std::quoted(arg);
       }
    +  std::wstring cmdline_w = to_wstring(cmdline.str());
     
    -  STARTUPINFO si = {0};
    +  STARTUPINFOW si = {0};
       PROCESS_INFORMATION pi = {0};
       SECURITY_ATTRIBUTES sa = {0};
     
    @@ -224,8 +226,8 @@ int SubProcess::spawn() {
       // We've transfered ownership from those handles.
       stdin_w = stdout_r = stderr_r = INVALID_HANDLE_VALUE;
     
    -  if (!CreateProcess(
    -      NULL, const_cast(cmdline.str().c_str()),
    +  if (!CreateProcessW(
    +      NULL, const_cast(cmdline_w.c_str()),
           NULL, NULL, /* No special security attributes */
           1, /* Inherit handles marked as inheritable */
           0, /* No special flags */
    diff --git a/src/common/win32/code_page.manifest b/src/common/win32/code_page.manifest
    new file mode 100644
    index 000000000000..dab929e1515a
    --- /dev/null
    +++ b/src/common/win32/code_page.manifest
    @@ -0,0 +1,8 @@
    +
    +
    +  
    +    
    +      UTF-8
    +    
    +  
    +
    diff --git a/src/common/win32/code_page.rc b/src/common/win32/code_page.rc
    new file mode 100644
    index 000000000000..12258c4bd615
    --- /dev/null
    +++ b/src/common/win32/code_page.rc
    @@ -0,0 +1,2 @@
    +#include 
    +CREATEPROCESS_MANIFEST_RESOURCE_ID RT_MANIFEST "code_page.manifest"
    diff --git a/src/common/win32/service.cc b/src/common/win32/service.cc
    index 7cf7620bf87b..5e86f1af90da 100644
    --- a/src/common/win32/service.cc
    +++ b/src/common/win32/service.cc
    @@ -86,6 +86,8 @@ void ServiceBase::shutdown(bool ignore_errors)
       DWORD original_state = status.dwCurrentState;
       set_status(SERVICE_STOP_PENDING);
     
    +  dout(0) << "Shutdown requested." << dendl;
    +
       int err = shutdown_hook();
       if (err) {
         derr << "Shutdown service hook failed. Error code: " << err << dendl;
    @@ -108,6 +110,8 @@ void ServiceBase::stop()
       DWORD original_state = status.dwCurrentState;
       set_status(SERVICE_STOP_PENDING);
     
    +  dout(0) << "Service stop requested." << dendl;
    +
       int err = stop_hook();
       if (err) {
         derr << "Service stop hook failed. Error code: " << err << dendl;
    diff --git a/src/compressor/CMakeLists.txt b/src/compressor/CMakeLists.txt
    index 3e99f8b73875..5ebb5d4afb2c 100644
    --- a/src/compressor/CMakeLists.txt
    +++ b/src/compressor/CMakeLists.txt
    @@ -1,19 +1,25 @@
    -
    -set(compressor_srcs
    -  Compressor.cc)
    -if (HAVE_QATZIP)
    -  list(APPEND compressor_srcs QatAccel.cc)
    -endif()
    -add_library(compressor_objs OBJECT ${compressor_srcs})
    +add_library(compressor_objs OBJECT Compressor.cc)
     add_dependencies(compressor_objs common-objs)
    -if(HAVE_QATZIP AND HAVE_QATDRV)
    -  target_link_libraries(compressor_objs PRIVATE
    -                        QatDrv::qat_s
    -                        QatDrv::usdm_drv_s
    -                        qatzip::qatzip
    +target_link_libraries(compressor_objs legacy-option-headers)
    +
    +if(HAVE_QATZIP AND HAVE_QAT)
    +  add_library(qat_compressor OBJECT QatAccel.cc)
    +  target_link_libraries(qat_compressor PUBLIC
    +                        QAT::qat
    +                        QAT::usdm
    +                        QAT::zip
    +                        legacy-option-headers
                            )
     endif()
    -add_dependencies(compressor_objs legacy-option-headers)
    +
    +if (HAVE_UADK)
    +  add_library(uadk_compressor OBJECT UadkAccel.cc)
    +  target_link_libraries(uadk_compressor PUBLIC
    +	                uadk::uadk
    +			uadk::uadkwd
    +			uadk::uadkzip
    +			numa)
    +endif()
     
     ## compressor plugins
     
    @@ -31,8 +37,8 @@ if(HAVE_BROTLI)
       add_subdirectory(brotli)
     endif()
     
    -add_library(compressor STATIC $)
    -target_link_libraries(compressor PRIVATE compressor_objs)
    +add_library(compressor STATIC)
    +target_link_libraries(compressor PUBLIC compressor_objs)
     
     set(ceph_compressor_libs
         ceph_snappy
    diff --git a/src/compressor/Compressor.cc b/src/compressor/Compressor.cc
    index 43d34c8eb01e..a13dfb30ddc7 100644
    --- a/src/compressor/Compressor.cc
    +++ b/src/compressor/Compressor.cc
    @@ -26,10 +26,6 @@
     
     namespace TOPNSPC {
     
    -#ifdef HAVE_QATZIP
    -  QatAccel Compressor::qat_accel;
    -#endif
    -
     const char* Compressor::get_comp_alg_name(int a) {
     
       auto p = std::find_if(std::cbegin(compression_algorithms), std::cend(compression_algorithms),
    diff --git a/src/compressor/Compressor.h b/src/compressor/Compressor.h
    index 276cd875a9a8..11f020a0dd24 100644
    --- a/src/compressor/Compressor.h
    +++ b/src/compressor/Compressor.h
    @@ -23,9 +23,6 @@
     #include "include/common_fwd.h"
     #include "include/buffer.h"
     #include "include/int_types.h"
    -#ifdef HAVE_QATZIP
    -  #include "QatAccel.h"
    -#endif
     
     namespace TOPNSPC {
     
    @@ -70,11 +67,6 @@ class Compressor {
         COMP_FORCE                  ///< compress always
       };
     
    -#ifdef HAVE_QATZIP
    -  bool qat_enabled;
    -  static QatAccel qat_accel;
    -#endif
    -
       static const char* get_comp_alg_name(int a);
       static std::optional get_comp_alg_type(std::string_view s);
     
    diff --git a/src/compressor/QatAccel.cc b/src/compressor/QatAccel.cc
    index de19ccfa358e..0c378729bba3 100644
    --- a/src/compressor/QatAccel.cc
    +++ b/src/compressor/QatAccel.cc
    @@ -19,6 +19,7 @@
     #include "common/dout.h"
     #include "common/errno.h"
     #include "QatAccel.h"
    +#include "zlib.h"
     
     // -----------------------------------------------------------------------------
     #define dout_context g_ceph_context
    @@ -33,6 +34,7 @@ static std::ostream& _prefix(std::ostream* _dout)
     // -----------------------------------------------------------------------------
     // default window size for Zlib 1.2.8, negated for raw deflate
     #define ZLIB_DEFAULT_WIN_SIZE -15
    +#define GZIP_WRAPPER 16
     
     /* Estimate data expansion after decompression */
     static const unsigned int expansion_ratio[] = {5, 20, 50, 100, 200, 1000, 10000};
    @@ -42,6 +44,10 @@ void QzSessionDeleter::operator() (struct QzSession_S *session) {
       delete session;
     }
     
    +QzPollingMode_T busy_polling(bool isSet) {
    +  return isSet ? QZ_BUSY_POLLING : QZ_PERIODICAL_POLLING;
    +}
    +
     static bool setup_session(const std::string &alg, QatAccel::session_ptr &session) {
       int rc;
       rc = qzInit(session.get(), QZ_SW_BACKUP_DEFAULT);
    @@ -52,10 +58,12 @@ static bool setup_session(const std::string &alg, QatAccel::session_ptr &session
         rc = qzGetDefaultsDeflate(¶ms);
         if (rc != QZ_OK)
           return false;
    -    params.data_fmt = QZ_DEFLATE_RAW;
    +
    +    params.data_fmt = QZ_DEFLATE_GZIP_EXT;
         params.common_params.comp_algorithm = QZ_DEFLATE;
         params.common_params.comp_lvl = g_ceph_context->_conf->compressor_zlib_level;
         params.common_params.direction = QZ_DIR_BOTH;
    +    params.common_params.polling_mode = busy_polling(g_ceph_context->_conf.get_val("qat_compressor_busy_polling"));
         rc = qzSetupSessionDeflate(session.get(), ¶ms);
         if (rc != QZ_OK)
           return false;
    @@ -136,16 +144,20 @@ bool QatAccel::init(const std::string &alg) {
       }
     
       alg_name = alg;
    +  windowBits = GZIP_WRAPPER + MAX_WBITS;
    +
       return true;
     }
     
     int QatAccel::compress(const bufferlist &in, bufferlist &out, std::optional &compressor_message) {
    +  dout(20) << "QAT compress" << dendl;
       auto s = get_session(); // get a session from the pool
       if (!s) {
         return -1; // session initialization failed
       }
       auto session = cached_session_t{this, std::move(s)}; // returns to the session pool on destruction
    -  compressor_message = ZLIB_DEFAULT_WIN_SIZE;
    +  compressor_message = windowBits;
    +
       int begin = 1;
       for (auto &i : in.buffers()) {
         const unsigned char* c_in = (unsigned char*) i.c_str();
    @@ -154,7 +166,15 @@ int QatAccel::compress(const bufferlist &in, bufferlist &out, std::optionalhw_session_stat != QZ_OK) {
    +      if(sess->hw_session_stat == QZ_NO_HW) {
    +        dout(1) << "QAT compressor NOT OK - Using SW: No QAT HW detected" << dendl;
    +      } else {
    +        dout(1) << "QAT compressor NOT OK - session state=" << sess->hw_session_stat << dendl;
    +      }
    +    }
         if (rc != QZ_OK)
           return -1;
         if (begin) {
    @@ -179,6 +199,7 @@ int QatAccel::decompress(bufferlist::const_iterator &p,
     		 size_t compressed_len,
     		 bufferlist &dst,
     		 std::optional compressor_message) {
    +  dout(20) << "QAT decompress" << dendl;
       auto s = get_session(); // get a session from the pool
       if (!s) {
         return -1; // session initialization failed
    @@ -188,28 +209,39 @@ int QatAccel::decompress(bufferlist::const_iterator &p,
     
       int rc = 0;
       bufferlist tmp;
    -  size_t remaining = std::min(p.get_remaining(), compressed_len);
    -
    -  while (remaining) {
    -    unsigned int ratio_idx = 0;
    -    const char* c_in = nullptr;
    -    unsigned int len = p.get_ptr_and_advance(remaining, &c_in);
    -    remaining -= len;
    -    len -= begin;
    -    c_in += begin;
    -    begin = 0;
    -    unsigned int out_len = QZ_HW_BUFF_SZ;
    +  unsigned int ratio_idx = 0;
    +  const char* c_in = nullptr;
    +  p.copy_all(tmp);
    +  c_in = tmp.c_str();
    +  unsigned int len = std::min(tmp.length(), compressed_len);
    +
    +  len -= begin;
    +  c_in += begin;
    +  begin = 0;
     
    -    bufferptr ptr;
    +  bufferptr ptr;
    +  do {
    +    unsigned int out_len = QZ_HW_BUFF_SZ;
    +    unsigned int len_current = len;
         do {
    -      while (out_len <= len * expansion_ratio[ratio_idx]) {
    +      while (out_len <= len_current * expansion_ratio[ratio_idx]) {
             out_len *= 2;
           }
     
           ptr = buffer::create_small_page_aligned(out_len);
    -      rc = qzDecompress(session.get(), (const unsigned char*)c_in, &len, (unsigned char*)ptr.c_str(), &out_len);
    +      QzSession_T *sess = session.get();
    +      rc = qzDecompress(sess, (const unsigned char*)c_in, &len_current, (unsigned char*)ptr.c_str(), &out_len);
    +      if(sess->hw_session_stat != QZ_OK) {
    +        if(sess->hw_session_stat == QZ_NO_HW) {
    +          dout(1) << "QAT decompress NOT OK - Using SW: No QAT HW detected" << dendl;
    +        } else {
    +          dout(1) << "QAT decompress NOT OK - session state=" << sess->hw_session_stat << dendl;
    +        }
    +      }
           ratio_idx++;
         } while (rc == QZ_BUF_ERROR && ratio_idx < std::size(expansion_ratio));
    +    c_in += len_current;
    +    len -= len_current;
     
         if (rc == QZ_OK) {
           dst.append(ptr, 0, out_len);
    @@ -223,7 +255,7 @@ int QatAccel::decompress(bufferlist::const_iterator &p,
           dout(1) << "QAT compressor NOT OK" << dendl;
           return -1;
         }
    -  }
     
    +  } while (len != 0);
       return 0;
     }
    diff --git a/src/compressor/QatAccel.h b/src/compressor/QatAccel.h
    index 3533eff9b8fd..3735fa4616e1 100644
    --- a/src/compressor/QatAccel.h
    +++ b/src/compressor/QatAccel.h
    @@ -49,6 +49,7 @@ class QatAccel {
       std::vector sessions;
       std::mutex mutex;
       std::string alg_name;
    +  int windowBits;
     };
     
     #endif
    diff --git a/src/compressor/UadkAccel.cc b/src/compressor/UadkAccel.cc
    new file mode 100644
    index 000000000000..dcfa0aa7bb95
    --- /dev/null
    +++ b/src/compressor/UadkAccel.cc
    @@ -0,0 +1,415 @@
    +/* 
    + * Ceph - scalable distributed file system
    + *
    + * Copyright (c) 2024 Huawei Technologies Co., Ltd All rights reserved.
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation.  See file COPYING.
    + *
    + */
    +
    +#include 
    +#include 
    +#include "unistd.h"
    +#include "common/debug.h"
    +#include "UadkAccel.h"
    +
    +using std::ostream;
    +using std::string;
    +
    +#define dout_context g_ceph_context
    +#define dout_subsys ceph_subsys_compressor
    +#undef dout_prefix
    +#define dout_prefix _prefix(_dout)
    +
    +#define NEED_MORE_OUT_BUFFER  5
    +#define PROCESS_NOT_FINISH    6
    +#define UADK_MIN_BUFFER       (32*1024)
    +#define UADK_MAX_BUFFER       (8*1024*1024)
    +
    +static ostream&
    +_prefix(std::ostream* _dout)
    +{
    +  return *_dout << "UadkAccel: ";
    +}
    +
    +static std::atomic init_called = false;
    +static std::atomic uadk_compressor_thread_num = 0;
    +static std::mutex uadk_lock;
    +
    +struct UadkEngine {
    +  struct wd_ctx_config ctx_cfg;
    +  struct wd_sched *sched;
    +  int numa_id;
    +} engine;
    +
    +// helper function, can be reserved for custom scheduling policy, in here, munged to 0 if ret is positive.
    +static int lib_poll_func(__u32 pos, __u32 expect, __u32 *count)
    +{
    +  int ret = wd_comp_poll_ctx(pos, expect, count);
    +  if (ret < 0)
    +    return ret;
    +  return 0;
    +}
    +
    +static int uadk_init()
    +{
    +  dout(10) << __func__ << ": uadk_init()." << dendl;
    +  if (init_called) {
    +    dout(10) << __func__ << ": UADK already init." << dendl;
    +    return 0;
    +  }
    +
    +  int ret = 0;
    +  engine.sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 2, 4, lib_poll_func);
    +
    +  if (engine.sched == nullptr) {
    +    derr << __func__ << ": wd_sched_rr_alloc fail" << dendl;
    +    return -ENOMEM;
    +  }
    +  engine.sched->name = "sched_rr";
    +
    +  struct uacce_dev *uadk_dev = wd_get_accel_dev("zlib");
    +  if (uadk_dev == nullptr) {
    +    derr << __func__ << ": cannot get uadk device " << dendl;
    +    wd_sched_rr_release(engine.sched);
    +    engine.sched = nullptr;
    +    return -ECANCELED;
    +  }
    +  engine.numa_id = uadk_dev->numa_id;
    +  uint64_t cmprs_ctx_num = g_ceph_context->_conf.get_val("uadk_wd_sync_ctx_num");
    +  engine.ctx_cfg.ctx_num = cmprs_ctx_num;
    +  engine.ctx_cfg.ctxs = new wd_ctx[cmprs_ctx_num];
    +
    +  unsigned int i;
    +
    +  /******** request ctxs (compress ctx num + decompress ctx num) ********/
    +  for (i = 0; i != cmprs_ctx_num; ++i) {
    +    engine.ctx_cfg.ctxs[i].ctx = wd_request_ctx(uadk_dev);
    +    if (!engine.ctx_cfg.ctxs[i].ctx) {
    +      derr << __func__ << ": UADK ctx ERROR !" << dendl;
    +      ret = -ECANCELED;
    +      goto out_fill;
    +    }
    +  }
    +
    +  struct sched_params param;
    +  /******** create sched instance for compress ctx ********/
    +  for(unsigned int m = 0; m != cmprs_ctx_num / 2; ++m) {
    +    engine.ctx_cfg.ctxs[m].op_type = WD_DIR_COMPRESS;
    +    engine.ctx_cfg.ctxs[m].ctx_mode = CTX_MODE_SYNC;
    +  }
    +  param.numa_id = engine.numa_id;
    +  param.type = WD_DIR_COMPRESS;
    +  param.mode = CTX_MODE_SYNC;
    +  param.begin = 0;
    +  param.end = cmprs_ctx_num / 2 - 1;
    +
    +  ret = wd_sched_rr_instance((const struct wd_sched *)engine.sched, ¶m);
    +  if (ret < 0) {
    +    derr << __func__ << ": Fail to fill compress sched region."
    +	 << "(" << ret << ")" << dendl;
    +    goto out_fill;
    +  }
    +
    +  /******** create sched instance for decompress ctx ********/
    +  for(unsigned int m = cmprs_ctx_num / 2; m != cmprs_ctx_num; ++m) {
    +    engine.ctx_cfg.ctxs[m].op_type = WD_DIR_DECOMPRESS;
    +    engine.ctx_cfg.ctxs[m].ctx_mode = CTX_MODE_SYNC;
    +  }
    +  param.type = WD_DIR_DECOMPRESS;
    +  param.mode = CTX_MODE_SYNC;
    +  param.begin = cmprs_ctx_num / 2;
    +  param.end = cmprs_ctx_num - 1;
    +  ret = wd_sched_rr_instance((const struct wd_sched *)engine.sched, ¶m);
    +  if (ret < 0) {
    +    derr << __func__ << ": Fail to fill decompress sched region."
    +	 << "(" << ret << ")" << dendl;
    +    goto out_fill;
    +  }
    +
    +  ret = wd_comp_init(&engine.ctx_cfg, engine.sched);
    +  if (ret != 0) {
    +    derr << __func__ << ": fail to init UADK !"
    +	 << "(" << ret << ")" << dendl;
    +    goto out_fill;
    +  }
    +
    +  free(uadk_dev);
    +  uadk_dev = nullptr;
    +  init_called = true;
    +  return 0;
    +
    +out_fill:
    +  for (unsigned int j = 0; j != i; ++j)
    +    wd_release_ctx(engine.ctx_cfg.ctxs[j].ctx);
    +
    +  delete[] engine.ctx_cfg.ctxs;
    +  wd_sched_rr_release(engine.sched);
    +  engine.sched = nullptr;
    +  free(uadk_dev);
    +  uadk_dev = nullptr;
    +  return ret;
    +}
    +
    +bool UadkAccel::init()
    +{
    +  dout(10) << __func__ << ": UadkAccel::init" << dendl;
    +  ++uadk_compressor_thread_num;
    +
    +  if (init_called) {
    +    dout(10) << __func__ << ": UADK already init." << dendl;
    +    return true;
    +  }
    +
    +  uadk_lock.lock();
    +  int ret = uadk_init();
    +  uadk_lock.unlock();
    +
    +  if (ret != 0) {
    +    derr << __func__ << ": fail to init uadk.(ret=" << ret << ")" << dendl;
    +    --uadk_compressor_thread_num;
    +    return false;
    +  }
    +
    +  return true;
    +}
    +
    +handle_t UadkAccel::create_comp_session()
    +{
    +  struct wd_comp_sess_setup setup;
    +  struct sched_params ss_param = {0};
    +
    +  setup.op_type = WD_DIR_COMPRESS;
    +  setup.alg_type = WD_ZLIB;
    +  setup.comp_lv = WD_COMP_L1;
    +  setup.win_sz = WD_COMP_WS_8K;
    +
    +  ss_param.type = setup.op_type;
    +  ss_param.numa_id = engine.numa_id;
    +  setup.sched_param = &ss_param;
    +  handle_t h_comp_sess = wd_comp_alloc_sess(&setup);
    +  return h_comp_sess;
    +}
    +
    +void UadkAccel::free_session(handle_t h_comp_sess)
    +{
    +  if (h_comp_sess) {
    +    wd_comp_free_sess(h_comp_sess);
    +    h_comp_sess = 0;
    +  }
    +}
    +
    +handle_t UadkAccel::create_decomp_session()
    +{
    +  struct wd_comp_sess_setup de_setup;
    +  struct sched_params ss_de_param = {0};
    +
    +  de_setup.op_type = WD_DIR_DECOMPRESS;
    +  de_setup.alg_type = WD_ZLIB;
    +  de_setup.comp_lv = WD_COMP_L1;
    +  de_setup.win_sz = WD_COMP_WS_32K;
    +
    +  ss_de_param.type = de_setup.op_type;
    +  ss_de_param.numa_id = engine.numa_id;
    +  de_setup.sched_param = &ss_de_param;
    +  handle_t h_decomp_sess = wd_comp_alloc_sess(&de_setup);
    +  return h_decomp_sess;
    +}
    +
    +int UadkAccel::uadk_do_compress(handle_t h_sess, const unsigned char* in, unsigned int &inlen,
    +		                           unsigned char *out, unsigned int &outlen, bool last_packet)
    +{
    +  struct wd_comp_req req;
    +
    +  req.op_type = WD_DIR_COMPRESS;
    +  req.src = const_cast(in);
    +  req.src_len = inlen;
    +  req.dst = out;
    +  req.dst_len = outlen;
    +  req.data_fmt = WD_FLAT_BUF;
    +  req.cb = nullptr;
    +  req.last = last_packet;
    +  int ret = wd_do_comp_strm(h_sess, &req);
    +  if (ret == 0) {
    +    if (inlen > req.src_len) {
    +      inlen = req.src_len;
    +      outlen = req.dst_len;
    +      return NEED_MORE_OUT_BUFFER;
    +    } else {
    +      outlen = req.dst_len;
    +      return ret;
    +    }
    +  }
    +
    +  return ret;
    +}
    +
    +int UadkAccel::compress(const bufferlist &in, bufferlist &out)
    +{
    +  handle_t h_comp_sess = create_comp_session();
    +  unsigned int begin = 1;
    +  unsigned int out_len = 0;
    +  for (ceph::bufferlist::buffers_t::const_iterator i = in.buffers().begin(); i != in.buffers().end();) {
    +    const unsigned char* c_in = (unsigned char*) (*i).c_str();
    +    unsigned int len = (*i).length();
    +    unsigned int in_len = len;
    +    int ret = 0;
    +    ++i;
    +
    +    bool last_ptr = (i == in.buffers().end());
    +
    +    do {
    +      if (len * 2 < UADK_MIN_BUFFER) {
    +        out_len = UADK_MIN_BUFFER;
    +      } else {
    +        out_len = std::min(UADK_MAX_BUFFER, len * 2);
    +      }
    +      bufferptr ptr = buffer::create_small_page_aligned(out_len);
    +      unsigned char* c_out = (unsigned char*)ptr.c_str() + begin;
    +      in_len = std::min(UADK_MAX_BUFFER, in_len);
    +      if (begin) {
    +        // put a compressor variation mark in front of compressed stream, not used at the moment
    +        ptr.c_str()[0] = 0;
    +	out_len -= begin;
    +      }
    +
    +      bool last_packet = last_ptr && (in_len == len);
    +      memset(c_out, 0, out_len);
    +      ret = uadk_do_compress(h_comp_sess, c_in, in_len, c_out, out_len, last_packet);
    +      if (ret < 0) {
    +        derr << __func__ << ": UADK deflation failed."
    +	     << "(" << ret << ")" << dendl;
    +	free_session(h_comp_sess);
    +	return ret;
    +      }
    +
    +      c_in += in_len;
    +      in_len = len - in_len;
    +      len = in_len;
    +
    +      out.append(ptr, 0, out_len + begin);
    +      begin = 0;
    +    } while (ret == NEED_MORE_OUT_BUFFER || len > 0);
    +  }
    +
    +  free_session(h_comp_sess);
    +  return 0;
    +}
    +
    +int UadkAccel::uadk_do_decompress(handle_t h_sess, const unsigned char *in, unsigned int &inlen,
    +		                             unsigned char *out, unsigned int &outlen)
    +{
    +  struct wd_comp_req req;
    +
    +  req.op_type = WD_DIR_DECOMPRESS;
    +  req.data_fmt = WD_FLAT_BUF;
    +  req.cb = nullptr;
    +
    +  req.src = const_cast(in);
    +  req.src_len = inlen;
    +  req.dst = out;
    +  req.dst_len = outlen;
    +
    +  int ret = wd_do_comp_strm(h_sess, &req);
    +
    +  if (ret == 0) {
    +    if (inlen > req.src_len) {
    +      inlen = req.src_len;
    +      outlen = req.dst_len;
    +      return NEED_MORE_OUT_BUFFER;
    +    } else if (req.status != WD_STREAM_END) {
    +      inlen = req.src_len;
    +      outlen = req.dst_len;
    +      return PROCESS_NOT_FINISH;
    +    } else {
    +      outlen = req.dst_len;
    +      return ret;
    +    }
    +  }
    +
    +  return ret;
    +}
    +
    +unsigned int cal_approx_ratio(unsigned int n, unsigned m)
    +{
    +  unsigned int x = 0;
    +  m /= n;
    +  while (m != 0) {
    +    m >>= 1;
    +    ++x;
    +  }
    +  return x + 1;
    +}
    +
    +int UadkAccel::decompress(bufferlist::const_iterator &p, size_t compressed_len, bufferlist &dst)
    +{
    +  handle_t h_decomp_sess = create_decomp_session();
    +  unsigned int begin = 1;
    +  unsigned int out_len = 0;
    +  unsigned int probe_ratio = 2;
    +  bufferptr ptr;
    +  size_t remaining = std::min(p.get_remaining(), compressed_len);
    +
    +  while (remaining) {
    +    const char *c_in;
    +    unsigned int len = p.get_ptr_and_advance(remaining, &c_in) - begin;
    +    unsigned int in_len = len;
    +    unsigned char *in = (unsigned char *)c_in + begin;
    +    int ret = 0;
    +
    +    remaining -= (in_len + begin);
    +    begin = 0;
    +
    +    do {
    +      if ((len << probe_ratio) < UADK_MIN_BUFFER) {
    +        out_len = UADK_MIN_BUFFER;
    +      } else {
    +        out_len = std::min(UADK_MAX_BUFFER, (len << probe_ratio));
    +      }
    +      ptr = buffer::create_small_page_aligned(out_len);
    +      unsigned char* out = (unsigned char*)ptr.c_str();
    +      in_len = std::min(UADK_MAX_BUFFER, in_len);
    +      memset(out, 0, out_len);
    +      ret = uadk_do_decompress(h_decomp_sess, in, in_len, out, out_len);
    +      if (ret < 0) {
    +        derr << __func__ << ": UADK inflation failed.(ret=" << ret << ")" << dendl;
    +	free_session(h_decomp_sess);
    +	return ret;
    +      }
    +
    +     probe_ratio = cal_approx_ratio(in_len, out_len);
    +     in += in_len;
    +     in_len = len - in_len;
    +     len = in_len;
    +     dst.append(ptr, 0, out_len);
    +    } while (ret == NEED_MORE_OUT_BUFFER || (ret == PROCESS_NOT_FINISH && remaining ==0) || len > 0);
    +  }
    +
    +  free_session(h_decomp_sess);
    +  return 0;
    +}
    +
    +void UadkAccel::destroy()
    +{
    +  if (!init_called) {
    +    return;
    +  }
    +
    +  if (--uadk_compressor_thread_num != 0) {
    +    dout(10) << __func__ << ": " << uadk_compressor_thread_num << " threads need uadk zip" << dendl;
    +    return;
    +  }
    +
    +  wd_comp_uninit();
    +
    +  for (unsigned int i = 0; i < engine.ctx_cfg.ctx_num; i++) {
    +    wd_release_ctx(engine.ctx_cfg.ctxs[i].ctx);
    +  }
    +  delete[] engine.ctx_cfg.ctxs;
    +  wd_sched_rr_release(engine.sched);
    +  engine.sched = nullptr;
    +  init_called = false;
    +}
    diff --git a/src/compressor/UadkAccel.h b/src/compressor/UadkAccel.h
    new file mode 100644
    index 000000000000..a6268411547b
    --- /dev/null
    +++ b/src/compressor/UadkAccel.h
    @@ -0,0 +1,42 @@
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright (c) 2024 Huawei Technologies Co., Ltd All rights reserved.
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software
    + * Foundation.  See file COPYING.
    + *
    + */
    +
    +#ifndef CEPH_UadkAccel_H
    +#define CEPH_UadkAccel_H
    +
    +#include "include/buffer.h"
    +
    +extern "C" {
    +#include 
    +#include 
    +#include 
    +}
    +
    +class UadkAccel {
    +  public:
    +      UadkAccel() {  }
    +      ~UadkAccel() { destroy(); }
    +
    +      bool init();
    +      void destroy();
    +
    +      int compress(const bufferlist &in, bufferlist &out);
    +      int decompress(bufferlist::const_iterator &p, size_t compressed_len, bufferlist &dst);
    +  private:
    +      int uadk_do_compress(handle_t h_sess, const unsigned char *in, unsigned int &inlen, unsigned char *out, unsigned int &outlen, bool last_packet);
    +      int uadk_do_decompress(handle_t h_sess, const unsigned char *in, unsigned int &inlen, unsigned char *out, unsigned int &outlen);
    +      handle_t create_comp_session();
    +      handle_t create_decomp_session();
    +      void free_session(handle_t h_sess);
    +};
    +
    +#endif
    diff --git a/src/compressor/lz4/CMakeLists.txt b/src/compressor/lz4/CMakeLists.txt
    index ff8e14c298c7..689baa375256 100644
    --- a/src/compressor/lz4/CMakeLists.txt
    +++ b/src/compressor/lz4/CMakeLists.txt
    @@ -2,11 +2,17 @@
     
     set(lz4_sources
       CompressionPluginLZ4.cc
    +  LZ4Compressor.cc
     )
     
     add_library(ceph_lz4 SHARED ${lz4_sources})
     target_link_libraries(ceph_lz4
    -  PRIVATE LZ4::LZ4 compressor $<$:ceph-common>)
    +  PRIVATE
    +  legacy-option-headers
    +  LZ4::LZ4 compressor $<$:ceph-common>)
    +if(HAVE_QATZIP AND HAVE_QAT)
    +  target_link_libraries(ceph_lz4 PRIVATE qat_compressor)
    +endif()
     set_target_properties(ceph_lz4 PROPERTIES
       VERSION 2.0.0
       SOVERSION 2
    diff --git a/src/compressor/lz4/LZ4Compressor.cc b/src/compressor/lz4/LZ4Compressor.cc
    new file mode 100644
    index 000000000000..a209a5ac149f
    --- /dev/null
    +++ b/src/compressor/lz4/LZ4Compressor.cc
    @@ -0,0 +1,149 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
    +// vim: ts=8 sw=2 smarttab
    +/*
    + * Ceph - scalable distributed file system
    + *
    + * Copyright contributors to the Ceph project
    + *
    + * This is free software; you can redistribute it and/or
    + * modify it under the terms of the GNU Lesser General Public
    + * License version 2.1, as published by the Free Software 
    + * Foundation.  See file COPYING.
    + *
    + */
    +
    +#include "LZ4Compressor.h"
    +#include "common/ceph_context.h"
    +#ifdef HAVE_QATZIP
    +  #include "compressor/QatAccel.h"
    +#endif
    +
    +#ifdef HAVE_QATZIP
    +QatAccel LZ4Compressor::qat_accel;
    +#endif
    +
    +LZ4Compressor::LZ4Compressor(CephContext* cct)
    +  : Compressor(COMP_ALG_LZ4, "lz4")
    +{
    +#ifdef HAVE_QATZIP
    +  if (cct->_conf->qat_compressor_enabled && qat_accel.init("lz4"))
    +    qat_enabled = true;
    +  else
    +    qat_enabled = false;
    +#endif
    +}
    +
    +int LZ4Compressor::compress(const ceph::buffer::list &src,
    +                            ceph::buffer::list &dst,
    +                            std::optional &compressor_message)
    +{
    +  // older versions of liblz4 introduce bit errors when compressing
    +  // fragmented buffers.  this was fixed in lz4 commit
    +  // af127334670a5e7b710bbd6adb71aa7c3ef0cd72, which first
    +  // appeared in v1.8.2.
    +  //
    +  // workaround: rebuild if not contiguous.
    +  if (!src.is_contiguous()) {
    +    ceph::buffer::list new_src = src;
    +    new_src.rebuild();
    +    return compress(new_src, dst, compressor_message);
    +  }
    +
    +#ifdef HAVE_QATZIP
    +  if (qat_enabled)
    +    return qat_accel.compress(src, dst, compressor_message);
    +#endif
    +  ceph::buffer::ptr outptr = ceph::buffer::create_small_page_aligned(
    +    LZ4_compressBound(src.length()));
    +  LZ4_stream_t lz4_stream;
    +  LZ4_resetStream(&lz4_stream);
    +
    +  using ceph::encode;
    +
    +  auto p = src.begin();
    +  size_t left = src.length();
    +  int pos = 0;
    +  const char *data;
    +  unsigned num = src.get_num_buffers();
    +  encode((uint32_t)num, dst);
    +  while (left) {
    +    uint32_t origin_len = p.get_ptr_and_advance(left, &data);
    +    int compressed_len = LZ4_compress_fast_continue(
    +      &lz4_stream, data, outptr.c_str()+pos, origin_len,
    +      outptr.length()-pos, 1);
    +    if (compressed_len <= 0)
    +      return -1;
    +    pos += compressed_len;
    +    left -= origin_len;
    +    encode(origin_len, dst);
    +    encode((uint32_t)compressed_len, dst);
    +  }
    +  ceph_assert(p.end());
    +
    +  dst.append(outptr, 0, pos);
    +  return 0;
    +}
    +
    +int LZ4Compressor::decompress(const ceph::buffer::list &src,
    +                              ceph::buffer::list &dst,
    +                              std::optional compressor_message)
    +{
    +#ifdef HAVE_QATZIP
    +  if (qat_enabled)
    +    return qat_accel.decompress(src, dst, compressor_message);
    +#endif
    +  auto i = std::cbegin(src);
    +  return decompress(i, src.length(), dst, compressor_message);
    +}
    +
    +int LZ4Compressor::decompress(ceph::buffer::list::const_iterator &p,
    +                              size_t compressed_len,
    +                              ceph::buffer::list &dst,
    +                              std::optional compressor_message)
    +{
    +#ifdef HAVE_QATZIP
    +  if (qat_enabled)
    +    return qat_accel.decompress(p, compressed_len, dst, compressor_message);
    +#endif
    +  using ceph::decode;
    +  uint32_t count;
    +  decode(count, p);
    +  std::vector > compressed_pairs(count);
    +  uint32_t total_origin = 0;
    +  for (auto& [dst_size, src_size] : compressed_pairs) {
    +    decode(dst_size, p);
    +    decode(src_size, p);
    +    total_origin += dst_size;
    +  }
    +  compressed_len -= (sizeof(uint32_t) + sizeof(uint32_t) * count * 2);
    +
    +  ceph::buffer::ptr dstptr(total_origin);
    +  LZ4_streamDecode_t lz4_stream_decode;
    +  LZ4_setStreamDecode(&lz4_stream_decode, nullptr, 0);
    +
    +  ceph::buffer::ptr cur_ptr = p.get_current_ptr();
    +  ceph::buffer::ptr *ptr = &cur_ptr;
    +  std::optional data_holder;
    +  if (compressed_len != cur_ptr.length()) {
    +    data_holder.emplace(compressed_len);
    +    p.copy_deep(compressed_len, *data_holder);
    +    ptr = &*data_holder;
    +  }
    +
    +  char *c_in = ptr->c_str();
    +  char *c_out = dstptr.c_str();
    +  for (unsigned i = 0; i < count; ++i) {
    +    int r = LZ4_decompress_safe_continue(
    +        &lz4_stream_decode, c_in, c_out, compressed_pairs[i].second, compressed_pairs[i].first);
    +    if (r == (int)compressed_pairs[i].first) {
    +      c_in += compressed_pairs[i].second;
    +      c_out += compressed_pairs[i].first;
    +    } else if (r < 0) {
    +      return -1;
    +    } else {
    +      return -2;
    +    }
    +  }
    +  dst.push_back(std::move(dstptr));
    +  return 0;
    +}
    diff --git a/src/compressor/lz4/LZ4Compressor.h b/src/compressor/lz4/LZ4Compressor.h
    index eca08e1a57ac..6939aae7609a 100644
    --- a/src/compressor/lz4/LZ4Compressor.h
    +++ b/src/compressor/lz4/LZ4Compressor.h
    @@ -23,125 +23,29 @@
     #include "include/encoding.h"
     #include "common/config.h"
     
    +class QatAccel;
     
     class LZ4Compressor : public Compressor {
    - public:
    -  LZ4Compressor(CephContext* cct) : Compressor(COMP_ALG_LZ4, "lz4") {
     #ifdef HAVE_QATZIP
    -    if (cct->_conf->qat_compressor_enabled && qat_accel.init("lz4"))
    -      qat_enabled = true;
    -    else
    -      qat_enabled = false;
    +  bool qat_enabled;
    +  static QatAccel qat_accel;
     #endif
    -  }
    -
    -  int compress(const ceph::buffer::list &src, ceph::buffer::list &dst, std::optional &compressor_message) override {
    -    // older versions of liblz4 introduce bit errors when compressing
    -    // fragmented buffers.  this was fixed in lz4 commit
    -    // af127334670a5e7b710bbd6adb71aa7c3ef0cd72, which first
    -    // appeared in v1.8.2.
    -    //
    -    // workaround: rebuild if not contiguous.
    -    if (!src.is_contiguous()) {
    -      ceph::buffer::list new_src = src;
    -      new_src.rebuild();
    -      return compress(new_src, dst, compressor_message);
    -    }
     
    -#ifdef HAVE_QATZIP
    -    if (qat_enabled)
    -      return qat_accel.compress(src, dst, compressor_message);
    -#endif
    -    ceph::buffer::ptr outptr = ceph::buffer::create_small_page_aligned(
    -      LZ4_compressBound(src.length()));
    -    LZ4_stream_t lz4_stream;
    -    LZ4_resetStream(&lz4_stream);
    -
    -    using ceph::encode;
    -
    -    auto p = src.begin();
    -    size_t left = src.length();
    -    int pos = 0;
    -    const char *data;
    -    unsigned num = src.get_num_buffers();
    -    encode((uint32_t)num, dst);
    -    while (left) {
    -      uint32_t origin_len = p.get_ptr_and_advance(left, &data);
    -      int compressed_len = LZ4_compress_fast_continue(
    -        &lz4_stream, data, outptr.c_str()+pos, origin_len,
    -        outptr.length()-pos, 1);
    -      if (compressed_len <= 0)
    -        return -1;
    -      pos += compressed_len;
    -      left -= origin_len;
    -      encode(origin_len, dst);
    -      encode((uint32_t)compressed_len, dst);
    -    }
    -    ceph_assert(p.end());
    + public:
    +  explicit LZ4Compressor(CephContext* cct);
     
    -    dst.append(outptr, 0, pos);
    -    return 0;
    -  }
    +  int compress(const ceph::buffer::list &src,
    +               ceph::buffer::list &dst,
    +               std::optional &compressor_message) override;
     
    -  int decompress(const ceph::buffer::list &src, ceph::buffer::list &dst, std::optional compressor_message) override {
    -#ifdef HAVE_QATZIP
    -    if (qat_enabled)
    -      return qat_accel.decompress(src, dst, compressor_message);
    -#endif
    -    auto i = std::cbegin(src);
    -    return decompress(i, src.length(), dst, compressor_message);
    -  }
    +  int decompress(const ceph::buffer::list &src,
    +                 ceph::buffer::list &dst,
    +                 std::optional compressor_message) override;
     
       int decompress(ceph::buffer::list::const_iterator &p,
     		 size_t compressed_len,
     		 ceph::buffer::list &dst,
    -		 std::optional compressor_message) override {
    -#ifdef HAVE_QATZIP
    -    if (qat_enabled)
    -      return qat_accel.decompress(p, compressed_len, dst, compressor_message);
    -#endif
    -    using ceph::decode;
    -    uint32_t count;
    -    decode(count, p);
    -    std::vector > compressed_pairs(count);
    -    uint32_t total_origin = 0;
    -    for (auto& [dst_size, src_size] : compressed_pairs) {
    -      decode(dst_size, p);
    -      decode(src_size, p);
    -      total_origin += dst_size;
    -    }
    -    compressed_len -= (sizeof(uint32_t) + sizeof(uint32_t) * count * 2);
    -
    -    ceph::buffer::ptr dstptr(total_origin);
    -    LZ4_streamDecode_t lz4_stream_decode;
    -    LZ4_setStreamDecode(&lz4_stream_decode, nullptr, 0);
    -
    -    ceph::buffer::ptr cur_ptr = p.get_current_ptr();
    -    ceph::buffer::ptr *ptr = &cur_ptr;
    -    std::optional data_holder;
    -    if (compressed_len != cur_ptr.length()) {
    -      data_holder.emplace(compressed_len);
    -      p.copy_deep(compressed_len, *data_holder);
    -      ptr = &*data_holder;
    -    }
    -
    -    char *c_in = ptr->c_str();
    -    char *c_out = dstptr.c_str();
    -    for (unsigned i = 0; i < count; ++i) {
    -      int r = LZ4_decompress_safe_continue(
    -          &lz4_stream_decode, c_in, c_out, compressed_pairs[i].second, compressed_pairs[i].first);
    -      if (r == (int)compressed_pairs[i].first) {
    -        c_in += compressed_pairs[i].second;
    -        c_out += compressed_pairs[i].first;
    -      } else if (r < 0) {
    -        return -1;
    -      } else {
    -        return -2;
    -      }
    -    }
    -    dst.push_back(std::move(dstptr));
    -    return 0;
    -  }
    +		 std::optional compressor_message) override;
     };
     
     #endif
    diff --git a/src/compressor/snappy/CMakeLists.txt b/src/compressor/snappy/CMakeLists.txt
    index d1ba3b2e7466..5f12f6a806d6 100644
    --- a/src/compressor/snappy/CMakeLists.txt
    +++ b/src/compressor/snappy/CMakeLists.txt
    @@ -6,7 +6,9 @@ set(snappy_sources
     
     add_library(ceph_snappy SHARED ${snappy_sources})
     target_link_libraries(ceph_snappy
    -  PRIVATE snappy::snappy compressor $<$:ceph-common>)
    +  PRIVATE
    +    legacy-option-headers
    +    snappy::snappy compressor $<$:ceph-common>)
     set_target_properties(ceph_snappy PROPERTIES
       VERSION 2.0.0
       SOVERSION 2
    diff --git a/src/compressor/snappy/SnappyCompressor.h b/src/compressor/snappy/SnappyCompressor.h
    index 8150f783c157..b635581068ae 100644
    --- a/src/compressor/snappy/SnappyCompressor.h
    +++ b/src/compressor/snappy/SnappyCompressor.h
    @@ -58,19 +58,9 @@ class CEPH_BUFFER_API BufferlistSource : public snappy::Source {
     class SnappyCompressor : public Compressor {
      public:
       SnappyCompressor(CephContext* cct) : Compressor(COMP_ALG_SNAPPY, "snappy") {
    -#ifdef HAVE_QATZIP
    -    if (cct->_conf->qat_compressor_enabled && qat_accel.init("snappy"))
    -      qat_enabled = true;
    -    else
    -      qat_enabled = false;
    -#endif
       }
     
       int compress(const ceph::bufferlist &src, ceph::bufferlist &dst, std::optional &compressor_message) override {
    -#ifdef HAVE_QATZIP
    -    if (qat_enabled)
    -      return qat_accel.compress(src, dst, compressor_message);
    -#endif
         BufferlistSource source(const_cast(src).begin(), src.length());
         ceph::bufferptr ptr = ceph::buffer::create_small_page_aligned(
           snappy::MaxCompressedLength(src.length()));
    @@ -81,10 +71,6 @@ class SnappyCompressor : public Compressor {
       }
     
       int decompress(const ceph::bufferlist &src, ceph::bufferlist &dst, std::optional compressor_message) override {
    -#ifdef HAVE_QATZIP
    -    if (qat_enabled)
    -      return qat_accel.decompress(src, dst, compressor_message);
    -#endif
         auto i = src.begin();
         return decompress(i, src.length(), dst, compressor_message);
       }
    @@ -93,10 +79,6 @@ class SnappyCompressor : public Compressor {
     		 size_t compressed_len,
     		 ceph::bufferlist &dst,
     		 std::optional compressor_message) override {
    -#ifdef HAVE_QATZIP
    -    if (qat_enabled)
    -      return qat_accel.decompress(p, compressed_len, dst, compressor_message);
    -#endif
         BufferlistSource source_1(p, compressed_len);
         uint32_t res_len = 0;
         if (!snappy::GetUncompressedLength(&source_1, &res_len)) {
    diff --git a/src/compressor/zlib/CMakeLists.txt b/src/compressor/zlib/CMakeLists.txt
    index 050ff03fa28f..b08543c0642a 100644
    --- a/src/compressor/zlib/CMakeLists.txt
    +++ b/src/compressor/zlib/CMakeLists.txt
    @@ -91,6 +91,12 @@ endif()
     
     add_library(ceph_zlib SHARED ${zlib_sources})
     target_link_libraries(ceph_zlib ZLIB::ZLIB compressor $<$:ceph-common>)
    +if(HAVE_QATZIP AND HAVE_QAT)
    +  target_link_libraries(ceph_zlib qat_compressor)
    +endif()
    +if(HAVE_UADK)
    +  target_link_libraries(ceph_zlib uadk_compressor)
    +endif()
     target_include_directories(ceph_zlib SYSTEM PRIVATE "${CMAKE_SOURCE_DIR}/src/isa-l/include")
     set_target_properties(ceph_zlib PROPERTIES
       VERSION 2.0.0
    diff --git a/src/compressor/zlib/ZlibCompressor.cc b/src/compressor/zlib/ZlibCompressor.cc
    index 9795d79b3ba7..fb7c1a0886cb 100644
    --- a/src/compressor/zlib/ZlibCompressor.cc
    +++ b/src/compressor/zlib/ZlibCompressor.cc
    @@ -17,6 +17,12 @@
     #include "ZlibCompressor.h"
     #include "osd/osd_types.h"
     #include "isa-l/include/igzip_lib.h"
    +#ifdef HAVE_QATZIP
    +  #include "compressor/QatAccel.h"
    +#endif
    +#ifdef HAVE_UADK
    +  #include "compressor/UadkAccel.h"
    +#endif
     // -----------------------------------------------------------------------------
     
     #include 
    @@ -46,12 +52,37 @@ _prefix(std::ostream* _dout)
     
     // default window size for Zlib 1.2.8, negated for raw deflate
     #define ZLIB_DEFAULT_WIN_SIZE -15
    +#define GZIP_WRAPPER 16
     
     // desired memory usage level. increasing to 9 doesn't speed things up
     // significantly (helps only on >=16K blocks) and sometimes degrades
     // compression ratio.
     #define ZLIB_MEMORY_LEVEL 8
     
    +#ifdef HAVE_QATZIP
    +QatAccel ZlibCompressor::qat_accel;
    +#endif
    +#ifdef HAVE_UADK
    +UadkAccel ZlibCompressor::uadk_accel;
    +#endif
    +
    +ZlibCompressor::ZlibCompressor(CephContext *cct, bool isal)
    +  : Compressor(COMP_ALG_ZLIB, "zlib"), isal_enabled(isal), cct(cct)
    +{
    +#ifdef HAVE_QATZIP
    +  if (cct->_conf->qat_compressor_enabled && qat_accel.init("zlib"))
    +    qat_enabled = true;
    +  else
    +    qat_enabled = false;
    +#endif
    +#ifdef HAVE_UADK
    +  if (cct->_conf->uadk_compressor_enabled && uadk_accel.init())
    +    uadk_enabled = true;
    +  else
    +    uadk_enabled = false;
    +#endif
    +}
    +
     int ZlibCompressor::zlib_compress(const bufferlist &in, bufferlist &out, std::optional &compressor_message)
     {
       int ret;
    @@ -174,6 +205,10 @@ int ZlibCompressor::compress(const bufferlist &in, bufferlist &out, std::optiona
       if (qat_enabled)
         return qat_accel.compress(in, out, compressor_message);
     #endif
    +#ifdef HAVE_UADK
    +  if (uadk_enabled)
    +    return uadk_accel.compress(in, out);
    +#endif
     #if (__x86_64__ && defined(HAVE_NASM_X64_AVX2)) || defined(__aarch64__)
       if (isal_enabled)
         return isal_compress(in, out, compressor_message);
    @@ -187,16 +222,21 @@ int ZlibCompressor::compress(const bufferlist &in, bufferlist &out, std::optiona
     int ZlibCompressor::decompress(bufferlist::const_iterator &p, size_t compressed_size, bufferlist &out, std::optional compressor_message)
     {
     #ifdef HAVE_QATZIP
    -  // QAT can only decompress with the default window size
    -  if (qat_enabled && (!compressor_message || *compressor_message == ZLIB_DEFAULT_WIN_SIZE))
    +  // QAT can only decompress with existing header, only for 'QZ_DEFLATE_GZIP_EXT'
    +  if (qat_enabled && compressor_message.has_value() && *compressor_message == GZIP_WRAPPER + MAX_WBITS)
         return qat_accel.decompress(p, compressed_size, out, compressor_message);
     #endif
    +#ifdef HAVE_UADK
    +  if (uadk_enabled && (!compressor_message || *compressor_message == ZLIB_DEFAULT_WIN_SIZE))
    +    return uadk_accel.decompress(p, compressed_size, out);
    +#endif
     
       int ret;
       unsigned have;
       z_stream strm;
       const char* c_in;
       int begin = 1;
    +  bool multisteam = false;
     
       /* allocate inflate state */
       strm.zalloc = Z_NULL;
    @@ -208,6 +248,7 @@ int ZlibCompressor::decompress(bufferlist::const_iterator &p, size_t compressed_
       // choose the variation of compressor
       if (!compressor_message)
         compressor_message = ZLIB_DEFAULT_WIN_SIZE;
    +
       ret = inflateInit2(&strm, *compressor_message);
       if (ret != Z_OK) {
         dout(1) << "Decompression init error: init return "
    @@ -237,7 +278,10 @@ int ZlibCompressor::decompress(bufferlist::const_iterator &p, size_t compressed_
           }
           have = MAX_LEN - strm.avail_out;
           out.append(ptr, 0, have);
    -    } while (strm.avail_out == 0);
    +      // There may be mutil stream to decompress
    +      multisteam = (strm.avail_in != 0 && ret == Z_STREAM_END);
    +      if (multisteam) inflateReset(&strm);
    +    } while (strm.avail_out == 0 || multisteam);
       }
     
       /* clean up and return */
    diff --git a/src/compressor/zlib/ZlibCompressor.h b/src/compressor/zlib/ZlibCompressor.h
    index da1c8117e882..af06639b43dc 100644
    --- a/src/compressor/zlib/ZlibCompressor.h
    +++ b/src/compressor/zlib/ZlibCompressor.h
    @@ -20,19 +20,23 @@
     #include "common/config.h"
     #include "compressor/Compressor.h"
     
    +class QatAccel;
    +class UadkAccel;
    +
     class ZlibCompressor : public Compressor {
       bool isal_enabled;
       CephContext *const cct;
    -public:
    -  ZlibCompressor(CephContext *cct, bool isal)
    -    : Compressor(COMP_ALG_ZLIB, "zlib"), isal_enabled(isal), cct(cct) {
     #ifdef HAVE_QATZIP
    -    if (cct->_conf->qat_compressor_enabled && qat_accel.init("zlib"))
    -      qat_enabled = true;
    -    else
    -      qat_enabled = false;
    +  bool qat_enabled;
    +  static QatAccel qat_accel;
    +#endif
    +#ifdef HAVE_UADK
    +  bool uadk_enabled;
    +  static UadkAccel uadk_accel;
     #endif
    -  }
    +
    + public:
    +  ZlibCompressor(CephContext *cct, bool isal);
     
       int compress(const ceph::buffer::list &in, ceph::buffer::list &out, std::optional &compressor_message) override;
       int decompress(const ceph::buffer::list &in, ceph::buffer::list &out, std::optional compressor_message) override;
    diff --git a/src/cpp_redis b/src/cpp_redis
    deleted file mode 160000
    index c659475ea43b..000000000000
    --- a/src/cpp_redis
    +++ /dev/null
    @@ -1 +0,0 @@
    -Subproject commit c659475ea43bc77850018aa1433d55cad902ea85
    diff --git a/src/crimson/CMakeLists.txt b/src/crimson/CMakeLists.txt
    index 9e751fcebc91..6bbd7b49ec75 100644
    --- a/src/crimson/CMakeLists.txt
    +++ b/src/crimson/CMakeLists.txt
    @@ -24,6 +24,7 @@ set(crimson_common_srcs
       common/throttle.cc
       common/tmap_helpers.cc
       common/tri_mutex.cc
    +  common/buffer_seastar.cc
       crush/CrushLocation.cc)
     
     # the specialized version of ceph-common, where
    @@ -35,7 +36,6 @@ add_library(crimson-common STATIC
       ${PROJECT_SOURCE_DIR}/src/common/bit_str.cc
       ${PROJECT_SOURCE_DIR}/src/common/bloom_filter.cc
       ${PROJECT_SOURCE_DIR}/src/common/buffer.cc
    -  ${PROJECT_SOURCE_DIR}/src/common/buffer_seastar.cc
       ${PROJECT_SOURCE_DIR}/src/common/ceph_argparse.cc
       ${PROJECT_SOURCE_DIR}/src/common/ceph_context.cc
       ${PROJECT_SOURCE_DIR}/src/common/ceph_crypto.cc
    @@ -121,6 +121,7 @@ add_library(crimson-common STATIC
       ${PROJECT_SOURCE_DIR}/src/osd/HitSet.cc
       ${PROJECT_SOURCE_DIR}/src/osd/OSDMap.cc
       ${PROJECT_SOURCE_DIR}/src/osd/PGPeeringEvent.cc
    +  ${PROJECT_SOURCE_DIR}/src/common/scrub_types.cc
       ${PROJECT_SOURCE_DIR}/src/xxHash/xxhash.c
       ${crimson_common_srcs}
       $
    diff --git a/src/crimson/admin/CMakeLists.txt b/src/crimson/admin/CMakeLists.txt
    index 36a5ae2a99dd..0c4fd10fc58c 100644
    --- a/src/crimson/admin/CMakeLists.txt
    +++ b/src/crimson/admin/CMakeLists.txt
    @@ -3,7 +3,6 @@ add_library(crimson-admin STATIC
       osd_admin.cc
       pg_commands.cc)
     target_link_libraries(crimson-admin
    +  legacy-option-headers
       crimson::cflags
       Boost::MPL)
    -add_dependencies(crimson-admin
    -  legacy-option-headers)
    diff --git a/src/crimson/admin/osd_admin.cc b/src/crimson/admin/osd_admin.cc
    index 0436e5184df8..de9626a2f2d4 100644
    --- a/src/crimson/admin/osd_admin.cc
    +++ b/src/crimson/admin/osd_admin.cc
    @@ -19,6 +19,7 @@
     #include "crimson/osd/pg.h"
     #include "crimson/osd/shard_services.h"
     
    +SET_SUBSYS(osd);
     namespace {
     seastar::logger& logger()
     {
    @@ -93,6 +94,105 @@ class SendBeaconHook : public AdminSocketHook {
     template std::unique_ptr
     make_asok_hook(crimson::osd::OSD& osd);
     
    +/**
    + * An OSD admin hook: run bench
    + * Usage parameters:
    + *   count=Count of bytes to write
    + *   bsize=block size
    + *   osize=Object size
    + *   onum=Number of objects
    + */
    +class RunOSDBenchHook : public AdminSocketHook {
    +public:
    +  explicit RunOSDBenchHook(crimson::osd::OSD& osd) :
    +    AdminSocketHook{"bench",
    +      "name=count,type=CephInt,req=false "
    +      "name=size,type=CephInt,req=false "
    +      "name=object_size,type=CephInt,req=false "
    +      "name=object_num,type=CephInt,req=false",
    +      "run OSD bench"},
    +    osd(osd)
    +  {}
    +  seastar::future call(const cmdmap_t& cmdmap,
    +              std::string_view format,
    +              ceph::bufferlist&& input) const final
    +  {
    +    LOG_PREFIX(RunOSDBenchHook::call);
    +    int64_t count = cmd_getval_or(cmdmap, "count", 1LL << 30);
    +    int64_t bsize = cmd_getval_or(cmdmap, "size", 4LL << 20);
    +    int64_t osize = cmd_getval_or(cmdmap, "object_size", 0);
    +    int64_t onum = cmd_getval_or(cmdmap, "object_num", 0);
    +    auto duration = local_conf()->osd_bench_duration;
    +    auto max_block_size = local_conf()->osd_bench_max_block_size;
    +    if (bsize > static_cast(max_block_size)) {
    +      // let us limit the block size because the next checks rely on it
    +      // having a sane value.  If we allow any block size to be set things
    +      // can still go sideways.
    +      INFO("block 'size' values are capped at {}. If you wish to use"
    +        " a higher value, please adjust 'osd_bench_max_block_size'",
    +        byte_u_t(max_block_size));
    +      return seastar::make_ready_future(-EINVAL, "block size too large");
    +    } else if (bsize < (1LL << 20)) {
    +      // entering the realm of small block sizes.
    +      // limit the count to a sane value, assuming a configurable amount of
    +      // IOPS and duration, so that the OSD doesn't get hung up on this,
    +      // preventing timeouts from going off
    +      int64_t max_count = bsize * duration * local_conf()->osd_bench_small_size_max_iops;
    +      if (count > max_count) {
    +        INFO("bench count {} > osd_bench_small_size_max_iops {}",
    +          count, max_count);
    +        return seastar::make_ready_future(-EINVAL, "count too large");
    +      }
    +    } else {
    +      // 1MB block sizes are big enough so that we get more stuff done.
    +      // However, to avoid the osd from getting hung on this and having
    +      // timers being triggered, we are going to limit the count assuming
    +      // a configurable throughput and duration.
    +      // NOTE: max_count is the total amount of bytes that we believe we
    +      //       will be able to write during 'duration' for the given
    +      //       throughput.  The block size hardly impacts this unless it's
    +      //       way too big.  Given we already check how big the block size
    +      //       is, it's safe to assume everything will check out.
    +      int64_t max_count = local_conf()->osd_bench_large_size_max_throughput * duration;
    +      if (count > max_count) {
    +        INFO("'count' values greater than {} for a block size of {},"
    +          " assuming {} IOPS, for {} seconds, can cause ill effects"
    +          " on osd. Please adjust 'osd_bench_small_size_max_iops'"
    +          " with a higher value if you wish to use a higher 'count'.",
    +          max_count, byte_u_t(bsize), local_conf()->osd_bench_small_size_max_iops,
    +          duration);
    +        return seastar::make_ready_future(-EINVAL, "count too large");
    +      }
    +    }
    +    if (osize && bsize > osize) {
    +      bsize = osize;
    +    }
    +
    +    return osd.run_bench(count, bsize, osize, onum).then(
    +      [format, bsize, count](double elapsed) {
    +      if (elapsed < 0) {
    +        return seastar::make_ready_future
    +          (elapsed, "bench failed with error");
    +      }
    +
    +      unique_ptr f{Formatter::create(format, "json-pretty", "json-pretty")};
    +      f->open_object_section("osd_bench_results");
    +      f->dump_int("bytes_written", count);
    +      f->dump_int("blocksize", bsize);
    +      f->dump_float("elapsed_sec", elapsed);
    +      f->dump_float("bytes_per_sec", (elapsed > 0) ? count / elapsed : 0);
    +      f->dump_float("iops", (elapsed > 0) ? (count / elapsed) / bsize : 0);
    +      f->close_section();
    +      
    +      return seastar::make_ready_future(std::move(f));
    +    });
    +  }
    +private:
    +  crimson::osd::OSD& osd;
    +};
    +template std::unique_ptr
    +make_asok_hook(crimson::osd::OSD& osd);
    +
     /**
      * send the latest pg stats to mgr
      */
    diff --git a/src/crimson/admin/osd_admin.h b/src/crimson/admin/osd_admin.h
    index a3ddd66b9a6a..1aafc5bee20a 100644
    --- a/src/crimson/admin/osd_admin.h
    +++ b/src/crimson/admin/osd_admin.h
    @@ -17,6 +17,7 @@ class InjectDataErrorHook;
     class InjectMDataErrorHook;
     class OsdStatusHook;
     class SendBeaconHook;
    +class RunOSDBenchHook;
     class DumpInFlightOpsHook;
     class DumpHistoricOpsHook;
     class DumpSlowestHistoricOpsHook;
    diff --git a/src/crimson/admin/pg_commands.cc b/src/crimson/admin/pg_commands.cc
    index f2c84b254db2..440c7a383ff7 100644
    --- a/src/crimson/admin/pg_commands.cc
    +++ b/src/crimson/admin/pg_commands.cc
    @@ -11,9 +11,11 @@
     #include 
     
     #include "crimson/admin/admin_socket.h"
    +#include "crimson/common/log.h"
     #include "crimson/osd/osd.h"
     #include "crimson/osd/pg.h"
     
    +SET_SUBSYS(osd);
     
     using crimson::osd::OSD;
     using crimson::osd::PG;
    @@ -148,6 +150,43 @@ class MarkUnfoundLostCommand final : public PGCommand {
       }
     };
     
    +template 
    +class ScrubCommand : public PGCommand {
    +public:
    +  explicit ScrubCommand(crimson::osd::OSD& osd) :
    +    PGCommand{
    +      osd,
    +      deep ? "deep_scrub" : "scrub",
    +      "",
    +      deep ? "deep scrub pg" : "scrub pg"}
    +  {}
    +
    +  seastar::future
    +  do_command(Ref pg,
    +	     const cmdmap_t& cmdmap,
    +	     std::string_view format,
    +	     ceph::bufferlist&&) const final
    +  {
    +    LOG_PREFIX(ScrubCommand::do_command);
    +    DEBUGDPP("deep: {}", *pg, deep);
    +    return PG::interruptor::with_interruption([pg] {
    +      pg->scrubber.handle_scrub_requested(deep);
    +      return PG::interruptor::now();
    +    }, [FNAME, pg](std::exception_ptr ep) {
    +      DEBUGDPP("interrupted with {}", *pg, ep);
    +    }, pg, pg->get_osdmap_epoch()).then([format] {
    +      std::unique_ptr f{
    +	Formatter::create(format, "json-pretty", "json-pretty")
    +      };
    +      f->open_object_section("scrub");
    +      f->dump_bool("deep", deep);
    +      f->dump_stream("stamp") << ceph_clock_now();
    +      f->close_section();
    +      return seastar::make_ready_future(std::move(f));
    +    });
    +  }
    +};
    +
     } // namespace crimson::admin::pg
     
     namespace crimson::admin {
    @@ -164,4 +203,9 @@ make_asok_hook(crimson::osd::OSD& osd);
     template std::unique_ptr
     make_asok_hook(crimson::osd::OSD& osd);
     
    +template std::unique_ptr
    +make_asok_hook>(crimson::osd::OSD& osd);
    +template std::unique_ptr
    +make_asok_hook>(crimson::osd::OSD& osd);
    +
     } // namespace crimson::admin
    diff --git a/src/crimson/admin/pg_commands.h b/src/crimson/admin/pg_commands.h
    index 873b3c923aaf..eb7912e7aa42 100644
    --- a/src/crimson/admin/pg_commands.h
    +++ b/src/crimson/admin/pg_commands.h
    @@ -6,5 +6,7 @@ namespace crimson::admin::pg {
     
     class QueryCommand;
     class MarkUnfoundLostCommand;
    +template 
    +class ScrubCommand;
     
     }  // namespace crimson::admin::pg
    diff --git a/src/crimson/auth/KeyRing.cc b/src/crimson/auth/KeyRing.cc
    index 436e29c1bdd0..b64d2d0f78a8 100644
    --- a/src/crimson/auth/KeyRing.cc
    +++ b/src/crimson/auth/KeyRing.cc
    @@ -10,7 +10,7 @@
     #include 
     #include 
     
    -#include "common/buffer_seastar.h"
    +#include "crimson/common/buffer_seastar.h"
     #include "auth/KeyRing.h"
     #include "include/denc.h"
     #include "crimson/common/buffer_io.h"
    diff --git a/src/common/buffer_seastar.cc b/src/crimson/common/buffer_seastar.cc
    similarity index 83%
    rename from src/common/buffer_seastar.cc
    rename to src/crimson/common/buffer_seastar.cc
    index bc529c937ab2..fa040a4661c2 100644
    --- a/src/common/buffer_seastar.cc
    +++ b/src/crimson/common/buffer_seastar.cc
    @@ -14,6 +14,8 @@
     
     #include 
     #include 
    +#include 
    +#include 
     
     #include "include/buffer_raw.h"
     #include "buffer_seastar.h"
    @@ -24,9 +26,21 @@ namespace ceph::buffer {
     
     class raw_seastar_foreign_ptr : public raw {
       seastar::foreign_ptr ptr;
    +  seastar::alien::instance& alien;
      public:
       raw_seastar_foreign_ptr(temporary_buffer&& buf)
    -    : raw(buf.get_write(), buf.size()), ptr(std::move(buf)) {}
    +    : raw(buf.get_write(), buf.size()), ptr(std::move(buf)),
    +      alien(seastar::engine().alien()) {}
    +
    +  ~raw_seastar_foreign_ptr() {
    +    if (!seastar::engine_is_ready()) {
    +      // we should let a seastar reactor destroy this memory, we are alien.
    +      seastar::alien::run_on(alien, ptr.get_owner_shard(),
    +      [_ptr = std::move(ptr)]() mutable noexcept {
    +        _ptr.reset();
    +      });
    +    }
    +  }
     };
     
     class raw_seastar_local_ptr : public raw {
    diff --git a/src/common/buffer_seastar.h b/src/crimson/common/buffer_seastar.h
    similarity index 100%
    rename from src/common/buffer_seastar.h
    rename to src/crimson/common/buffer_seastar.h
    diff --git a/src/crimson/common/config_proxy.h b/src/crimson/common/config_proxy.h
    index 4c0e655075ad..b04fbee2e8a7 100644
    --- a/src/crimson/common/config_proxy.h
    +++ b/src/crimson/common/config_proxy.h
    @@ -14,6 +14,11 @@ namespace ceph {
     class Formatter;
     }
     
    +namespace ceph::global {
    +int g_conf_set_val(const std::string& key, const std::string& s);
    +int g_conf_rm_val(const std::string& key);
    +}
    +
     namespace crimson::common {
     
     // a facade for managing config. each shard has its own copy of ConfigProxy.
    @@ -54,13 +59,18 @@ class ConfigProxy : public seastar::peering_sharded_service
           // avoid racings with other do_change() calls in parallel.
           ObserverMgr::rev_obs_map rev_obs;
           owner.values.reset(new_values);
    -      owner.obs_mgr.for_each_change(owner.values->changed, owner,
    -                                    [&rev_obs](ConfigObserver *obs,
    +      std::map changes_present;
    +      for (const auto& change : owner.values->changed) {
    +        std::string dummy;
    +        changes_present[change] = owner.get_val(change, &dummy);
    +      }
    +      owner.obs_mgr.for_each_change(changes_present,
    +                                    [&rev_obs](auto obs,
                                                    const std::string &key) {
                                           rev_obs[obs].insert(key);
                                         }, nullptr);
           for (auto& [obs, keys] : rev_obs) {
    -        obs->handle_conf_change(owner, keys);
    +        (*obs)->handle_conf_change(owner, keys);
           }
     
           return seastar::parallel_for_each(boost::irange(1u, seastar::smp::count),
    @@ -70,13 +80,19 @@ class ConfigProxy : public seastar::peering_sharded_service
                 proxy.values.reset();
                 proxy.values = std::move(foreign_values);
     
    +            std::map changes_present;
    +            for (const auto& change : proxy.values->changed) {
    +              std::string dummy;
    +              changes_present[change] = proxy.get_val(change, &dummy);
    +            }
    +
                 ObserverMgr::rev_obs_map rev_obs;
    -            proxy.obs_mgr.for_each_change(proxy.values->changed, proxy,
    -              [&rev_obs](ConfigObserver *obs, const std::string& key) {
    +            proxy.obs_mgr.for_each_change(changes_present,
    +              [&rev_obs](auto obs, const std::string& key) {
                     rev_obs[obs].insert(key);
                   }, nullptr);
    -            for (auto& obs_keys : rev_obs) {
    -              obs_keys.first->handle_conf_change(proxy, obs_keys.second);
    +            for (auto& [obs, keys] : rev_obs) {
    +              (*obs)->handle_conf_change(proxy, keys);
                 }
               });
             }).finally([new_values] {
    @@ -117,6 +133,7 @@ class ConfigProxy : public seastar::peering_sharded_service
         obs_mgr.remove_observer(obs);
       }
       seastar::future<> rm_val(const std::string& key) {
    +    ceph::global::g_conf_rm_val(key);
         return do_change([key, this](ConfigValues& values) {
           auto ret = get_config().rm_val(values, key);
           if (ret < 0) {
    @@ -126,6 +143,7 @@ class ConfigProxy : public seastar::peering_sharded_service
       }
       seastar::future<> set_val(const std::string& key,
     			    const std::string& val) {
    +    ceph::global::g_conf_set_val(key, val);
         return do_change([key, val, this](ConfigValues& values) {
           std::stringstream err;
           auto ret = get_config().set_val(values, obs_mgr, key, val, &err);
    diff --git a/src/crimson/common/coroutine.h b/src/crimson/common/coroutine.h
    new file mode 100644
    index 000000000000..cf8476fcf47d
    --- /dev/null
    +++ b/src/crimson/common/coroutine.h
    @@ -0,0 +1,310 @@
    +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
    +// vim: ts=8 sw=2 smarttab expandtab
    +
    +#pragma once
    +
    +#include 
    +
    +#include "crimson/common/errorator.h"
    +#include "crimson/common/interruptible_future.h"
    +
    +
    +namespace crimson {
    +namespace internal {
    +
    +template 
    +struct to_future {
    +  template 
    +  using future = crimson::interruptible::interruptible_future_detail<
    +    typename Interruptor::condition,
    +    typename Errorator::template future>;
    +};
    +
    +template 
    +struct to_future {
    +  template 
    +  using future = typename Errorator::template future;
    +};
    +
    +
    +template 
    +struct to_future {
    +  template 
    +  using future = ::crimson::interruptible::interruptible_future<
    +    typename Interruptor::condition, T>;
    +};
    +
    +template <>
    +struct to_future {
    +  template 
    +  using future = seastar::future;
    +};
    +
    +
    +template 
    +struct cond_checker {
    +  using ref = std::unique_ptr;
    +  virtual std::optional may_interrupt() = 0;
    +  virtual ~cond_checker() = default;
    +};
    +
    +template 
    +struct interrupt_cond_capture {
    +  using InterruptCond = typename Interruptor::condition;
    +  interruptible::InterruptCondRef cond;
    +
    +  template 
    +  struct type_erased_cond_checker final : cond_checker {
    +    interruptible::InterruptCondRef cond;
    +
    +    template 
    +    type_erased_cond_checker(T &&t) : cond(std::forward(t)) {}
    +
    +    std::optional may_interrupt() final {
    +      return cond->template may_interrupt();
    +    }
    +  };
    +
    +  template 
    +  typename cond_checker::ref capture_and_get_checker() {
    +    ceph_assert(interruptible::interrupt_cond.interrupt_cond);
    +    cond = interruptible::interrupt_cond.interrupt_cond;
    +    return typename cond_checker::ref{
    +      new type_erased_cond_checker{cond}
    +    };
    +  }
    +
    +  void restore() {
    +    ceph_assert(cond);
    +    interruptible::interrupt_cond.set(cond);
    +  }
    +
    +  void reset() {
    +    interruptible::interrupt_cond.reset();
    +  }
    +};
    +
    +template <>
    +struct interrupt_cond_capture {
    +  template 
    +  typename cond_checker::ref capture_and_get_checker() {
    +    return nullptr;
    +  }
    +};
    +
    +template 
    +struct seastar_task_ancestor : protected seastar::task {};
    +
    +template <>
    +struct seastar_task_ancestor : public seastar::task {};
    +
    +template 
    +class promise_base : public seastar_task_ancestor {
    +protected:
    +  seastar::promise _promise;
    +
    +public:
    +  interrupt_cond_capture cond;
    +
    +  using errorator_type = Errorator;
    +  using interruptor = Interruptor;
    +  static constexpr bool is_errorated = !std::is_void::value;
    +  static constexpr bool is_interruptible = !std::is_void::value;
    +
    +  using _to_future =  to_future;
    +
    +  template 
    +  using future = typename _to_future::template future;
    +
    +  promise_base() = default;
    +  promise_base(promise_base&&) = delete;
    +  promise_base(const promise_base&) = delete;
    +
    +  void set_exception(std::exception_ptr&& eptr) noexcept {
    +    _promise.set_exception(std::move(eptr));
    +  }
    +
    +  void unhandled_exception() noexcept {
    +    _promise.set_exception(std::current_exception());
    +  }
    +
    +  future get_return_object() noexcept {
    +    return _promise.get_future();
    +  }
    +
    +  std::suspend_never initial_suspend() noexcept { return { }; }
    +  std::suspend_never final_suspend() noexcept { return { }; }
    +
    +  void run_and_dispose() noexcept final {
    +    if constexpr (is_interruptible) {
    +      cond.restore();
    +    }
    +    auto handle = std::coroutine_handle::from_promise(*this);
    +    handle.resume();
    +    if constexpr (is_interruptible) {
    +      cond.reset();
    +    }
    +  }
    +
    +  seastar::task *waiting_task() noexcept override {
    +    return _promise.waiting_task();
    +  }
    +  seastar::task *get_seastar_task() { return this; }
    +};
    +
    +template 
    +class coroutine_traits {
    +public:
    +  class promise_type final : public promise_base {
    +    using base = promise_base;
    +  public:
    +    template 
    +    void return_value(U&&... value) {
    +      base::_promise.set_value(std::forward(value)...);
    +    }
    +  };
    +};
    +
    +
    +template 
    +class coroutine_traits {
    +public:
    +  class promise_type final : public promise_base {
    +    using base = promise_base;
    +  public:
    +    void return_void() noexcept {
    +      base::_promise.set_value();
    +    }
    +  };
    +};
    +
    +template 
    +struct awaiter {
    +  static constexpr bool is_errorated = !std::is_void::value;
    +  static constexpr bool is_interruptible = !std::is_void::value;
    +
    +  template 
    +  using future = typename to_future::template future;
    +
    +  future _future;
    +
    +  typename cond_checker>::ref checker;
    +public:
    +  explicit awaiter(future&& f) noexcept : _future(std::move(f)) { }
    +
    +  awaiter(const awaiter&) = delete;
    +  awaiter(awaiter&&) = delete;
    +
    +  bool await_ready() const noexcept {
    +    return _future.available() && (!CheckPreempt || !seastar::need_preempt());
    +  }
    +
    +  template 
    +  void await_suspend(std::coroutine_handle hndl) noexcept {
    +    if constexpr (is_errorated) {
    +      using dest_errorator_t  = typename U::errorator_type;
    +      static_assert(dest_errorator_t::template contains_once_v,
    +		    "conversion is possible to more-or-eq errorated future!");
    +    }
    +
    +    checker =
    +      hndl.promise().cond.template capture_and_get_checker>();
    +    if (!CheckPreempt || !_future.available()) {
    +      _future.set_coroutine(*hndl.promise().get_seastar_task());
    +    } else {
    +      ::seastar::schedule(hndl.promise().get_seastar_task());
    +    }
    +  }
    +
    +  T await_resume() {
    +    if (auto maybe_fut = checker ? checker->may_interrupt() : std::nullopt) {
    +      // silence warning that we are discarding an exceptional future
    +      if (_future.failed()) _future.get_exception();
    +      if constexpr (is_errorated) {
    +	return (T)maybe_fut->unsafe_get();
    +      } else {
    +	return (T)maybe_fut->get();
    +      }
    +    } else {
    +      if constexpr (is_errorated) {
    +	return (T)_future.unsafe_get();
    +      } else {
    +	return (T)_future.get();
    +      }
    +    }
    +  }
    +};
    +
    +}
    +}
    +
    +template