diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
index 9fe1ce1431..1c9c492bec 100644
--- a/.github/CHANGELOG.md
+++ b/.github/CHANGELOG.md
@@ -36,6 +36,9 @@
 
 ### Improvements
 
+* Updated calls of ``size_t`` to ``std::size_t`` everywhere.
+  [(#816)](https://github.com/PennyLaneAI/pennylane-lightning/pull/816/)
+
 * Update `ctrl_decomp_zyz` tests with `len(control_wires) > 1`.
   [(#821)](https://github.com/PennyLaneAI/pennylane-lightning/pull/821)
 
@@ -44,14 +47,14 @@
 
 * Multiple calls to the `append_mps_final_state()` API is allowed in `lightning.tensor`.
   [(#830)](https://github.com/PennyLaneAI/pennylane-lightning/pull/830)
-  
+
 * Update `generate_samples` in `LightningKokkos` and `LightningGPU` to support `qml.measurements.Shots` type instances.
   [(#839)](https://github.com/PennyLaneAI/pennylane-lightning/pull/839)
   [(#864)](https://github.com/PennyLaneAI/pennylane-lightning/pull/864)
 
 * LightningQubit gains native support for the `PauliRot` gate.
   [(#834)](https://github.com/PennyLaneAI/pennylane-lightning/pull/834)
-  
+
 * The `setBasisState` and `setStateVector` methods of `StateVectorLQubit` and `StateVectorKokkos` are overloaded to support PennyLane-like parameters.
   [(#843)](https://github.com/PennyLaneAI/pennylane-lightning/pull/843)
 
diff --git a/doc/lightning_qubit/development/add_gate_kernel.rst b/doc/lightning_qubit/development/add_gate_kernel.rst
index c4785ed358..fb600454d1 100644
--- a/doc/lightning_qubit/development/add_gate_kernel.rst
+++ b/doc/lightning_qubit/development/add_gate_kernel.rst
@@ -21,12 +21,12 @@ In this case, you may first create a file and add a class:
         /* This defines the required alignment for this kernel. If there is no special requirement,
            using std::alignment_of_v is sufficient. */
         template <typename PrecisionT>
-        constexpr static size_t required_alignment = std::alignment_of_v<PrecisionT>;
+        constexpr static std::size_t required_alignment = std::alignment_of_v<PrecisionT>;
 
         template <class PrecisionT>
         static void applyPauliX(std::complex<PrecisionT>* data,
-                                size_t num_qubits,
-                                const std::vector<size_t>& wires,
+                                std::size_t num_qubits,
+                                const std::vector<std::size_t>& wires,
                                 [[maybe_unused]] bool inverse) {
             /* Write your implementation */
             ...
diff --git a/doc/lightning_qubit/development/avx_kernels/implementation.rst b/doc/lightning_qubit/development/avx_kernels/implementation.rst
index 12bdf6f8e6..2d762124f0 100644
--- a/doc/lightning_qubit/development/avx_kernels/implementation.rst
+++ b/doc/lightning_qubit/development/avx_kernels/implementation.rst
@@ -42,9 +42,9 @@ The following simple (C++ style) pseudocode shows how the algorithm is implement
 
 .. code-block::
 
-   template<typename PrecisionT, size_t packed_size>
+   template<typename PrecisionT, std::size_t packed_size>
    class ApplyPauliX {
-      template<size_t wire>
+      template<std::size_t wire>
       void applyInternal(...) {
          // Within a row
          permutation = compute a permutation within a row for a given wire
@@ -54,7 +54,7 @@ The following simple (C++ style) pseudocode shows how the algorithm is implement
             save row to the memory
          }
       }
-      void applyExternal(size_t wire, ...) {
+      void applyExternal(std::size_t wire, ...) {
          // Between rows
          for proper index k {
             row1 = load k-th row
diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py
index 8c281f63a4..970e51686a 100644
--- a/pennylane_lightning/core/_version.py
+++ b/pennylane_lightning/core/_version.py
@@ -16,4 +16,4 @@
    Version number (major.minor.patch[-label])
 """
 
-__version__ = "0.38.0-dev49"
+__version__ = "0.38.0-dev50"
diff --git a/pennylane_lightning/core/src/algorithms/JacobianData.hpp b/pennylane_lightning/core/src/algorithms/JacobianData.hpp
index c408167dbe..fba007789e 100644
--- a/pennylane_lightning/core/src/algorithms/JacobianData.hpp
+++ b/pennylane_lightning/core/src/algorithms/JacobianData.hpp
@@ -249,7 +249,7 @@ template <class StateVectorT> class OpsData {
     [[nodiscard]] auto getTotalNumParams() const -> std::size_t {
         return std::accumulate(
             ops_params_.begin(), ops_params_.end(), std::size_t{0U},
-            [](size_t acc, auto &params) { return acc + params.size(); });
+            [](std::size_t acc, auto &params) { return acc + params.size(); });
     }
 };
 
@@ -306,7 +306,8 @@ template <class StateVectorT> class JacobianData {
      * (e.g. StatePrep) or Hamiltonian coefficients.
      * @endrst
      */
-    JacobianData(size_t num_params, std::size_t num_elem, const CFP_t *sv_ptr,
+    JacobianData(std::size_t num_params, std::size_t num_elem,
+                 const CFP_t *sv_ptr,
                  std::vector<std::shared_ptr<Observable<StateVectorT>>> obs,
                  OpsData<StateVectorT> ops, std::vector<std::size_t> trainP)
         : num_parameters(num_params), num_elements(num_elem), psi(sv_ptr),
diff --git a/pennylane_lightning/core/src/algorithms/tests/Test_AdjointJacobian.cpp b/pennylane_lightning/core/src/algorithms/tests/Test_AdjointJacobian.cpp
index 0f56ca388c..09957eb56b 100644
--- a/pennylane_lightning/core/src/algorithms/tests/Test_AdjointJacobian.cpp
+++ b/pennylane_lightning/core/src/algorithms/tests/Test_AdjointJacobian.cpp
@@ -453,7 +453,7 @@ template <typename TypeList> void testAdjointJacobian() {
             const auto obs = std::make_shared<NamedObs<StateVectorT>>(
                 "PauliZ", std::vector<std::size_t>{0});
 
-            for (size_t i = 0; i < thetas.size(); i++) {
+            for (std::size_t i = 0; i < thetas.size(); i++) {
                 const PrecisionT theta = thetas[i];
                 std::vector<PrecisionT> local_params{
                     theta, std::pow(theta, (PrecisionT)3),
diff --git a/pennylane_lightning/core/src/bindings/Bindings.hpp b/pennylane_lightning/core/src/bindings/Bindings.hpp
index 08907f5126..30a0eedde8 100644
--- a/pennylane_lightning/core/src/bindings/Bindings.hpp
+++ b/pennylane_lightning/core/src/bindings/Bindings.hpp
@@ -189,7 +189,7 @@ auto alignedNumpyArray(CPUMemoryModel memory_model, std::size_t size,
  * @param size Size of the array to create
  * @param dt Pybind11's datatype object
  */
-auto allocateAlignedArray(size_t size, const py::dtype &dt,
+auto allocateAlignedArray(std::size_t size, const py::dtype &dt,
                           bool zeroInit = false) -> py::array {
     // TODO: Move memset operations to here to reduce zeroInit pass-throughs.
     auto memory_model = bestCPUMemoryModel();
@@ -480,7 +480,7 @@ void registerBackendAgnosticMeasurements(PyClass &pyclass) {
             auto &&result = M.generate_samples(num_shots);
             const std::size_t ndim = 2;
             const std::vector<std::size_t> shape{num_shots, num_wires};
-            constexpr auto sz = sizeof(size_t);
+            constexpr auto sz = sizeof(std::size_t);
             const std::vector<std::size_t> strides{sz * num_wires, sz};
             // return 2-D NumPy array
             return py::array(py::buffer_info(
@@ -559,7 +559,7 @@ void registerBackendAgnosticAlgorithms(py::module_ &m) {
         .def("__repr__", [](const OpsData<StateVectorT> &ops) {
             using namespace Pennylane::Util;
             std::ostringstream ops_stream;
-            for (size_t op = 0; op < ops.getSize(); op++) {
+            for (std::size_t op = 0; op < ops.getSize(); op++) {
                 ops_stream << "{'name': " << ops.getOpsName()[op];
                 ops_stream << ", 'params': " << ops.getOpsParams()[op];
                 ops_stream << ", 'inv': " << ops.getOpsInverses()[op];
@@ -591,7 +591,7 @@ void registerBackendAgnosticAlgorithms(py::module_ &m) {
            const std::vector<std::vector<bool>> &ops_controlled_values) {
             std::vector<std::vector<ComplexT>> conv_matrices(
                 ops_matrices.size());
-            for (size_t op = 0; op < ops_name.size(); op++) {
+            for (std::size_t op = 0; op < ops_name.size(); op++) {
                 const auto m_buffer = ops_matrices[op].request();
                 if (m_buffer.size) {
                     const auto m_ptr =
diff --git a/pennylane_lightning/core/src/bindings/BindingsMPI.hpp b/pennylane_lightning/core/src/bindings/BindingsMPI.hpp
index bad6e9e1da..8c6d1bb5b1 100644
--- a/pennylane_lightning/core/src/bindings/BindingsMPI.hpp
+++ b/pennylane_lightning/core/src/bindings/BindingsMPI.hpp
@@ -282,7 +282,7 @@ void registerBackendAgnosticMeasurementsMPI(PyClass &pyclass) {
             auto &&result = M.generate_samples(num_shots);
             const std::size_t ndim = 2;
             const std::vector<std::size_t> shape{num_shots, num_wires};
-            constexpr auto sz = sizeof(size_t);
+            constexpr auto sz = sizeof(std::size_t);
             const std::vector<std::size_t> strides{sz * num_wires, sz};
             // return 2-D NumPy array
             return py::array(py::buffer_info(
@@ -351,7 +351,7 @@ void registerBackendAgnosticAlgorithmsMPI(py::module_ &m) {
         .def("__repr__", [](const OpsData<StateVectorT> &ops) {
             using namespace Pennylane::Util;
             std::ostringstream ops_stream;
-            for (size_t op = 0; op < ops.getSize(); op++) {
+            for (std::size_t op = 0; op < ops.getSize(); op++) {
                 ops_stream << "{'name': " << ops.getOpsName()[op];
                 ops_stream << ", 'params': " << ops.getOpsParams()[op];
                 ops_stream << ", 'inv': " << ops.getOpsInverses()[op];
@@ -378,7 +378,7 @@ void registerBackendAgnosticAlgorithmsMPI(py::module_ &m) {
            const std::vector<std::vector<bool>> &ops_controlled_values) {
             std::vector<std::vector<ComplexT>> conv_matrices(
                 ops_matrices.size());
-            for (size_t op = 0; op < ops_name.size(); op++) {
+            for (std::size_t op = 0; op < ops_name.size(); op++) {
                 const auto m_buffer = ops_matrices[op].request();
                 if (m_buffer.size) {
                     const auto m_ptr =
diff --git a/pennylane_lightning/core/src/measurements/MeasurementsBase.hpp b/pennylane_lightning/core/src/measurements/MeasurementsBase.hpp
index 167e8c75ae..50a76610dc 100644
--- a/pennylane_lightning/core/src/measurements/MeasurementsBase.hpp
+++ b/pennylane_lightning/core/src/measurements/MeasurementsBase.hpp
@@ -134,7 +134,7 @@ template <class StateVectorT, class Derived> class MeasurementsBase {
      * @return 1-D vector of samples in binary with each sample
      * separated by a stride equal to the number of qubits.
      */
-    auto generate_samples(size_t num_samples) -> std::vector<std::size_t> {
+    auto generate_samples(std::size_t num_samples) -> std::vector<std::size_t> {
         return static_cast<Derived *>(this)->generate_samples(num_samples);
     };
 
@@ -160,7 +160,7 @@ template <class StateVectorT, class Derived> class MeasurementsBase {
         } else if (obs.getObsName().find("Hamiltonian") != std::string::npos) {
             auto coeffs = obs.getCoeffs();
             auto obsTerms = obs.getObs();
-            for (size_t obs_term_idx = 0; obs_term_idx < coeffs.size();
+            for (std::size_t obs_term_idx = 0; obs_term_idx < coeffs.size();
                  obs_term_idx++) {
                 result += coeffs[obs_term_idx] * expval(*obsTerms[obs_term_idx],
                                                         num_shots, shot_range);
@@ -202,11 +202,11 @@ template <class StateVectorT, class Derived> class MeasurementsBase {
 
         std::vector<PrecisionT> eigenVals = eigenValues[0];
 
-        for (size_t i = 1; i < eigenValues.size(); i++) {
+        for (std::size_t i = 1; i < eigenValues.size(); i++) {
             eigenVals = kronProd(eigenVals, eigenValues[i]);
         }
 
-        for (size_t i = 0; i < num_samples; i++) {
+        for (std::size_t i = 0; i < num_samples; i++) {
             std::size_t idx = 0;
             std::size_t wire_idx = 0;
             for (auto &obs_wire : obs_wires) {
@@ -319,11 +319,11 @@ template <class StateVectorT, class Derived> class MeasurementsBase {
 
         std::size_t num_wires = _statevector.getTotalNumQubits();
 
-        std::vector<PrecisionT> prob_shots(size_t{1} << wires.size(), 0.0);
+        std::vector<PrecisionT> prob_shots(std::size_t{1} << wires.size(), 0.0);
 
         for (auto &it : counts_map) {
             std::size_t bitVal = 0;
-            for (size_t bit = 0; bit < wires.size(); bit++) {
+            for (std::size_t bit = 0; bit < wires.size(); bit++) {
                 // Mapping the value of wires[bit]th bit to local [bit]th bit of
                 // the output
                 bitVal +=
@@ -346,12 +346,12 @@ template <class StateVectorT, class Derived> class MeasurementsBase {
      *
      * @return Floating point std::vector with probabilities.
      */
-    auto probs(size_t num_shots) -> std::vector<PrecisionT> {
+    auto probs(std::size_t num_shots) -> std::vector<PrecisionT> {
         auto counts_map = counts(num_shots);
 
         std::size_t num_wires = _statevector.getTotalNumQubits();
 
-        std::vector<PrecisionT> prob_shots(size_t{1} << num_wires, 0.0);
+        std::vector<PrecisionT> prob_shots(std::size_t{1} << num_wires, 0.0);
 
         for (auto &it : counts_map) {
             prob_shots[it.first] =
@@ -407,7 +407,7 @@ template <class StateVectorT, class Derived> class MeasurementsBase {
         -> std::unordered_map<PrecisionT, std::size_t> {
         std::unordered_map<PrecisionT, std::size_t> outcome_map;
         auto sample_data = sample(obs, num_shots);
-        for (size_t i = 0; i < num_shots; i++) {
+        for (std::size_t i = 0; i < num_shots; i++) {
             auto key = sample_data[i];
             auto it = outcome_map.find(key);
             if (it != outcome_map.end()) {
@@ -425,18 +425,18 @@ template <class StateVectorT, class Derived> class MeasurementsBase {
      *
      * @param num_shots Number of wires the sampled observable was performed on
      *
-     * @return std::unordered_map<size_t, std::size_t> with format ``{'outcome':
-     * num_occurences}``
+     * @return std::unordered_map<std::size_t, std::size_t> with format
+     * ``{'outcome': num_occurences}``
      */
     auto counts(const std::size_t &num_shots)
-        -> std::unordered_map<size_t, std::size_t> {
-        std::unordered_map<size_t, std::size_t> outcome_map;
+        -> std::unordered_map<std::size_t, std::size_t> {
+        std::unordered_map<std::size_t, std::size_t> outcome_map;
         auto sample_data = sample(num_shots);
 
         std::size_t num_wires = _statevector.getTotalNumQubits();
-        for (size_t i = 0; i < num_shots; i++) {
+        for (std::size_t i = 0; i < num_shots; i++) {
             std::size_t key = 0;
-            for (size_t j = 0; j < num_wires; j++) {
+            for (std::size_t j = 0; j < num_wires; j++) {
                 key += sample_data[i * num_wires + j] << (num_wires - 1 - j);
             }
 
@@ -494,7 +494,8 @@ template <class StateVectorT, class Derived> class MeasurementsBase {
             // Get a slice of samples based on the shot_range vector
             std::size_t shot_idx = 0;
             for (const auto &i : shot_range) {
-                for (size_t j = i * num_qubits; j < (i + 1) * num_qubits; j++) {
+                for (std::size_t j = i * num_qubits; j < (i + 1) * num_qubits;
+                     j++) {
                     // TODO some extra work to make it cache-friendly
                     sub_samples[shot_idx * num_qubits + j - i * num_qubits] =
                         samples[j];
diff --git a/pennylane_lightning/core/src/measurements/tests/Test_MeasurementsBase.cpp b/pennylane_lightning/core/src/measurements/tests/Test_MeasurementsBase.cpp
index 17e5f112dd..674659a9cc 100644
--- a/pennylane_lightning/core/src/measurements/tests/Test_MeasurementsBase.cpp
+++ b/pennylane_lightning/core/src/measurements/tests/Test_MeasurementsBase.cpp
@@ -230,7 +230,7 @@ template <typename TypeList> void testProbabilitiesObs() {
 
         DYNAMIC_SECTION("Test PauliX"
                         << StateVectorToName<StateVectorT>::name) {
-            for (size_t i = 0; i < num_qubits; i++) {
+            for (std::size_t i = 0; i < num_qubits; i++) {
                 NamedObs<StateVectorT> obs("PauliX", {i});
                 Measurements<StateVectorT> Measurer_obs(statevector);
 
@@ -247,7 +247,7 @@ template <typename TypeList> void testProbabilitiesObs() {
 
         DYNAMIC_SECTION("Test PauliY"
                         << StateVectorToName<StateVectorT>::name) {
-            for (size_t i = 0; i < num_qubits; i++) {
+            for (std::size_t i = 0; i < num_qubits; i++) {
                 NamedObs<StateVectorT> obs("PauliY", {i});
                 Measurements<StateVectorT> Measurer_obs(statevector);
 
@@ -265,7 +265,7 @@ template <typename TypeList> void testProbabilitiesObs() {
 
         DYNAMIC_SECTION("Test PauliZ"
                         << StateVectorToName<StateVectorT>::name) {
-            for (size_t i = 0; i < num_qubits; i++) {
+            for (std::size_t i = 0; i < num_qubits; i++) {
                 NamedObs<StateVectorT> obs("PauliZ", {i});
                 Measurements<StateVectorT> Measurer_obs(statevector);
 
@@ -280,7 +280,7 @@ template <typename TypeList> void testProbabilitiesObs() {
 
         DYNAMIC_SECTION("Test Hadamard"
                         << StateVectorToName<StateVectorT>::name) {
-            for (size_t i = 0; i < num_qubits; i++) {
+            for (std::size_t i = 0; i < num_qubits; i++) {
                 NamedObs<StateVectorT> obs("Hadamard", {i});
                 Measurements<StateVectorT> Measurer_obs(statevector);
                 const PrecisionT theta = -M_PI / 4.0;
@@ -297,7 +297,7 @@ template <typename TypeList> void testProbabilitiesObs() {
 
         DYNAMIC_SECTION("Test Identity"
                         << StateVectorToName<StateVectorT>::name) {
-            for (size_t i = 0; i < num_qubits; i++) {
+            for (std::size_t i = 0; i < num_qubits; i++) {
                 NamedObs<StateVectorT> obs("Identity", {i});
                 Measurements<StateVectorT> Measurer_obs(statevector);
 
@@ -474,11 +474,11 @@ template <typename TypeList> void testNamedObsExpval() {
             {-0.64421768, -0.47942553, -0.29552020},
             {0.58498357, 0.77015115, 0.91266780}};
 
-        for (size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
+        for (std::size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
             DYNAMIC_SECTION(obs_name[ind_obs]
                             << " - Varying wires"
                             << StateVectorToName<StateVectorT>::name) {
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObs<StateVectorT> obs(obs_name[ind_obs],
                                                wires_list[ind_wires]);
@@ -522,13 +522,13 @@ template <typename TypeList> void testNamedObsExpvalShot() {
             {0.58498357, 0.77015115, 0.91266780},
             {0.7620549436, 0.8420840225, 0.8449848566},
             {1.0, 1.0, 1.0}};
-        for (size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
+        for (std::size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
             DYNAMIC_SECTION(obs_name[ind_obs]
                             << " - Varying wires"
                             << StateVectorToName<StateVectorT>::name) {
                 std::size_t num_shots = 20000;
                 std::vector<std::size_t> shots_range = {};
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObs<StateVectorT> obs(obs_name[ind_obs],
                                                wires_list[ind_wires]);
@@ -542,16 +542,16 @@ template <typename TypeList> void testNamedObsExpvalShot() {
             }
         }
 
-        for (size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
+        for (std::size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
             DYNAMIC_SECTION(obs_name[ind_obs]
                             << " - Varying wires-with shots_range"
                             << StateVectorToName<StateVectorT>::name) {
                 std::size_t num_shots = 20000;
                 std::vector<std::size_t> shots_range;
-                for (size_t i = 0; i < num_shots; i += 2) {
+                for (std::size_t i = 0; i < num_shots; i += 2) {
                     shots_range.push_back(i);
                 }
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObs<StateVectorT> obs(obs_name[ind_obs],
                                                wires_list[ind_wires]);
@@ -709,7 +709,7 @@ template <typename TypeList> void testHermitianObsExpval() {
             MatrixT Hermitian_matrix{real_term, ComplexT{0, imag_term},
                                      ComplexT{0, -imag_term}, real_term};
 
-            for (size_t ind_wires = 0; ind_wires < wires_list.size();
+            for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                  ind_wires++) {
                 HermitianObs<StateVectorT> obs(Hermitian_matrix,
                                                wires_list[ind_wires]);
@@ -736,7 +736,7 @@ template <typename TypeList> void testHermitianObsExpval() {
             Hermitian_matrix[10] = ComplexT{1.0, 0};
             Hermitian_matrix[15] = ComplexT{1.0, 0};
 
-            for (size_t ind_wires = 0; ind_wires < wires_list.size();
+            for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                  ind_wires++) {
                 HermitianObs<StateVectorT> obs(Hermitian_matrix,
                                                wires_list[ind_wires]);
@@ -809,7 +809,7 @@ template <typename TypeList> void testTensorProdObsExpvalShot() {
                         << StateVectorToName<StateVectorT>::name) {
             std::size_t num_shots = 20000;
             std::vector<std::size_t> shots_range;
-            for (size_t i = 0; i < num_shots; i += 2) {
+            for (std::size_t i = 0; i < num_shots; i += 2) {
                 shots_range.push_back(i);
             }
             auto X0 = std::make_shared<NamedObs<StateVectorT>>(
@@ -828,7 +828,7 @@ template <typename TypeList> void testTensorProdObsExpvalShot() {
                         << StateVectorToName<StateVectorT>::name) {
             std::size_t num_shots = 20000;
             std::vector<std::size_t> shots_range;
-            for (size_t i = 0; i < num_shots; i += 2) {
+            for (std::size_t i = 0; i < num_shots; i += 2) {
                 shots_range.push_back(i);
             }
             auto X0 = std::make_shared<NamedObs<StateVectorT>>(
@@ -898,11 +898,11 @@ template <typename TypeList> void testNamedObsVar() {
             {0.5849835, 0.7701511, 0.9126678},
             {0.6577942, 0.4068672, 0.1670374}};
 
-        for (size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
+        for (std::size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
             DYNAMIC_SECTION(obs_name[ind_obs]
                             << " - Varying wires"
                             << StateVectorToName<StateVectorT>::name) {
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObs<StateVectorT> obs(obs_name[ind_obs],
                                                wires_list[ind_wires]);
@@ -915,7 +915,7 @@ template <typename TypeList> void testNamedObsVar() {
             DYNAMIC_SECTION(obs_name[ind_obs]
                             << " Shots - Varying wires"
                             << StateVectorToName<StateVectorT>::name) {
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObs<StateVectorT> obs(obs_name[ind_obs],
                                                wires_list[ind_wires]);
@@ -969,7 +969,7 @@ template <typename TypeList> void testHermitianObsVar() {
             MatrixT Hermitian_matrix{real_term, ComplexT{0, imag_term},
                                      ComplexT{0, -imag_term}, real_term};
 
-            for (size_t ind_wires = 0; ind_wires < wires_list.size();
+            for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                  ind_wires++) {
                 HermitianObs<StateVectorT> obs(Hermitian_matrix,
                                                wires_list[ind_wires]);
@@ -995,7 +995,7 @@ template <typename TypeList> void testHermitianObsVar() {
             Hermitian_matrix[10] = ComplexT{1.0, 0};
             Hermitian_matrix[15] = ComplexT{1.0, 0};
 
-            for (size_t ind_wires = 0; ind_wires < wires_list.size();
+            for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                  ind_wires++) {
                 HermitianObs<StateVectorT> obs(Hermitian_matrix,
                                                wires_list[ind_wires]);
@@ -1046,7 +1046,7 @@ template <typename TypeList> void testHermitianObsShotVar() {
             MatrixT Hermitian_matrix{real_term, ComplexT{0, imag_term},
                                      ComplexT{0, -imag_term}, real_term};
 
-            for (size_t ind_wires = 0; ind_wires < wires_list.size();
+            for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                  ind_wires++) {
                 HermitianObs<StateVectorT> obs(Hermitian_matrix,
                                                wires_list[ind_wires]);
@@ -1076,7 +1076,7 @@ template <typename TypeList> void testHermitianObsShotVar() {
             Hermitian_matrix[10] = ComplexT{1.0, 0};
             Hermitian_matrix[15] = ComplexT{1.0, 0};
 
-            for (size_t ind_wires = 0; ind_wires < wires_list.size();
+            for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                  ind_wires++) {
                 HermitianObs<StateVectorT> obs(Hermitian_matrix,
                                                wires_list[ind_wires]);
@@ -1287,8 +1287,8 @@ template <typename TypeList> void testSamples() {
         std::vector<std::size_t> samples_decimal(num_samples, 0);
 
         // convert samples to decimal and then bin them in counts
-        for (size_t i = 0; i < num_samples; i++) {
-            for (size_t j = 0; j < num_qubits; j++) {
+        for (std::size_t i = 0; i < num_samples; i++) {
+            for (std::size_t j = 0; j < num_qubits; j++) {
                 if (samples[i * num_qubits + j] != 0) {
                     samples_decimal[i] += twos[(num_qubits - 1 - j)];
                 }
@@ -1298,7 +1298,7 @@ template <typename TypeList> void testSamples() {
 
         // compute estimated probabilities from histogram
         std::vector<PrecisionT> probabilities(counts.size());
-        for (size_t i = 0; i < counts.size(); i++) {
+        for (std::size_t i = 0; i < counts.size(); i++) {
             probabilities[i] = counts[i] / (PrecisionT)num_samples;
         }
 
@@ -1349,12 +1349,12 @@ template <typename TypeList> void testSamplesCountsObs() {
             {0.58498357, 0.77015115, 0.91266780},
             {0.7620549436, 0.8420840225, 0.8449848566},
             {1.0, 1.0, 1.0}};
-        for (size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
+        for (std::size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
             DYNAMIC_SECTION(obs_name[ind_obs]
                             << " Sample Obs - Varying wires"
                             << StateVectorToName<StateVectorT>::name) {
                 std::size_t num_shots = 20000;
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObs<StateVectorT> obs(obs_name[ind_obs],
                                                wires_list[ind_wires]);
@@ -1377,7 +1377,7 @@ template <typename TypeList> void testSamplesCountsObs() {
                             << " Counts Obs - Varying wires"
                             << StateVectorToName<StateVectorT>::name) {
                 std::size_t num_shots = 20000;
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObs<StateVectorT> obs(obs_name[ind_obs],
                                                wires_list[ind_wires]);
@@ -1412,8 +1412,8 @@ template <typename TypeList> void testSamplesCountsObs() {
             std::vector<std::size_t> samples_decimal(num_samples, 0);
 
             // convert samples to decimal and then bin them in counts
-            for (size_t i = 0; i < num_samples; i++) {
-                for (size_t j = 0; j < num_qubits; j++) {
+            for (std::size_t i = 0; i < num_samples; i++) {
+                for (std::size_t j = 0; j < num_qubits; j++) {
                     if (samples[i * num_qubits + j] != 0) {
                         samples_decimal[i] += twos[(num_qubits - 1 - j)];
                     }
@@ -1423,7 +1423,7 @@ template <typename TypeList> void testSamplesCountsObs() {
 
             // compute estimated probabilities from histogram
             std::vector<PrecisionT> probabilities(counts.size());
-            for (size_t i = 0; i < counts.size(); i++) {
+            for (std::size_t i = 0; i < counts.size(); i++) {
                 probabilities[i] = counts[i] / (PrecisionT)num_samples;
             }
 
@@ -1457,7 +1457,7 @@ template <typename TypeList> void testSamplesCountsObs() {
 
             // compute estimated probabilities from histogram
             std::vector<PrecisionT> probabilities(counts.size());
-            for (size_t i = 0; i < counts.size(); i++) {
+            for (std::size_t i = 0; i < counts.size(); i++) {
                 probabilities[i] = counts[i] / (PrecisionT)num_samples;
             }
 
@@ -1522,7 +1522,7 @@ template <typename TypeList> void testHamiltonianObsExpvalShot() {
                         << StateVectorToName<StateVectorT>::name) {
             std::size_t num_shots = 20000;
             std::vector<std::size_t> shots_range;
-            for (size_t i = 0; i < num_shots; i += 2) {
+            for (std::size_t i = 0; i < num_shots; i += 2) {
                 shots_range.push_back(i);
             }
 
diff --git a/pennylane_lightning/core/src/measurements/tests/mpi/Test_MeasurementsBaseMPI.cpp b/pennylane_lightning/core/src/measurements/tests/mpi/Test_MeasurementsBaseMPI.cpp
index 7c834201af..4a90d8849d 100644
--- a/pennylane_lightning/core/src/measurements/tests/mpi/Test_MeasurementsBaseMPI.cpp
+++ b/pennylane_lightning/core/src/measurements/tests/mpi/Test_MeasurementsBaseMPI.cpp
@@ -185,7 +185,7 @@ template <typename TypeList> void testProbabilitiesObs() {
 
         DYNAMIC_SECTION("Test PauliX"
                         << StateVectorMPIToName<StateVectorT>::name) {
-            for (size_t i = 0; i < num_qubits; i++) {
+            for (std::size_t i = 0; i < num_qubits; i++) {
                 NamedObsMPI<StateVectorT> obs("PauliX", {i});
                 MeasurementsMPI<StateVectorT> Measurer_obs(statevector);
 
@@ -202,7 +202,7 @@ template <typename TypeList> void testProbabilitiesObs() {
 
         DYNAMIC_SECTION("Test PauliY"
                         << StateVectorMPIToName<StateVectorT>::name) {
-            for (size_t i = 0; i < num_qubits; i++) {
+            for (std::size_t i = 0; i < num_qubits; i++) {
                 NamedObsMPI<StateVectorT> obs("PauliY", {i});
                 MeasurementsMPI<StateVectorT> Measurer_obs(statevector);
 
@@ -220,7 +220,7 @@ template <typename TypeList> void testProbabilitiesObs() {
 
         DYNAMIC_SECTION("Test PauliZ"
                         << StateVectorMPIToName<StateVectorT>::name) {
-            for (size_t i = 0; i < num_qubits; i++) {
+            for (std::size_t i = 0; i < num_qubits; i++) {
                 NamedObsMPI<StateVectorT> obs("PauliZ", {i});
                 MeasurementsMPI<StateVectorT> Measurer_obs(statevector);
 
@@ -235,7 +235,7 @@ template <typename TypeList> void testProbabilitiesObs() {
 
         DYNAMIC_SECTION("Test Hadamard"
                         << StateVectorMPIToName<StateVectorT>::name) {
-            for (size_t i = 0; i < num_qubits; i++) {
+            for (std::size_t i = 0; i < num_qubits; i++) {
                 NamedObsMPI<StateVectorT> obs("Hadamard", {i});
                 MeasurementsMPI<StateVectorT> Measurer_obs(statevector);
                 const PrecisionT theta = -M_PI / 4.0;
@@ -252,7 +252,7 @@ template <typename TypeList> void testProbabilitiesObs() {
 
         DYNAMIC_SECTION("Test Identity"
                         << StateVectorMPIToName<StateVectorT>::name) {
-            for (size_t i = 0; i < num_qubits; i++) {
+            for (std::size_t i = 0; i < num_qubits; i++) {
                 NamedObsMPI<StateVectorT> obs("Identity", {i});
                 MeasurementsMPI<StateVectorT> Measurer_obs(statevector);
 
@@ -473,11 +473,11 @@ template <typename TypeList> void testNamedObsExpval() {
             {-0.64421768, -0.47942553, -0.29552020},
             {0.58498357, 0.77015115, 0.91266780}};
 
-        for (size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
+        for (std::size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
             DYNAMIC_SECTION(obs_name[ind_obs]
                             << " - Varying wires"
                             << StateVectorMPIToName<StateVectorT>::name) {
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObsMPI<StateVectorT> obs(obs_name[ind_obs],
                                                   wires_list[ind_wires]);
@@ -548,11 +548,11 @@ template <typename TypeList> void testNamedObsExpvalShot() {
         std::size_t num_shots = 10000;
         std::vector<std::size_t> shots_range = {};
 
-        for (size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
+        for (std::size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
             DYNAMIC_SECTION(obs_name[ind_obs]
                             << " - Varying wires"
                             << StateVectorMPIToName<StateVectorT>::name) {
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObsMPI<StateVectorT> obs(obs_name[ind_obs],
                                                   wires_list[ind_wires]);
@@ -628,7 +628,7 @@ template <typename TypeList> void testHermitianObsExpval() {
             MatrixT Hermitian_matrix{real_term, ComplexT{0, imag_term},
                                      ComplexT{0, -imag_term}, real_term};
 
-            for (size_t ind_wires = 0; ind_wires < wires_list.size();
+            for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                  ind_wires++) {
                 HermitianObsMPI<StateVectorT> obs(Hermitian_matrix,
                                                   wires_list[ind_wires]);
@@ -655,7 +655,7 @@ template <typename TypeList> void testHermitianObsExpval() {
             Hermitian_matrix[10] = ComplexT{1.0, 0};
             Hermitian_matrix[15] = ComplexT{1.0, 0};
 
-            for (size_t ind_wires = 0; ind_wires < wires_list.size();
+            for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                  ind_wires++) {
                 HermitianObsMPI<StateVectorT> obs(Hermitian_matrix,
                                                   wires_list[ind_wires]);
@@ -747,7 +747,7 @@ template <typename TypeList> void testTensorProdObsExpvalShot() {
                         << StateVectorMPIToName<StateVectorT>::name) {
             std::size_t num_shots = 10000;
             std::vector<std::size_t> shots_range;
-            for (size_t i = 0; i < num_shots; i += 2) {
+            for (std::size_t i = 0; i < num_shots; i += 2) {
                 shots_range.push_back(i);
             }
             auto X0 = std::make_shared<NamedObsMPI<StateVectorT>>(
@@ -764,7 +764,7 @@ template <typename TypeList> void testTensorProdObsExpvalShot() {
                         << StateVectorMPIToName<StateVectorT>::name) {
             std::size_t num_shots = 10000;
             std::vector<std::size_t> shots_range;
-            for (size_t i = 0; i < num_shots; i += 2) {
+            for (std::size_t i = 0; i < num_shots; i += 2) {
                 shots_range.push_back(i);
             }
             auto X0 = std::make_shared<NamedObsMPI<StateVectorT>>(
@@ -861,7 +861,7 @@ template <typename TypeList> void testHamiltonianObsExpvalShot() {
                         << StateVectorMPIToName<StateVectorT>::name) {
             std::size_t num_shots = 10000;
             std::vector<std::size_t> shots_range;
-            for (size_t i = 0; i < num_shots; i += 2) {
+            for (std::size_t i = 0; i < num_shots; i += 2) {
                 shots_range.push_back(i);
             }
             auto X0 = std::make_shared<NamedObsMPI<StateVectorT>>(
@@ -879,7 +879,7 @@ template <typename TypeList> void testHamiltonianObsExpvalShot() {
                         << StateVectorMPIToName<StateVectorT>::name) {
             std::size_t num_shots = 10000;
             std::vector<std::size_t> shots_range;
-            for (size_t i = 0; i < num_shots; i += 2) {
+            for (std::size_t i = 0; i < num_shots; i += 2) {
                 shots_range.push_back(i);
             }
             auto X0 = std::make_shared<NamedObsMPI<StateVectorT>>(
@@ -950,11 +950,11 @@ template <typename TypeList> void testNamedObsVar() {
             {0.5849835, 0.7701511, 0.9126678},
             {0.6577942, 0.4068672, 0.1670374}};
 
-        for (size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
+        for (std::size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
             DYNAMIC_SECTION(obs_name[ind_obs]
                             << " - Varying wires"
                             << StateVectorMPIToName<StateVectorT>::name) {
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObsMPI<StateVectorT> obs(obs_name[ind_obs],
                                                   wires_list[ind_wires]);
@@ -967,7 +967,7 @@ template <typename TypeList> void testNamedObsVar() {
             DYNAMIC_SECTION(obs_name[ind_obs]
                             << " Shots - Varying wires"
                             << StateVectorMPIToName<StateVectorT>::name) {
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObsMPI<StateVectorT> obs(obs_name[ind_obs],
                                                   wires_list[ind_wires]);
@@ -1043,7 +1043,7 @@ template <typename TypeList> void testHermitianObsVar() {
             MatrixT Hermitian_matrix{real_term, ComplexT{0, imag_term},
                                      ComplexT{0, -imag_term}, real_term};
 
-            for (size_t ind_wires = 0; ind_wires < wires_list.size();
+            for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                  ind_wires++) {
                 HermitianObsMPI<StateVectorT> obs(Hermitian_matrix,
                                                   wires_list[ind_wires]);
@@ -1069,7 +1069,7 @@ template <typename TypeList> void testHermitianObsVar() {
             Hermitian_matrix[10] = ComplexT{1.0, 0};
             Hermitian_matrix[15] = ComplexT{1.0, 0};
 
-            for (size_t ind_wires = 0; ind_wires < wires_list.size();
+            for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                  ind_wires++) {
                 HermitianObsMPI<StateVectorT> obs(Hermitian_matrix,
                                                   wires_list[ind_wires]);
@@ -1225,8 +1225,8 @@ template <typename TypeList> void testSamples() {
         std::vector<std::size_t> samples_decimal(num_samples, 0);
 
         // convert samples to decimal and then bin them in counts
-        for (size_t i = 0; i < num_samples; i++) {
-            for (size_t j = 0; j < num_qubits; j++) {
+        for (std::size_t i = 0; i < num_samples; i++) {
+            for (std::size_t j = 0; j < num_qubits; j++) {
                 if (samples[i * num_qubits + j] != 0) {
                     samples_decimal[i] += twos[(num_qubits - 1 - j)];
                 }
@@ -1236,7 +1236,7 @@ template <typename TypeList> void testSamples() {
 
         // compute estimated probabilities from histogram
         std::vector<PrecisionT> probabilities(counts.size());
-        for (size_t i = 0; i < counts.size(); i++) {
+        for (std::size_t i = 0; i < counts.size(); i++) {
             probabilities[i] = counts[i] / (PrecisionT)num_samples;
         }
 
@@ -1312,12 +1312,12 @@ template <typename TypeList> void testSamplesCountsObs() {
             {0.58498357, 0.77015115, 0.91266780},
             {0.7620549436, 0.8420840225, 0.8449848566},
             {1.0, 1.0, 1.0}};
-        for (size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
+        for (std::size_t ind_obs = 0; ind_obs < obs_name.size(); ind_obs++) {
             DYNAMIC_SECTION(obs_name[ind_obs]
                             << " Sample Obs - Varying wires"
                             << StateVectorMPIToName<StateVectorT>::name) {
                 std::size_t num_shots = 10000;
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObsMPI<StateVectorT> obs(obs_name[ind_obs],
                                                   wires_list[ind_wires]);
@@ -1338,7 +1338,7 @@ template <typename TypeList> void testSamplesCountsObs() {
                             << " Counts Obs - Varying wires"
                             << StateVectorMPIToName<StateVectorT>::name) {
                 std::size_t num_shots = 10000;
-                for (size_t ind_wires = 0; ind_wires < wires_list.size();
+                for (std::size_t ind_wires = 0; ind_wires < wires_list.size();
                      ind_wires++) {
                     NamedObsMPI<StateVectorT> obs(obs_name[ind_obs],
                                                   wires_list[ind_wires]);
@@ -1371,8 +1371,8 @@ template <typename TypeList> void testSamplesCountsObs() {
             std::vector<std::size_t> samples_decimal(num_samples, 0);
 
             // convert samples to decimal and then bin them in counts
-            for (size_t i = 0; i < num_samples; i++) {
-                for (size_t j = 0; j < num_qubits; j++) {
+            for (std::size_t i = 0; i < num_samples; i++) {
+                for (std::size_t j = 0; j < num_qubits; j++) {
                     if (samples[i * num_qubits + j] != 0) {
                         samples_decimal[i] += twos[(num_qubits - 1 - j)];
                     }
@@ -1382,7 +1382,7 @@ template <typename TypeList> void testSamplesCountsObs() {
 
             // compute estimated probabilities from histogram
             std::vector<PrecisionT> probabilities(counts.size());
-            for (size_t i = 0; i < counts.size(); i++) {
+            for (std::size_t i = 0; i < counts.size(); i++) {
                 probabilities[i] = counts[i] / (PrecisionT)num_samples;
             }
 
@@ -1415,7 +1415,7 @@ template <typename TypeList> void testSamplesCountsObs() {
 
             // compute estimated probabilities from histogram
             std::vector<PrecisionT> probabilities(counts.size());
-            for (size_t i = 0; i < counts.size(); i++) {
+            for (std::size_t i = 0; i < counts.size(); i++) {
                 probabilities[i] = counts[i] / (PrecisionT)num_samples;
             }
 
diff --git a/pennylane_lightning/core/src/observables/Observables.hpp b/pennylane_lightning/core/src/observables/Observables.hpp
index d10cb4b62a..d5ba340c66 100644
--- a/pennylane_lightning/core/src/observables/Observables.hpp
+++ b/pennylane_lightning/core/src/observables/Observables.hpp
@@ -332,7 +332,7 @@ class TensorProdObsBase : public Observable<StateVectorT> {
             return false;
         }
 
-        for (size_t i = 0; i < obs_.size(); i++) {
+        for (std::size_t i = 0; i < obs_.size(); i++) {
             if (*obs_[i] != *other_cast.obs_[i]) {
                 return false;
             }
@@ -460,7 +460,7 @@ class TensorProdObsBase : public Observable<StateVectorT> {
         using Util::operator<<;
         std::ostringstream obs_stream;
         const auto obs_size = obs_.size();
-        for (size_t idx = 0; idx < obs_size; idx++) {
+        for (std::size_t idx = 0; idx < obs_size; idx++) {
             obs_stream << obs_[idx]->getObsName();
             if (idx != obs_size - 1) {
                 obs_stream << " @ ";
@@ -495,7 +495,7 @@ class HamiltonianBase : public Observable<StateVectorT> {
             return false;
         }
 
-        for (size_t i = 0; i < obs_.size(); i++) {
+        for (std::size_t i = 0; i < obs_.size(); i++) {
             if (*obs_[i] != *other_cast.obs_[i]) {
                 return false;
             }
@@ -566,7 +566,7 @@ class HamiltonianBase : public Observable<StateVectorT> {
         std::ostringstream ss;
         ss << "Hamiltonian: { 'coeffs' : " << coeffs_ << ", 'observables' : [";
         const auto term_size = coeffs_.size();
-        for (size_t t = 0; t < term_size; t++) {
+        for (std::size_t t = 0; t < term_size; t++) {
             ss << obs_[t]->getObsName();
             if (t != term_size - 1) {
                 ss << ", ";
diff --git a/pennylane_lightning/core/src/simulators/base/StateVectorBase.hpp b/pennylane_lightning/core/src/simulators/base/StateVectorBase.hpp
index df5dedf4af..2ac156a88c 100644
--- a/pennylane_lightning/core/src/simulators/base/StateVectorBase.hpp
+++ b/pennylane_lightning/core/src/simulators/base/StateVectorBase.hpp
@@ -55,7 +55,8 @@ template <class PrecisionT, class Derived> class StateVectorBase {
      *
      * @param num_qubits Number of qubits
      */
-    explicit StateVectorBase(size_t num_qubits) : num_qubits_{num_qubits} {}
+    explicit StateVectorBase(std::size_t num_qubits)
+        : num_qubits_{num_qubits} {}
 
     /**
      * @brief Get the number of qubits represented by the statevector data.
@@ -141,7 +142,7 @@ template <class PrecisionT, class Derived> class StateVectorBase {
             numOperations != ops_params.size(),
             "Invalid arguments: number of operations, wires, inverses, and "
             "parameters must all be equal");
-        for (size_t i = 0; i < numOperations; i++) {
+        for (std::size_t i = 0; i < numOperations; i++) {
             this->applyOperation(ops[i], ops_wires[i], ops_adjoint[i],
                                  ops_params[i]);
         }
@@ -168,7 +169,7 @@ template <class PrecisionT, class Derived> class StateVectorBase {
             numOperations == ops_adjoint.size(),
             "Invalid arguments: number of operations, wires and inverses"
             "must all be equal");
-        for (size_t i = 0; i < numOperations; i++) {
+        for (std::size_t i = 0; i < numOperations; i++) {
             this->applyOperation(ops[i], ops_wires[i], ops_adjoint[i], {});
         }
     }
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/MPIWorker.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/MPIWorker.hpp
index 6e524b61fe..0124233cc9 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/MPIWorker.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/MPIWorker.hpp
@@ -124,7 +124,7 @@ make_shared_mpi_worker(custatevecHandle_t handle, MPIManager &mpi_manager,
     if (nP2PDeviceBits != 0) {
         std::size_t local_device_id = mpi_manager.getRank() % nDevices;
 
-        for (size_t devId = 0; devId < nDevices; ++devId) {
+        for (std::size_t devId = 0; devId < nDevices; ++devId) {
             if (devId != local_device_id) {
                 int accessEnabled;
                 PL_CUDA_IS_SUCCESS(cudaDeviceCanAccessPeer(
@@ -267,7 +267,7 @@ make_shared_mpi_worker(custatevecHandle_t handle, MPIManager &mpi_manager,
         std::size_t p2pSubSVIndexBegin =
             (mpi_manager.getRank() / nSubSVsP2P) * nSubSVsP2P;
         std::size_t p2pSubSVIndexEnd = p2pSubSVIndexBegin + nSubSVsP2P;
-        for (size_t p2pSubSVIndex = p2pSubSVIndexBegin;
+        for (std::size_t p2pSubSVIndex = p2pSubSVIndexBegin;
              p2pSubSVIndex < p2pSubSVIndexEnd; p2pSubSVIndex++) {
             if (static_cast<std::size_t>(mpi_manager.getRank()) ==
                 p2pSubSVIndex)
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaBase.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaBase.hpp
index d4bec462a6..90b03961e7 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaBase.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaBase.hpp
@@ -213,14 +213,15 @@ class StateVectorCudaBase : public StateVectorBase<Precision, Derived> {
                                        const std::vector<Precision> &)>;
     using FMap = std::unordered_map<std::string, ParFunc>;
 
-    StateVectorCudaBase(size_t num_qubits, int device_id = 0,
+    StateVectorCudaBase(std::size_t num_qubits, int device_id = 0,
                         cudaStream_t stream_id = 0, bool device_alloc = true)
         : StateVectorBase<Precision, Derived>(num_qubits),
           data_buffer_{std::make_unique<LightningGPU::DataBuffer<CFP_t>>(
               Pennylane::Util::exp2(num_qubits), device_id, stream_id,
               device_alloc)} {}
 
-    StateVectorCudaBase(size_t num_qubits, LightningGPU::DevTag<int> dev_tag,
+    StateVectorCudaBase(std::size_t num_qubits,
+                        LightningGPU::DevTag<int> dev_tag,
                         bool device_alloc = true)
         : StateVectorBase<Precision, Derived>(num_qubits),
           data_buffer_{std::make_unique<LightningGPU::DataBuffer<CFP_t>>(
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp
index 21192a4064..3753f792fd 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp
@@ -299,7 +299,8 @@ class StateVectorCudaMPI final
         std::vector<index_type> indices_local;
         std::vector<std::complex<Precision>> values_local;
 
-        for (size_t i = 0; i < static_cast<std::size_t>(num_indices); i++) {
+        for (std::size_t i = 0; i < static_cast<std::size_t>(num_indices);
+             i++) {
             int index = indices[i];
             PL_ASSERT(index >= 0);
             std::size_t rankId =
@@ -1173,7 +1174,7 @@ class StateVectorCudaMPI final
         auto threshold = WiresSwapStatus::Swappable;
         bool isAllTargetsLocal = std::all_of(
             tgtsSwapStatus.begin(), tgtsSwapStatus.end(),
-            [&threshold](size_t status) { return status < threshold; });
+            [&threshold](std::size_t status) { return status < threshold; });
 
         mpi_manager_.Barrier();
 
@@ -1181,7 +1182,7 @@ class StateVectorCudaMPI final
             expvalOnPauliBasis(pauli_words, localTgts, expect_local);
         } else {
             std::size_t wirePairsIdx = 0;
-            for (size_t i = 0; i < pauli_words.size(); i++) {
+            for (std::size_t i = 0; i < pauli_words.size(); i++) {
                 if (tgtsSwapStatus[i] == WiresSwapStatus::UnSwappable) {
                     auto opsNames = pauliStringToOpNames(pauli_words[i]);
                     StateVectorCudaMPI<Precision> tmp(
@@ -1189,7 +1190,8 @@ class StateVectorCudaMPI final
                         this->getNumGlobalQubits(), this->getNumLocalQubits(),
                         this->getData());
 
-                    for (size_t opsIdx = 0; opsIdx < tgts[i].size(); opsIdx++) {
+                    for (std::size_t opsIdx = 0; opsIdx < tgts[i].size();
+                         opsIdx++) {
                         std::vector<std::size_t> wires = {tgts[i][opsIdx]};
                         tmp.applyOperation({opsNames[opsIdx]},
                                            {tgts[i][opsIdx]}, {false});
@@ -1665,11 +1667,11 @@ class StateVectorCudaMPI final
                                      WireStatus::Default);
 
         // Update wire status based on the gate information
-        for (size_t i = 0; i < ctrlsInt.size(); i++) {
+        for (std::size_t i = 0; i < ctrlsInt.size(); i++) {
             statusWires[ctrlsInt[i]] = WireStatus::Control;
         }
         // Update wire status based on the gate information
-        for (size_t i = 0; i < tgtsInt.size(); i++) {
+        for (std::size_t i = 0; i < tgtsInt.size(); i++) {
             statusWires[tgtsInt[i]] = WireStatus::Target;
         }
 
@@ -1825,11 +1827,11 @@ class StateVectorCudaMPI final
                                      WireStatus::Default);
 
         // Update wire status based on the gate information
-        for (size_t i = 0; i < ctrlsInt.size(); i++) {
+        for (std::size_t i = 0; i < ctrlsInt.size(); i++) {
             statusWires[ctrlsInt[i]] = WireStatus::Control;
         }
         // Update wire status based on the gate information
-        for (size_t i = 0; i < tgtsInt.size(); i++) {
+        for (std::size_t i = 0; i < tgtsInt.size(); i++) {
             statusWires[tgtsInt[i]] = WireStatus::Target;
         }
 
@@ -1966,7 +1968,7 @@ class StateVectorCudaMPI final
                                      WireStatus::Default);
 
         // Update wire status based on the gate information
-        for (size_t i = 0; i < tgtsInt.size(); i++) {
+        for (std::size_t i = 0; i < tgtsInt.size(); i++) {
             statusWires[tgtsInt[i]] = WireStatus::Target;
         }
 
@@ -2055,7 +2057,7 @@ class StateVectorCudaMPI final
         // the main loop of index bit swaps
         //
         constexpr std::size_t nLoops = 2;
-        for (size_t loop = 0; loop < nLoops; loop++) {
+        for (std::size_t loop = 0; loop < nLoops; loop++) {
             for (int swapBatchIndex = 0;
                  swapBatchIndex < static_cast<int>(nSwapBatches);
                  swapBatchIndex++) {
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp
index e7bcadcb18..716d95c89f 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp
@@ -107,7 +107,7 @@ class StateVectorCudaManaged
     using MemoryStorageT = Pennylane::Util::MemoryStorageLocation::Undefined;
 
     StateVectorCudaManaged() = delete;
-    StateVectorCudaManaged(size_t num_qubits)
+    StateVectorCudaManaged(std::size_t num_qubits)
         : StateVectorCudaBase<Precision, StateVectorCudaManaged<Precision>>(
               num_qubits),
           handle_(make_shared_cusv_handle()),
@@ -223,7 +223,7 @@ class StateVectorCudaManaged
      * @param adjoint Indicates whether to use adjoint of gate.
      * @param param Complex phase generator.
      */
-    template <size_t thread_per_block = 256>
+    template <std::size_t thread_per_block = 256>
     void globalPhaseStateVector(const bool adjoint, const Precision param) {
         auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID();
         std::complex<Precision> phase =
@@ -238,7 +238,7 @@ class StateVectorCudaManaged
      *
      * @param phase Controlled complex phase vector.
      */
-    template <size_t thread_per_block = 256>
+    template <std::size_t thread_per_block = 256>
     void cGlobalPhaseStateVector(const bool adjoint,
                                  const std::vector<CFP_t> &phase,
                                  const bool async = false) {
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/AdjointJacobianGPU.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/AdjointJacobianGPU.hpp
index 48258735c7..79f5ee40d4 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/AdjointJacobianGPU.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/AdjointJacobianGPU.hpp
@@ -82,7 +82,7 @@ class AdjointJacobian final
                    DataBuffer<CFP_t, int> &device_buffer_jac_single_param,
                    std::vector<CFP_t> &host_buffer_jac_single_param) {
         host_buffer_jac_single_param.clear();
-        for (size_t obs_idx = 0; obs_idx < num_observables; obs_idx++) {
+        for (std::size_t obs_idx = 0; obs_idx < num_observables; obs_idx++) {
             const StateVectorT &sv1 = sv1s[obs_idx];
             PL_ABORT_IF_NOT(sv1.getDataBuffer().getDevTag().getDeviceID() ==
                                 sv2.getDataBuffer().getDevTag().getDeviceID(),
@@ -98,7 +98,7 @@ class AdjointJacobian final
         device_buffer_jac_single_param.CopyGpuDataToHost(
             host_buffer_jac_single_param.data(),
             host_buffer_jac_single_param.size(), false);
-        for (size_t obs_idx = 0; obs_idx < num_observables; obs_idx++) {
+        for (std::size_t obs_idx = 0; obs_idx < num_observables; obs_idx++) {
             std::size_t idx = param_index + obs_idx * tp_size;
             jac[idx] =
                 -2 * scaling_coeff * host_buffer_jac_single_param[obs_idx].y;
@@ -284,7 +284,7 @@ class AdjointJacobian final
 
         // Create observable-applied state-vectors
         std::vector<StateVectorT> H_lambda;
-        for (size_t n = 0; n < num_observables; n++) {
+        for (std::size_t n = 0; n < num_observables; n++) {
             H_lambda.emplace_back(lambda.getNumQubits(), dt_local, true,
                                   cusvhandle, cublascaller, cusparsehandle);
         }
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/AdjointJacobianGPUMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/AdjointJacobianGPUMPI.hpp
index b39c47456d..77b3a8656b 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/AdjointJacobianGPUMPI.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/AdjointJacobianGPUMPI.hpp
@@ -151,7 +151,7 @@ class AdjointJacobianMPI final
                             lambda_ref.getNumLocalQubits(),
                             lambda_ref.getData());
 
-        for (size_t obs_idx = 0; obs_idx < num_observables; obs_idx++) {
+        for (std::size_t obs_idx = 0; obs_idx < num_observables; obs_idx++) {
             lambda.updateData(lambda_ref);
 
             // Create observable-applied state-vectors
@@ -281,7 +281,7 @@ class AdjointJacobianMPI final
         using SVTypePtr = std::unique_ptr<StateVectorT>;
         std::unique_ptr<SVTypePtr[]> H_lambda(new SVTypePtr[num_observables]);
 
-        for (size_t h_i = 0; h_i < num_observables; h_i++) {
+        for (std::size_t h_i = 0; h_i < num_observables; h_i++) {
             H_lambda[h_i] = std::make_unique<StateVectorT>(
                 dt_local, lambda.getNumGlobalQubits(),
                 lambda.getNumLocalQubits(), lambda.getData());
@@ -317,7 +317,7 @@ class AdjointJacobianMPI final
                             !ops.getOpsInverses()[op_idx]) *
                         (ops.getOpsInverses()[op_idx] ? -1 : 1);
 
-                    for (size_t obs_idx = 0; obs_idx < num_observables;
+                    for (std::size_t obs_idx = 0; obs_idx < num_observables;
                          obs_idx++) {
                         updateJacobian(*H_lambda[obs_idx], mu, jac,
                                        scalingFactor, obs_idx,
@@ -330,7 +330,8 @@ class AdjointJacobianMPI final
                 current_param_idx--;
             }
 
-            for (size_t obs_idx = 0; obs_idx < num_observables; obs_idx++) {
+            for (std::size_t obs_idx = 0; obs_idx < num_observables;
+                 obs_idx++) {
                 BaseType::applyOperationAdj(*H_lambda[obs_idx], ops, op_idx);
             }
         }
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/JacobianDataMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/JacobianDataMPI.hpp
index 6e3d5c492b..1afa54cf3c 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/JacobianDataMPI.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/algorithms/JacobianDataMPI.hpp
@@ -60,7 +60,7 @@ class JacobianDataMPI final : public JacobianData<StateVectorT> {
      * (e.g. StatePrep) or Hamiltonian coefficients.
      * @endrst
      */
-    JacobianDataMPI(size_t num_params, const StateVectorT &sv,
+    JacobianDataMPI(std::size_t num_params, const StateVectorT &sv,
                     std::vector<std::shared_ptr<Observable<StateVectorT>>> obs,
                     OpsData<StateVectorT> ops, std::vector<std::size_t> trainP)
         : JacobianData<StateVectorT>(num_params, sv.getLength(), sv.getData(),
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Generators.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Generators.cpp
index edbb825d05..1129e5a662 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Generators.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Generators.cpp
@@ -1692,7 +1692,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyGeneratorGlobalPhase",
         auto result_gate_svp = gate_svp.getDataVector();
         auto result_gate_svm = gate_svm.getDataVector();
 
-        for (size_t j = 0; j < exp2(num_qubits); j++) {
+        for (std::size_t j = 0; j < exp2(num_qubits); j++) {
             CHECK(-scale * imag(result_gntr_sv[j]) ==
                   Approx(0.5 *
                          (real(result_gate_svp[j]) - real(result_gate_svm[j])) /
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_NonParam.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_NonParam.cpp
index 55f2710653..b552ef5f01 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_NonParam.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_NonParam.cpp
@@ -1077,7 +1077,8 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::SetStateVector",
             createRandomStateVectorData<PrecisionT>(re, num_qubits);
         auto expected_state = init_state;
 
-        for (size_t i = 0; i < Pennylane::Util::exp2(num_qubits - 1); i++) {
+        for (std::size_t i = 0; i < Pennylane::Util::exp2(num_qubits - 1);
+             i++) {
             std::swap(expected_state[i * 2], expected_state[i * 2 + 1]);
         }
 
@@ -1116,7 +1117,8 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::SetStateVectorwith_thread_setting",
             createRandomStateVectorData<PrecisionT>(re, num_qubits);
         auto expected_state = init_state;
 
-        for (size_t i = 0; i < Pennylane::Util::exp2(num_qubits - 1); i++) {
+        for (std::size_t i = 0; i < Pennylane::Util::exp2(num_qubits - 1);
+             i++) {
             std::swap(expected_state[i * 2], expected_state[i * 2 + 1]);
         }
 
@@ -1158,7 +1160,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::SetIthStates",
 
         expected_state[0] = expected_state[1];
 
-        for (size_t i = 1; i < Pennylane::Util::exp2(num_qubits); i++) {
+        for (std::size_t i = 1; i < Pennylane::Util::exp2(num_qubits); i++) {
             expected_state[i] = {0, 0};
         }
 
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Param.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Param.cpp
index 3938dd6168..c93eba882e 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Param.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Param.cpp
@@ -71,7 +71,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRX", "[LightningGPU_Param]", double) {
                               {0, -0.867423225594017}}};
 
         SECTION("Apply directly") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyRX({0}, false, angles[index]);
@@ -80,7 +80,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRX", "[LightningGPU_Param]", double) {
             }
         }
         SECTION("Apply using dispatcher") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_dispatch{init_state.data(),
                                                              init_state.size()};
                 sv_dispatch.applyOperation("RX", {0}, false, {angles[index]});
@@ -99,7 +99,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRX", "[LightningGPU_Param]", double) {
                               {0, 0.867423225594017}}};
 
         SECTION("Apply directly") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyRX({0}, true, {angles[index]});
@@ -108,7 +108,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRX", "[LightningGPU_Param]", double) {
             }
         }
         SECTION("Apply using dispatcher") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_dispatch{init_state.data(),
                                                              init_state.size()};
                 sv_dispatch.applyOperation("RX", {0}, true, {angles[index]});
@@ -136,7 +136,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRY", "[LightningGPU_Param]", float,
             std::vector<cp_t>{{0.10575112905629831, 0.47593196040758534},
                               {0.8711876098966215, -0.0577721051072477}}};
         SECTION("Apply directly") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyRY({0}, false, angles[index]);
@@ -145,7 +145,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRY", "[LightningGPU_Param]", float,
             }
         }
         SECTION("Apply using dispatcher") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_dispatch{init_state.data(),
                                                              init_state.size()};
                 sv_dispatch.applyOperation("RY", {0}, false, {angles[index]});
@@ -163,7 +163,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRY", "[LightningGPU_Param]", float,
             std::vector<cp_t>{{0.10575112905629831, -0.47593196040758534},
                               {-0.8711876098966215, -0.0577721051072477}}};
         SECTION("Apply directly") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyRY({0}, true, {angles[index]});
@@ -172,7 +172,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRY", "[LightningGPU_Param]", float,
             }
         }
         SECTION("Apply using dispatcher") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_dispatch{init_state.data(),
                                                              init_state.size()};
                 sv_dispatch.applyOperation("RY", {0}, true, {angles[index]});
@@ -224,7 +224,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRZ", "[LightningGPU_Param]", float,
 
     const auto init_state = sv.getDataVector();
     SECTION("Apply directly") {
-        for (size_t index = 0; index < num_qubits; index++) {
+        for (std::size_t index = 0; index < num_qubits; index++) {
             StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                        init_state.size()};
 
@@ -234,7 +234,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRZ", "[LightningGPU_Param]", float,
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < num_qubits; index++) {
+        for (std::size_t index = 0; index < num_qubits; index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{init_state.data(),
                                                          init_state.size()};
             sv_dispatch.applyOperation("RZ", {index}, false, {angles[index]});
@@ -287,7 +287,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyPhaseShift", "[LightningGPU_Param]",
 
     const auto init_state = sv.getDataVector();
     SECTION("Apply directly") {
-        for (size_t index = 0; index < num_qubits; index++) {
+        for (std::size_t index = 0; index < num_qubits; index++) {
             StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                        init_state.size()};
 
@@ -297,7 +297,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyPhaseShift", "[LightningGPU_Param]",
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < num_qubits; index++) {
+        for (std::size_t index = 0; index < num_qubits; index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{init_state.data(),
                                                          init_state.size()};
             sv_dispatch.applyOperation("PhaseShift", {index}, false,
@@ -374,7 +374,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRot", "[LightningGPU_Param]", float,
         std::vector<cp_t>(0b1 << num_qubits),
         std::vector<cp_t>(0b1 << num_qubits)};
 
-    for (size_t i = 0; i < angles.size(); i++) {
+    for (std::size_t i = 0; i < angles.size(); i++) {
         const auto rot_mat =
             (adjoint) ? Gates::getRot<std::complex, TestType>(
                             -angles[i][0], -angles[i][1], -angles[i][2])
@@ -385,7 +385,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRot", "[LightningGPU_Param]", float,
     }
 
     SECTION("Apply directly") {
-        for (size_t index = 0; index < num_qubits; index++) {
+        for (std::size_t index = 0; index < num_qubits; index++) {
             StateVectorCudaManaged<TestType> sv_direct{num_qubits};
             sv_direct.initSV();
 
@@ -394,7 +394,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRot", "[LightningGPU_Param]", float,
             CHECK(sv_direct.getDataVector() ==
                   Pennylane::Util::approx(expected_results[index]));
         }
-        for (size_t index = 0; index < num_qubits; index++) {
+        for (std::size_t index = 0; index < num_qubits; index++) {
             StateVectorCudaManaged<TestType> sv_direct{num_qubits};
             sv_direct.initSV();
 
@@ -404,7 +404,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyRot", "[LightningGPU_Param]", float,
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < num_qubits; index++) {
+        for (std::size_t index = 0; index < num_qubits; index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{num_qubits};
             sv_dispatch.initSV();
 
@@ -526,7 +526,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingXX", "[LightningGPU_Param]", float,
     const auto init_state = sv.getDataVector();
     SECTION("Apply directly adjoint=false") {
         SECTION("IsingXX 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -536,7 +536,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingXX", "[LightningGPU_Param]", float,
             }
         }
         SECTION("IsingXX 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -549,7 +549,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingXX", "[LightningGPU_Param]", float,
     }
     SECTION("Apply directly adjoint=true") {
         SECTION("IsingXX 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -559,7 +559,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingXX", "[LightningGPU_Param]", float,
             }
         }
         SECTION("IsingXX 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyIsingXX({0, 2}, true, angles[index]);
@@ -570,7 +570,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingXX", "[LightningGPU_Param]", float,
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < angles.size(); index++) {
+        for (std::size_t index = 0; index < angles.size(); index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{init_state.data(),
                                                          init_state.size()};
 
@@ -679,7 +679,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingYY", "[LightningGPU_Param]", float,
     const auto init_state = sv.getDataVector();
     SECTION("Apply directly adjoint=false") {
         SECTION("IsingYY 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyIsingYY({0, 1}, false, angles[index]);
@@ -688,7 +688,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingYY", "[LightningGPU_Param]", float,
             }
         }
         SECTION("IsingYY 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyIsingYY({0, 2}, false, angles[index]);
@@ -700,7 +700,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingYY", "[LightningGPU_Param]", float,
     }
     SECTION("Apply directly adjoint=true") {
         SECTION("IsingYY 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyIsingYY({0, 1}, true, angles[index]);
@@ -709,7 +709,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingYY", "[LightningGPU_Param]", float,
             }
         }
         SECTION("IsingYY 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyIsingYY({0, 2}, true, angles[index]);
@@ -720,7 +720,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingYY", "[LightningGPU_Param]", float,
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < angles.size(); index++) {
+        for (std::size_t index = 0; index < angles.size(); index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{num_qubits};
             sv_dispatch.initSV();
 
@@ -755,7 +755,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingZZ", "[LightningGPU_Param]", float,
     const auto init_state = sv.getDataVector();
     SECTION("Apply directly adjoint=false") {
         SECTION("IsingZZ 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyIsingZZ({0, 1}, false, angles[index]);
@@ -764,7 +764,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingZZ", "[LightningGPU_Param]", float,
             }
         }
         SECTION("IsingZZ 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyIsingZZ({0, 2}, false, angles[index]);
@@ -775,7 +775,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingZZ", "[LightningGPU_Param]", float,
     }
     SECTION("Apply directly adjoint=true") {
         SECTION("IsingZZ 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyIsingZZ({0, 1}, true, angles[index]);
@@ -784,7 +784,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingZZ", "[LightningGPU_Param]", float,
             }
         }
         SECTION("IsingZZ 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyIsingZZ({0, 2}, true, angles[index]);
@@ -794,7 +794,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyIsingZZ", "[LightningGPU_Param]", float,
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < angles.size(); index++) {
+        for (std::size_t index = 0; index < angles.size(); index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{num_qubits};
             sv_dispatch.initSV();
 
@@ -998,7 +998,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitation",
     const auto init_state = sv.getDataVector();
     SECTION("Apply directly") {
         SECTION("SingleExcitation 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1008,7 +1008,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitation",
             }
         }
         SECTION("SingleExcitation 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1019,7 +1019,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitation",
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < angles.size(); index++) {
+        for (std::size_t index = 0; index < angles.size(); index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{num_qubits};
             sv_dispatch.initSV();
             sv_dispatch.applyOperation("SingleExcitation", {0, 1}, false,
@@ -1052,7 +1052,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationMinus",
     const auto init_state = sv.getDataVector();
     SECTION("Apply directly adjoint=false") {
         SECTION("SingleExcitationMinus 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1063,7 +1063,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationMinus",
             }
         }
         SECTION("SingleExcitationMinus 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1076,7 +1076,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationMinus",
     }
     SECTION("Apply directly adjoint=true") {
         SECTION("SingleExcitationMinus 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1087,7 +1087,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationMinus",
             }
         }
         SECTION("SingleExcitationMinus 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1099,7 +1099,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationMinus",
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < angles.size(); index++) {
+        for (std::size_t index = 0; index < angles.size(); index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{num_qubits};
             sv_dispatch.initSV();
 
@@ -1133,7 +1133,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationPlus",
     const auto init_state = sv.getDataVector();
     SECTION("Apply directly adjoint=false") {
         SECTION("SingleExcitationPlus 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1144,7 +1144,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationPlus",
             }
         }
         SECTION("SingleExcitationPlus 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1157,7 +1157,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationPlus",
     }
     SECTION("Apply directly adjoint=true") {
         SECTION("SingleExcitationPlus 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1168,7 +1168,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationPlus",
             }
         }
         SECTION("SingleExcitationPlus 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1180,7 +1180,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applySingleExcitationPlus",
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < angles.size(); index++) {
+        for (std::size_t index = 0; index < angles.size(); index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{num_qubits};
             sv_dispatch.initSV();
 
@@ -1207,7 +1207,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitation",
     const auto init_state = sv.getDataVector();
     SECTION("Apply directly") {
         SECTION("DoubleExcitation 0,1,2,3") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1219,7 +1219,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitation",
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < angles.size(); index++) {
+        for (std::size_t index = 0; index < angles.size(); index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{num_qubits};
             sv_dispatch.initSV();
 
@@ -1253,7 +1253,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitationMinus",
     const auto init_state = sv.getDataVector();
     SECTION("Apply directly adjoint=false") {
         SECTION("DoubleExcitationMinus 0,1,2,3") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyDoubleExcitationMinus({0, 1, 2, 3}, false,
@@ -1265,7 +1265,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitationMinus",
     }
     SECTION("Apply directly adjoint=true") {
         SECTION("DoubleExcitationMinus 0,1,2,3") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyDoubleExcitationMinus({0, 1, 2, 3}, true,
@@ -1276,7 +1276,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitationMinus",
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < angles.size(); index++) {
+        for (std::size_t index = 0; index < angles.size(); index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{num_qubits};
             sv_dispatch.initSV();
 
@@ -1310,7 +1310,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitationPlus",
     const auto init_state = sv.getDataVector();
     SECTION("Apply directly adjoint=false") {
         SECTION("DoubleExcitationPlus 0,1,2,3") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1323,7 +1323,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitationPlus",
     }
     SECTION("Apply directly adjoint=true") {
         SECTION("DoubleExcitationPlus 0,1,2,3") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyDoubleExcitationPlus({0, 1, 2, 3}, true,
@@ -1334,7 +1334,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyDoubleExcitationPlus",
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < angles.size(); index++) {
+        for (std::size_t index = 0; index < angles.size(); index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{num_qubits};
             sv_dispatch.initSV();
             sv_dispatch.applyOperation("DoubleExcitationPlus", {0, 1, 2, 3},
@@ -1367,7 +1367,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyMultiRZ", "[LightningGPU_Param]", float,
     const auto init_state = sv.getDataVector();
     SECTION("Apply directly adjoint=false") {
         SECTION("MultiRZ 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1377,7 +1377,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyMultiRZ", "[LightningGPU_Param]", float,
             }
         }
         SECTION("MultiRZ 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
                 sv_direct.applyMultiRZ({0, 2}, false, angles[index]);
@@ -1389,7 +1389,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyMultiRZ", "[LightningGPU_Param]", float,
     }
     SECTION("Apply directly adjoint=true") {
         SECTION("MultiRZ 0,1") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1399,7 +1399,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyMultiRZ", "[LightningGPU_Param]", float,
             }
         }
         SECTION("MultiRZ 0,2") {
-            for (size_t index = 0; index < angles.size(); index++) {
+            for (std::size_t index = 0; index < angles.size(); index++) {
                 StateVectorCudaManaged<TestType> sv_direct{init_state.data(),
                                                            init_state.size()};
 
@@ -1410,7 +1410,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyMultiRZ", "[LightningGPU_Param]", float,
         }
     }
     SECTION("Apply using dispatcher") {
-        for (size_t index = 0; index < angles.size(); index++) {
+        for (std::size_t index = 0; index < angles.size(); index++) {
             StateVectorCudaManaged<TestType> sv_dispatch{num_qubits};
             sv_dispatch.initSV();
 
@@ -1442,7 +1442,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation 1 wire",
             StateVectorCudaManaged<TestType> sv_expected{num_qubits};
             sv_expected.initSV();
 
-            for (size_t index = 0; index < num_qubits; index++) {
+            for (std::size_t index = 0; index < num_qubits; index++) {
                 sv_expected.applyOperations({{"PauliX"}, {"PauliZ"}},
                                             {{index}, {index}}, {false, false});
 
@@ -1463,7 +1463,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation 1 wire",
             StateVectorCudaManaged<TestType> sv_expected{num_qubits};
             sv_expected.initSV();
 
-            for (size_t index = 0; index < num_qubits; index++) {
+            for (std::size_t index = 0; index < num_qubits; index++) {
                 sv_expected.applyOperations({{"PauliZ"}, {"PauliX"}},
                                             {{index}, {index}}, {false, false});
                 sv.applyOperation("ZX", {index}, false, {0.0}, zx_gate);
@@ -1482,7 +1482,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation 1 wire",
             StateVectorCudaManaged<TestType> sv_expected{num_qubits};
             sv_expected.initSV();
 
-            for (size_t index = 0; index < num_qubits; index++) {
+            for (std::size_t index = 0; index < num_qubits; index++) {
                 sv_expected.applyOperations({{"PauliX"}, {"PauliY"}},
                                             {{index}, {index}}, {false, false});
                 sv.applyOperation("XY", {index}, false, {0.0}, xy_gate);
@@ -1501,7 +1501,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation 1 wire",
             StateVectorCudaManaged<TestType> sv_expected{num_qubits};
             sv_expected.initSV();
 
-            for (size_t index = 0; index < num_qubits; index++) {
+            for (std::size_t index = 0; index < num_qubits; index++) {
                 sv_expected.applyOperations({{"PauliY"}, {"PauliX"}},
                                             {{index}, {index}}, {false, false});
                 sv.applyOperation("YX", {index}, false, {0.0}, yx_gate);
@@ -1521,7 +1521,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation 1 wire",
             StateVectorCudaManaged<TestType> sv_expected{num_qubits};
             sv_expected.initSV();
 
-            for (size_t index = 0; index < num_qubits; index++) {
+            for (std::size_t index = 0; index < num_qubits; index++) {
                 sv_expected.applyOperations({{"PauliY"}, {"PauliZ"}},
                                             {{index}, {index}}, {false, false});
                 sv.applyOperation("YZ", {index}, false, {0.0}, yz_gate);
@@ -1541,7 +1541,7 @@ TEMPLATE_TEST_CASE("LightningGPU::applyOperation 1 wire",
             StateVectorCudaManaged<TestType> sv_expected{num_qubits};
             sv_expected.initSV();
 
-            for (size_t index = 0; index < num_qubits; index++) {
+            for (std::size_t index = 0; index < num_qubits; index++) {
                 sv_expected.applyOperations({{"PauliZ"}, {"PauliY"}},
                                             {{index}, {index}}, {false, false});
                 sv.applyOperation("ZY", {index}, false, {0.0}, zy_gate);
@@ -1596,7 +1596,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyGlobalPhase",
         reinterpret_cast<ComplexT *>(sv_data.data()), sv_data.size());
     sv.applyOperation("GlobalPhase", {index}, inverse, {param});
     auto result_sv = sv.getDataVector();
-    for (size_t j = 0; j < exp2(num_qubits); j++) {
+    for (std::size_t j = 0; j < exp2(num_qubits); j++) {
         ComplexT tmp = phase * ComplexT(sv_data[j]);
         CHECK((real(result_sv[j])) == Approx(real(tmp)));
         CHECK((imag(result_sv[j])) == Approx(imag(tmp)));
@@ -1629,7 +1629,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::applyControlledGlobalPhase",
         reinterpret_cast<ComplexT *>(sv_data.data()), sv_data.size());
     sv.applyOperation("C(GlobalPhase)", {index}, inverse, {}, phase);
     auto result_sv = sv.getDataVector();
-    for (size_t j = 0; j < exp2(num_qubits); j++) {
+    for (std::size_t j = 0; j < exp2(num_qubits); j++) {
         ComplexT tmp = (inverse) ? conj(phase[j]) : phase[j];
         tmp *= ComplexT(sv_data[j]);
         CHECK((real(result_sv[j])) == Approx(real(tmp)));
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp
index 3531de47c4..9b88afa388 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/mpi/Test_StateVectorCudaMPI_NonParam.cpp
@@ -131,7 +131,8 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::SetStateVector",
         init_state =
             std::vector<cp_t>(st.begin(), st.end(), init_state.get_allocator());
         expected_state = init_state;
-        for (size_t i = 0; i < Pennylane::Util::exp2(num_qubits - 1); i++) {
+        for (std::size_t i = 0; i < Pennylane::Util::exp2(num_qubits - 1);
+             i++) {
             std::swap(expected_state[i * 2], expected_state[i * 2 + 1]);
             indices[i * 2] = i * 2 + 1;
             indices[i * 2 + 1] = i * 2;
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp
index a229425175..460a4fa8cb 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp
@@ -157,7 +157,7 @@ class Measurements final
      */
     auto probs() -> std::vector<PrecisionT> {
         std::vector<std::size_t> wires;
-        for (size_t i = 0; i < this->_statevector.getNumQubits(); i++) {
+        for (std::size_t i = 0; i < this->_statevector.getNumQubits(); i++) {
             wires.push_back(i);
         }
         return this->probs(wires);
@@ -185,7 +185,7 @@ class Measurements final
      *
      * @return Floating point std::vector with probabilities.
      */
-    std::vector<PrecisionT> probs(size_t num_shots) {
+    std::vector<PrecisionT> probs(std::size_t num_shots) {
         return BaseType::probs(num_shots);
     }
 
@@ -218,7 +218,7 @@ class Measurements final
      * be accessed using the stride sample_id*num_qubits, where sample_id is a
      * number between 0 and num_samples-1.
      */
-    auto generate_samples(size_t num_samples) -> std::vector<std::size_t> {
+    auto generate_samples(std::size_t num_samples) -> std::vector<std::size_t> {
         std::vector<double> rand_nums(num_samples);
         custatevecSamplerDescriptor_t sampler;
 
@@ -240,11 +240,11 @@ class Measurements final
 
         this->setRandomSeed();
         std::uniform_real_distribution<PrecisionT> dis(0.0, 1.0);
-        for (size_t n = 0; n < num_samples; n++) {
+        for (std::size_t n = 0; n < num_samples; n++) {
             rand_nums[n] = dis(this->rng);
         }
         std::vector<std::size_t> samples(num_samples * num_qubits, 0);
-        std::unordered_map<size_t, std::size_t> cache;
+        std::unordered_map<std::size_t, std::size_t> cache;
         std::vector<custatevecIndex_t> bitStrings(num_samples);
 
         void *extraWorkspace = nullptr;
@@ -281,7 +281,7 @@ class Measurements final
         PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerDestroy(sampler));
 
         // Pick samples
-        for (size_t i = 0; i < num_samples; i++) {
+        for (std::size_t i = 0; i < num_samples; i++) {
             auto idx = bitStrings[i];
             // If cached, retrieve sample from cache
             if (cache.count(idx) != 0) {
@@ -292,7 +292,7 @@ class Measurements final
             }
             // If not cached, compute
             else {
-                for (size_t j = 0; j < num_qubits; j++) {
+                for (std::size_t j = 0; j < num_qubits; j++) {
                     samples[i * num_qubits + (num_qubits - 1 - j)] =
                         (idx >> j) & 1U;
                 }
@@ -382,7 +382,7 @@ class Measurements final
             "The lengths of the list of operations and wires do not match.");
         std::vector<PrecisionT> expected_value_list;
 
-        for (size_t index = 0; index < operations_list.size(); index++) {
+        for (std::size_t index = 0; index < operations_list.size(); index++) {
             expected_value_list.emplace_back(
                 expval(operations_list[index], wires_list[index]));
         }
@@ -636,7 +636,7 @@ class Measurements final
 
         std::vector<PrecisionT> expected_value_list;
 
-        for (size_t index = 0; index < operations_list.size(); index++) {
+        for (std::size_t index = 0; index < operations_list.size(); index++) {
             expected_value_list.emplace_back(
                 var(operations_list[index], wires_list[index]));
         }
@@ -664,9 +664,9 @@ class Measurements final
                    const int64_t csrOffsets_size, const index_type *columns_ptr,
                    const std::complex<PrecisionT> *values_ptr,
                    const int64_t numNNZ) {
-        PL_ABORT_IF(
-            (this->_statevector.getLength() != (size_t(csrOffsets_size) - 1)),
-            "Statevector and Hamiltonian have incompatible sizes.");
+        PL_ABORT_IF((this->_statevector.getLength() !=
+                     (std::size_t(csrOffsets_size) - 1)),
+                    "Statevector and Hamiltonian have incompatible sizes.");
 
         StateVectorT ob_sv(this->_statevector.getData(),
                            this->_statevector.getLength());
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPUMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPUMPI.hpp
index 41961e26e3..126ce2e686 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPUMPI.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPUMPI.hpp
@@ -147,7 +147,7 @@ class MeasurementsMPI final
 
         // create new MPI communicator groups
         std::size_t subCommGroupId = 0;
-        for (size_t i = 0; i < wires_global.size(); i++) {
+        for (std::size_t i = 0; i < wires_global.size(); i++) {
             std::size_t mask =
                 1 << (wires_global[i] - this->_statevector.getNumLocalQubits());
             std::size_t bitValue = mpi_manager_.getRank() & mask;
@@ -205,7 +205,7 @@ class MeasurementsMPI final
      */
     auto probs() -> std::vector<PrecisionT> {
         std::vector<std::size_t> wires;
-        for (size_t i = 0; i < this->_statevector.getNumQubits(); i++) {
+        for (std::size_t i = 0; i < this->_statevector.getNumQubits(); i++) {
             wires.push_back(i);
         }
         return this->probs(wires);
@@ -233,7 +233,7 @@ class MeasurementsMPI final
      *
      * @return Floating point std::vector with probabilities.
      */
-    std::vector<PrecisionT> probs(size_t num_shots) {
+    std::vector<PrecisionT> probs(std::size_t num_shots) {
         return BaseType::probs(num_shots);
     }
 
@@ -262,7 +262,7 @@ class MeasurementsMPI final
      * be accessed using the stride sample_id*num_qubits, where sample_id is a
      * number between 0 and num_samples-1.
      */
-    auto generate_samples(size_t num_samples) -> std::vector<std::size_t> {
+    auto generate_samples(std::size_t num_samples) -> std::vector<std::size_t> {
         double epsilon = 1e-15;
         std::size_t nSubSvs = 1UL << (this->_statevector.getNumGlobalQubits());
         std::vector<double> rand_nums(num_samples);
@@ -274,7 +274,7 @@ class MeasurementsMPI final
 
         std::vector<int> bitOrdering(bitStringLen);
 
-        for (size_t i = 0; i < bitOrdering.size(); i++) {
+        for (std::size_t i = 0; i < bitOrdering.size(); i++) {
             bitOrdering[i] = i;
         }
 
@@ -282,7 +282,7 @@ class MeasurementsMPI final
         std::vector<custatevecIndex_t> globalBitStrings(num_samples);
 
         if (mpi_manager_.getRank() == 0) {
-            for (size_t n = 0; n < num_samples; n++) {
+            for (std::size_t n = 0; n < num_samples; n++) {
                 rand_nums[n] = (n + 1.0) / (num_samples + 2.0);
             }
         }
@@ -393,8 +393,8 @@ class MeasurementsMPI final
         mpi_manager_.Allreduce<custatevecIndex_t>(localBitStrings,
                                                   globalBitStrings, "sum");
 
-        for (size_t i = 0; i < num_samples; i++) {
-            for (size_t j = 0; j < bitStringLen; j++) {
+        for (std::size_t i = 0; i < num_samples; i++) {
+            for (std::size_t j = 0; j < bitStringLen; j++) {
                 samples[i * bitStringLen + (bitStringLen - 1 - j)] =
                     (globalBitStrings[i] >> j) & 1U;
             }
@@ -424,7 +424,7 @@ class MeasurementsMPI final
         if (mpi_manager_.getRank() == 0) {
             PL_ABORT_IF_NOT(
                 static_cast<std::size_t>(csrOffsets_size - 1) ==
-                    (size_t{1} << this->_statevector.getTotalNumQubits()),
+                    (std::size_t{1} << this->_statevector.getTotalNumQubits()),
                 "Incorrect size of CSR Offsets.");
             PL_ABORT_IF_NOT(numNNZ > 0, "Empty CSR matrix.");
         }
@@ -499,7 +499,7 @@ class MeasurementsMPI final
             "The lengths of the list of operations and wires do not match.");
         std::vector<PrecisionT> expected_value_list;
 
-        for (size_t index = 0; index < operations_list.size(); index++) {
+        for (std::size_t index = 0; index < operations_list.size(); index++) {
             expected_value_list.emplace_back(
                 expval(operations_list[index], wires_list[index]));
             PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize());
@@ -703,7 +703,7 @@ class MeasurementsMPI final
 
         std::vector<PrecisionT> var_list;
 
-        for (size_t index = 0; index < operations_list.size(); index++) {
+        for (std::size_t index = 0; index < operations_list.size(); index++) {
             var_list.emplace_back(
                 var(operations_list[index], wires_list[index]));
         }
@@ -734,7 +734,7 @@ class MeasurementsMPI final
         if (mpi_manager_.getRank() == 0) {
             PL_ABORT_IF_NOT(
                 static_cast<std::size_t>(csrOffsets_size - 1) ==
-                    (size_t{1} << this->_statevector.getTotalNumQubits()),
+                    (std::size_t{1} << this->_statevector.getTotalNumQubits()),
                 "Incorrect size of CSR Offsets.");
             PL_ABORT_IF_NOT(numNNZ > 0, "Empty CSR matrix.");
         }
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Expval.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Expval.cpp
index 9f63465d55..3de2f6aab6 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Expval.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Expval.cpp
@@ -219,7 +219,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::Hamiltonian_expval",
         std::size_t matrix_dim = static_cast<std::size_t>(1U) << num_qubits;
         std::vector<ComplexT> matrix(matrix_dim * matrix_dim);
 
-        for (size_t i = 0; i < matrix.size(); i++) {
+        for (std::size_t i = 0; i < matrix.size(); i++) {
             if (i % matrix_dim == i / matrix_dim)
                 matrix[i] = ComplexT{1, 0};
             else
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Measure.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Measure.cpp
index f72bff7f1d..f23497f0c7 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Measure.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Measure.cpp
@@ -149,7 +149,7 @@ TEMPLATE_TEST_CASE("Pauli word based API", "[Measurements]", float, double) {
             Measurer.expval(operations_list, wires_list, coeffs.data());
         exp_values_ref = {0.49272486, 0.42073549, 0.28232124};
         PrecisionT expected_values = 0;
-        for (size_t i = 0; i < coeffs.size(); i++) {
+        for (std::size_t i = 0; i < coeffs.size(); i++) {
             expected_values += exp_values_ref[i] * (coeffs[i].real());
         }
         CHECK(exp_values == Approx(expected_values).margin(1e-7));
@@ -159,7 +159,7 @@ TEMPLATE_TEST_CASE("Pauli word based API", "[Measurements]", float, double) {
             Measurer.expval(operations_list, wires_list, coeffs.data());
         exp_values_ref = {-0.64421768, -0.47942553, -0.29552020};
         expected_values = 0;
-        for (size_t i = 0; i < coeffs.size(); i++) {
+        for (std::size_t i = 0; i < coeffs.size(); i++) {
             expected_values += exp_values_ref[i] * (coeffs[i].real());
         }
         CHECK(exp_values == Approx(expected_values).margin(1e-7));
@@ -169,7 +169,7 @@ TEMPLATE_TEST_CASE("Pauli word based API", "[Measurements]", float, double) {
             Measurer.expval(operations_list, wires_list, coeffs.data());
         exp_values_ref = {0.58498357, 0.77015115, 0.91266780};
         expected_values = 0;
-        for (size_t i = 0; i < coeffs.size(); i++) {
+        for (std::size_t i = 0; i < coeffs.size(); i++) {
             expected_values += exp_values_ref[i] * (coeffs[i].real());
         }
         CHECK(exp_values == Approx(expected_values).margin(1e-7));
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPU.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPU.hpp
index fcb6a95b2d..60ffd6ac4c 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPU.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPU.hpp
@@ -195,7 +195,8 @@ class Hamiltonian final : public HamiltonianBase<StateVectorT> {
                                                 sv.getDataBuffer().getDevTag());
         buffer->zeroInit();
 
-        for (size_t term_idx = 0; term_idx < this->coeffs_.size(); term_idx++) {
+        for (std::size_t term_idx = 0; term_idx < this->coeffs_.size();
+             term_idx++) {
             StateVectorT tmp(sv);
             this->obs_[term_idx]->applyInPlace(tmp);
             scaleAndAddC_CUDA(
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPUMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPUMPI.hpp
index 5d6c6b66b7..19d3ea24fa 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPUMPI.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPUMPI.hpp
@@ -197,7 +197,8 @@ class HamiltonianMPI final : public HamiltonianBase<StateVectorT> {
                                       sv.getDataBuffer().getDevTag());
         buffer.zeroInit();
 
-        for (size_t term_idx = 0; term_idx < this->coeffs_.size(); term_idx++) {
+        for (std::size_t term_idx = 0; term_idx < this->coeffs_.size();
+             term_idx++) {
             DevTag<int> dt_local(sv.getDataBuffer().getDevTag());
             dt_local.refresh();
             StateVectorT tmp(dt_local, sv.getNumGlobalQubits(),
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/tests/Test_StateVectorCudaManaged.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/tests/Test_StateVectorCudaManaged.cpp
index 924dab1376..4003395b53 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/tests/Test_StateVectorCudaManaged.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/tests/Test_StateVectorCudaManaged.cpp
@@ -237,7 +237,7 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::StateVectorCudaManaged",
     using ComplexT = typename StateVectorT::ComplexT;
     std::mt19937_64 re{1337};
 
-    SECTION("StateVectorCudaManaged<TestType> {size_t}") {
+    SECTION("StateVectorCudaManaged<TestType> {std::size_t}") {
         REQUIRE(std::is_constructible_v<StateVectorT, std::size_t>);
         const std::size_t num_qubits = 4;
         StateVectorT sv(num_qubits);
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/tests/mpi/Test_StateVectorCudaMPI.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/tests/mpi/Test_StateVectorCudaMPI.cpp
index be6c68d1c5..6dd5a01590 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/tests/mpi/Test_StateVectorCudaMPI.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/tests/mpi/Test_StateVectorCudaMPI.cpp
@@ -63,12 +63,12 @@ TEMPLATE_PRODUCT_TEST_CASE("StateVectorCudaMPI::Constructibility",
     }
     SECTION(
         "StateVectorBackend<TestType> {MPIManager, DevTag<int>, std::size_t, "
-        "size_t, std::size_t}") {
+        "std::size_t, std::size_t}") {
         REQUIRE(std::is_constructible_v<StateVectorT, MPIManager, DevTag<int>,
                                         std::size_t, std::size_t, std::size_t>);
     }
     SECTION("StateVectorBackend<TestType> {MPI_Comm, DevTag<int>, std::size_t, "
-            "size_t, std::size_t}") {
+            "std::size_t, std::size_t}") {
         REQUIRE(std::is_constructible_v<StateVectorT, MPI_Comm, DevTag<int>,
                                         std::size_t, std::size_t, std::size_t>);
     }
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/MPI_helpers.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/MPI_helpers.hpp
index fd9b4edcbf..25a407c6a5 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/MPI_helpers.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/MPI_helpers.hpp
@@ -46,13 +46,13 @@ inline std::vector<int2> createWirePairs(const int numLocalQubits,
             int2 wirepair = make_int2(localbit, globalbit);
             wirePairs.push_back(wirepair);
             if (statusWires[globalbit] == WireStatus::Control) {
-                for (size_t k = 0; k < ctrls.size(); k++) {
+                for (std::size_t k = 0; k < ctrls.size(); k++) {
                     if (ctrls[k] == globalbit) {
                         ctrls[k] = localbit;
                     }
                 }
             } else {
-                for (size_t k = 0; k < tgts.size(); k++) {
+                for (std::size_t k = 0; k < tgts.size(); k++) {
                     if (tgts[k] == globalbit) {
                         tgts[k] = localbit;
                     }
@@ -143,8 +143,9 @@ inline void tgtsVecProcess(const std::size_t numLocalQubits,
                 tgtsSwapStatus.push_back(WiresSwapStatus::Swappable);
 
                 std::vector<int> localVec(vec.size());
-                std::transform(vec.begin(), vec.end(), localVec.begin(),
-                               [&](size_t x) { return static_cast<int>(x); });
+                std::transform(
+                    vec.begin(), vec.end(), localVec.begin(),
+                    [&](std::size_t x) { return static_cast<int>(x); });
                 auto wirePairs = createWirePairs(numLocalQubits, numTotalQubits,
                                                  localVec, statusWires);
                 std::vector<std::size_t> localVecSizeT(localVec.size());
diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosObsManager.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosObsManager.hpp
index 387c4e282a..e7e260b9b7 100644
--- a/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosObsManager.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosObsManager.hpp
@@ -64,7 +64,7 @@ template <typename PrecisionT> class LightningKokkosObsManager final {
     isValidObservables(const std::vector<ObsIdType> &obsKeys) const -> bool {
         return std::all_of(obsKeys.begin(), obsKeys.end(), [this](auto i) {
             return (i >= 0 &&
-                    static_cast<size_t>(i) < this->observables_.size());
+                    static_cast<std::size_t>(i) < this->observables_.size());
         });
     }
 
@@ -83,9 +83,9 @@ template <typename PrecisionT> class LightningKokkosObsManager final {
     /**
      * @brief Get the number of observables.
      *
-     * @return size_t
+     * @return std::size_t
      */
-    [[nodiscard]] auto numObservables() const -> size_t {
+    [[nodiscard]] auto numObservables() const -> std::size_t {
         return this->observables_.size();
     }
 
@@ -97,7 +97,7 @@ template <typename PrecisionT> class LightningKokkosObsManager final {
      * @return ObsIdType
      */
     [[nodiscard]] auto createNamedObs(ObsId obsId,
-                                      const std::vector<size_t> &wires)
+                                      const std::vector<std::size_t> &wires)
         -> ObsIdType {
         auto &&obs_str = std::string(
             Lightning::lookup_obs<Lightning::simulator_observable_support_size>(
@@ -119,7 +119,7 @@ template <typename PrecisionT> class LightningKokkosObsManager final {
      */
     [[nodiscard]] auto
     createHermitianObs(const std::vector<std::complex<PrecisionT>> &matrix,
-                       const std::vector<size_t> &wires) -> ObsIdType {
+                       const std::vector<std::size_t> &wires) -> ObsIdType {
         std::vector<Kokkos::complex<PrecisionT>> matrix_k;
         matrix_k.reserve(matrix.size());
         for (const auto &elem : matrix) {
@@ -151,7 +151,7 @@ template <typename PrecisionT> class LightningKokkosObsManager final {
         obs_vec.reserve(key_size);
 
         for (const auto &key : obsKeys) {
-            RT_FAIL_IF(static_cast<size_t>(key) >= obs_size || key < 0,
+            RT_FAIL_IF(static_cast<std::size_t>(key) >= obs_size || key < 0,
                        "Invalid observable key");
 
             auto &&[obs, type] = this->observables_[key];
@@ -188,7 +188,7 @@ template <typename PrecisionT> class LightningKokkosObsManager final {
         obs_vec.reserve(key_size);
 
         for (auto key : obsKeys) {
-            RT_FAIL_IF(static_cast<size_t>(key) >= obs_size || key < 0,
+            RT_FAIL_IF(static_cast<std::size_t>(key) >= obs_size || key < 0,
                        "Invalid observable key");
 
             auto &&[obs, type] = this->observables_[key];
diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosSimulator.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosSimulator.hpp
index c20b0ded7b..890c3a267f 100644
--- a/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosSimulator.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/LightningKokkosSimulator.hpp
@@ -112,10 +112,11 @@ class LightningKokkosSimulator final : public Catalyst::Runtime::QuantumDevice {
     LightningKokkosSimulator &operator=(LightningKokkosSimulator &&) = delete;
 
     auto AllocateQubit() -> QubitIdType override;
-    auto AllocateQubits(size_t num_qubits) -> std::vector<QubitIdType> override;
+    auto AllocateQubits(std::size_t num_qubits)
+        -> std::vector<QubitIdType> override;
     void ReleaseQubit(QubitIdType q) override;
     void ReleaseAllQubits() override;
-    [[nodiscard]] auto GetNumQubits() const -> size_t override;
+    [[nodiscard]] auto GetNumQubits() const -> std::size_t override;
     void StartTapeRecording() override;
     void StopTapeRecording() override;
     void SetDeviceShots(std::size_t shots) override;
@@ -154,21 +155,21 @@ class LightningKokkosSimulator final : public Catalyst::Runtime::QuantumDevice {
     void Probs(DataView<double, 1> &probs) override;
     void PartialProbs(DataView<double, 1> &probs,
                       const std::vector<QubitIdType> &wires) override;
-    void Sample(DataView<double, 2> &samples, size_t shots) override;
+    void Sample(DataView<double, 2> &samples, std::size_t shots) override;
     void PartialSample(DataView<double, 2> &samples,
                        const std::vector<QubitIdType> &wires,
-                       size_t shots) override;
+                       std::size_t shots) override;
     void Counts(DataView<double, 1> &eigvals, DataView<int64_t, 1> &counts,
-                size_t shots) override;
+                std::size_t shots) override;
     void PartialCounts(DataView<double, 1> &eigvals,
                        DataView<int64_t, 1> &counts,
                        const std::vector<QubitIdType> &wires,
-                       size_t shots) override;
+                       std::size_t shots) override;
     auto Measure(QubitIdType wire,
                  std::optional<int32_t> postselect = std::nullopt)
         -> Result override;
     void Gradient(std::vector<DataView<double, 1>> &gradients,
-                  const std::vector<size_t> &trainParams) override;
+                  const std::vector<std::size_t> &trainParams) override;
 
     auto CacheManagerInfo()
         -> std::tuple<std::size_t, std::size_t, std::size_t,
diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/tests/Test_LightningKokkosMeasures.cpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/tests/Test_LightningKokkosMeasures.cpp
index 2655372252..7208732a3b 100644
--- a/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/tests/Test_LightningKokkosMeasures.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/catalyst/tests/Test_LightningKokkosMeasures.cpp
@@ -1754,7 +1754,7 @@ TEST_CASE("Counts and PartialCounts tests with numWires=0-4 shots=100",
 }
 
 TEST_CASE("Measurement with a seeded device", "[Measures]") {
-    for (size_t _ = 0; _ < 5; _++) {
+    for (std::size_t _ = 0; _ < 5; _++) {
         std::unique_ptr<LKSimulator> sim = std::make_unique<LKSimulator>();
         std::unique_ptr<LKSimulator> sim1 = std::make_unique<LKSimulator>();
 
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/StateVectorLQubit.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/StateVectorLQubit.hpp
index 042f4643be..78521ebe8b 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/StateVectorLQubit.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/StateVectorLQubit.hpp
@@ -92,7 +92,7 @@ class StateVectorLQubit : public StateVectorBase<PrecisionT, Derived> {
      * @param threading Threading option
      * @param memory_model Memory model
      */
-    void setKernels(size_t num_qubits, Threading threading,
+    void setKernels(std::size_t num_qubits, Threading threading,
                     CPUMemoryModel memory_model) {
         using KernelMap::OperationKernelMap;
         kernel_for_gates_ =
@@ -260,7 +260,7 @@ class StateVectorLQubit : public StateVectorBase<PrecisionT, Derived> {
     }
 
   protected:
-    explicit StateVectorLQubit(size_t num_qubits, Threading threading,
+    explicit StateVectorLQubit(std::size_t num_qubits, Threading threading,
                                CPUMemoryModel memory_model)
         : BaseType(num_qubits), threading_{threading},
           memory_model_{memory_model} {
@@ -684,9 +684,9 @@ class StateVectorLQubit : public StateVectorBase<PrecisionT, Derived> {
         // **__**__ for stride 2
         // ****____ for stride 4
         const std::size_t k = branch ? 0 : 1;
-        for (size_t idx = 0; idx < half_section_size; idx++) {
+        for (std::size_t idx = 0; idx < half_section_size; idx++) {
             const std::size_t offset = stride * (k + 2 * idx);
-            for (size_t ids = 0; ids < stride; ids++) {
+            for (std::size_t ids = 0; ids < stride; ids++) {
                 arr[offset + ids] = {0., 0.};
             }
         }
@@ -705,7 +705,7 @@ class StateVectorLQubit : public StateVectorBase<PrecisionT, Derived> {
                     "vector has norm close to zero and can't be normalized");
 
         ComplexT inv_norm = 1. / norm;
-        for (size_t k = 0; k < BaseType::getLength(); k++) {
+        for (std::size_t k = 0; k < BaseType::getLength(); k++) {
             arr[k] *= inv_norm;
         }
     }
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/AdjointJacobianLQubit.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/AdjointJacobianLQubit.hpp
index f09a9cc5f3..87c1b80691 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/AdjointJacobianLQubit.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/AdjointJacobianLQubit.hpp
@@ -108,7 +108,7 @@ class AdjointJacobian final
             {
                 #pragma omp for
             #endif
-                for (size_t h_i = 0; h_i < num_observables; h_i++) {
+                for (std::size_t h_i = 0; h_i < num_observables; h_i++) {
                     try {
                         states[h_i].updateData(reference_state.getData(),
                                                reference_state.getLength());
@@ -164,7 +164,7 @@ class AdjointJacobian final
         {
             #pragma omp for
         #endif
-            for (size_t st_idx = 0; st_idx < num_states; st_idx++) {
+            for (std::size_t st_idx = 0; st_idx < num_states; st_idx++) {
                 try {
                     BaseType::applyOperationAdj(states[st_idx], operations, op_idx);
                 } catch (...) {
@@ -271,7 +271,7 @@ class AdjointJacobian final
                 std::make_unique<std::vector<std::vector<ComplexT>>>(
                     num_observables, std::vector<ComplexT>(lambda.getLength()));
             H_lambda = std::make_unique<std::vector<StateVectorT>>();
-            for (size_t ind = 0; ind < num_observables; ind++) {
+            for (std::size_t ind = 0; ind < num_observables; ind++) {
                 (*H_lambda_storage)[ind][0] = {1.0, 0};
 
                 StateVectorT sv((*H_lambda_storage)[ind].data(),
@@ -334,7 +334,7 @@ class AdjointJacobian final
                 #endif
                     // clang-format on
 
-                    for (size_t obs_idx = 0; obs_idx < num_observables;
+                    for (std::size_t obs_idx = 0; obs_idx < num_observables;
                          obs_idx++) {
                         updateJacobian(*H_lambda, mu, jac, scalingFactor,
                                        obs_idx, mat_row_idx);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/tests/Test_AdjointJacobianLQubit.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/tests/Test_AdjointJacobianLQubit.cpp
index e30a066ddd..f81f36c847 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/tests/Test_AdjointJacobianLQubit.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/tests/Test_AdjointJacobianLQubit.cpp
@@ -65,11 +65,11 @@ TEMPLATE_PRODUCT_TEST_CASE(
 
     std::uniform_real_distribution<PrecisionT> dist(-1.0, 1.0);
 
-    for (size_t k = 0; k < n_terms; k++) {
+    for (std::size_t k = 0; k < n_terms; k++) {
         auto term_pauli = randomIntVector(re, num_qubits, 0, 3);
 
         std::vector<std::shared_ptr<Observable<StateVectorT>>> term_comp;
-        for (size_t i = 0; i < num_qubits; i++) {
+        for (std::size_t i = 0; i < num_qubits; i++) {
             if (term_pauli[i] == 0) {
                 continue;
             }
@@ -83,7 +83,7 @@ TEMPLATE_PRODUCT_TEST_CASE(
         coeffs.emplace_back(dist(re));
         terms.emplace_back(TensorProdObs<StateVectorT>::create(term_comp));
     }
-    std::vector<ComplexT> psi(size_t{1} << num_qubits);
+    std::vector<ComplexT> psi(std::size_t{1} << num_qubits);
     std::normal_distribution<PrecisionT> ndist;
     for (auto &e : psi) {
         e = ndist(re);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/tests/Test_VectorJacobianProduct.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/tests/Test_VectorJacobianProduct.cpp
index 06e52bbb25..02f9cee251 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/tests/Test_VectorJacobianProduct.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/algorithms/tests/Test_VectorJacobianProduct.cpp
@@ -72,7 +72,7 @@ auto createRandomOps(RandomEngine &re, std::size_t length, std::size_t wires)
     std::uniform_real_distribution<PrecisionT> param_dist(0.0, 2 * M_PI);
     std::uniform_int_distribution<int> inverse_dist(0, 1);
 
-    for (size_t i = 0; i < length; i++) {
+    for (std::size_t i = 0; i < length; i++) {
         const auto gate_op = gates_to_use[gate_dist(re)];
         const auto gate_name = lookup(Constant::gate_names, gate_op);
         ops_names.emplace_back(gate_name);
@@ -146,7 +146,7 @@ TEMPLATE_PRODUCT_TEST_CASE("StateVector VJP", "[Algorithms]",
             JacobianData<StateVectorT> jd{1,  4,        ini_st.data(),
                                           {}, ops_data, {0}};
 
-            for (size_t i = 0; i < 4; i++) {
+            for (std::size_t i = 0; i < 4; i++) {
                 std::fill(dy.begin(), dy.end(), ComplexT{0.0, 0.0});
                 dy[i] = {1.0, 0.0};
                 std::vector<ComplexT> vjp(1);
@@ -165,7 +165,7 @@ TEMPLATE_PRODUCT_TEST_CASE("StateVector VJP", "[Algorithms]",
             JacobianData<StateVectorT> jd{1,  4,        final_st.data(),
                                           {}, ops_data, {0}};
 
-            for (size_t i = 0; i < 4; i++) {
+            for (std::size_t i = 0; i < 4; i++) {
                 std::fill(dy.begin(), dy.end(), ComplexT{0.0, 0.0});
                 dy[i] = {1.0, 0.0};
                 std::vector<ComplexT> vjp(1);
@@ -212,7 +212,7 @@ TEMPLATE_PRODUCT_TEST_CASE("StateVector VJP", "[Algorithms]",
 
             auto dy = std::vector<ComplexT>(4);
 
-            for (size_t i = 0; i < 4; i++) {
+            for (std::size_t i = 0; i < 4; i++) {
                 std::fill(dy.begin(), dy.end(), ComplexT{0.0, 0.0});
                 dy[i] = {1.0, 0.0};
                 std::vector<ComplexT> vjp(2);
@@ -234,7 +234,7 @@ TEMPLATE_PRODUCT_TEST_CASE("StateVector VJP", "[Algorithms]",
 
             auto dy = std::vector<ComplexT>(4);
 
-            for (size_t i = 0; i < 4; i++) {
+            for (std::size_t i = 0; i < 4; i++) {
                 std::fill(dy.begin(), dy.end(), ComplexT{0.0, 0.0});
                 dy[i] = {1.0, 0.0};
                 std::vector<ComplexT> vjp(2);
@@ -362,7 +362,7 @@ TEMPLATE_PRODUCT_TEST_CASE("StateVector VJP", "[Algorithms]",
 
         StateVectorT sv(ini_st.data(), ini_st.size());
 
-        for (size_t op_idx = 0; op_idx < ops_data.getOpsName().size();
+        for (std::size_t op_idx = 0; op_idx < ops_data.getOpsName().size();
              op_idx++) {
             sv.applyOperation(ops_data.getOpsName()[op_idx],
                               ops_data.getOpsWires()[op_idx], false,
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/bindings/LQubitBindings.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/bindings/LQubitBindings.hpp
index 77216405fc..2418cf65a0 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/bindings/LQubitBindings.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/bindings/LQubitBindings.hpp
@@ -327,7 +327,7 @@ void registerBackendSpecificMeasurements(PyClass &pyclass) {
 
             const std::size_t ndim = 2;
             const std::vector<std::size_t> shape{num_shots, num_wires};
-            constexpr auto sz = sizeof(size_t);
+            constexpr auto sz = sizeof(std::size_t);
             const std::vector<std::size_t> strides{sz * num_wires, sz};
             // return 2-D NumPy array
             return py::array(py::buffer_info(
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/DynamicDispatcher.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/DynamicDispatcher.hpp
index d9a8986af2..6097d5da21 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/DynamicDispatcher.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/DynamicDispatcher.hpp
@@ -55,7 +55,7 @@ constexpr auto generatorNamesWithoutPrefix() {
     std::array<std::pair<GeneratorOperation, std::string_view>,
                GateConstant::generator_names.size()>
         res{};
-    for (size_t i = 0; i < GateConstant::generator_names.size(); i++) {
+    for (std::size_t i = 0; i < GateConstant::generator_names.size(); i++) {
         // NOLINTBEGIN(cppcoreguidelines-pro-bounds-constant-array-index)
         const auto [gntr_op, gntr_name] = GateConstant::generator_names[i];
         res[i].first = gntr_op;
@@ -634,7 +634,7 @@ template <typename PrecisionT> class DynamicDispatcher {
                         numOperations != params.size(),
                     "Invalid arguments: number of operations, wires, and "
                     "parameters must all be equal");
-        for (size_t i = 0; i < numOperations; i++) {
+        for (std::size_t i = 0; i < numOperations; i++) {
             applyOperation(kernel, data, num_qubits, ops[i], wires[i],
                            inverse[i], params[i]);
         }
@@ -658,7 +658,7 @@ template <typename PrecisionT> class DynamicDispatcher {
         PL_ABORT_IF(numOperations != wires.size(),
                     "Invalid arguments: number of operations, wires, and "
                     "parameters must all be equal");
-        for (size_t i = 0; i < numOperations; i++) {
+        for (std::size_t i = 0; i < numOperations; i++) {
             applyOperation(kernel, data, num_qubits, ops[i], wires[i],
                            inverse[i], {});
         }
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/GateIndices.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/GateIndices.cpp
index 9ed8c719cc..bf9c44b4e8 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/GateIndices.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/GateIndices.cpp
@@ -20,7 +20,7 @@ auto getIndicesAfterExclusion(const std::vector<std::size_t> &indicesToExclude,
                               std::size_t num_qubits)
     -> std::vector<std::size_t> {
     std::set<std::size_t> indices;
-    for (size_t i = 0; i < num_qubits; i++) {
+    for (std::size_t i = 0; i < num_qubits; i++) {
         indices.emplace(i);
     }
     for (const std::size_t &excludedIndex : indicesToExclude) {
@@ -41,7 +41,7 @@ auto generateBitPatterns(const std::vector<std::size_t> &qubitIndices,
         const std::size_t value =
             Pennylane::Util::maxDecimalForQubit(*index_it, num_qubits);
         const std::size_t currentSize = indices.size();
-        for (size_t j = 0; j < currentSize; j++) {
+        for (std::size_t j = 0; j < currentSize; j++) {
             indices.emplace_back(indices[j] + value);
         }
     }
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/KernelMap.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/KernelMap.hpp
index fb59e35ba9..ff2becab8f 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/KernelMap.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/KernelMap.hpp
@@ -167,7 +167,7 @@ class PriorityDispatchSet {
         ordered_vec_.insert(iter_to_insert, elem);
     }
 
-    [[nodiscard]] KernelType getKernel(size_t num_qubits) const {
+    [[nodiscard]] KernelType getKernel(std::size_t num_qubits) const {
         for (const auto &elem : ordered_vec_) {
             if (elem.getIntegerInterval()(num_qubits)) {
                 return elem.getKernelType();
@@ -229,7 +229,7 @@ class OperationKernelMap {
     EnumDispatchKernalMap kernel_map_;
 
     /* TODO: Cache logic can be improved */
-    mutable std::deque<std::tuple<size_t, uint32_t, EnumKernelMap>> cache_;
+    mutable std::deque<std::tuple<std::size_t, uint32_t, EnumKernelMap>> cache_;
     mutable std::mutex cache_mutex_;
 
     /**
@@ -417,7 +417,7 @@ class OperationKernelMap {
      * @param memory_model Memory model of the underlying data
      * @return A kernel map for given keys
      */
-    [[nodiscard]] auto getKernelMap(size_t num_qubits, Threading threading,
+    [[nodiscard]] auto getKernelMap(std::size_t num_qubits, Threading threading,
                                     CPUMemoryModel memory_model) const
         -> EnumKernelMap {
         const uint32_t dispatch_key = toDispatchKey(threading, memory_model);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/GateImplementationsLM.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/GateImplementationsLM.hpp
index a6a440c45b..7bd416d7d9 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/GateImplementationsLM.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/GateImplementationsLM.hpp
@@ -78,30 +78,32 @@ class GateImplementationsLM : public PauliGenerator<GateImplementationsLM> {
         return {rev_wires, rev_wire_shifts};
     }
 
-    static std::pair<size_t, std::size_t> revWireParity(size_t rev_wire) {
+    static std::pair<std::size_t, std::size_t>
+    revWireParity(std::size_t rev_wire) {
         const auto parity = Pennylane::Util::revWireParity(
             std::array<std::size_t, 1>{rev_wire});
         return {parity[1], parity[0]};
     }
-    static std::tuple<size_t, std::size_t, std::size_t>
-    revWireParity(size_t rev_wire0, std::size_t rev_wire1) {
+    static std::tuple<std::size_t, std::size_t, std::size_t>
+    revWireParity(std::size_t rev_wire0, std::size_t rev_wire1) {
         const auto parity = Pennylane::Util::revWireParity(
             std::array<std::size_t, 2>{rev_wire0, rev_wire1});
         return {parity[2], parity[1], parity[0]};
     }
     template <const std::size_t wire_size = 3>
-    static constexpr auto revWireParity(size_t rev_wire0, std::size_t rev_wire1,
+    static constexpr auto revWireParity(std::size_t rev_wire0,
+                                        std::size_t rev_wire1,
                                         std::size_t rev_wire2)
-        -> std::array<size_t, wire_size + 1> {
+        -> std::array<std::size_t, wire_size + 1> {
         return Pennylane::Util::revWireParity(
             std::array<std::size_t, wire_size>{rev_wire0, rev_wire1,
                                                rev_wire2});
     }
     template <const std::size_t wire_size = 4>
-    static constexpr auto revWireParity(size_t rev_wire0, std::size_t rev_wire1,
-                                        std::size_t rev_wire2,
-                                        std::size_t rev_wire3)
-        -> std::array<size_t, wire_size + 1> {
+    static constexpr auto
+    revWireParity(std::size_t rev_wire0, std::size_t rev_wire1,
+                  std::size_t rev_wire2, std::size_t rev_wire3)
+        -> std::array<std::size_t, wire_size + 1> {
         return Pennylane::Util::revWireParity(
             std::array<std::size_t, wire_size>{rev_wire0, rev_wire1, rev_wire2,
                                                rev_wire3});
@@ -676,7 +678,7 @@ class GateImplementationsLM : public PauliGenerator<GateImplementationsLM> {
                 Pennylane::Util::revWireParity(rev_wires);
 
             PL_LOOP_PARALLEL(1)
-            for (size_t k = 0; k < exp2(num_qubits - nw_tot); k++) {
+            for (std::size_t k = 0; k < exp2(num_qubits - nw_tot); k++) {
                 std::size_t i0 = (k & parity[0]);
                 for (std::size_t i = 1; i < parity.size(); i++) {
                     i0 |= ((k << i) & parity[i]);
@@ -1306,7 +1308,7 @@ class GateImplementationsLM : public PauliGenerator<GateImplementationsLM> {
             const std::vector<std::size_t> parity =
                 Pennylane::Util::revWireParity(rev_wires);
             PL_LOOP_PARALLEL(1)
-            for (size_t k = 0; k < exp2(num_qubits - nw_tot); k++) {
+            for (std::size_t k = 0; k < exp2(num_qubits - nw_tot); k++) {
                 std::size_t i00 = (k & parity[0]);
                 for (std::size_t i = 1; i < parity.size(); i++) {
                     i00 |= ((k << i) & parity[i]);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/GateImplementationsPI.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/GateImplementationsPI.hpp
index 51644831fa..3249e9fdcf 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/GateImplementationsPI.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/GateImplementationsPI.hpp
@@ -281,23 +281,23 @@ class GateImplementationsPI : public PauliGenerator<GateImplementationsPI> {
 
             // Apply + scatter
             if (inverse) {
-                for (size_t i = 0; i < indices.size(); i++) {
+                for (std::size_t i = 0; i < indices.size(); i++) {
                     std::size_t index = indices[i];
                     shiftedState[index] = 0;
 
-                    for (size_t j = 0; j < indices.size(); j++) {
+                    for (std::size_t j = 0; j < indices.size(); j++) {
                         const std::size_t baseIndex = j * indices.size();
                         shiftedState[index] +=
                             std::conj(matrix[baseIndex + i]) * v[j];
                     }
                 }
             } else {
-                for (size_t i = 0; i < indices.size(); i++) {
+                for (std::size_t i = 0; i < indices.size(); i++) {
                     std::size_t index = indices[i];
                     shiftedState[index] = 0;
 
                     const std::size_t baseIndex = i * indices.size();
-                    for (size_t j = 0; j < indices.size(); j++) {
+                    for (std::size_t j = 0; j < indices.size(); j++) {
                         shiftedState[index] += matrix[baseIndex + j] * v[j];
                     }
                 }
@@ -876,7 +876,7 @@ class GateImplementationsPI : public PauliGenerator<GateImplementationsPI> {
 
         for (const std::size_t &externalIndex : externalIndices) {
             std::complex<PrecisionT> *shiftedState = arr + externalIndex;
-            for (size_t k = 0; k < indices.size(); k++) {
+            for (std::size_t k = 0; k < indices.size(); k++) {
                 shiftedState[indices[k]] *= shifts[std::popcount(k) % 2];
             }
         }
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVXUtil.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVXUtil.hpp
index 20e8d8734b..b125425409 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVXUtil.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVXUtil.hpp
@@ -62,10 +62,11 @@ template <> struct AVXIntrinsic<double, 8> {
  * @brief one or minus one parity for reverse wire in packed data.
  */
 template <typename PrecisionT, std::size_t packed_size>
-constexpr auto internalParity(size_t rev_wire)
+constexpr auto internalParity(std::size_t rev_wire)
     -> AVXIntrinsicType<PrecisionT, packed_size>;
 #ifdef PL_USE_AVX2
-template <> constexpr auto internalParity<float, 8>(size_t rev_wire) -> __m256 {
+template <>
+constexpr auto internalParity<float, 8>(std::size_t rev_wire) -> __m256 {
     switch (rev_wire) {
     case 0:
         // When Z is applied to the 0th qubit
@@ -89,7 +90,7 @@ constexpr auto internalParity<double, 4>([[maybe_unused]] std::size_t rev_wire)
 #ifdef PL_USE_AVX512F
 // LCOV_EXCL_START
 template <>
-constexpr auto internalParity<float, 16>(size_t rev_wire) -> __m512 {
+constexpr auto internalParity<float, 16>(std::size_t rev_wire) -> __m512 {
     // AVX512 with float
     // clang-format off
     switch(rev_wire) {
@@ -116,7 +117,7 @@ constexpr auto internalParity<float, 16>(size_t rev_wire) -> __m512 {
     };
 };
 template <>
-constexpr auto internalParity<double, 8>(size_t rev_wire) -> __m512d {
+constexpr auto internalParity<double, 8>(std::size_t rev_wire) -> __m512d {
     // AVX512 with double
     switch (rev_wire) {
     case 0:
@@ -207,10 +208,10 @@ constexpr auto set1(PrecisionT val) {
     return Set1<PrecisionT, packed_size>::create(val);
 }
 
-template <size_t packed_size> struct InternalWires {
+template <std::size_t packed_size> struct InternalWires {
     constexpr static auto value = log2PerfectPower(packed_size / 2);
 };
-template <size_t packed_size>
+template <std::size_t packed_size>
 constexpr auto internal_wires_v = InternalWires<packed_size>::value;
 
 #ifdef PL_USE_AVX2
@@ -306,7 +307,7 @@ template <typename PrecisionT, std::size_t packed_size, typename Func>
 auto toParity(Func &&func) -> AVXIntrinsicType<PrecisionT, packed_size> {
     std::array<PrecisionT, packed_size> data{};
     PL_LOOP_SIMD
-    for (size_t idx = 0; idx < packed_size / 2; idx++) {
+    for (std::size_t idx = 0; idx < packed_size / 2; idx++) {
         data[2 * idx + 0] = static_cast<PrecisionT>(1.0) -
                             2 * static_cast<PrecisionT>(func(idx));
         data[2 * idx + 1] = static_cast<PrecisionT>(1.0) -
@@ -325,7 +326,7 @@ template <typename PrecisionT, std::size_t packed_size, typename Func>
 auto setValueOneTwo(Func &&func) -> AVXIntrinsicType<PrecisionT, packed_size> {
     std::array<PrecisionT, packed_size> data{};
     PL_LOOP_SIMD
-    for (size_t idx = 0; idx < packed_size / 2; idx++) {
+    for (std::size_t idx = 0; idx < packed_size / 2; idx++) {
         data[2 * idx + 0] = static_cast<PrecisionT>(func(idx));
         data[2 * idx + 1] = data[2 * idx + 0];
     }
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCNOT.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCNOT.hpp
index 3921fa3553..bc497fce2c 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCNOT.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCNOT.hpp
@@ -42,11 +42,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCNOT {
     constexpr static auto packed_size_ = packed_size;
     constexpr static bool symmetric = false;
 
-    template <size_t control, std::size_t target>
+    template <std::size_t control, std::size_t target>
     static consteval auto applyInternalInternalPermutation() {
         std::array<uint8_t, packed_size> perm{};
 
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) { // if control bit is 1
                 perm[2 * k + 0] = 2 * (k ^ (1U << target)) + 0;
                 perm[2 * k + 1] = 2 * (k ^ (1U << target)) + 1;
@@ -58,23 +58,23 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCNOT {
         return Permutation::compilePermutation<PrecisionT>(perm);
     }
 
-    template <size_t control, std::size_t target>
+    template <std::size_t control, std::size_t target>
     static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       [[maybe_unused]] bool inverse) {
         constexpr static auto perm =
             applyInternalInternalPermutation<control, target>();
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             PrecisionAVXConcept::store(arr + n, Permutation::permute<perm>(v));
         }
     }
 
-    template <size_t control>
+    template <std::size_t control>
     static consteval auto applyInternalExternalMask() {
         std::array<bool, packed_size> mask{};
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) {
                 mask[2 * k + 0] = true;
                 mask[2 * k + 1] = true;
@@ -91,7 +91,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCNOT {
      * on internal wires (inside of packed bytes) but the target acts on
      * external wires.
      */
-    template <size_t control>
+    template <std::size_t control>
     static void
     applyInternalExternal(std::complex<PrecisionT> *arr, std::size_t num_qubits,
                           std::size_t target, [[maybe_unused]] bool inverse) {
@@ -107,7 +107,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCNOT {
 
         constexpr static auto mask = applyInternalExternalMask<control>();
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | max_rev_wire_shift;
@@ -123,17 +124,17 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCNOT {
     /**
      * @brief Permutation that flip the target bit.
      */
-    template <size_t target>
+    template <std::size_t target>
     static consteval auto applyExternalInternalPermutation() {
         std::array<uint8_t, packed_size> perm{};
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             perm[2 * k + 0] = 2 * (k ^ (1U << target)) + 0;
             perm[2 * k + 1] = 2 * (k ^ (1U << target)) + 1;
         }
         return Permutation::compilePermutation<PrecisionT>(perm);
     }
 
-    template <size_t target>
+    template <std::size_t target>
     static void
     applyExternalInternal(std::complex<PrecisionT> *arr, std::size_t num_qubits,
                           std::size_t control, [[maybe_unused]] bool inverse) {
@@ -146,7 +147,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCNOT {
 
         constexpr static auto perm = applyExternalInternalPermutation<target>();
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | control_shift;
@@ -174,7 +176,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCNOT {
         const std::size_t parity_middle =
             fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRX.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRX.hpp
index 195806d1b4..1ba6e12dcf 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRX.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRX.hpp
@@ -61,10 +61,10 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRX {
      * [Re(v[0]), Im(v[0]), Re(v[1]), Im(v[1]), Im(v[3]), Re(v[3]), Im(v[2]),
      * Re(v[2])]
      */
-    template <size_t control, std::size_t target>
+    template <std::size_t control, std::size_t target>
     static consteval auto applyInternalInternalPermutation() {
         std::array<uint8_t, packed_size> perm{};
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) { // if control bit is 1
                 perm[2 * k + 0] = 2 * (k ^ (1U << target)) + 1;
                 perm[2 * k + 1] = 2 * (k ^ (1U << target)) + 0;
@@ -80,12 +80,12 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRX {
      * @brief Factor for (2).
      * [0, 0, 0, 0, sin(phi/2), -sin(phi/2), sin(phi/2), -sin(phi/2)]
      */
-    template <size_t control, std::size_t target, class ParamT>
+    template <std::size_t control, std::size_t target, class ParamT>
     static auto applyInternalInternalOffDiagFactor(ParamT angle) {
         std::array<PrecisionT, packed_size> arr{};
 
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) { // if control bit is 1
                 arr[2 * k + 0] = std::sin(angle / 2);
                 arr[2 * k + 1] = -std::sin(angle / 2);
@@ -101,11 +101,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRX {
      * @brief Factor for (1)
      * [1, 1, 1, 1, cos(phi/2), cos(phi/2), cos(phi/2), cos(phi/2)]
      */
-    template <size_t control, std::size_t target, class ParamT>
+    template <std::size_t control, std::size_t target, class ParamT>
     static auto applyInternalInternalDiagFactor(ParamT angle) {
         std::array<PrecisionT, packed_size> arr{};
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) { // if control bit is 1
                 arr[2 * k + 0] = std::cos(angle / 2);
                 arr[2 * k + 1] = std::cos(angle / 2);
@@ -117,7 +117,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRX {
         return setValue(arr);
     }
 
-    template <size_t control, std::size_t target, class ParamT>
+    template <std::size_t control, std::size_t target, class ParamT>
     static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits, bool inverse,
                                       ParamT angle) {
@@ -133,7 +133,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRX {
         const auto diag_factor =
             applyInternalInternalDiagFactor<control, target>(angle);
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             const auto diag_w = diag_factor * v;
             const auto off_diag_w =
@@ -145,11 +145,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRX {
     /**
      * @brief Factor for (1) when the target bit is 0/1.
      */
-    template <size_t control, typename ParamT>
+    template <std::size_t control, typename ParamT>
     static auto applyInternalExternalDiagFactor(ParamT angle) {
         std::array<Precision, packed_size> arr{};
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) {
                 // if control is 1
                 arr[2 * k + 0] = std::cos(angle / 2);
@@ -165,11 +165,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRX {
     /**
      * @brief Factor for (2) when the target bit is 0/1.
      */
-    template <size_t control, typename ParamT>
+    template <std::size_t control, typename ParamT>
     static auto applyInternalExternalOffDiagFactor(ParamT angle) {
         std::array<Precision, packed_size> arr{};
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) {
                 // if control is 1
                 arr[2 * k + 0] = std::sin(angle / 2);
@@ -187,7 +187,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRX {
      * on internal wires (inside of packed bytes) but the target acts on
      * external wires.
      */
-    template <size_t control, typename ParamT>
+    template <std::size_t control, typename ParamT>
     static void
     applyInternalExternal(std::complex<PrecisionT> *arr, std::size_t num_qubits,
                           std::size_t target, bool inverse, ParamT angle) {
@@ -211,7 +211,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRX {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & target_wire_parity_inv) | (target_wire_parity & k);
             const std::size_t i1 = i0 | target_rev_wire_shift;
@@ -231,19 +232,19 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRX {
     /**
      * @brief Permutation that flips the target bit.
      */
-    template <size_t target>
+    template <std::size_t target>
     static consteval auto applyExternalInternalOffDiagPerm() {
         std::array<uint8_t, packed_size> arr{};
 
         uint8_t s = (uint8_t{1U} << target);
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             arr[2 * k + 0] = 2 * (k ^ s) + 1;
             arr[2 * k + 1] = 2 * (k ^ s) + 0;
         }
         return Permutation::compilePermutation<PrecisionT>(arr);
     }
 
-    template <size_t target, typename ParamT>
+    template <std::size_t target, typename ParamT>
     static void
     applyExternalInternal(std::complex<PrecisionT> *arr, std::size_t num_qubits,
                           std::size_t control, bool inverse, ParamT angle) {
@@ -265,7 +266,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRX {
         const auto offdiag_factor =
             imagFactor<PrecisionT, packed_size>(-std::sin(angle / 2));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | control_shift;
@@ -309,7 +311,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRX {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRY.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRY.hpp
index de9ebeb80a..5baf8aa8c6 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRY.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRY.hpp
@@ -58,10 +58,10 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
     /**
      * @brief Permutation for (2). Flip the target bit if control bit is 1.
      */
-    template <size_t control, std::size_t target>
+    template <std::size_t control, std::size_t target>
     static consteval auto applyInternalInternalPermutation() {
         std::array<uint8_t, packed_size> perm{};
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) { // if control bit is 1
                 perm[2 * k + 0] = 2 * (k ^ (1U << target)) + 0;
                 perm[2 * k + 1] = 2 * (k ^ (1U << target)) + 1;
@@ -77,11 +77,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
      * @brief Factor for (2).
      * [0, 0, 0, 0, -sin(phi/2), -sin(phi/2), sin(phi/2), sin(phi/2)]
      */
-    template <size_t control, std::size_t target, class ParamT>
+    template <std::size_t control, std::size_t target, class ParamT>
     static auto applyInternalInternalOffDiagFactor(ParamT angle) {
         std::array<PrecisionT, packed_size> arr{};
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) { // if control bit is 1
                 if ((k >> target) & 1U) {
                     // if target bit is 1 (was 0) -> sin(phi/2)
@@ -100,12 +100,12 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
         return setValue(arr);
     }
 
-    template <size_t control, std::size_t target, class ParamT>
+    template <std::size_t control, std::size_t target, class ParamT>
     static auto applyInternalInternalDiagFactor(ParamT angle) {
         std::array<PrecisionT, packed_size> arr{};
 
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) { // if control bit is 1
                 arr[2 * k + 0] = std::cos(angle / 2);
                 arr[2 * k + 1] = std::cos(angle / 2);
@@ -117,7 +117,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
         return setValue(arr);
     }
 
-    template <size_t control, std::size_t target, class ParamT>
+    template <std::size_t control, std::size_t target, class ParamT>
     static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits, bool inverse,
                                       ParamT angle) {
@@ -133,7 +133,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
         const auto diag_factor =
             applyInternalInternalDiagFactor<control, target>(angle);
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             const auto diag_w = diag_factor * v;
             const auto off_diag_w =
@@ -146,11 +146,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
      * @brief Factor for (1).
      * [1, 1, 1, 1, cos(phi/2), cos(phi/2), cos(phi/2), cos(phi/2)]
      */
-    template <size_t control, typename ParamT>
+    template <std::size_t control, typename ParamT>
     static auto applyInternalExternalDiagFactor(ParamT angle) {
         std::array<Precision, packed_size> arr{};
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) {
                 // if control is 1
                 arr[2 * k + 0] = std::cos(angle / 2);
@@ -166,11 +166,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
     /**
      * @brief Factor for (2) when the target bit is 1
      */
-    template <size_t control, typename ParamT>
+    template <std::size_t control, typename ParamT>
     static auto applyInternalExternalOffDiagFactor(ParamT angle) {
         std::array<Precision, packed_size> arr{};
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) {
                 // if control is 1
                 arr[2 * k + 0] = std::sin(angle / 2);
@@ -188,7 +188,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
      * on internal wires (inside of packed bytes) but the target acts on
      * external wires.
      */
-    template <size_t control, typename ParamT>
+    template <std::size_t control, typename ParamT>
     static void
     applyInternalExternal(std::complex<PrecisionT> *arr, std::size_t num_qubits,
                           std::size_t target, bool inverse, ParamT angle) {
@@ -211,7 +211,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
             applyInternalExternalOffDiagFactor<control>(angle);
         const auto off_diag_factor_m = -off_diag_factor_p;
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & target_wire_parity_inv) | (target_wire_parity & k);
             const std::size_t i1 = i0 | target_rev_wire_shift;
@@ -229,11 +230,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
     /**
      * @brief Factor for (2) when the control bit is 1
      */
-    template <size_t target, typename ParamT>
+    template <std::size_t target, typename ParamT>
     static auto applyExternalInternalOffDiagFactor(ParamT angle) {
         std::array<Precision, packed_size> arr{};
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> target) & 1U) { // target bit is 1 (was 0)
                 arr[2 * k + 0] = std::sin(angle / 2);
                 arr[2 * k + 1] = std::sin(angle / 2);
@@ -245,7 +246,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
         return setValue(arr);
     }
 
-    template <size_t target, typename ParamT>
+    template <std::size_t target, typename ParamT>
     static void
     applyExternalInternal(std::complex<PrecisionT> *arr, std::size_t num_qubits,
                           std::size_t control, bool inverse, ParamT angle) {
@@ -269,7 +270,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
             applyExternalInternalOffDiagFactor<target>(angle);
 
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | control_shift;
@@ -310,7 +312,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
         const auto sin_factor =
             set1<PrecisionT, packed_size>(std::sin(angle / 2));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRZ.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRZ.hpp
index b82fda7760..a75c7fb97f 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRZ.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRZ.hpp
@@ -39,10 +39,10 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
     /**
      * @brief Permutation for applying `i` when the control bit is 1
      */
-    template <size_t control>
+    template <std::size_t control>
     static consteval auto applyInternalImagPermutation() {
         std::array<uint8_t, packed_size> perm{};
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) { // if control bit is 1
                 perm[2 * k + 0] = 2 * k + 1;
                 perm[2 * k + 1] = 2 * k + 0;
@@ -58,12 +58,12 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
      * @brief Factor for real parts
      * [1, 1, 1, 1, cos(phi/2), cos(phi/2), cos(phi/2), cos(phi/2)]
      */
-    template <size_t control, std::size_t target, class ParamT>
+    template <std::size_t control, std::size_t target, class ParamT>
     static auto applyInternalInternalRealFactor(ParamT angle) {
         std::array<PrecisionT, packed_size> arr{};
 
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) { // if control bit is 1
                 arr[2 * k + 0] = std::cos(angle / 2);
                 arr[2 * k + 1] = std::cos(angle / 2);
@@ -79,12 +79,12 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
      * @brief Factor for imaginary parts
      * [0, 0, 0, 0, sin(phi/2), -sin(phi/2), -sin(phi/2), sin(phi/2)]
      */
-    template <size_t control, std::size_t target, class ParamT>
+    template <std::size_t control, std::size_t target, class ParamT>
     static auto applyInternalInternalImagFactor(ParamT angle) {
         std::array<PrecisionT, packed_size> arr{};
 
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) {    // if control bit is 1
                 if ((k >> target) & 1U) { // if target bit is 1
                     arr[2 * k + 0] = -std::sin(angle / 2);
@@ -101,7 +101,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
         return setValue(arr);
     }
 
-    template <size_t control, std::size_t target, class ParamT>
+    template <std::size_t control, std::size_t target, class ParamT>
     static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits, bool inverse,
                                       ParamT angle) {
@@ -116,7 +116,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
         const auto imag_factor =
             applyInternalInternalImagFactor<control, target>(angle);
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             PrecisionAVXConcept::store(
                 arr + n,
@@ -127,11 +127,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
     /**
      * @brief Factor for real parts when the target bit is 1
      */
-    template <size_t control, typename ParamT>
+    template <std::size_t control, typename ParamT>
     static auto applyInternalExternalRealFactor(ParamT angle) {
         std::array<Precision, packed_size> arr{};
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) {
                 // if control is 1
                 arr[2 * k + 0] = std::cos(angle / 2);
@@ -144,11 +144,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
         return setValue(arr);
     }
 
-    template <size_t control, typename ParamT>
+    template <std::size_t control, typename ParamT>
     static auto applyInternalExternalImagFactor(ParamT angle) {
         std::array<Precision, packed_size> arr{};
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) {
                 // if control is 1
                 arr[2 * k + 0] = std::sin(angle / 2);
@@ -166,7 +166,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
      * on internal wires (inside of packed bytes) but the target acts on
      * external wires.
      */
-    template <size_t control, typename ParamT>
+    template <std::size_t control, typename ParamT>
     static void
     applyInternalExternal(std::complex<PrecisionT> *arr, std::size_t num_qubits,
                           std::size_t target, bool inverse, ParamT angle) {
@@ -187,7 +187,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
         const auto imag_factor =
             applyInternalExternalImagFactor<control>(angle);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & target_wire_parity_inv) | (target_wire_parity & k);
             const std::size_t i1 = i0 | target_rev_wire_shift;
@@ -207,7 +208,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
     /**
      * @brief Factor for real parts when the control bit is 1
      */
-    template <size_t target, typename ParamT>
+    template <std::size_t target, typename ParamT>
     static auto applyExternalInternalRealFactor(ParamT angle) {
         std::array<Precision, packed_size> arr{};
         arr.fill(std::cos(angle / 2));
@@ -217,11 +218,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
     /**
      * @brief Factor for real parts when the control bit is 1
      */
-    template <size_t target, typename ParamT>
+    template <std::size_t target, typename ParamT>
     static auto applyExternalInternalImagFactor(ParamT angle) {
         std::array<Precision, packed_size> arr{};
         PL_LOOP_SIMD
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> target) & 1U) { // target bit is 1
                 arr[2 * k + 0] = -std::sin(angle / 2);
                 arr[2 * k + 1] = std::sin(angle / 2);
@@ -233,7 +234,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
         return setValue(arr);
     }
 
-    template <size_t target, typename ParamT>
+    template <std::size_t target, typename ParamT>
     static void
     applyExternalInternal(std::complex<PrecisionT> *arr, std::size_t num_qubits,
                           std::size_t control, bool inverse, ParamT angle) {
@@ -253,7 +254,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
         const auto real_factor = applyExternalInternalRealFactor<target>(angle);
         const auto imag_factor = applyExternalInternalImagFactor<target>(angle);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | control_shift;
@@ -298,7 +300,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCRZ {
             imagFactor<PrecisionT, packed_size>(-std::sin(angle / 2));
         const auto imag_factor_m = -imag_factor_p;
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCY.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCY.hpp
index aa92eec2be..13ea110895 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCY.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCY.hpp
@@ -40,10 +40,10 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
      * @brief Permutation for multiplying `i` and flip the target bit if control
      * is 1
      */
-    template <size_t control, std::size_t target>
+    template <std::size_t control, std::size_t target>
     static consteval auto applyInternalInternalPermuation() {
         std::array<uint8_t, packed_size> perm{};
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) { // if control bit is 1
                 perm[2 * k + 0] = 2 * (k ^ (1U << target)) + 1;
                 perm[2 * k + 1] = 2 * (k ^ (1U << target)) + 0;
@@ -58,10 +58,10 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
     /**
      * @brief Factor to applying `-i` and `i`
      */
-    template <size_t control, std::size_t target>
+    template <std::size_t control, std::size_t target>
     static consteval auto applyInternalInternalFactor() {
         std::array<PrecisionT, packed_size> signs{};
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) {    // if control bit is 1
                 if ((k >> target) & 1U) { // if target bit is 1 (was 0) == -> i
                     signs[2 * k + 0] = Precision{-1.0};
@@ -78,7 +78,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
         return setValue(signs);
     }
 
-    template <size_t control, std::size_t target>
+    template <std::size_t control, std::size_t target>
     static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       [[maybe_unused]] bool inverse) {
@@ -87,7 +87,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
         constexpr static auto factor =
             applyInternalInternalFactor<control, target>();
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             PrecisionAVXConcept::store(arr + n,
                                        factor * Permutation::permute<perm>(v));
@@ -98,10 +98,10 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
      * @brief Mask for blending. Using this mask, blending v0 and v1 will output
      * v0 if the control bit is 0 v1 otherwise.
      */
-    template <size_t control>
+    template <std::size_t control>
     static consteval auto applyInternalExternalMask() {
         std::array<bool, packed_size> mask{};
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) {
                 mask[2 * k + 0] = true;
                 mask[2 * k + 1] = true;
@@ -113,10 +113,10 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
     /**
      * @brief Permutation when the target bit is 1
      */
-    template <size_t control>
+    template <std::size_t control>
     static consteval auto applyInternalExternalPermutation() {
         std::array<uint8_t, packed_size> permutation{};
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) { // if control bit is 1
                 permutation[2 * k + 0] = 2 * k + 1;
                 permutation[2 * k + 1] = 2 * k + 0;
@@ -131,13 +131,13 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
     /**
      * @brief Sign factor when the target bit is 0
      */
-    template <size_t control>
+    template <std::size_t control>
     static consteval auto applyInternalExternalSign_target0() {
         // Signs when the target is 0
         std::array<Precision, packed_size> signs = {
             1.0,
         };
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) {
                 signs[2 * k + 0] = 1.0;
                 signs[2 * k + 1] = -1.0;
@@ -152,13 +152,13 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
     /**
      * @brief Sign factor when the target bit is 1
      */
-    template <size_t control>
+    template <std::size_t control>
     static consteval auto applyInternalExternalSign_target1() {
         // Signs when the target is 1
         std::array<Precision, packed_size> signs = {
             1.0,
         };
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> control) & 1U) {
                 signs[2 * k + 0] = -1.0;
                 signs[2 * k + 1] = 1.0;
@@ -175,7 +175,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
      * on internal wires (inside of packed bytes) but the target acts on
      * external wires.
      */
-    template <size_t control>
+    template <std::size_t control>
     static void
     applyInternalExternal(std::complex<PrecisionT> *arr, std::size_t num_qubits,
                           std::size_t target, [[maybe_unused]] bool inverse) {
@@ -196,7 +196,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
         constexpr static auto sign1 =
             applyInternalExternalSign_target1<control>();
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & target_wire_parity_inv) | (target_wire_parity & k);
             const std::size_t i1 = i0 | target_rev_wire_shift;
@@ -215,11 +216,12 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
     /**
      * @brief Sign factor when the control bit is 1.
      */
-    template <size_t target> static consteval auto applyExternalInternalSign() {
+    template <std::size_t target>
+    static consteval auto applyExternalInternalSign() {
         std::array<Precision, packed_size> signs = {
             1.0,
         };
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((k >> target) & 1U) { // target is 1 (was 0)
                 signs[2 * k + 0] = -1.0;
                 signs[2 * k + 1] = 1.0;
@@ -231,7 +233,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
         return setValue(signs);
     }
 
-    template <size_t target>
+    template <std::size_t target>
     static void
     applyExternalInternal(std::complex<PrecisionT> *arr, std::size_t num_qubits,
                           std::size_t control, [[maybe_unused]] bool inverse) {
@@ -247,7 +249,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
             swapRealImag(flip(identity<packed_size>(), target)));
         constexpr static auto factor = applyExternalInternalSign<target>();
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | control_shift;
@@ -280,7 +283,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCY {
             swapRealImag(identity<packed_size>()));
         constexpr static auto factor = imagFactor<PrecisionT, packed_size>();
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCZ.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCZ.hpp
index a48dc4d955..bf7081fb89 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCZ.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCZ.hpp
@@ -31,22 +31,23 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCZ {
     constexpr static std::size_t packed_size_ = packed_size;
     constexpr static bool symmetric = true;
 
-    template <size_t rev_wire0, std::size_t rev_wire1>
+    template <std::size_t rev_wire0, std::size_t rev_wire1>
     static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       [[maybe_unused]] bool inverse) {
-        const auto parity = toParity<PrecisionT, packed_size>([](size_t idx) {
-            return ((idx >> rev_wire0) & 1U) & ((idx >> rev_wire1) & 1U);
-        });
+        const auto parity =
+            toParity<PrecisionT, packed_size>([](std::size_t idx) {
+                return ((idx >> rev_wire0) & 1U) & ((idx >> rev_wire1) & 1U);
+            });
 
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             PrecisionAVXConcept::store(arr + n, v * parity);
         }
     }
 
-    template <size_t min_rev_wire>
+    template <std::size_t min_rev_wire>
     static void applyInternalExternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       std::size_t max_rev_wire,
@@ -60,7 +61,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCZ {
         const auto parity =
             internalParity<PrecisionT, packed_size>(min_rev_wire);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | max_rev_wire_shift;
@@ -90,7 +92,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyCZ {
             fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max);
 
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyControlledPhaseShift.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyControlledPhaseShift.hpp
index c2a8823f59..f621776bc7 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyControlledPhaseShift.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyControlledPhaseShift.hpp
@@ -39,11 +39,11 @@ struct ApplyControlledPhaseShift {
     /**
      * @brief Permutation applying imaginary `i` to |11>
      */
-    template <size_t rev_wire0, std::size_t rev_wire1>
+    template <std::size_t rev_wire0, std::size_t rev_wire1>
     static consteval auto applyInternalInternalPermutation() {
         // Swap real and imaginary part of 11
         std::array<uint8_t, packed_size> perm{};
-        for (size_t k = 0; k < (packed_size / 2); k++) {
+        for (std::size_t k = 0; k < (packed_size / 2); k++) {
             if ((((k >> rev_wire0) & 1U) & ((k >> rev_wire1) & 1U)) == 1) {
                 // Only swap real and image for 11
                 perm[2 * k + 0] = 2 * k + 1;
@@ -57,7 +57,7 @@ struct ApplyControlledPhaseShift {
         return Permutation::compilePermutation<PrecisionT>(perm);
     }
 
-    template <size_t rev_wire0, std::size_t rev_wire1, class ParamT>
+    template <std::size_t rev_wire0, std::size_t rev_wire1, class ParamT>
     static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits, bool inverse,
                                       ParamT angle) {
@@ -66,7 +66,7 @@ struct ApplyControlledPhaseShift {
         const auto real_factor = [angle]() {
             std::array<PrecisionT, packed_size> arr{};
             PL_LOOP_SIMD
-            for (size_t k = 0; k < (packed_size / 2); k++) {
+            for (std::size_t k = 0; k < (packed_size / 2); k++) {
                 if ((((k >> rev_wire0) & 1U) & ((k >> rev_wire1) & 1U)) == 1) {
                     // for 11
                     arr[2 * k + 0] = std::cos(angle);
@@ -81,7 +81,7 @@ struct ApplyControlledPhaseShift {
         const auto imag_factor = [isin]() {
             std::array<PrecisionT, packed_size> arr{};
             PL_LOOP_SIMD
-            for (size_t k = 0; k < (packed_size / 2); k++) {
+            for (std::size_t k = 0; k < (packed_size / 2); k++) {
                 if ((((k >> rev_wire0) & 1U) & ((k >> rev_wire1) & 1U)) == 1) {
                     // for 11
                     arr[2 * k + 0] = -isin;
@@ -97,7 +97,7 @@ struct ApplyControlledPhaseShift {
         constexpr static auto perm =
             applyInternalInternalPermutation<rev_wire0, rev_wire1>();
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
 
             const auto prod_cos = real_factor * v;
@@ -110,10 +110,10 @@ struct ApplyControlledPhaseShift {
     /**
      * @brief Permutation applying product `i` when the target bit is 1
      */
-    template <size_t min_rev_wire>
+    template <std::size_t min_rev_wire>
     static consteval auto applyInternalExternalPermutation() {
         std::array<uint8_t, packed_size> perm{};
-        for (size_t k = 0; k < (packed_size / 2); k++) {
+        for (std::size_t k = 0; k < (packed_size / 2); k++) {
             if (((k >> min_rev_wire) & 1U) == 1) {
                 // Only swap real and imag when 1
                 perm[2 * k + 0] = 2 * k + 1;
@@ -127,7 +127,7 @@ struct ApplyControlledPhaseShift {
         return Permutation::compilePermutation<PrecisionT>(perm);
     }
 
-    template <size_t min_rev_wire, class ParamT>
+    template <std::size_t min_rev_wire, class ParamT>
     static void applyInternalExternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       std::size_t max_rev_wire, bool inverse,
@@ -142,7 +142,7 @@ struct ApplyControlledPhaseShift {
         const auto real_factor = [angle]() {
             std::array<Precision, packed_size> arr{};
             PL_LOOP_SIMD
-            for (size_t k = 0; k < (packed_size / 2); k++) {
+            for (std::size_t k = 0; k < (packed_size / 2); k++) {
                 if (((k >> min_rev_wire) & 1U) == 1) {
                     // for 11
                     arr[2 * k + 0] = std::cos(angle);
@@ -159,7 +159,7 @@ struct ApplyControlledPhaseShift {
         const auto imag_factor = [isin]() {
             std::array<Precision, packed_size> arr{};
             PL_LOOP_SIMD
-            for (size_t k = 0; k < (packed_size / 2); k++) {
+            for (std::size_t k = 0; k < (packed_size / 2); k++) {
                 if (((k >> min_rev_wire) & 1U) == 1) {
                     // for 11
                     arr[2 * k + 0] = -isin;
@@ -176,7 +176,8 @@ struct ApplyControlledPhaseShift {
         constexpr static auto perm =
             applyInternalExternalPermutation<min_rev_wire>();
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | max_rev_wire_shift;
@@ -218,7 +219,8 @@ struct ApplyControlledPhaseShift {
         constexpr static auto perm = compilePermutation<Precision>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorIsingXX.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorIsingXX.hpp
index 2d38a9a183..de5af08290 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorIsingXX.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorIsingXX.hpp
@@ -34,7 +34,7 @@ struct ApplyGeneratorIsingXX {
     constexpr static std::size_t packed_size_ = packed_size;
     constexpr static bool symmetric = true;
 
-    template <size_t rev_wire0, std::size_t rev_wire1>
+    template <std::size_t rev_wire0, std::size_t rev_wire1>
     static auto applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       [[maybe_unused]] bool adj) -> PrecisionT {
@@ -42,7 +42,7 @@ struct ApplyGeneratorIsingXX {
         constexpr static auto perm = compilePermutation<Precision, packed_size>(
             flip(flip(identity<packed_size>(), rev_wire0), rev_wire1));
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             PrecisionAVXConcept::store(arr + n, permute<perm>(v));
         }
@@ -50,7 +50,7 @@ struct ApplyGeneratorIsingXX {
             0.5); // NOLINT(readability-magic-numbers)
     }
 
-    template <size_t min_rev_wire>
+    template <std::size_t min_rev_wire>
     static auto applyInternalExternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       std::size_t max_rev_wire,
@@ -66,7 +66,8 @@ struct ApplyGeneratorIsingXX {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             flip(identity<packed_size>(), min_rev_wire));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | max_rev_wire_shift;
@@ -101,7 +102,8 @@ struct ApplyGeneratorIsingXX {
         const std::size_t parity_middle =
             fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorIsingYY.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorIsingYY.hpp
index e30c1033ca..a18b9bd4bc 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorIsingYY.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorIsingYY.hpp
@@ -34,7 +34,7 @@ struct ApplyGeneratorIsingYY {
     constexpr static std::size_t packed_size_ = packed_size;
     constexpr static bool symmetric = true;
 
-    template <size_t rev_wire0, std::size_t rev_wire1>
+    template <std::size_t rev_wire0, std::size_t rev_wire1>
     static auto applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       [[maybe_unused]] bool adj) -> PrecisionT {
@@ -42,14 +42,14 @@ struct ApplyGeneratorIsingYY {
         constexpr static auto perm = compilePermutation<Precision, packed_size>(
             flip(flip(identity<packed_size>(), rev_wire0), rev_wire1));
 
-        auto parityFunc = [](size_t idx) -> std::size_t {
+        auto parityFunc = [](std::size_t idx) -> std::size_t {
             return std::size_t{1U} - (((idx >> rev_wire0) & std::size_t{1U}) ^
                                       ((idx >> rev_wire1) & std::size_t{1U}));
         };
 
         const auto signs = toParity<PrecisionT, packed_size>(parityFunc);
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             PrecisionAVXConcept::store(arr + n, signs * permute<perm>(v));
         }
@@ -57,7 +57,7 @@ struct ApplyGeneratorIsingYY {
             0.5); // NOLINT(readability-magic-numbers)
     }
 
-    template <size_t min_rev_wire>
+    template <std::size_t min_rev_wire>
     static auto applyInternalExternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       std::size_t max_rev_wire,
@@ -77,7 +77,8 @@ struct ApplyGeneratorIsingYY {
             -internalParity<Precision, packed_size>(min_rev_wire);
         const auto sign1 = internalParity<Precision, packed_size>(min_rev_wire);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | max_rev_wire_shift;
@@ -112,7 +113,8 @@ struct ApplyGeneratorIsingYY {
         const std::size_t parity_middle =
             fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorIsingZZ.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorIsingZZ.hpp
index 041e2d78f2..17401b8e64 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorIsingZZ.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorIsingZZ.hpp
@@ -34,18 +34,19 @@ struct ApplyGeneratorIsingZZ {
     constexpr static std::size_t packed_size_ = packed_size;
     constexpr static bool symmetric = true;
 
-    template <size_t rev_wire0, std::size_t rev_wire1>
+    template <std::size_t rev_wire0, std::size_t rev_wire1>
     static auto applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       [[maybe_unused]] bool adj) -> PrecisionT {
         using namespace Permutation;
 
-        const auto signs = toParity<Precision, packed_size>([](size_t idx) {
-            return (((idx >> rev_wire0) & std::size_t{1U}) ^
-                    ((idx >> rev_wire1) & std::size_t{1U}));
-        });
+        const auto signs =
+            toParity<Precision, packed_size>([](std::size_t idx) {
+                return (((idx >> rev_wire0) & std::size_t{1U}) ^
+                        ((idx >> rev_wire1) & std::size_t{1U}));
+            });
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             PrecisionAVXConcept::store(arr + n, signs * v);
         }
@@ -53,7 +54,7 @@ struct ApplyGeneratorIsingZZ {
             0.5); // NOLINT(readability-magic-numbers)
     }
 
-    template <size_t min_rev_wire>
+    template <std::size_t min_rev_wire>
     static auto applyInternalExternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       std::size_t max_rev_wire,
@@ -70,7 +71,8 @@ struct ApplyGeneratorIsingZZ {
         const auto sign1 =
             -internalParity<Precision, packed_size>(min_rev_wire);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | max_rev_wire_shift;
@@ -105,7 +107,8 @@ struct ApplyGeneratorIsingZZ {
         const std::size_t parity_middle =
             fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorPhaseShift.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorPhaseShift.hpp
index 215f77f529..aff52594d2 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorPhaseShift.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyGeneratorPhaseShift.hpp
@@ -32,11 +32,11 @@ struct ApplyGeneratorPhaseShift {
 
     constexpr static std::size_t packed_size_ = packed_size;
 
-    template <size_t rev_wire>
+    template <std::size_t rev_wire>
     static consteval auto factorInternal() ->
         typename PrecisionAVXConcept::IntrinsicType {
         std::array<PrecisionT, packed_size> factors{};
-        for (size_t k = 0; k < packed_size_ / 2; k++) {
+        for (std::size_t k = 0; k < packed_size_ / 2; k++) {
             if (((k >> rev_wire) & std::size_t{1U}) == 0) {
                 factors[2 * k + 0] = 0.0;
                 factors[2 * k + 1] = 0.0;
@@ -48,13 +48,13 @@ struct ApplyGeneratorPhaseShift {
         return setValue(factors);
     }
 
-    template <size_t rev_wire>
+    template <std::size_t rev_wire>
     static auto applyInternal(std::complex<PrecisionT> *arr,
                               const std::size_t num_qubits,
                               [[maybe_unused]] bool inverse) -> PrecisionT {
         constexpr auto factor = factorInternal<rev_wire>();
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) {
+        for (std::size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + k);
             PrecisionAVXConcept::store(arr + k, factor * v);
         }
@@ -71,7 +71,8 @@ struct ApplyGeneratorPhaseShift {
         constexpr auto zero =
             typename PrecisionAVXConcept::IntrinsicType{PrecisionT{0.0}};
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & wire_parity_inv) | (wire_parity & k);
             PrecisionAVXConcept::store(arr + i0, zero);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyHadamard.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyHadamard.hpp
index bcb80b2122..d1260c560c 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyHadamard.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyHadamard.hpp
@@ -30,7 +30,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyHadamard {
 
     constexpr static std::size_t packed_size_ = packed_size;
 
-    template <size_t rev_wire>
+    template <std::size_t rev_wire>
     static void applyInternal(std::complex<PrecisionT> *arr,
                               const std::size_t num_qubits,
                               [[maybe_unused]] bool inverse) {
@@ -46,7 +46,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyHadamard {
             compilePermutation<PrecisionT>(
                 flip(identity<packed_size>(), rev_wire));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits); k += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + k);
 
             const auto w_diag = mat_diag * v;
@@ -70,7 +70,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyHadamard {
         const auto p_isqrt2 = set1<PrecisionT, packed_size>(isqrt2);
         const auto m_isqrt2 = set1<PrecisionT, packed_size>(-isqrt2);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & wire_parity_inv) | (wire_parity & k);
             const std::size_t i1 = i0 | rev_wire_shift;
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingXX.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingXX.hpp
index ddc5d8f217..06495ebbbc 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingXX.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingXX.hpp
@@ -32,21 +32,21 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXX {
     constexpr static std::size_t packed_size_ = packed_size;
     constexpr static bool symmetric = true;
 
-    template <size_t rev_wire0, std::size_t rev_wire1>
+    template <std::size_t rev_wire0, std::size_t rev_wire1>
     static consteval auto permutationInternalInternal() {
         std::array<uint8_t, packed_size> perm = {
             0,
         };
 
         std::size_t m = (1U << rev_wire0) | (1U << rev_wire1);
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             perm[2 * k + 0] = 2 * (k ^ m) + 1;
             perm[2 * k + 1] = 2 * (k ^ m) + 0;
         }
         return Permutation::compilePermutation<PrecisionT>(perm);
     }
 
-    template <size_t rev_wire0, std::size_t rev_wire1, class ParamT>
+    template <std::size_t rev_wire0, std::size_t rev_wire1, class ParamT>
     static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits, bool inverse,
                                       ParamT angle) {
@@ -59,7 +59,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXX {
         constexpr static auto perm =
             permutationInternalInternal<rev_wire0, rev_wire1>();
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
 
             const auto prod_cos = real_cos * v;
@@ -68,7 +68,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXX {
             PrecisionAVXConcept::store(arr + n, prod_cos + prod_sin);
         }
     }
-    template <size_t min_rev_wire, class ParamT>
+    template <std::size_t min_rev_wire, class ParamT>
     static void applyInternalExternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       std::size_t max_rev_wire, bool inverse,
@@ -89,7 +89,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXX {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(flip(identity<packed_size>(), min_rev_wire)));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | max_rev_wire_shift;
@@ -138,7 +139,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXX {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingXY.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingXY.hpp
index f9a136c199..f589bf18b1 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingXY.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingXY.hpp
@@ -32,14 +32,14 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
     constexpr static std::size_t packed_size_ = packed_size;
     constexpr static bool symmetric = true;
 
-    template <size_t rev_wire0, std::size_t rev_wire1>
+    template <std::size_t rev_wire0, std::size_t rev_wire1>
     static consteval auto permutationInternalInternal() {
         std::array<uint8_t, packed_size> perm = {
             0,
         };
 
         std::size_t m = (1U << rev_wire0) | (1U << rev_wire1);
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             if ((((k >> rev_wire0) & 1U) ^ ((k >> rev_wire1) & 1U)) == 0) {
                 perm[2 * k + 0] = 2 * k + 0;
                 perm[2 * k + 1] = 2 * k + 1;
@@ -52,7 +52,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
         return Permutation::compilePermutation<PrecisionT>(perm);
     }
 
-    template <size_t rev_wire0, std::size_t rev_wire1, class ParamT>
+    template <std::size_t rev_wire0, std::size_t rev_wire1, class ParamT>
     static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits, bool inverse,
                                       ParamT angle) {
@@ -63,7 +63,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
                 0.0,
             };
             PL_LOOP_SIMD
-            for (size_t k = 0; k < packed_size / 2; k++) {
+            for (std::size_t k = 0; k < packed_size / 2; k++) {
                 if ((((k >> rev_wire0) & 1U) ^ ((k >> rev_wire1) & 1U)) == 0) {
                     // 00 or 11
                     arr[2 * k + 0] = 1.0;
@@ -81,7 +81,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
                 0.0,
             };
             PL_LOOP_SIMD
-            for (size_t k = 0; k < packed_size / 2; k++) {
+            for (std::size_t k = 0; k < packed_size / 2; k++) {
                 if ((((k >> rev_wire0) & 1U) ^ ((k >> rev_wire1) & 1U)) == 0) {
                     // 00 or 11
                     arr[2 * k + 0] = 0.0;
@@ -98,7 +98,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
         constexpr static auto perm =
             permutationInternalInternal<rev_wire0, rev_wire1>();
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
 
             const auto prod_real = real_factor * v;
@@ -108,12 +108,12 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
         }
     }
 
-    template <size_t min_rev_wire>
+    template <std::size_t min_rev_wire>
     static consteval auto permutationInternalExternal() {
         std::array<uint8_t, packed_size> perm{};
 
         std::size_t m = 1U << min_rev_wire;
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             // swap 01 and 10 and apply imaginary
             perm[2 * k + 0] = 2 * (k ^ m) + 1;
             perm[2 * k + 1] = 2 * (k ^ m) + 0;
@@ -121,7 +121,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
         return Permutation::compilePermutation<PrecisionT>(perm);
     }
 
-    template <size_t min_rev_wire, class ParamT>
+    template <std::size_t min_rev_wire, class ParamT>
     static void applyInternalExternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       std::size_t max_rev_wire, bool inverse,
@@ -140,7 +140,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
                 0.0,
             };
             PL_LOOP_SIMD
-            for (size_t k = 0; k < packed_size / 2; k++) {
+            for (std::size_t k = 0; k < packed_size / 2; k++) {
                 if (((k >> min_rev_wire) & 1U) == 0) {
                     arr[2 * k + 0] = 1.0;
                     arr[2 * k + 1] = 1.0;
@@ -155,7 +155,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
         const auto real_factor1 = [angle]() {
             std::array<PrecisionT, packed_size> arr{};
             PL_LOOP_SIMD
-            for (size_t k = 0; k < packed_size / 2; k++) {
+            for (std::size_t k = 0; k < packed_size / 2; k++) {
                 if (((k >> min_rev_wire) & 1U) == 0) {
                     arr[2 * k + 0] = std::cos(angle / 2);
                     arr[2 * k + 1] = std::cos(angle / 2);
@@ -170,7 +170,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
         const auto imag_factor0 = [isin]() {
             std::array<PrecisionT, packed_size> arr{};
             PL_LOOP_SIMD
-            for (size_t k = 0; k < packed_size / 2; k++) {
+            for (std::size_t k = 0; k < packed_size / 2; k++) {
                 if (((k >> min_rev_wire) & 1U) == 0) {
                     arr[2 * k + 0] = 0.0;
                     arr[2 * k + 1] = 0.0;
@@ -185,7 +185,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
         const auto imag_factor1 = [isin]() {
             std::array<PrecisionT, packed_size> arr = {};
             PL_LOOP_SIMD
-            for (size_t k = 0; k < packed_size / 2; k++) {
+            for (std::size_t k = 0; k < packed_size / 2; k++) {
                 if (((k >> min_rev_wire) & 1U) == 0) {
                     arr[2 * k + 0] = -isin;
                     arr[2 * k + 1] = isin;
@@ -200,7 +200,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
         constexpr static auto perm =
             permutationInternalExternal<min_rev_wire>();
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | max_rev_wire_shift;
@@ -251,7 +252,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingXY {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingYY.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingYY.hpp
index bd93c0ff78..1ee2efc568 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingYY.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingYY.hpp
@@ -32,21 +32,21 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingYY {
     constexpr static std::size_t packed_size_ = packed_size;
     constexpr static bool symmetric = true;
 
-    template <size_t rev_wire0, std::size_t rev_wire1>
+    template <std::size_t rev_wire0, std::size_t rev_wire1>
     static consteval auto permutationInternalInternal() {
         std::array<uint8_t, packed_size> perm = {
             0,
         };
 
         std::size_t m = (1U << rev_wire0) | (1U << rev_wire1);
-        for (size_t k = 0; k < packed_size / 2; k++) {
+        for (std::size_t k = 0; k < packed_size / 2; k++) {
             perm[2 * k + 0] = 2 * (k ^ m) + 1;
             perm[2 * k + 1] = 2 * (k ^ m) + 0;
         }
         return Permutation::compilePermutation<PrecisionT>(perm);
     }
 
-    template <size_t rev_wire0, std::size_t rev_wire1, class ParamT>
+    template <std::size_t rev_wire0, std::size_t rev_wire1, class ParamT>
     static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits, bool inverse,
                                       ParamT angle) {
@@ -61,7 +61,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingYY {
         // otherwise
         const auto imag_sin =
             imagFactor<PrecisionT, packed_size>(isin) *
-            toParity<PrecisionT, packed_size>([](size_t n) {
+            toParity<PrecisionT, packed_size>([](std::size_t n) {
                 std::size_t b = ((n >> rev_wire0) ^ (n >> rev_wire1)) & 1U;
                 if (b == 0) {
                     return 1;
@@ -72,7 +72,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingYY {
         constexpr static auto perm =
             permutationInternalInternal<rev_wire0, rev_wire1>();
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
 
             const auto prod_cos = real_cos * v;
@@ -81,7 +81,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingYY {
             PrecisionAVXConcept::store(arr + n, prod_cos + prod_sin);
         }
     }
-    template <size_t min_rev_wire, class ParamT>
+    template <std::size_t min_rev_wire, class ParamT>
     static void applyInternalExternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       std::size_t max_rev_wire, bool inverse,
@@ -107,7 +107,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingYY {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(flip(identity<packed_size>(), min_rev_wire)));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | max_rev_wire_shift;
@@ -158,7 +159,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingYY {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingZZ.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingZZ.hpp
index 99f42315e6..5e59e3053f 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingZZ.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyIsingZZ.hpp
@@ -34,20 +34,21 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingZZ {
     constexpr static std::size_t packed_size_ = packed_size;
     constexpr static bool symmetric = true;
 
-    template <size_t rev_wire0, std::size_t rev_wire1, class ParamT>
+    template <std::size_t rev_wire0, std::size_t rev_wire1, class ParamT>
     static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits, bool inverse,
                                       ParamT angle) {
         const auto isin = inverse ? std::sin(angle / 2) : -std::sin(angle / 2);
-        const auto parity = toParity<PrecisionT, packed_size>([=](size_t idx) {
-            return ((idx >> rev_wire0) & 1U) ^ ((idx >> rev_wire1) & 1U);
-        });
+        const auto parity =
+            toParity<PrecisionT, packed_size>([=](std::size_t idx) {
+                return ((idx >> rev_wire0) & 1U) ^ ((idx >> rev_wire1) & 1U);
+            });
         const auto real_cos =
             set1<PrecisionT, packed_size>(std::cos(angle / 2));
         const auto imag_sin =
             imagFactor<PrecisionT, packed_size>(isin) * parity;
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
 
             const auto prod_cos = real_cos * v;
@@ -57,7 +58,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingZZ {
         }
     }
 
-    template <size_t min_rev_wire, class ParamT>
+    template <std::size_t min_rev_wire, class ParamT>
     static void applyInternalExternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       std::size_t max_rev_wire, bool inverse,
@@ -77,7 +78,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingZZ {
             imag_sin * internalParity<PrecisionT, packed_size>(min_rev_wire);
         const auto imag_sin_parity1 = imag_sin_parity0 * -1.0;
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | max_rev_wire_shift;
@@ -126,7 +128,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyIsingZZ {
         const auto p_isin = imagFactor<PrecisionT, packed_size>(isin);
         const auto m_isin = imagFactor<PrecisionT, packed_size>(-isin);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPauliX.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPauliX.hpp
index 2e27f620da..734a0289ff 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPauliX.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPauliX.hpp
@@ -31,7 +31,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPauliX {
 
     constexpr static std::size_t packed_size_ = packed_size;
 
-    template <size_t rev_wire>
+    template <std::size_t rev_wire>
     static void applyInternal(std::complex<PrecisionT> *arr,
                               const std::size_t num_qubits,
                               [[maybe_unused]] bool inverse) {
@@ -40,7 +40,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPauliX {
             compilePermutation<PrecisionT>(
                 flip(identity<packed_size>(), rev_wire));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) {
+        for (std::size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + k);
             PrecisionAVXConcept::store(arr + k,
                                        permute<compiled_permutation>(v));
@@ -56,7 +56,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPauliX {
         const std::size_t wire_parity = fillTrailingOnes(rev_wire);
         const std::size_t wire_parity_inv = fillLeadingOnes(rev_wire + 1);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & wire_parity_inv) | (wire_parity & k);
             const std::size_t i1 = i0 | rev_wire_shift;
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPauliY.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPauliY.hpp
index 62de1c0d19..48ee79ba3f 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPauliY.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPauliY.hpp
@@ -31,7 +31,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPauliY {
 
     constexpr static std::size_t packed_size_ = packed_size;
 
-    template <size_t rev_wire>
+    template <std::size_t rev_wire>
     static void applyInternal(std::complex<PrecisionT> *arr,
                               const std::size_t num_qubits,
                               [[maybe_unused]] bool inverse) {
@@ -42,7 +42,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPauliY {
             compilePermutation<PrecisionT>(
                 swapRealImag(flip(identity<packed_size>(), rev_wire)));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) {
+        for (std::size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + k);
             const auto w = permute<compiled_permutation>(v);
             PrecisionAVXConcept::store(arr + k, w * factor);
@@ -67,7 +67,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPauliY {
             compilePermutation<PrecisionT>(
                 swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & wire_parity_inv) | (wire_parity & k);
             const std::size_t i1 = i0 | rev_wire_shift;
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPauliZ.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPauliZ.hpp
index 050a7ebca8..79a5c5aeb1 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPauliZ.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPauliZ.hpp
@@ -31,13 +31,13 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPauliZ {
 
     constexpr static std::size_t packed_size_ = packed_size;
 
-    template <size_t rev_wire>
+    template <std::size_t rev_wire>
     static void applyInternal(std::complex<PrecisionT> *arr,
                               const std::size_t num_qubits,
                               [[maybe_unused]] bool inverse) {
         const auto factor = internalParity<PrecisionT, packed_size>(rev_wire);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits); k += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + k);
             PrecisionAVXConcept::store(arr + k, factor * v);
         }
@@ -54,7 +54,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPauliZ {
 
         const auto factor = set1<PrecisionT, packed_size>(-1.0);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & wire_parity_inv) | (wire_parity & k);
             const std::size_t i1 = i0 | rev_wire_shift;
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp
index 2f927678c5..6a4427539b 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyPhaseShift.hpp
@@ -37,9 +37,9 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPhaseShift {
      *
      * FIXME: clang++-12 currently does not accept consteval here.
      */
-    static constexpr auto applyInternalPermutation(size_t rev_wire) {
+    static constexpr auto applyInternalPermutation(std::size_t rev_wire) {
         std::array<uint8_t, packed_size> perm{};
-        for (size_t n = 0; n < packed_size / 2; n++) {
+        for (std::size_t n = 0; n < packed_size / 2; n++) {
             if (((n >> rev_wire) & 1U) == 0) {
                 perm[2 * n + 0] = 2 * n + 0;
                 perm[2 * n + 1] = 2 * n + 1;
@@ -55,11 +55,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPhaseShift {
     /**
      * @brief Factor for applying [1, 1, cos(phi/2), cos(phi/2)]
      */
-    static auto cosFactor(size_t rev_wire, PrecisionT angle)
+    static auto cosFactor(std::size_t rev_wire, PrecisionT angle)
         -> AVXIntrinsicType<PrecisionT, packed_size> {
         std::array<PrecisionT, packed_size> arr{};
         PL_LOOP_SIMD
-        for (size_t n = 0; n < packed_size / 2; n++) {
+        for (std::size_t n = 0; n < packed_size / 2; n++) {
             if (((n >> rev_wire) & 1U) == 0) {
                 arr[2 * n + 0] = 1.0;
                 arr[2 * n + 1] = 1.0;
@@ -74,11 +74,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPhaseShift {
     /**
      * @brief Factor for applying [0, 0, -sin(phi/2), sin(phi/2)]
      */
-    static auto isinFactor(size_t rev_wire, PrecisionT angle)
+    static auto isinFactor(std::size_t rev_wire, PrecisionT angle)
         -> AVXIntrinsicType<PrecisionT, packed_size> {
         std::array<PrecisionT, packed_size> arr{};
         PL_LOOP_SIMD
-        for (size_t n = 0; n < packed_size / 2; n++) {
+        for (std::size_t n = 0; n < packed_size / 2; n++) {
             if (((n >> rev_wire) & 1U) == 0) {
                 arr[2 * n + 0] = 0.0;
                 arr[2 * n + 1] = 0.0;
@@ -90,7 +90,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPhaseShift {
         return setValue(arr);
     }
 
-    template <size_t rev_wire, typename ParamT>
+    template <std::size_t rev_wire, typename ParamT>
     static void applyInternal(std::complex<PrecisionT> *arr,
                               const std::size_t num_qubits, bool inverse,
                               ParamT angle) {
@@ -99,7 +99,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPhaseShift {
         const auto isin_factor =
             isinFactor(rev_wire, (inverse ? -angle : angle));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) {
+        for (std::size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + k);
             const auto w =
                 cos_factor * v + isin_factor * Permutation::permute<perm>(v);
@@ -126,7 +126,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyPhaseShift {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & wire_parity_inv) | (wire_parity & k);
             const std::size_t i1 = i0 | rev_wire_shift;
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyRX.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyRX.hpp
index f0bb7ab586..e0cef43b5f 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyRX.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyRX.hpp
@@ -32,7 +32,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyRX {
 
     constexpr static std::size_t packed_size_ = packed_size;
 
-    template <size_t rev_wire, class ParamT>
+    template <std::size_t rev_wire, class ParamT>
     static void applyInternal(std::complex<PrecisionT> *arr,
                               const std::size_t num_qubits,
                               [[maybe_unused]] bool inverse, ParamT angle) {
@@ -47,7 +47,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyRX {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(flip(identity<packed_size>(), rev_wire)));
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < (1U << num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < (1U << num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             const auto w_diag = diag_real * v;
             const auto w_offdiag = offdiag_imag * permute<perm>(v);
@@ -76,7 +76,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyRX {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & wire_parity_inv) | (wire_parity & k);
             const std::size_t i1 = i0 | rev_wire_shift;
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyRY.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyRY.hpp
index 100d1b1ec2..d51daee837 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyRY.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyRY.hpp
@@ -32,7 +32,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyRY {
 
     constexpr static std::size_t packed_size_ = packed_size;
 
-    template <size_t rev_wire, class ParamT>
+    template <std::size_t rev_wire, class ParamT>
     static void applyInternal(std::complex<PrecisionT> *arr,
                               const std::size_t num_qubits,
                               [[maybe_unused]] bool inverse, ParamT angle) {
@@ -49,7 +49,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyRY {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             flip(identity<packed_size>(), rev_wire));
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < (1U << num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < (1U << num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             const auto w_diag = diag_real * v;
             const auto w_offdiag = offdiag_real * permute<perm>(v);
@@ -76,7 +76,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyRY {
         const auto p_sin_factor = set1<PrecisionT, packed_size>(sin);
         const auto m_sin_factor = set1<PrecisionT, packed_size>(-sin);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & wire_parity_inv) | (wire_parity & k);
             const std::size_t i1 = i0 | rev_wire_shift;
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyRZ.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyRZ.hpp
index 91d760ed72..da75c8c109 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyRZ.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyRZ.hpp
@@ -32,7 +32,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyRZ {
 
     constexpr static std::size_t packed_size_ = packed_size;
 
-    template <size_t rev_wire, class ParamT>
+    template <std::size_t rev_wire, class ParamT>
     static void applyInternal(std::complex<PrecisionT> *arr,
                               const std::size_t num_qubits,
                               [[maybe_unused]] bool inverse, ParamT angle) {
@@ -48,7 +48,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyRZ {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < (1U << num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < (1U << num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             const auto w = permute<perm>(v);
             PrecisionAVXConcept::store(arr + n,
@@ -78,7 +78,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyRZ {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & wire_parity_inv) | (wire_parity & k);
             const std::size_t i1 = i0 | rev_wire_shift;
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyS.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyS.hpp
index 1b64e1b920..e3726845a7 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyS.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyS.hpp
@@ -36,9 +36,9 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyS {
      *
      * FIXME: clang++-12 currently does not accept consteval here.
      */
-    static constexpr auto applyInternalPermutation(size_t rev_wire) {
+    static constexpr auto applyInternalPermutation(std::size_t rev_wire) {
         std::array<uint8_t, packed_size> perm{};
-        for (size_t n = 0; n < packed_size / 2; n++) {
+        for (std::size_t n = 0; n < packed_size / 2; n++) {
             if (((n >> rev_wire) & 1U) == 0) {
                 perm[2 * n + 0] = 2 * n + 0;
                 perm[2 * n + 1] = 2 * n + 1;
@@ -50,11 +50,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyS {
         return Permutation::compilePermutation<PrecisionT>(perm);
     }
 
-    static auto createFactor(size_t rev_wire, bool inverse)
+    static auto createFactor(std::size_t rev_wire, bool inverse)
         -> AVXIntrinsicType<PrecisionT, packed_size> {
         std::array<PrecisionT, packed_size> data{};
         PL_LOOP_SIMD
-        for (size_t n = 0; n < packed_size / 2; n++) {
+        for (std::size_t n = 0; n < packed_size / 2; n++) {
             if (((n >> rev_wire) & 1U) == 0) {
                 data[2 * n + 0] = 1.0;
                 data[2 * n + 1] = 1.0;
@@ -71,13 +71,13 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyS {
         return PrecisionAVXConcept::loadu(data.data());
     }
 
-    template <size_t rev_wire>
+    template <std::size_t rev_wire>
     static void applyInternal(std::complex<PrecisionT> *arr,
                               const std::size_t num_qubits, bool inverse) {
         constexpr static auto perm = applyInternalPermutation(rev_wire);
         const auto factor = createFactor(rev_wire, inverse);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) {
+        for (std::size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + k);
             PrecisionAVXConcept::store(arr + k,
                                        factor * Permutation::permute<perm>(v));
@@ -99,7 +99,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyS {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & wire_parity_inv) | (wire_parity & k);
             const std::size_t i1 = i0 | rev_wire_shift;
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplySWAP.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplySWAP.hpp
index ef5b7069fa..7eecf0d23f 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplySWAP.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplySWAP.hpp
@@ -37,11 +37,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplySWAP {
     /**
      * @brief Permutation that swaps bits in two wires
      */
-    template <size_t rev_wire0, std::size_t rev_wire1>
+    template <std::size_t rev_wire0, std::size_t rev_wire1>
     static consteval auto applyInternalInternalPermutation() {
         const auto identity_perm = Permutation::identity<packed_size>();
         std::array<uint8_t, packed_size> perm{};
-        for (size_t i = 0; i < packed_size / 2; i++) {
+        for (std::size_t i = 0; i < packed_size / 2; i++) {
             // swap rev_wire1 and rev_wire0 bits
             const std::size_t b = ((i >> rev_wire0) ^ (i >> rev_wire1)) & 1U;
             const std::size_t j = i ^ ((b << rev_wire0) | (b << rev_wire1));
@@ -51,7 +51,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplySWAP {
         return Permutation::compilePermutation<PrecisionT, packed_size>(perm);
     }
 
-    template <size_t rev_wire0, std::size_t rev_wire1>
+    template <std::size_t rev_wire0, std::size_t rev_wire1>
     static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       [[maybe_unused]] bool inverse) {
@@ -59,7 +59,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplySWAP {
         constexpr static auto perm =
             applyInternalInternalPermutation<rev_wire0, rev_wire1>();
         PL_LOOP_PARALLEL(1)
-        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
+        for (std::size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + n);
             PrecisionAVXConcept::store(arr + n, permute<perm>(v));
         }
@@ -68,9 +68,9 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplySWAP {
     /**
      * @brief Setting a mask. Mask is 1 if bits in min_rev_wire is set
      */
-    template <size_t min_rev_wire> static consteval auto createMask0() {
+    template <std::size_t min_rev_wire> static consteval auto createMask0() {
         std::array<bool, packed_size> m{};
-        for (size_t i = 0; i < packed_size / 2; i++) {
+        for (std::size_t i = 0; i < packed_size / 2; i++) {
             if ((i & (1U << min_rev_wire)) != 0) {
                 m[2 * i + 0] = true;
                 m[2 * i + 1] = true;
@@ -85,9 +85,9 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplySWAP {
     /**
      * @brief Setting a mask. Mask is 1 if bits in min_rev_wire is unset
      */
-    template <size_t min_rev_wire> static consteval auto createMask1() {
+    template <std::size_t min_rev_wire> static consteval auto createMask1() {
         std::array<bool, packed_size> m = {};
-        for (size_t i = 0; i < packed_size / 2; i++) {
+        for (std::size_t i = 0; i < packed_size / 2; i++) {
             if ((i & (1U << min_rev_wire)) != 0) {
                 m[2 * i + 0] = false;
                 m[2 * i + 1] = false;
@@ -99,7 +99,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplySWAP {
         return compileMask<PrecisionT, packed_size>(m);
     }
 
-    template <size_t min_rev_wire>
+    template <std::size_t min_rev_wire>
     static void applyInternalExternal(std::complex<PrecisionT> *arr,
                                       std::size_t num_qubits,
                                       std::size_t max_rev_wire,
@@ -117,7 +117,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplySWAP {
         constexpr static auto compiled_perm = compilePermutation<PrecisionT>(
             flip(identity<packed_size>(), min_rev_wire));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
             const std::size_t i1 = i0 | max_rev_wire_shift;
@@ -151,7 +152,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplySWAP {
         const std::size_t parity_middle =
             fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 2);
+             k += packed_size / 2) {
             const std::size_t i00 = ((k << 2U) & parity_high) |
                                     ((k << 1U) & parity_middle) |
                                     (k & parity_low);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplySingleQubitOp.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplySingleQubitOp.hpp
index 04ade55acb..b800f1f6cb 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplySingleQubitOp.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplySingleQubitOp.hpp
@@ -30,7 +30,7 @@ struct ApplySingleQubitOp {
     using PrecisionAVXConcept =
         typename AVXConcept<PrecisionT, packed_size>::Type;
 
-    template <size_t rev_wire>
+    template <std::size_t rev_wire>
     static void applyInternal(std::complex<PrecisionT> *arr,
                               const std::size_t num_qubits,
                               const std::complex<PrecisionT> *matrix,
@@ -38,12 +38,12 @@ struct ApplySingleQubitOp {
         using namespace Permutation;
 
         const AVXIntrinsicType<PrecisionT, packed_size> diag_real =
-            setValueOneTwo<PrecisionT, packed_size>([=](size_t idx) {
+            setValueOneTwo<PrecisionT, packed_size>([=](std::size_t idx) {
                 return (((idx >> rev_wire) & 1U) == 0) ? real(matrix[0])
                                                        : real(matrix[3]);
             });
         const AVXIntrinsicType<PrecisionT, packed_size> diag_imag =
-            setValueOneTwo<PrecisionT, packed_size>([=](size_t idx) {
+            setValueOneTwo<PrecisionT, packed_size>([=](std::size_t idx) {
                 if (inverse) {
                     return (((idx >> rev_wire) & 1U) == 0) ? -imag(matrix[0])
                                                            : -imag(matrix[3]);
@@ -53,7 +53,7 @@ struct ApplySingleQubitOp {
             }) *
             imagFactor<PrecisionT, packed_size>();
         const AVXIntrinsicType<PrecisionT, packed_size> offdiag_real =
-            setValueOneTwo<PrecisionT, packed_size>([=](size_t idx) {
+            setValueOneTwo<PrecisionT, packed_size>([=](std::size_t idx) {
                 if (inverse) {
                     return (((idx >> rev_wire) & 1U) == 0) ? real(matrix[2])
                                                            : real(matrix[1]);
@@ -62,7 +62,7 @@ struct ApplySingleQubitOp {
                                                        : real(matrix[2]);
             });
         const AVXIntrinsicType<PrecisionT, packed_size> offdiag_imag =
-            setValueOneTwo<PrecisionT, packed_size>([=](size_t idx) {
+            setValueOneTwo<PrecisionT, packed_size>([=](std::size_t idx) {
                 if (inverse) {
                     return (((idx >> rev_wire) & 1U) == 0) ? -imag(matrix[2])
                                                            : -imag(matrix[1]);
@@ -81,7 +81,7 @@ struct ApplySingleQubitOp {
             compilePermutation<PrecisionT>(
                 swapRealImag(flip(identity<packed_size>(), rev_wire)));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits); k += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + k);
             const auto w_diag =
                 diag_real * v + diag_imag * permute<swap_real_imag>(v);
@@ -143,7 +143,8 @@ struct ApplySingleQubitOp {
         constexpr static auto swap_real_imag = compilePermutation<PrecisionT>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & wire_parity_inv) | (wire_parity & k);
             const std::size_t i1 = i0 | rev_wire_shift;
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyT.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyT.hpp
index 8b624f93b2..8f9a55bae4 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyT.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyT.hpp
@@ -37,9 +37,9 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyT {
      *
      * FIXME: clang++-12 currently does not accept consteval here.
      */
-    static constexpr auto applyInternalPermutation(size_t rev_wire) {
+    static constexpr auto applyInternalPermutation(std::size_t rev_wire) {
         std::array<uint8_t, packed_size> perm{};
-        for (size_t n = 0; n < packed_size / 2; n++) {
+        for (std::size_t n = 0; n < packed_size / 2; n++) {
             if (((n >> rev_wire) & 1U) == 0) {
                 perm[2 * n + 0] = 2 * n + 0;
                 perm[2 * n + 1] = 2 * n + 1;
@@ -52,11 +52,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyT {
         return Permutation::compilePermutation<PrecisionT>(perm);
     }
 
-    static auto applyInternalRealFactor(size_t rev_wire)
+    static auto applyInternalRealFactor(std::size_t rev_wire)
         -> AVXIntrinsicType<PrecisionT, packed_size> {
         std::array<PrecisionT, packed_size> data{};
         PL_LOOP_SIMD
-        for (size_t n = 0; n < packed_size / 2; n++) {
+        for (std::size_t n = 0; n < packed_size / 2; n++) {
             if (((n >> rev_wire) & 1U) == 0) {
                 data[2 * n + 0] = 1.0;
                 data[2 * n + 1] = 1.0;
@@ -68,11 +68,11 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyT {
         return PrecisionAVXConcept::loadu(data.data());
     }
 
-    static auto applyInternalImagFactor(size_t rev_wire, bool inverse)
+    static auto applyInternalImagFactor(std::size_t rev_wire, bool inverse)
         -> AVXIntrinsicType<PrecisionT, packed_size> {
         std::array<PrecisionT, packed_size> data{};
         PL_LOOP_SIMD
-        for (size_t n = 0; n < packed_size / 2; n++) {
+        for (std::size_t n = 0; n < packed_size / 2; n++) {
             if (((n >> rev_wire) & 1U) == 0) {
                 data[2 * n + 0] = 0.0;
                 data[2 * n + 1] = 0.0;
@@ -89,7 +89,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyT {
         return PrecisionAVXConcept::loadu(data.data());
     }
 
-    template <size_t rev_wire>
+    template <std::size_t rev_wire>
     static void applyInternal(std::complex<PrecisionT> *arr,
                               const std::size_t num_qubits, bool inverse) {
         constexpr static auto perm = applyInternalPermutation(rev_wire);
@@ -97,7 +97,7 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyT {
         const auto cos_factor = applyInternalRealFactor(rev_wire);
         const auto isin_factor = applyInternalImagFactor(rev_wire, inverse);
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) {
+        for (std::size_t k = 0; k < (1U << num_qubits); k += packed_size / 2) {
             const auto v = PrecisionAVXConcept::load(arr + k);
             const auto w =
                 cos_factor * v + isin_factor * Permutation::permute<perm>(v);
@@ -121,7 +121,8 @@ template <typename PrecisionT, std::size_t packed_size> struct ApplyT {
         constexpr static auto perm = compilePermutation<PrecisionT>(
             swapRealImag(identity<packed_size>()));
         PL_LOOP_PARALLEL(1)
-        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
+        for (std::size_t k = 0; k < exp2(num_qubits - 1);
+             k += packed_size / 2) {
             const std::size_t i0 =
                 ((k << 1U) & wire_parity_inv) | (wire_parity & k);
             const std::size_t i1 = i0 | rev_wire_shift;
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/Permutation.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/Permutation.hpp
index 5bb6d817b7..b477e4cda8 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/Permutation.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/Permutation.hpp
@@ -120,21 +120,21 @@ constexpr bool isWithinLane(const std::array<uint8_t, size> &permutation) {
     std::array<uint32_t, size_within_lane> lane = {
         0,
     };
-    for (size_t i = 0; i < size_within_lane; i++) {
+    for (std::size_t i = 0; i < size_within_lane; i++) {
         lane[i] = permutation[i];
     }
     {
         auto lane2 = lane;
         std::sort(lane2.begin(), lane2.end());
-        for (size_t i = 0; i < size_within_lane; i++) {
+        for (std::size_t i = 0; i < size_within_lane; i++) {
             if (lane2[i] != i) {
                 return false;
             }
         }
     }
 
-    for (size_t k = 0; k < permutation.size(); k += size_within_lane) {
-        for (size_t idx = 0; idx < size_within_lane; idx++) {
+    for (std::size_t k = 0; k < permutation.size(); k += size_within_lane) {
+        for (std::size_t idx = 0; idx < size_within_lane; idx++) {
             if (lane[idx] + k != permutation[idx + k]) {
                 return false;
             }
@@ -144,14 +144,14 @@ constexpr bool isWithinLane(const std::array<uint8_t, size> &permutation) {
 }
 
 ///@cond DEV
-template <size_t size>
+template <std::size_t size>
 constexpr uint8_t
 getPermutation2x(const std::array<uint8_t, size> &permutation) {
     uint8_t res = static_cast<uint8_t>(permutation[1] << 1U) | permutation[0];
     // NOLINTNEXTLINE(readability-magic-numbers, hicpp-signed-bitwise)
     return (res << 6U) | (res << 4U) | (res << 2U) | res;
 }
-template <size_t size>
+template <std::size_t size>
 constexpr uint8_t
 getPermutation4x(const std::array<uint8_t, size> &permutation) {
     uint8_t res = 0;
@@ -261,7 +261,7 @@ compilePermutation<double, 8>(const std::array<uint8_t, 8> &permutation)
 // LCOV_EXCL_STOP
 #endif // Specializations for AVX512 end
 
-template <size_t packed_size>
+template <std::size_t packed_size>
 constexpr auto identity() -> std::array<uint8_t, packed_size> {
     std::array<uint8_t, packed_size> res{};
     for (uint8_t i = 0; i < packed_size; i++) {
@@ -278,12 +278,12 @@ constexpr auto identity() -> std::array<uint8_t, packed_size> {
  * @param perm Previous permutation
  * @param rev_wire Reverse wire
  */
-template <size_t packed_size>
+template <std::size_t packed_size>
 constexpr auto flip(const std::array<uint8_t, packed_size> &perm,
                     std::size_t rev_wire) -> std::array<uint8_t, packed_size> {
     std::array<uint8_t, packed_size> res{};
 
-    for (size_t k = 0; k < packed_size / 2; k++) {
+    for (std::size_t k = 0; k < packed_size / 2; k++) {
         res[2 * k + 0] = perm[2 * (k ^ (1U << rev_wire)) + 0];
         res[2 * k + 1] = perm[2 * (k ^ (1U << rev_wire)) + 1];
     }
@@ -297,7 +297,7 @@ constexpr auto flip(const std::array<uint8_t, packed_size> &perm,
  * @tparam packed_size Number of elements in a packed type
  * @param perm Previous permutation
  */
-template <size_t packed_size>
+template <std::size_t packed_size>
 constexpr auto swapRealImag(const std::array<uint8_t, packed_size> &perm)
     -> std::array<uint8_t, packed_size> {
     std::array<uint8_t, packed_size> res{};
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_AVXSingleQubitGateHelpers.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_AVXSingleQubitGateHelpers.cpp
index 0f675046f4..d6db3836c5 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_AVXSingleQubitGateHelpers.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_AVXSingleQubitGateHelpers.cpp
@@ -27,7 +27,7 @@ struct MockSingleQubitGateWithoutParam {
     using Precision = PrecisionT;
     constexpr static std::size_t packed_size_ = packed_size;
 
-    template <size_t rev_wire>
+    template <std::size_t rev_wire>
     static std::tuple<std::string, std::size_t, bool>
     applyInternal(std::complex<PrecisionT> *arr, const std::size_t num_qubits,
                   bool inverse) {
@@ -53,7 +53,7 @@ struct MockSingleQubitGateWithParam {
     using Precision = PrecisionT;
     constexpr static std::size_t packed_size_ = packed_size;
 
-    template <size_t rev_wire, class ParamT>
+    template <std::size_t rev_wire, class ParamT>
     static std::tuple<std::string, std::size_t, bool>
     applyInternal(std::complex<PrecisionT> *arr, const std::size_t num_qubits,
                   bool inverse, ParamT angle) {
@@ -86,7 +86,7 @@ struct MockSingleQubitGateSomethingWrong {
     using Precision = PrecisionT;
     constexpr static std::size_t packed_size_ = packed_size;
 
-    template <size_t rev_wire>
+    template <std::size_t rev_wire>
     static std::tuple<std::string, std::size_t, bool>
     applyInternal(std::complex<PrecisionT> *arr, const std::size_t num_qubits,
                   bool inverse) {
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_AVXTwoQubitGateHelpers.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_AVXTwoQubitGateHelpers.cpp
index 2085f12f0e..2c282d73c9 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_AVXTwoQubitGateHelpers.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_AVXTwoQubitGateHelpers.cpp
@@ -33,7 +33,7 @@ struct MockSymmetricTwoQubitGateWithoutParam {
     constexpr static std::size_t packed_size_ = packed_size;
     constexpr static bool symmetric = true;
 
-    template <size_t rev_wire0, std::size_t rev_wire1>
+    template <std::size_t rev_wire0, std::size_t rev_wire1>
     static std::tuple<std::string, std::size_t, std::size_t, bool>
     applyInternalInternal(std::complex<PrecisionT> *arr,
                           const std::size_t num_qubits, bool inverse) {
@@ -43,7 +43,7 @@ struct MockSymmetricTwoQubitGateWithoutParam {
         return {"applyInternalInternal", rev_wire0, rev_wire1, inverse};
     }
 
-    template <size_t rev_wire0>
+    template <std::size_t rev_wire0>
     static std::tuple<std::string, std::size_t, std::size_t, bool>
     applyInternalExternal(std::complex<PrecisionT> *arr,
                           const std::size_t num_qubits, std::size_t rev_wire1,
@@ -71,7 +71,7 @@ struct MockSymmetricTwoQubitGateWithParam {
     constexpr static std::size_t packed_size_ = packed_size;
     constexpr static bool symmetric = true;
 
-    template <size_t rev_wire0, std::size_t rev_wire1, typename ParamT>
+    template <std::size_t rev_wire0, std::size_t rev_wire1, typename ParamT>
     static std::tuple<std::string, std::size_t, std::size_t, bool>
     applyInternalInternal(std::complex<PrecisionT> *arr,
                           const std::size_t num_qubits, bool inverse,
@@ -83,7 +83,7 @@ struct MockSymmetricTwoQubitGateWithParam {
         return {"applyInternalInternal", rev_wire0, rev_wire1, inverse};
     }
 
-    template <size_t rev_wire0, typename ParamT>
+    template <std::size_t rev_wire0, typename ParamT>
     static std::tuple<std::string, std::size_t, std::size_t, bool>
     applyInternalExternal(std::complex<PrecisionT> *arr,
                           const std::size_t num_qubits, std::size_t rev_wire1,
@@ -114,7 +114,7 @@ struct MockAsymmetricTwoQubitGateWithoutParam {
     constexpr static std::size_t packed_size_ = packed_size;
     constexpr static bool symmetric = false;
 
-    template <size_t rev_wire0, std::size_t rev_wire1>
+    template <std::size_t rev_wire0, std::size_t rev_wire1>
     static std::tuple<std::string, std::size_t, std::size_t, bool>
     applyInternalInternal(std::complex<PrecisionT> *arr,
                           const std::size_t num_qubits, bool inverse) {
@@ -124,7 +124,7 @@ struct MockAsymmetricTwoQubitGateWithoutParam {
         return {"applyInternalInternal", rev_wire0, rev_wire1, inverse};
     }
 
-    template <size_t control>
+    template <std::size_t control>
     static std::tuple<std::string, std::size_t, std::size_t, bool>
     applyInternalExternal(std::complex<PrecisionT> *arr,
                           const std::size_t num_qubits, std::size_t target,
@@ -135,7 +135,7 @@ struct MockAsymmetricTwoQubitGateWithoutParam {
         return {"applyInternalExternal", control, target, inverse};
     }
 
-    template <size_t target>
+    template <std::size_t target>
     static std::tuple<std::string, std::size_t, std::size_t, bool>
     applyExternalInternal(std::complex<PrecisionT> *arr,
                           const std::size_t num_qubits, std::size_t control,
@@ -163,7 +163,7 @@ struct MockAsymmetricTwoQubitGateWithParam {
     constexpr static std::size_t packed_size_ = packed_size;
     constexpr static bool symmetric = false;
 
-    template <size_t rev_wire0, std::size_t rev_wire1, typename ParamT>
+    template <std::size_t rev_wire0, std::size_t rev_wire1, typename ParamT>
     static std::tuple<std::string, std::size_t, std::size_t, bool>
     applyInternalInternal(std::complex<PrecisionT> *arr,
                           const std::size_t num_qubits, bool inverse,
@@ -175,7 +175,7 @@ struct MockAsymmetricTwoQubitGateWithParam {
         return {"applyInternalInternal", rev_wire0, rev_wire1, inverse};
     }
 
-    template <size_t control, typename ParamT>
+    template <std::size_t control, typename ParamT>
     static std::tuple<std::string, std::size_t, std::size_t, bool>
     applyInternalExternal(std::complex<PrecisionT> *arr,
                           const std::size_t num_qubits, std::size_t target,
@@ -187,7 +187,7 @@ struct MockAsymmetricTwoQubitGateWithParam {
         return {"applyInternalExternal", control, target, inverse};
     }
 
-    template <size_t target, typename ParamT>
+    template <std::size_t target, typename ParamT>
     static std::tuple<std::string, std::size_t, std::size_t, bool>
     applyExternalInternal(std::complex<PrecisionT> *arr,
                           const std::size_t num_qubits, std::size_t control,
@@ -326,7 +326,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateHelper template functions",
                    MockAsymmetricTwoQubitGateWithParam<TestType, 4>>);
 }
 
-std::pair<size_t, std::size_t> sort(size_t a, std::size_t b) {
+std::pair<std::size_t, std::size_t> sort(std::size_t a, std::size_t b) {
     return {std::min(a, b), std::max(a, b)};
 }
 
@@ -358,7 +358,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyExternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{2, 3});
+                        std::pair<std::size_t, std::size_t>{2, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {1, 0} -> rev_wires = {2, 3}
@@ -366,7 +366,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyExternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{2, 3});
+                        std::pair<std::size_t, std::size_t>{2, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {0, 3} -> rev_wires = {0, 3}
@@ -374,7 +374,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 3});
+                        std::pair<std::size_t, std::size_t>{0, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {3, 0} -> rev_wires = {3, 0}
@@ -382,7 +382,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 3});
+                        std::pair<std::size_t, std::size_t>{0, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {2, 3} -> rev_wires = {0, 1}
@@ -390,7 +390,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalInternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 1});
+                        std::pair<std::size_t, std::size_t>{0, 1});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {3, 2} -> rev_wires = {1, 0}
@@ -398,7 +398,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalInternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 1});
+                        std::pair<std::size_t, std::size_t>{0, 1});
                 REQUIRE(std::get<3>(res) == inverse);
             }
         }
@@ -422,7 +422,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{2, 3});
+                        std::pair<std::size_t, std::size_t>{2, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {1, 0} -> rev_wires = {3, 2}
@@ -430,7 +430,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{2, 3});
+                        std::pair<std::size_t, std::size_t>{2, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {0, 3} -> rev_wires = {0, 3}
@@ -438,7 +438,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 3});
+                        std::pair<std::size_t, std::size_t>{0, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {3, 0} -> rev_wires = {3, 0}
@@ -446,7 +446,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 3});
+                        std::pair<std::size_t, std::size_t>{0, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {1, 3} -> rev_wires = {0, 2}
@@ -454,7 +454,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalInternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 2});
+                        std::pair<std::size_t, std::size_t>{0, 2});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {3, 1} -> rev_wires = {0, 2}
@@ -462,7 +462,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithoutParamHelper",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalInternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 2});
+                        std::pair<std::size_t, std::size_t>{0, 2});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 2, wires = {0, 1} -> fallback
@@ -652,7 +652,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyExternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{2, 3});
+                        std::pair<std::size_t, std::size_t>{2, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {1, 0} -> rev_wires = {2, 3}
@@ -660,7 +660,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyExternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{2, 3});
+                        std::pair<std::size_t, std::size_t>{2, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {0, 3} -> rev_wires = {0, 3}
@@ -668,7 +668,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 3});
+                        std::pair<std::size_t, std::size_t>{0, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {3, 0} -> rev_wires = {3, 0}
@@ -676,7 +676,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 3});
+                        std::pair<std::size_t, std::size_t>{0, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {2, 3} -> rev_wires = {0, 1}
@@ -684,7 +684,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalInternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 1});
+                        std::pair<std::size_t, std::size_t>{0, 1});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {3, 2} -> rev_wires = {1, 0}
@@ -692,7 +692,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalInternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 1});
+                        std::pair<std::size_t, std::size_t>{0, 1});
                 REQUIRE(std::get<3>(res) == inverse);
             }
         }
@@ -716,7 +716,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{2, 3});
+                        std::pair<std::size_t, std::size_t>{2, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {1, 0} -> rev_wires = {3, 2}
@@ -724,7 +724,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{2, 3});
+                        std::pair<std::size_t, std::size_t>{2, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {0, 3} -> rev_wires = {0, 3}
@@ -732,7 +732,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 3});
+                        std::pair<std::size_t, std::size_t>{0, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {3, 0} -> rev_wires = {3, 0}
@@ -740,7 +740,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalExternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 3});
+                        std::pair<std::size_t, std::size_t>{0, 3});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {1, 3} -> rev_wires = {0, 2}
@@ -748,7 +748,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalInternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 2});
+                        std::pair<std::size_t, std::size_t>{0, 2});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 4, wires = {3, 1} -> rev_wires = {0, 2}
@@ -756,7 +756,7 @@ TEMPLATE_TEST_CASE("Test TwoQubitGateWithParamHelper", "[TwoQubitGateHelper]",
                 REQUIRE(std::get<0>(res) ==
                         std::string("applyInternalInternal"));
                 REQUIRE(sort(std::get<1>(res), std::get<2>(res)) ==
-                        std::pair<size_t, std::size_t>{0, 2});
+                        std::pair<std::size_t, std::size_t>{0, 2});
                 REQUIRE(std::get<3>(res) == inverse);
             }
             { // num_qubits = 2, wires = {0, 1} -> fallback
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_DynamicDispatcher.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_DynamicDispatcher.cpp
index 0c41b75b25..ea2bfb7226 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_DynamicDispatcher.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_DynamicDispatcher.cpp
@@ -73,7 +73,7 @@ TEMPLATE_TEST_CASE("Print registered kernels", "[DynamicDispatcher]", float,
 
     std::ostringstream ss;
     ss << "Registered kernels: ";
-    for (size_t n = 0; n < kernels.size(); n++) {
+    for (std::size_t n = 0; n < kernels.size(); n++) {
         ss << dispatcher.getKernelName(kernels[n]);
         if (n != kernels.size() - 1) {
             ss << ", ";
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_CompareKernels.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_CompareKernels.cpp
index 759c83712c..f96083ab5b 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_CompareKernels.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_CompareKernels.cpp
@@ -123,7 +123,7 @@ void testApplyGate(RandomEngine &re, GateOperation gate_op,
         }
 
         // And compare them
-        for (size_t i = 0; i < res.size() - 1; i++) {
+        for (std::size_t i = 0; i < res.size() - 1; i++) {
             REQUIRE(res[i] ==
                     approx(res[i + 1]).margin(static_cast<PrecisionT>(1e-5)));
         }
@@ -143,8 +143,8 @@ TEMPLATE_TEST_CASE("Test all kernels give the same results for gates",
             }
             return lookup(Pennylane::Gates::Constant::gate_wires, gate_op);
         }();
-        for (size_t num_qubits = min_num_qubits; num_qubits <= max_num_qubits;
-             num_qubits++) {
+        for (std::size_t num_qubits = min_num_qubits;
+             num_qubits <= max_num_qubits; num_qubits++) {
             testApplyGate<TestType>(re, gate_op, num_qubits);
         }
     });
@@ -198,7 +198,8 @@ void testMatrixOp(RandomEngine &re, std::size_t num_qubits,
             }
 
             // And compare them
-            for (size_t idx = 0; idx < implementing_kernels.size() - 1; idx++) {
+            for (std::size_t idx = 0; idx < implementing_kernels.size() - 1;
+                 idx++) {
                 REQUIRE(res[idx] == approx(res[idx + 1]).margin(1e-7));
             }
         }
@@ -212,7 +213,7 @@ TEMPLATE_TEST_CASE("Test all kernels give the same results for matrices",
     const std::size_t num_qubits = 5;
 
     for (bool inverse : {true, false}) {
-        for (size_t num_wires = 1; num_wires <= 5; num_wires++) {
+        for (std::size_t num_wires = 1; num_wires <= 5; num_wires++) {
             testMatrixOp<TestType>(re, num_qubits, num_wires, inverse);
         }
     }
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_Generator.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_Generator.cpp
index d58bdd0cac..71681edc7d 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_Generator.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_Generator.cpp
@@ -58,7 +58,7 @@ constexpr auto findGateOpForGenerator() -> GateOperation {
     return GateOperation{};
 }
 
-template <size_t gntr_idx> constexpr auto generatorGatePairsIter() {
+template <std::size_t gntr_idx> constexpr auto generatorGatePairsIter() {
     if constexpr (gntr_idx < Constant::generator_names.size()) {
         constexpr auto gntr_op =
             std::get<0>(Constant::generator_names[gntr_idx]);
@@ -105,8 +105,8 @@ void testGeneratorEqualsGateDerivativeForKernel(
 
     DYNAMIC_SECTION("Test generator of " << gate_name << " for kernel "
                                          << kernel_name) {
-        for (size_t num_qubits = min_num_qubits; num_qubits < max_num_qubits;
-             num_qubits++) {
+        for (std::size_t num_qubits = min_num_qubits;
+             num_qubits < max_num_qubits; num_qubits++) {
             const auto wires = createWires(gate_op, num_qubits);
             const auto ini_st =
                 createRandomStateVectorData<PrecisionT>(re, num_qubits);
@@ -131,7 +131,7 @@ void testGeneratorEqualsGateDerivativeForKernel(
             dispatcher.applyOperation(kernel, diff_st_2.data(), num_qubits,
                                       gate_op, wires, inverse, {-eps});
 
-            std::vector<ComplexT> gate_der_st(size_t{1U} << num_qubits);
+            std::vector<ComplexT> gate_der_st(std::size_t{1U} << num_qubits);
 
             std::transform(diff_st_1.cbegin(), diff_st_1.cend(),
                            diff_st_2.cbegin(), gate_der_st.begin(),
@@ -184,7 +184,8 @@ constexpr auto findGateOpForControlledGenerator() -> ControlledGateOperation {
     return ControlledGateOperation{};
 }
 
-template <size_t gntr_idx> constexpr auto controlledGeneratorGatePairsIter() {
+template <std::size_t gntr_idx>
+constexpr auto controlledGeneratorGatePairsIter() {
     if constexpr (gntr_idx < Constant::controlled_generator_names.size()) {
         constexpr auto gntr_op =
             std::get<0>(Constant::controlled_generator_names[gntr_idx]);
@@ -233,8 +234,8 @@ void testControlledGeneratorEqualsGateDerivativeForKernel(
 
     DYNAMIC_SECTION("Test controlled generator of "
                     << gate_name << " for kernel " << kernel_name) {
-        for (size_t num_qubits = min_num_qubits; num_qubits < max_num_qubits;
-             num_qubits++) {
+        for (std::size_t num_qubits = min_num_qubits;
+             num_qubits < max_num_qubits; num_qubits++) {
             const auto wires = createWires(gate_op, num_qubits);
             const std::vector<std::size_t> controls = {num_qubits - 1};
             const std::vector<bool> values = {true};
@@ -265,7 +266,7 @@ void testControlledGeneratorEqualsGateDerivativeForKernel(
                                            gate_op, controls, values, wires,
                                            inverse, {-eps});
 
-            std::vector<ComplexT> gate_der_st(size_t{1U} << num_qubits);
+            std::vector<ComplexT> gate_der_st(std::size_t{1U} << num_qubits);
 
             std::transform(diff_st_1.cbegin(), diff_st_1.cend(),
                            diff_st_2.cbegin(), gate_der_st.begin(),
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_Nonparam.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_Nonparam.cpp
index 94b0a3bd70..305d161f2e 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_Nonparam.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_Nonparam.cpp
@@ -88,7 +88,7 @@ template <typename PrecisionT, class GateImplementation>
 void testApplyIdentity() {
     using ComplexT = std::complex<PrecisionT>;
     const std::size_t num_qubits = 3;
-    for (size_t index = 0; index < num_qubits; index++) {
+    for (std::size_t index = 0; index < num_qubits; index++) {
         auto st_pre = createZeroState<ComplexT>(num_qubits);
         auto st_post = createZeroState<ComplexT>(num_qubits);
 
@@ -96,7 +96,7 @@ void testApplyIdentity() {
                                           false);
         CHECK(std::equal(st_pre.begin(), st_pre.end(), st_post.begin()));
     }
-    for (size_t index = 0; index < num_qubits; index++) {
+    for (std::size_t index = 0; index < num_qubits; index++) {
         auto st_pre = createZeroState<ComplexT>(num_qubits);
         auto st_post = createZeroState<ComplexT>(num_qubits);
         GateImplementation::applyHadamard(st_pre.data(), num_qubits, {index},
@@ -117,7 +117,7 @@ void testApplyPauliX() {
     const std::size_t num_qubits = 3;
     DYNAMIC_SECTION(GateImplementation::name
                     << ", PauliX - " << PrecisionToName<PrecisionT>::value) {
-        for (size_t index = 0; index < num_qubits; index++) {
+        for (std::size_t index = 0; index < num_qubits; index++) {
             auto st = createZeroState<ComplexT>(num_qubits);
 
             GateImplementation::applyPauliX(st.data(), num_qubits, {index},
@@ -146,7 +146,7 @@ void testApplyPauliY() {
         {m, m, p, p, m, m, p, p},
         {m, p, m, p, m, p, m, p}};
 
-    for (size_t index = 0; index < num_qubits; index++) {
+    for (std::size_t index = 0; index < num_qubits; index++) {
         auto st = createPlusState<PrecisionT>(num_qubits);
 
         GateImplementation::applyPauliY(st.data(), num_qubits, {index}, false);
@@ -169,7 +169,7 @@ void testApplyPauliZ() {
         {p, p, m, m, p, p, m, m},
         {p, m, p, m, p, m, p, m}};
 
-    for (size_t index = 0; index < num_qubits; index++) {
+    for (std::size_t index = 0; index < num_qubits; index++) {
         auto st = createPlusState<PrecisionT>(num_qubits);
         GateImplementation::applyPauliZ(st.data(), num_qubits, {index}, false);
 
@@ -182,7 +182,7 @@ template <typename PrecisionT, class GateImplementation>
 void testApplyHadamard() {
     using ComplexT = std::complex<PrecisionT>;
     const std::size_t num_qubits = 3;
-    for (size_t index = 0; index < num_qubits; index++) {
+    for (std::size_t index = 0; index < num_qubits; index++) {
         auto st = createZeroState<ComplexT>(num_qubits);
 
         GateImplementation::applyHadamard(st.data(), num_qubits, {index},
@@ -211,7 +211,7 @@ template <typename PrecisionT, class GateImplementation> void testApplyS() {
         {r, r, i, i, r, r, i, i},
         {r, i, r, i, r, i, r, i}};
 
-    for (size_t index = 0; index < num_qubits; index++) {
+    for (std::size_t index = 0; index < num_qubits; index++) {
         auto st = createPlusState<PrecisionT>(num_qubits);
 
         GateImplementation::applyS(st.data(), num_qubits, {index}, false);
@@ -234,7 +234,7 @@ template <typename PrecisionT, class GateImplementation> void testApplyT() {
         {r, r, i, i, r, r, i, i},
         {r, i, r, i, r, i, r, i}};
 
-    for (size_t index = 0; index < num_qubits; index++) {
+    for (std::size_t index = 0; index < num_qubits; index++) {
         auto st = createPlusState<PrecisionT>(num_qubits);
 
         GateImplementation::applyT(st.data(), num_qubits, {index}, false);
@@ -274,7 +274,7 @@ template <typename PrecisionT, class GateImplementation> void testApplyCNOT() {
         auto st = createProductState<PrecisionT>("+00");
 
         // Test using |+00> state to generate 3-qubit GHZ state
-        for (size_t index = 1; index < num_qubits; index++) {
+        for (std::size_t index = 1; index < num_qubits; index++) {
             GateImplementation::applyCNOT(st.data(), num_qubits,
                                           {index - 1, index}, false);
         }
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_Param.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_Param.cpp
index 34d4c9e603..0e21555be7 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_Param.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateImplementations_Param.cpp
@@ -128,7 +128,7 @@ void testApplyPhaseShift() {
         scaleVector(vec, coef);
     }
 
-    for (size_t index = 0; index < num_qubits; index++) {
+    for (std::size_t index = 0; index < num_qubits; index++) {
         auto st = createPlusState<PrecisionT>(num_qubits);
 
         GateImplementation::applyPhaseShift(st.data(), num_qubits, {index},
@@ -153,7 +153,7 @@ void testApplyRX() {
         std::vector<ComplexT>{{0.49757104789172696, 0.0},
                               {0, -0.867423225594017}}};
 
-    for (size_t index = 0; index < angles.size(); index++) {
+    for (std::size_t index = 0; index < angles.size(); index++) {
         auto st = createZeroState<ComplexT>(num_qubits);
 
         GateImplementation::applyRX(st.data(), num_qubits, {0}, false,
@@ -190,7 +190,7 @@ void testApplyRY() {
         getBestAllocator<ComplexT>()};
     DYNAMIC_SECTION(GateImplementation::name
                     << ", RY - " << PrecisionToName<PrecisionT>::value) {
-        for (size_t index = 0; index < angles.size(); index++) {
+        for (std::size_t index = 0; index < angles.size(); index++) {
             auto st = init_state;
             GateImplementation::applyRY(st.data(), num_qubits, {0}, false,
                                         {angles[index]});
@@ -237,7 +237,7 @@ void testApplyRZ() {
         scaleVector(vec, coef);
     }
 
-    for (size_t index = 0; index < num_qubits; index++) {
+    for (std::size_t index = 0; index < num_qubits; index++) {
         auto st = createPlusState<PrecisionT>(num_qubits);
 
         GateImplementation::applyRZ(st.data(), num_qubits, {index}, false,
@@ -246,7 +246,7 @@ void testApplyRZ() {
         CHECK(st == approx(expected[index]));
     }
 
-    for (size_t index = 0; index < num_qubits; index++) {
+    for (std::size_t index = 0; index < num_qubits; index++) {
         auto st = createPlusState<PrecisionT>(num_qubits);
 
         GateImplementation::applyRZ(st.data(), num_qubits, {index}, true,
@@ -272,14 +272,14 @@ void testApplyRot() {
         std::vector<ComplexT>(1U << num_qubits),
         std::vector<ComplexT>(1U << num_qubits)};
 
-    for (size_t i = 0; i < angles.size(); i++) {
+    for (std::size_t i = 0; i < angles.size(); i++) {
         const auto rot_mat = getRot<std::complex, PrecisionT>(
             angles[i][0], angles[i][1], angles[i][2]);
         expected[i][0] = rot_mat[0];
-        expected[i][size_t{1U} << (num_qubits - i - 1)] = rot_mat[2];
+        expected[i][std::size_t{1U} << (num_qubits - i - 1)] = rot_mat[2];
     }
 
-    for (size_t index = 0; index < num_qubits; index++) {
+    for (std::size_t index = 0; index < num_qubits; index++) {
         auto st = createZeroState<ComplexT>(num_qubits);
         GateImplementation::applyRot(st.data(), num_qubits, {index}, false,
                                      angles[index][0], angles[index][1],
@@ -1502,8 +1502,8 @@ void testApplyCRot() {
         std::vector<ComplexT> expected(8);
         const auto rot_mat =
             getRot<std::complex, PrecisionT>(angles[0], angles[1], angles[2]);
-        expected[size_t{1U} << (num_qubits - 1)] = rot_mat[0];
-        expected[(size_t{1U} << num_qubits) - 2] = rot_mat[2];
+        expected[std::size_t{1U} << (num_qubits - 1)] = rot_mat[0];
+        expected[(std::size_t{1U} << num_qubits) - 2] = rot_mat[2];
 
         GateImplementation::applyPauliX(st.data(), num_qubits, {0}, false);
 
@@ -2888,7 +2888,7 @@ TEMPLATE_TEST_CASE("StateVectorLQubitManaged::applyGlobalPhase",
         reinterpret_cast<ComplexT *>(sv_data.data()), sv_data.size());
     sv.applyOperation("GlobalPhase", {index}, inverse, {param});
     auto result_sv = sv.getDataVector();
-    for (size_t j = 0; j < exp2(num_qubits); j++) {
+    for (std::size_t j = 0; j < exp2(num_qubits); j++) {
         ComplexT tmp = phase * ComplexT(sv_data[j]);
         CHECK((real(result_sv[j])) == Approx(real(tmp)));
         CHECK((imag(result_sv[j])) == Approx(imag(tmp)));
@@ -2922,7 +2922,7 @@ TEMPLATE_TEST_CASE("StateVectorLQubitManaged::applyControlledGlobalPhase",
         reinterpret_cast<ComplexT *>(sv_data.data()), sv_data.size());
     sv.applyOperation("GlobalPhase", {0, 1}, {0, 1}, {2}, inverse, {-pi2});
     auto result_sv = sv.getDataVector();
-    for (size_t j = 0; j < exp2(num_qubits); j++) {
+    for (std::size_t j = 0; j < exp2(num_qubits); j++) {
         ComplexT tmp = (inverse) ? conj(phase[j]) : phase[j];
         tmp *= ComplexT(sv_data[j]);
         CHECK((real(result_sv[j])) == Approx(real(tmp)));
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateIndices.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateIndices.cpp
index f25b630c69..89c3c3ca46 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateIndices.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_GateIndices.cpp
@@ -31,7 +31,7 @@ TEST_CASE("generateBitPatterns", "[GateUtil]") {
         CHECK(bit_pattern == std::vector<std::size_t>{0});
     }
     SECTION("Qubit indices {i}") {
-        for (size_t i = 0; i < num_qubits; i++) {
+        for (std::size_t i = 0; i < num_qubits; i++) {
             std::vector<std::size_t> expected{0, std::size_t{1U}
                                                      << (num_qubits - i - 1)};
             auto bit_pattern = generateBitPatterns({i}, num_qubits);
@@ -68,7 +68,7 @@ TEST_CASE("getIndicesAfterExclusion", "[GateUtil]") {
         CHECK(indices == expected);
     }
     SECTION("Qubit indices {i}") {
-        for (size_t i = 0; i < num_qubits; i++) {
+        for (std::size_t i = 0; i < num_qubits; i++) {
             std::vector<std::size_t> expected{0, 1, 2, 3};
             expected.erase(expected.begin() + i);
 
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_Internal.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_Internal.cpp
index e81302268e..ebcb5ced45 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_Internal.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_Internal.cpp
@@ -116,21 +116,21 @@ TEMPLATE_TEST_CASE("createProductState", "[Test_Internal]", float, double) {
     }
 }
 
-size_t binomialCeff(size_t n, std::size_t r) {
+std::size_t binomialCeff(std::size_t n, std::size_t r) {
     std::size_t num = 1;
     std::size_t dem = 1;
-    for (size_t k = 0; k < r; k++) {
+    for (std::size_t k = 0; k < r; k++) {
         num *= (n - k);
     }
-    for (size_t k = 1; k <= r; k++) {
+    for (std::size_t k = 1; k <= r; k++) {
         dem *= k;
     }
     return num / dem;
 }
 
-size_t permSize(size_t n, std::size_t r) {
+std::size_t permSize(std::size_t n, std::size_t r) {
     std::size_t res = 1;
-    for (size_t k = 0; k < r; k++) {
+    for (std::size_t k = 0; k < r; k++) {
         res *= (n - k);
     }
     return res;
@@ -141,7 +141,7 @@ size_t permSize(size_t n, std::size_t r) {
  */
 TEST_CASE("createAllWires", "[Test_Internal]") {
     SECTION("order = false") {
-        const std::vector<std::pair<size_t, std::size_t>> test_pairs{
+        const std::vector<std::pair<std::size_t, std::size_t>> test_pairs{
             {4, 2},  {8, 3},  {12, 1}, {12, 2}, {12, 3},  {12, 4},  {12, 5},
             {12, 6}, {12, 7}, {12, 8}, {12, 9}, {12, 10}, {12, 11}, {12, 12}};
 
@@ -161,13 +161,13 @@ TEST_CASE("createAllWires", "[Test_Internal]") {
                           return std::lexicographical_compare(
                               v1.begin(), v1.end(), v2.begin(), v2.end());
                       }); // sort lexicographically
-            for (size_t i = 0; i < v.size() - 1; i++) {
+            for (std::size_t i = 0; i < v.size() - 1; i++) {
                 REQUIRE(v[i] != v[i + 1]); // all combinations must be different
             }
         }
     }
     SECTION("order = true") {
-        const std::vector<std::pair<size_t, std::size_t>> test_pairs{
+        const std::vector<std::pair<std::size_t, std::size_t>> test_pairs{
             {4, 2}, {8, 3}, {12, 1}, {12, 2}, {12, 3}, {12, 4}, {12, 5}};
 
         for (const auto &[n, r] : test_pairs) {
@@ -184,7 +184,7 @@ TEST_CASE("createAllWires", "[Test_Internal]") {
                           return std::lexicographical_compare(
                               v1.begin(), v1.end(), v2.begin(), v2.end());
                       }); // sort lexicographically
-            for (size_t i = 0; i < v.size() - 1; i++) {
+            for (std::size_t i = 0; i < v.size() - 1; i++) {
                 REQUIRE(v[i] != v[i + 1]); // all permutations must be different
             }
         }
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_KernelMap.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_KernelMap.cpp
index a1d11895cd..e833aa4520 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_KernelMap.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/gates/tests/Test_KernelMap.cpp
@@ -56,7 +56,7 @@ TEST_CASE("Test default kernels for gates are well defined", "[KernelMap]") {
         OperationKernelMap<Pennylane::Gates::GateOperation>::getInstance();
     for_each_enum<Threading, CPUMemoryModel>(
         [&instance](Threading threading, CPUMemoryModel memory_model) {
-            for (size_t num_qubits = 1; num_qubits < 27; num_qubits++) {
+            for (std::size_t num_qubits = 1; num_qubits < 27; num_qubits++) {
                 REQUIRE_NOTHROW(
                     instance.getKernelMap(num_qubits, threading, memory_model));
             }
@@ -69,7 +69,7 @@ TEST_CASE("Test default kernels for generators are well defined",
         OperationKernelMap<Pennylane::Gates::GeneratorOperation>::getInstance();
     for_each_enum<Threading, CPUMemoryModel>(
         [&instance](Threading threading, CPUMemoryModel memory_model) {
-            for (size_t num_qubits = 1; num_qubits < 27; num_qubits++) {
+            for (std::size_t num_qubits = 1; num_qubits < 27; num_qubits++) {
                 REQUIRE_NOTHROW(
                     instance.getKernelMap(num_qubits, threading, memory_model));
             }
@@ -82,7 +82,7 @@ TEST_CASE("Test default kernels for matrix operation are well defined",
         OperationKernelMap<Pennylane::Gates::MatrixOperation>::getInstance();
     for_each_enum<Threading, CPUMemoryModel>(
         [&instance](Threading threading, CPUMemoryModel memory_model) {
-            for (size_t num_qubits = 1; num_qubits < 27; num_qubits++) {
+            for (std::size_t num_qubits = 1; num_qubits < 27; num_qubits++) {
                 REQUIRE_NOTHROW(
                     instance.getKernelMap(num_qubits, threading, memory_model));
             }
@@ -216,7 +216,7 @@ TEST_CASE("Test KernelMap is consistent in extreme usecase", "[KernelMap]") {
 #ifdef _OPENMP
 #pragma omp for
 #endif
-        for (size_t i = 0; i < num_iter; i++) {
+        for (std::size_t i = 0; i < num_iter; i++) {
             const auto num_qubit = num_qubits[num_qubit_dist(re)];
             const auto threading = threadings[threading_dist(re)];
             const auto memory_model = memory_models[memory_model_dist(re)];
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/TransitionKernels.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/TransitionKernels.hpp
index 65f2fa5867..1957be2feb 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/TransitionKernels.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/TransitionKernels.hpp
@@ -50,7 +50,7 @@ template <typename fp_t> class TransitionKernel {
 
   public:
     //  outputs the next state and the qratio
-    virtual std::pair<size_t, fp_t> operator()(size_t) = 0;
+    virtual std::pair<std::size_t, fp_t> operator()(std::size_t) = 0;
     virtual ~TransitionKernel() = default;
 };
 
@@ -72,13 +72,13 @@ class LocalTransitionKernel : public TransitionKernel<fp_t> {
     std::uniform_int_distribution<std::size_t> distrib_binary_;
 
   public:
-    explicit LocalTransitionKernel(size_t num_qubits)
+    explicit LocalTransitionKernel(std::size_t num_qubits)
         : num_qubits_(num_qubits), gen_(std::mt19937(rd_())),
           distrib_num_qubits_(
               std::uniform_int_distribution<std::size_t>(0, num_qubits - 1)),
           distrib_binary_(std::uniform_int_distribution<std::size_t>(0, 1)) {}
 
-    std::pair<size_t, fp_t> operator()(size_t init_idx) final {
+    std::pair<std::size_t, fp_t> operator()(std::size_t init_idx) final {
         std::size_t qubit_site = distrib_num_qubits_(gen_);
         std::size_t qubit_value = distrib_binary_(gen_);
         std::size_t current_bit = (static_cast<unsigned>(init_idx) >>
@@ -86,13 +86,14 @@ class LocalTransitionKernel : public TransitionKernel<fp_t> {
                                   1U;
 
         if (qubit_value == current_bit) {
-            return std::pair<size_t, fp_t>(init_idx, 1);
+            return std::pair<std::size_t, fp_t>(init_idx, 1);
         }
         if (current_bit == 0) {
-            return std::pair<size_t, fp_t>(init_idx + std::pow(2, qubit_site),
-                                           1);
+            return std::pair<std::size_t, fp_t>(
+                init_idx + std::pow(2, qubit_site), 1);
         }
-        return std::pair<size_t, fp_t>(init_idx - std::pow(2, qubit_site), 1);
+        return std::pair<std::size_t, fp_t>(init_idx - std::pow(2, qubit_site),
+                                            1);
     }
 };
 
@@ -119,7 +120,7 @@ class NonZeroRandomTransitionKernel : public TransitionKernel<fp_t> {
         auto data = sv;
         sv_length_ = sv_length;
         // find nonzero candidates
-        for (size_t i = 0; i < sv_length_; i++) {
+        for (std::size_t i = 0; i < sv_length_; i++) {
             if (std::abs(data[i]) > min_error) {
                 non_zeros_.push_back(i);
             }
@@ -128,10 +129,10 @@ class NonZeroRandomTransitionKernel : public TransitionKernel<fp_t> {
         distrib_ = std::uniform_int_distribution<std::size_t>(
             0, non_zeros_.size() - 1);
     }
-    std::pair<size_t, fp_t>
+    std::pair<std::size_t, fp_t>
     operator()([[maybe_unused]] std::size_t init_idx) final {
         auto trans_idx = distrib_(gen_);
-        return std::pair<size_t, fp_t>(non_zeros_[trans_idx], 1);
+        return std::pair<std::size_t, fp_t>(non_zeros_[trans_idx], 1);
     }
 };
 
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/tests/Test_MeasurementsLQubit.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/tests/Test_MeasurementsLQubit.cpp
index 7ffdd28a48..8156b23b37 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/tests/Test_MeasurementsLQubit.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/tests/Test_MeasurementsLQubit.cpp
@@ -643,8 +643,8 @@ TEMPLATE_PRODUCT_TEST_CASE("Sample with Metropolis (Local Kernel)",
     std::vector<std::size_t> samples_decimal(num_samples, 0);
 
     // convert samples to decimal and then bin them in counts
-    for (size_t i = 0; i < num_samples; i++) {
-        for (size_t j = 0; j < num_qubits; j++) {
+    for (std::size_t i = 0; i < num_samples; i++) {
+        for (std::size_t j = 0; j < num_qubits; j++) {
             if (samples[i * num_qubits + j] != 0) {
                 samples_decimal[i] += twos[(num_qubits - 1 - j)];
             }
@@ -654,7 +654,7 @@ TEMPLATE_PRODUCT_TEST_CASE("Sample with Metropolis (Local Kernel)",
 
     // compute estimated probabilities from histogram
     std::vector<PrecisionT> probabilities(counts.size());
-    for (size_t i = 0; i < counts.size(); i++) {
+    for (std::size_t i = 0; i < counts.size(); i++) {
         probabilities[i] = counts[i] / (PrecisionT)num_samples;
     }
 
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/observables/ObservablesLQubit.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/observables/ObservablesLQubit.hpp
index 67466924d0..1bc1d6156d 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/observables/ObservablesLQubit.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/observables/ObservablesLQubit.hpp
@@ -165,7 +165,8 @@ template <class StateVectorT, bool use_openmp> struct HamiltonianApplyInPlace {
             auto allocator = sv.allocator();
             std::vector<ComplexT, decltype(allocator)> res(
                 sv.getLength(), ComplexT{0.0, 0.0}, allocator);
-            for (size_t term_idx = 0; term_idx < coeffs.size(); term_idx++) {
+            for (std::size_t term_idx = 0; term_idx < coeffs.size();
+                 term_idx++) {
                 StateVectorT tmp(sv);
                 terms[term_idx]->applyInPlace(tmp);
                 scaleAndAdd(tmp.getLength(), ComplexT{coeffs[term_idx], 0.0},
@@ -176,7 +177,8 @@ template <class StateVectorT, bool use_openmp> struct HamiltonianApplyInPlace {
                                  typename StateVectorT::MemoryStorageT,
                                  MemoryStorageLocation::External>) {
             std::vector<ComplexT> res(sv.getLength(), ComplexT{0.0, 0.0});
-            for (size_t term_idx = 0; term_idx < coeffs.size(); term_idx++) {
+            for (std::size_t term_idx = 0; term_idx < coeffs.size();
+                 term_idx++) {
                 std::vector<ComplexT> tmp_data_storage(
                     sv.getData(), sv.getData() + sv.getLength());
                 StateVectorT tmp(tmp_data_storage.data(),
@@ -220,7 +222,8 @@ struct HamiltonianApplyInPlace<StateVectorLQubitManaged<PrecisionT>, true> {
                 length, ComplexT{}, allocator);
 
 #pragma omp for
-            for (size_t term_idx = 0; term_idx < terms.size(); term_idx++) {
+            for (std::size_t term_idx = 0; term_idx < terms.size();
+                 term_idx++) {
                 try {
                     tmp.updateData(sv.getDataVector());
                     terms[term_idx]->applyInPlace(tmp);
@@ -280,7 +283,8 @@ struct HamiltonianApplyInPlace<StateVectorLQubitRaw<PrecisionT>, true> {
             }
 
 #pragma omp for
-            for (size_t term_idx = 0; term_idx < terms.size(); term_idx++) {
+            for (std::size_t term_idx = 0; term_idx < terms.size();
+                 term_idx++) {
                 std::copy(sv.getData(), sv.getData() + sv.getLength(),
                           tmp_data_storage->data());
                 try {
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/tests/Test_StateVectorLQubit.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/tests/Test_StateVectorLQubit.cpp
index 775fdfd8c1..efaf7451b6 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/tests/Test_StateVectorLQubit.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/tests/Test_StateVectorLQubit.cpp
@@ -262,7 +262,7 @@ TEMPLATE_PRODUCT_TEST_CASE("StateVectorLQubit::applyMatrix with a pointer",
 
     SECTION("Test with different number of wires") {
         const std::size_t num_qubits = 5;
-        for (size_t num_wires = 1; num_wires < num_qubits; num_wires++) {
+        for (std::size_t num_wires = 1; num_wires < num_qubits; num_wires++) {
             VectorT st_data_1 =
                 createRandomStateVectorData<PrecisionT>(re, num_qubits);
             VectorT st_data_2 = st_data_1;
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/tests/Test_StateVectorLQubitManaged.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/tests/Test_StateVectorLQubitManaged.cpp
index 23477db393..3c9f4abd90 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/tests/Test_StateVectorLQubitManaged.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/tests/Test_StateVectorLQubitManaged.cpp
@@ -48,7 +48,7 @@ TEMPLATE_TEST_CASE("StateVectorLQubitManaged::StateVectorLQubitManaged",
     using PrecisionT = TestType;
     std::mt19937_64 re{1337};
 
-    SECTION("StateVectorLQubitManaged<TestType> {size_t}") {
+    SECTION("StateVectorLQubitManaged<TestType> {std::size_t}") {
         REQUIRE(std::is_constructible_v<StateVectorLQubitManaged<TestType>,
                                         std::size_t>);
         const std::size_t num_qubits = 4;
@@ -58,7 +58,7 @@ TEMPLATE_TEST_CASE("StateVectorLQubitManaged::StateVectorLQubitManaged",
         REQUIRE(sv.getLength() == 16);
         REQUIRE(sv.getDataVector().size() == 16);
     }
-    SECTION("StateVectorLQubitManaged<TestType> {size_t}") {
+    SECTION("StateVectorLQubitManaged<TestType> {std::size_t}") {
         using TestVectorT = TestVector<std::complex<PrecisionT>>;
         REQUIRE(std::is_constructible_v<StateVectorLQubitManaged<TestType>,
                                         TestVectorT>);
@@ -123,7 +123,7 @@ TEMPLATE_TEST_CASE("StateVectorLQubitManaged::setBasisState",
         TestVectorT init_state =
             createRandomStateVectorData<PrecisionT>(re, num_qubits);
 
-        TestVectorT expected_state(size_t{1U} << num_qubits, 0.0,
+        TestVectorT expected_state(std::size_t{1U} << num_qubits, 0.0,
                                    getBestAllocator<ComplexT>());
         std::size_t index = GENERATE(0, 1, 2, 3, 4, 5, 6, 7);
         expected_state[index] = {1.0, 0.0};
@@ -149,7 +149,8 @@ TEMPLATE_TEST_CASE("StateVectorLQubitManaged::SetStateVector",
 
         auto expected_state = init_state;
 
-        for (size_t i = 0; i < Pennylane::Util::exp2(num_qubits - 1); i++) {
+        for (std::size_t i = 0; i < Pennylane::Util::exp2(num_qubits - 1);
+             i++) {
             std::swap(expected_state[i * 2], expected_state[i * 2 + 1]);
         }
 
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/LinearAlgebra.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/LinearAlgebra.hpp
index 25b86cf698..ca1724c190 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/LinearAlgebra.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/LinearAlgebra.hpp
@@ -95,7 +95,7 @@ omp_innerProd(const std::complex<T> *v1, const std::complex<T> *v2,
 #pragma omp parallel for num_threads(nthreads) default(none)                   \
     shared(v1, v2, data_size) reduction(sm : result)
 #endif
-    for (size_t i = 0; i < data_size; i++) {
+    for (std::size_t i = 0; i < data_size; i++) {
         result = ConstSum(result, ConstMult(*(v1 + i), *(v2 + i)));
     }
 }
@@ -169,7 +169,7 @@ omp_innerProdC(const std::complex<T> *v1, const std::complex<T> *v2,
 #pragma omp parallel for num_threads(nthreads) default(none)                   \
     shared(v1, v2, data_size) reduction(sm : result)
 #endif
-    for (size_t i = 0; i < data_size; i++) {
+    for (std::size_t i = 0; i < data_size; i++) {
         result = ConstSum(result, ConstMultConj(*(v1 + i), *(v2 + i)));
     }
 }
@@ -632,9 +632,9 @@ inline void omp_matrixMatProd(const std::complex<T> *m_left,
 #pragma omp parallel for default(none) shared(m_left, m_right, m_out)          \
     firstprivate(m, n, k)
 #endif
-        for (size_t row = 0; row < m; row++) {
-            for (size_t col = 0; col < n; col++) {
-                for (size_t blk = 0; blk < k; blk++) {
+        for (std::size_t row = 0; row < m; row++) {
+            for (std::size_t col = 0; col < n; col++) {
+                for (std::size_t blk = 0; blk < k; blk++) {
                     m_out[row * n + col] +=
                         m_left[row * k + blk] * m_right[col * n + blk];
                 }
@@ -646,9 +646,9 @@ inline void omp_matrixMatProd(const std::complex<T> *m_left,
 #pragma omp parallel for default(none) shared(m_left, m_right, m_out)          \
     firstprivate(m, n, k)
 #endif
-        for (size_t row = 0; row < m; row++) {
-            for (size_t col = 0; col < n; col++) {
-                for (size_t blk = 0; blk < k; blk++) {
+        for (std::size_t row = 0; row < m; row++) {
+            for (std::size_t col = 0; col < n; col++) {
+                for (std::size_t blk = 0; blk < k; blk++) {
                     m_out[row * n + col] += m_left[row * k + blk] *
                                             std::conj(m_right[col * n + blk]);
                 }
@@ -660,13 +660,13 @@ inline void omp_matrixMatProd(const std::complex<T> *m_left,
 #pragma omp parallel for default(none) shared(m_left, m_right, m_out)          \
     firstprivate(m, n, k)
 #endif
-        for (size_t row = 0; row < m; row += STRIDE) {
+        for (std::size_t row = 0; row < m; row += STRIDE) {
             std::size_t i;
             std::size_t j;
             std::size_t l;
             std::complex<T> t;
-            for (size_t col = 0; col < n; col += STRIDE) {
-                for (size_t blk = 0; blk < k; blk += STRIDE) {
+            for (std::size_t col = 0; col < n; col += STRIDE) {
+                for (std::size_t blk = 0; blk < k; blk += STRIDE) {
                     // cache-blocking:
                     for (i = row; i < std::min(row + STRIDE, m); i++) {
                         for (j = col; j < std::min(col + STRIDE, n); j++) {
@@ -779,17 +779,17 @@ inline auto matrixMatProd(const std::vector<std::complex<T>> m_left,
 template <
     class T,
     std::size_t STD_CROSSOVER = 1U << 12U> // NOLINT(readability-magic-numbers)
-void omp_scaleAndAdd(size_t dim, std::complex<T> a, const std::complex<T> *x,
-                     std::complex<T> *y) {
+void omp_scaleAndAdd(std::size_t dim, std::complex<T> a,
+                     const std::complex<T> *x, std::complex<T> *y) {
     if (dim < STD_CROSSOVER) {
-        for (size_t i = 0; i < dim; i++) {
+        for (std::size_t i = 0; i < dim; i++) {
             y[i] += a * x[i];
         }
     } else {
 #if defined(_OPENMP)
 #pragma omp parallel for default(none) firstprivate(a, dim, x, y)
 #endif
-        for (size_t i = 0; i < dim; i++) {
+        for (std::size_t i = 0; i < dim; i++) {
             y[i] += a * x[i];
         }
     }
@@ -807,8 +807,8 @@ void omp_scaleAndAdd(size_t dim, std::complex<T> a, const std::complex<T> *x,
  * @param y Vector to be added
  */
 template <class T>
-void blas_scaleAndAdd(size_t dim, std::complex<T> a, const std::complex<T> *x,
-                      std::complex<T> *y) {
+void blas_scaleAndAdd(std::size_t dim, std::complex<T> a,
+                      const std::complex<T> *x, std::complex<T> *y) {
     if constexpr (std::is_same_v<T, float>) {
         cblas_caxpy(dim, &a, x, 1, y, 1);
     } else if (std::is_same_v<T, double>) {
@@ -834,7 +834,7 @@ void blas_scaleAndAdd(size_t dim, std::complex<T> a, const std::complex<T> *x,
  * @param y Vector to be added
  */
 template <class T>
-void scaleAndAdd(size_t dim, std::complex<T> a, const std::complex<T> *x,
+void scaleAndAdd(std::size_t dim, std::complex<T> a, const std::complex<T> *x,
                  std::complex<T> *y) {
     if constexpr (USE_CBLAS) {
         blas_scaleAndAdd(dim, a, x, y);
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_LinearAlgebra.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_LinearAlgebra.cpp
index 5152085942..e5d6ae1eaa 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_LinearAlgebra.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_LinearAlgebra.cpp
@@ -36,7 +36,7 @@ using Pennylane::Util::randomUnitary;
 TEMPLATE_TEST_CASE("Inner product", "[Util][LinearAlgebra]", float, double) {
     SECTION("innerProd") {
         SECTION("Iterative increment") {
-            for (size_t i = 0; i < 12; i++) {
+            for (std::size_t i = 0; i < 12; i++) {
                 auto sz = static_cast<std::size_t>(1U << i);
                 std::vector<std::complex<TestType>> data1(sz, {1.0, 1.0});
                 std::vector<std::complex<TestType>> data2(sz, {1.0, 1.0});
@@ -60,7 +60,7 @@ TEMPLATE_TEST_CASE("Inner product", "[Util][LinearAlgebra]", float, double) {
     }
     SECTION("innerProd-inline") {
         SECTION("Iterative increment") {
-            for (size_t i = 0; i < 12; i++) {
+            for (std::size_t i = 0; i < 12; i++) {
                 auto sz = static_cast<std::size_t>(1U << i);
                 std::vector<std::complex<TestType>> data1(sz, {1.0, 1.0});
                 std::vector<std::complex<TestType>> data2(sz, {1.0, 1.0});
@@ -86,12 +86,12 @@ TEMPLATE_TEST_CASE("Inner product", "[Util][LinearAlgebra]", float, double) {
     }
     SECTION("innerProdC") {
         SECTION("Iterative increment") {
-            for (size_t i = 0; i < 12; i++) {
+            for (std::size_t i = 0; i < 12; i++) {
                 auto sz = static_cast<std::size_t>(1U << i);
                 std::vector<std::complex<TestType>> data1(sz, {1.0, 1.0});
                 std::vector<std::complex<TestType>> data2(sz, {1.0, 1.0});
-                std::complex<TestType> expected_result(size_t{1U} << (i + 1),
-                                                       0);
+                std::complex<TestType> expected_result(
+                    std::size_t{1U} << (i + 1), 0);
                 std::complex<TestType> result = Util::innerProdC(data1, data2);
                 CAPTURE(result);
                 CAPTURE(expected_result);
@@ -115,12 +115,12 @@ TEMPLATE_TEST_CASE("Inner product", "[Util][LinearAlgebra]", float, double) {
     }
     SECTION("innerProdC-inline") {
         SECTION("Iterative increment") {
-            for (size_t i = 0; i < 12; i++) {
+            for (std::size_t i = 0; i < 12; i++) {
                 auto sz = static_cast<std::size_t>(1U << i);
                 std::vector<std::complex<TestType>> data1(sz, {1.0, 1.0});
                 std::vector<std::complex<TestType>> data2(sz, {1.0, 1.0});
-                std::complex<TestType> expected_result(size_t{1U} << (i + 1),
-                                                       0);
+                std::complex<TestType> expected_result(
+                    std::size_t{1U} << (i + 1), 0);
                 std::complex<TestType> result = Util::innerProdC<TestType, 1>(
                     data1.data(), data2.data(), sz);
                 CAPTURE(result);
@@ -151,7 +151,7 @@ TEMPLATE_TEST_CASE("Product", "[Util][LinearAlgebra]", float, double) {
     using Util::Trans;
     SECTION("matrixVecProd") {
         SECTION("Simple Iterative with NoTranspose") {
-            for (size_t m = 2; m < 8; m++) {
+            for (std::size_t m = 2; m < 8; m++) {
                 std::vector<std::complex<TestType>> mat(m * m, {1.0, 1.0});
                 std::vector<std::complex<TestType>> v_in(m, {1.0, 1.0});
                 std::vector<std::complex<TestType>> v_expected(
@@ -165,7 +165,7 @@ TEMPLATE_TEST_CASE("Product", "[Util][LinearAlgebra]", float, double) {
             }
         }
         SECTION("Simple Iterative with Transpose") {
-            for (size_t m = 2; m < 8; m++) {
+            for (std::size_t m = 2; m < 8; m++) {
                 std::vector<std::complex<TestType>> mat(m * m, {1.0, 1.0});
                 std::vector<std::complex<TestType>> v_in(m, {1.0, 1.0});
                 std::vector<std::complex<TestType>> v_expected(
@@ -295,7 +295,7 @@ TEMPLATE_TEST_CASE("Product", "[Util][LinearAlgebra]", float, double) {
     }
     SECTION("vecMatrixProd") {
         SECTION("Simple Iterative") {
-            for (size_t m = 2; m < 8; m++) {
+            for (std::size_t m = 2; m < 8; m++) {
                 std::vector<TestType> mat(m * m, TestType{1.0});
                 std::vector<TestType> v_in(m, TestType{1.0});
                 std::vector<TestType> v_expected(m, static_cast<TestType>(m));
@@ -309,7 +309,7 @@ TEMPLATE_TEST_CASE("Product", "[Util][LinearAlgebra]", float, double) {
             }
         }
         SECTION("Zero Vector") {
-            for (size_t m = 2; m < 8; m++) {
+            for (std::size_t m = 2; m < 8; m++) {
                 std::vector<TestType> mat(m * m, 1);
                 std::vector<TestType> v_in(m, 0);
                 std::vector<TestType> v_expected(m, 0);
@@ -388,7 +388,7 @@ TEMPLATE_TEST_CASE("Product", "[Util][LinearAlgebra]", float, double) {
     }
     SECTION("matrixMatProd") {
         SECTION("Simple Iterative (Trans::Transpose)") {
-            for (size_t m = 2; m < 8; m++) {
+            for (std::size_t m = 2; m < 8; m++) {
                 std::vector<std::complex<TestType>> m_left(m * m, {1.0, 1.0});
                 std::vector<std::complex<TestType>> m_right(m * m, {1.0, 1.0});
                 std::vector<std::complex<TestType>> m_out_exp(
@@ -403,7 +403,7 @@ TEMPLATE_TEST_CASE("Product", "[Util][LinearAlgebra]", float, double) {
             }
         }
         SECTION("Simple Iterative (Trans::Adjoint)") {
-            for (size_t m = 2; m < 8; m++) {
+            for (std::size_t m = 2; m < 8; m++) {
                 std::vector<std::complex<TestType>> m_left(m * m, {1.0, 1.0});
                 std::vector<std::complex<TestType>> m_right(m * m, {1.0, 1.0});
                 std::vector<std::complex<TestType>> m_out_exp(
@@ -582,9 +582,9 @@ TEMPLATE_TEST_CASE("Product", "[Util][LinearAlgebra]", float, double) {
 TEMPLATE_TEST_CASE("Transpose", "[Util][LinearAlgebra]", float, double) {
     SECTION("CFTranspose") {
         SECTION("Simple Matrix") {
-            for (size_t m = 2; m < 10; m++) {
+            for (std::size_t m = 2; m < 10; m++) {
                 std::vector<TestType> mat(m * m, {0});
-                for (size_t i = 0; i < m; i++) {
+                for (std::size_t i = 0; i < m; i++) {
                     mat[i * m + i] = 1.0;
                 }
                 std::vector<TestType> mat_t(m * m);
@@ -635,9 +635,9 @@ TEMPLATE_TEST_CASE("Transpose", "[Util][LinearAlgebra]", float, double) {
     }
     SECTION("Transpose") {
         SECTION("Simple Matrix") {
-            for (size_t m = 2; m < 8; m++) {
+            for (std::size_t m = 2; m < 8; m++) {
                 std::vector<std::complex<TestType>> mat(m * m, {0, 0});
-                for (size_t i = 0; i < m; i++) {
+                for (std::size_t i = 0; i < m; i++) {
                     mat[i * m + i] = {1, 1};
                 }
                 std::vector<std::complex<TestType>> mat_t =
@@ -690,9 +690,9 @@ TEMPLATE_TEST_CASE("Transpose", "[Util][LinearAlgebra]", float, double) {
     }
     SECTION("Transpose<complex<T>>") {
         SECTION("Simple Matrix") {
-            for (size_t m = 2; m < 8; m++) {
+            for (std::size_t m = 2; m < 8; m++) {
                 std::vector<std::complex<TestType>> mat(m * m, {0, 0});
-                for (size_t i = 0; i < m; i++) {
+                for (std::size_t i = 0; i < m; i++) {
                     mat[i * m + i] = {1.0, 1.0};
                 }
                 std::vector<std::complex<TestType>> mat_t =
@@ -831,7 +831,7 @@ TEMPLATE_TEST_CASE("randomUnitary", "[Util][LinearAlgebra]", float, double) {
 
     std::mt19937 re{1337};
 
-    for (size_t num_qubits = 1; num_qubits <= 5; num_qubits++) {
+    for (std::size_t num_qubits = 1; num_qubits <= 5; num_qubits++) {
         const std::size_t dim = (1U << num_qubits);
         const auto unitary = randomUnitary<PrecisionT>(re, num_qubits);
 
@@ -847,7 +847,7 @@ TEMPLATE_TEST_CASE("randomUnitary", "[Util][LinearAlgebra]", float, double) {
 
         std::vector<std::complex<PrecisionT>> identity(
             dim * dim, std::complex<PrecisionT>{});
-        for (size_t i = 0; i < dim; i++) {
+        for (std::size_t i = 0; i < dim; i++) {
             identity[i * dim + i] = std::complex<PrecisionT>{1.0, 0.0};
         }
 
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_SparseLinAlg.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_SparseLinAlg.cpp
index d639a34578..56c41a0df5 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_SparseLinAlg.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/utils/tests/Test_SparseLinAlg.cpp
@@ -74,7 +74,7 @@ TEMPLATE_TEST_CASE("apply_Sparse_Matrix", "[Sparse]", float, double) {
     write_CSR_vectors(row_map, entries, values, data_size);
 
     SECTION("Testing sparse matrix dense vector product:") {
-        for (size_t vec = 0; vec < vectors.size(); vec++) {
+        for (std::size_t vec = 0; vec < vectors.size(); vec++) {
             std::vector<complex<TestType>> result = apply_Sparse_Matrix(
                 vectors[vec].data(), vectors[vec].size(), row_map.data(),
                 row_map.size(), entries.data(), values.data(), values.size());
diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tensor/tncuda/tests/Tests_TensorCuda.cpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tensor/tncuda/tests/Tests_TensorCuda.cpp
index c2713bf725..f44daf2bad 100644
--- a/pennylane_lightning/core/src/simulators/lightning_tensor/tensor/tncuda/tests/Tests_TensorCuda.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_tensor/tensor/tncuda/tests/Tests_TensorCuda.cpp
@@ -55,7 +55,7 @@ TEMPLATE_TEST_CASE("TensorCuda::baseMethods", "[TensorCuda]", float, double) {
     const std::size_t rank = 3;
     const std::vector<std::size_t> modes = {0, 1, 2};
     const std::vector<std::size_t> extents = {2, 2, 2};
-    const size_t length = 8;
+    const std::size_t length = 8;
     DevTag<int> dev_tag{0, 0};
 
     TensorCuda<TestType> tensor{rank, modes, extents, dev_tag};
diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/MPSTNCuda.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/MPSTNCuda.hpp
index 178d987a10..81a115e685 100644
--- a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/MPSTNCuda.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/MPSTNCuda.hpp
@@ -259,7 +259,7 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
             /* cutensornetStateAttributes_t */
             CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO,
             /* const void * */ &algo,
-            /* size_t */ sizeof(algo)));
+            /* std::size_t */ sizeof(algo)));
 
         PL_ABORT_IF_NOT(cutoff_mode == "rel" || cutoff_mode == "abs",
                         "cutoff_mode should either 'rel' or 'abs'.");
@@ -274,7 +274,7 @@ class MPSTNCuda final : public TNCudaBase<Precision, MPSTNCuda<Precision>> {
             /* cutensornetState_t */ BaseType::getQuantumState(),
             /* cutensornetStateAttributes_t */ svd_cutoff_mode,
             /* const void * */ &cutoff,
-            /* size_t */ sizeof(cutoff)));
+            /* std::size_t */ sizeof(cutoff)));
 
         BaseType::computeState(
             const_cast<int64_t **>(getSitesExtentsPtr().data()),
diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/TNCudaBase.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/TNCudaBase.hpp
index 35f296930c..b545ef890d 100644
--- a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/TNCudaBase.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/TNCudaBase.hpp
@@ -366,7 +366,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
         } else {
             DataBuffer<CFP_t, int> tmp(tensor_data_size, getDevTag(), true);
 
-            const std::size_t projected_modes_size = size_t(1)
+            const std::size_t projected_modes_size = std::size_t(1)
                                                      << projected_modes.size();
             for (std::size_t idx = 0; idx < projected_modes_size; idx++) {
                 for (std::size_t j = 0; j < projected_modes.size(); j++) {
@@ -417,7 +417,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
             /* cutensornetStateAccessor_t */ accessor,
             /* cutensornetAccessorAttributes_t */ accessor_attribute,
             /* const void * */ &numHyperSamples,
-            /* size_t */ sizeof(numHyperSamples)));
+            /* std::size_t */ sizeof(numHyperSamples)));
 
         // prepare the computation
         cutensornetWorkspaceDescriptor_t workDesc;
@@ -431,7 +431,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
         PL_CUTENSORNET_IS_SUCCESS(cutensornetAccessorPrepare(
             /* const cutensornetHandle_t */ getTNCudaHandle(),
             /* cutensornetStateAccessor_t*/ accessor,
-            /* size_t */ scratchSize,
+            /* std::size_t */ scratchSize,
             /* cutensornetWorkspaceDescriptor_t */ workDesc,
             /* cudaStream_t unused as of v24.03 */ 0x0));
 
@@ -520,7 +520,7 @@ class TNCudaBase : public TensornetBase<PrecisionT, Derived> {
         PL_CUTENSORNET_IS_SUCCESS(cutensornetStatePrepare(
             /* const cutensornetHandle_t */ getTNCudaHandle(),
             /* cutensornetState_t */ getQuantumState(),
-            /* size_t maxWorkspaceSizeDevice */ scratchSize,
+            /* std::size_t maxWorkspaceSizeDevice */ scratchSize,
             /* cutensornetWorkspaceDescriptor_t */ workDesc,
             /*  cudaStream_t unused as of v24.03*/ 0x0));
 
diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/base/tests/Tests_tensornetBase.cpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/base/tests/Tests_tensornetBase.cpp
index 0c919914dd..8edf472afb 100644
--- a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/base/tests/Tests_tensornetBase.cpp
+++ b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/base/tests/Tests_tensornetBase.cpp
@@ -31,7 +31,7 @@ template <typename TypeList> void testTensornetBase() {
     if constexpr (!std::is_same_v<TypeList, void>) {
         using MPS_T = typename TypeList::Type;
 
-        const size_t num_qubits = 4;
+        const std::size_t num_qubits = 4;
         const std::size_t maxBondDim = 2;
         std::vector<std::size_t> qubitDims = {2, 2, 2, 2};
         DevTag<int> dev_tag{0, 0};
diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/TNCudaGateCache.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/TNCudaGateCache.hpp
index 377ccc66ce..30e5d824f1 100644
--- a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/TNCudaGateCache.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/TNCudaGateCache.hpp
@@ -161,7 +161,7 @@ template <class PrecisionT> class TNCudaGateCache {
      * @brief Returns the key (index of the gate) of the first element in the
      * `device_gates_`.
      *
-     * @return size_t Key of the first element in the `device_gates_`.
+     * @return std::size_t Key of the first element in the `device_gates_`.
      */
     auto get_cache_head_idx() const -> std::size_t {
         auto it = device_gates_.begin();
diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/measurements/MeasurementsTNCuda.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/measurements/MeasurementsTNCuda.hpp
index e7234367bc..c64203941d 100644
--- a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/measurements/MeasurementsTNCuda.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/measurements/MeasurementsTNCuda.hpp
@@ -240,7 +240,7 @@ template <class TensorNetT> class MeasurementsTNCuda {
             /* cutensornetExpectationAttributes_t */
             CUTENSORNET_EXPECTATION_CONFIG_NUM_HYPER_SAMPLES,
             /* const void * */ &numHyperSamples,
-            /* size_t */ sizeof(numHyperSamples)));
+            /* std::size_t */ sizeof(numHyperSamples)));
 
         cutensornetWorkspaceDescriptor_t workDesc;
         PL_CUTENSORNET_IS_SUCCESS(cutensornetCreateWorkspaceDescriptor(
@@ -254,7 +254,7 @@ template <class TensorNetT> class MeasurementsTNCuda {
         PL_CUTENSORNET_IS_SUCCESS(cutensornetExpectationPrepare(
             /* const cutensornetHandle_t */ tensor_network_.getTNCudaHandle(),
             /* cutensornetStateExpectation_t */ expectation,
-            /* size_t maxWorkspaceSizeDevice */ scratchSize,
+            /* std::size_t maxWorkspaceSizeDevice */ scratchSize,
             /* cutensornetWorkspaceDescriptor_t */ workDesc,
             /* cudaStream_t [unused] */ 0x0));
 
@@ -264,9 +264,9 @@ template <class TensorNetT> class MeasurementsTNCuda {
         PL_ABORT_IF(worksize > scratchSize,
                     "Insufficient workspace size on Device.\n");
 
-        const std::size_t d_scratch_length = worksize / sizeof(size_t) + 1;
-        DataBuffer<size_t, int> d_scratch(d_scratch_length,
-                                          tensor_network_.getDevTag(), true);
+        const std::size_t d_scratch_length = worksize / sizeof(std::size_t) + 1;
+        DataBuffer<std::size_t, int> d_scratch(
+            d_scratch_length, tensor_network_.getDevTag(), true);
 
         setWorkSpaceMemory(tensor_network_.getTNCudaHandle(), workDesc,
                            reinterpret_cast<void *>(d_scratch.getData()),
diff --git a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/observables/ObservablesTNCuda.hpp b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/observables/ObservablesTNCuda.hpp
index e17e0c7d5c..cc4b8d2b93 100644
--- a/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/observables/ObservablesTNCuda.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/observables/ObservablesTNCuda.hpp
@@ -299,7 +299,7 @@ class TensorProdObsTNCuda : public ObservableTNCuda<TensorNetT> {
             return false;
         }
 
-        for (size_t i = 0; i < obs_.size(); i++) {
+        for (std::size_t i = 0; i < obs_.size(); i++) {
             if (*obs_[i] != *other_cast.obs_[i]) {
                 return false;
             }
@@ -424,7 +424,7 @@ class TensorProdObsTNCuda : public ObservableTNCuda<TensorNetT> {
         using Pennylane::Util::operator<<;
         std::ostringstream obs_stream;
         const auto obs_size = obs_.size();
-        for (size_t idx = 0; idx < obs_size; idx++) {
+        for (std::size_t idx = 0; idx < obs_size; idx++) {
             obs_stream << obs_[idx]->getObsName();
             if (idx != obs_size - 1) {
                 obs_stream << " @ ";
@@ -466,7 +466,7 @@ class HamiltonianTNCuda : public ObservableTNCuda<TensorNetT> {
             return false;
         }
 
-        for (size_t i = 0; i < obs_.size(); i++) {
+        for (std::size_t i = 0; i < obs_.size(); i++) {
             if (*obs_[i] != *other_cast.obs_[i]) {
                 return false;
             }
@@ -541,7 +541,7 @@ class HamiltonianTNCuda : public ObservableTNCuda<TensorNetT> {
         ss << "Hamiltonian: { 'coeffs' : " << BaseType::coeffs_
            << ", 'observables' : [";
         const auto term_size = BaseType::coeffs_.size();
-        for (size_t t = 0; t < term_size; t++) {
+        for (std::size_t t = 0; t < term_size; t++) {
             ss << obs_[t]->getObsName();
             if (t != term_size - 1) {
                 ss << ", ";
diff --git a/pennylane_lightning/core/src/utils/BitUtil.hpp b/pennylane_lightning/core/src/utils/BitUtil.hpp
index 162b7433ba..7905549b60 100644
--- a/pennylane_lightning/core/src/utils/BitUtil.hpp
+++ b/pennylane_lightning/core/src/utils/BitUtil.hpp
@@ -31,7 +31,7 @@ namespace Pennylane::Util {
  * @param val Size of the state vector. Expected to be a power of 2.
  * @return std::size_t Log2(val), or the state vector's number of qubits.
  */
-inline auto constexpr log2PerfectPower(size_t val) -> std::size_t {
+inline auto constexpr log2PerfectPower(std::size_t val) -> std::size_t {
     return static_cast<std::size_t>(std::countr_zero(val));
 }
 
@@ -42,7 +42,7 @@ inline auto constexpr log2PerfectPower(size_t val) -> std::size_t {
  * @return true
  * @return false
  */
-inline auto constexpr isPerfectPowerOf2(size_t value) -> bool {
+inline auto constexpr isPerfectPowerOf2(std::size_t value) -> bool {
     return std::has_single_bit(value);
 }
 
@@ -54,7 +54,7 @@ inline auto constexpr isPerfectPowerOf2(size_t value) -> bool {
  * @param nbits Number of bits to fill
  */
 template <class IntegerType = std::size_t>
-inline auto constexpr fillTrailingOnes(size_t nbits) -> IntegerType {
+inline auto constexpr fillTrailingOnes(std::size_t nbits) -> IntegerType {
     static_assert(std::is_integral_v<IntegerType> &&
                   std::is_unsigned_v<IntegerType>);
 
@@ -70,7 +70,7 @@ inline auto constexpr fillTrailingOnes(size_t nbits) -> IntegerType {
  * @param pos Position up to which bit one is filled.
  */
 template <class IntegerType = std::size_t>
-inline auto constexpr fillLeadingOnes(size_t pos) -> std::size_t {
+inline auto constexpr fillLeadingOnes(std::size_t pos) -> std::size_t {
     static_assert(std::is_integral_v<IntegerType> &&
                   std::is_unsigned_v<IntegerType>);
 
@@ -80,7 +80,7 @@ inline auto constexpr fillLeadingOnes(size_t pos) -> std::size_t {
 /**
  * @brief Swap bits in i-th and j-th position in place
  */
-inline auto constexpr bitswap(size_t bits, const std::size_t i,
+inline auto constexpr bitswap(std::size_t bits, const std::size_t i,
                               const std::size_t j) -> std::size_t {
     std::size_t x = ((bits >> i) ^ (bits >> j)) & 1U;
     return bits ^ ((x << i) | (x << j));
diff --git a/pennylane_lightning/core/src/utils/ConstantTestHelpers.hpp b/pennylane_lightning/core/src/utils/ConstantTestHelpers.hpp
index d6eea62a3b..2c862ba43c 100644
--- a/pennylane_lightning/core/src/utils/ConstantTestHelpers.hpp
+++ b/pennylane_lightning/core/src/utils/ConstantTestHelpers.hpp
@@ -81,9 +81,9 @@ template <typename T, std::size_t size>
 constexpr std::size_t count_unique(const std::array<T, size> &arr) {
     std::size_t res = 0;
 
-    for (size_t i = 0; i < size; i++) {
+    for (std::size_t i = 0; i < size; i++) {
         bool counted = false;
-        for (size_t j = 0; j < i; j++) {
+        for (std::size_t j = 0; j < i; j++) {
             if (arr[j] == arr[i]) {
                 counted = true;
                 break;
@@ -111,7 +111,7 @@ constexpr std::size_t count_unique(const std::array<T, size> &arr) {
     auto arr_cpd = arr;
     std::size_t dup_cnt = 0;
     std::sort(std::begin(arr_cpd), std::end(arr_cpd));
-    for (size_t i = 0; i < size - 1; i++) {
+    for (std::size_t i = 0; i < size - 1; i++) {
         if (arr_cpd[i] == arr_cpd[i + 1]) {
             dup_cnt++;
         }
diff --git a/pennylane_lightning/core/src/utils/ConstantUtil.hpp b/pennylane_lightning/core/src/utils/ConstantUtil.hpp
index a3f127e1b4..3451c859d5 100644
--- a/pennylane_lightning/core/src/utils/ConstantUtil.hpp
+++ b/pennylane_lightning/core/src/utils/ConstantUtil.hpp
@@ -46,7 +46,7 @@ namespace Pennylane::Util {
 template <typename Key, typename Value, std::size_t size>
 constexpr auto lookup(const std::array<std::pair<Key, Value>, size> &arr,
                       const Key &key) -> Value {
-    for (size_t idx = 0; idx < size; idx++) {
+    for (std::size_t idx = 0; idx < size; idx++) {
         if (std::get<0>(arr[idx]) == key) {
             return std::get<1>(arr[idx]);
         }
@@ -68,7 +68,7 @@ template <typename Key, typename Value, std::size_t size>
 constexpr auto
 reverse_lookup(const std::array<std::pair<Key, Value>, size> &arr,
                const Value &value) -> Key {
-    for (size_t idx = 0; idx < size; idx++) {
+    for (std::size_t idx = 0; idx < size; idx++) {
         if (std::get<1>(arr[idx]) == value) {
             return std::get<0>(arr[idx]);
         }
@@ -87,7 +87,7 @@ reverse_lookup(const std::array<std::pair<Key, Value>, size> &arr,
 template <typename U, std::size_t size>
 constexpr auto array_has_elem(const std::array<U, size> &arr, const U &elem)
     -> bool {
-    for (size_t idx = 0; idx < size; idx++) {
+    for (std::size_t idx = 0; idx < size; idx++) {
         if (arr[idx] == elem) {
             return true;
         }
@@ -108,7 +108,7 @@ template <typename Key, typename Value, std::size_t size>
 constexpr auto
 array_contains(const std::array<std::pair<Key, Value>, size> &arr,
                const Value &value) -> bool {
-    for (size_t idx = 0; idx < size; idx++) {
+    for (std::size_t idx = 0; idx < size; idx++) {
         if (std::get<1>(arr[idx]) == value) {
             return true;
         }
diff --git a/pennylane_lightning/core/src/utils/TestHelpers.hpp b/pennylane_lightning/core/src/utils/TestHelpers.hpp
index d4cff49a03..556ab24035 100644
--- a/pennylane_lightning/core/src/utils/TestHelpers.hpp
+++ b/pennylane_lightning/core/src/utils/TestHelpers.hpp
@@ -48,7 +48,7 @@ template <class T, class Alloc = std::allocator<T>> struct PLApprox {
             return false;
         }
 
-        for (size_t i = 0; i < lhs.size(); i++) {
+        for (std::size_t i = 0; i < lhs.size(); i++) {
             if constexpr (Util::is_complex_v<T>) {
                 if (lhs[i].real() != Approx(comp_[i].real())
                                          .epsilon(epsilon_)
@@ -226,7 +226,7 @@ isApproxEqual(const Data_t *data1, const std::size_t length1,
         return false;
     }
 
-    for (size_t idx = 0; idx < length1; idx++) {
+    for (std::size_t idx = 0; idx < length1; idx++) {
         if (!isApproxEqual(data1[idx], data2[idx], eps)) {
             return false;
         }
@@ -281,8 +281,8 @@ void scaleVector(std::vector<std::complex<Data_t>, Alloc> &data,
  * @brief create |0>^N
  */
 template <typename ComplexT>
-auto createZeroState(size_t num_qubits) -> TestVector<ComplexT> {
-    TestVector<ComplexT> res(size_t{1U} << num_qubits, {0.0, 0.0},
+auto createZeroState(std::size_t num_qubits) -> TestVector<ComplexT> {
+    TestVector<ComplexT> res(std::size_t{1U} << num_qubits, {0.0, 0.0},
                              getBestAllocator<ComplexT>());
     res[0] = ComplexT{1.0, 0.0};
     return res;
@@ -292,8 +292,8 @@ auto createZeroState(size_t num_qubits) -> TestVector<ComplexT> {
  * @brief create |+>^N
  */
 template <typename ComplexT>
-auto createPlusState_(size_t num_qubits) -> TestVector<ComplexT> {
-    TestVector<ComplexT> res(size_t{1U} << num_qubits, 1.0,
+auto createPlusState_(std::size_t num_qubits) -> TestVector<ComplexT> {
+    TestVector<ComplexT> res(std::size_t{1U} << num_qubits, 1.0,
                              getBestAllocator<ComplexT>());
     for (auto &elem : res) {
         elem /= std::sqrt(1U << num_qubits);
@@ -305,7 +305,7 @@ auto createPlusState_(size_t num_qubits) -> TestVector<ComplexT> {
  * @brief create |+>^N
  */
 template <typename PrecisionT>
-auto createPlusState(size_t num_qubits)
+auto createPlusState(std::size_t num_qubits)
     -> TestVector<std::complex<PrecisionT>> {
     TestVector<std::complex<PrecisionT>> res(
         std::size_t{1U} << num_qubits, 1.0,
@@ -326,7 +326,7 @@ auto createRandomStateVectorData(RandomEngine &re, std::size_t num_qubits)
         std::size_t{1U} << num_qubits, 0.0,
         getBestAllocator<std::complex<PrecisionT>>());
     std::uniform_real_distribution<PrecisionT> dist;
-    for (size_t idx = 0; idx < (size_t{1U} << num_qubits); idx++) {
+    for (std::size_t idx = 0; idx < (std::size_t{1U} << num_qubits); idx++) {
         res[idx] = {dist(re), dist(re)};
     }
 
@@ -355,9 +355,9 @@ auto createProductState(std::string_view str) -> TestVector<ComplexT> {
     std::vector<PrecisionT> minus{INVSQRT2<PrecisionT>(),
                                   -INVSQRT2<PrecisionT>()};
 
-    for (size_t k = 0; k < (size_t{1U} << str.length()); k++) {
+    for (std::size_t k = 0; k < (std::size_t{1U} << str.length()); k++) {
         PrecisionT elem = 1.0;
-        for (size_t n = 0; n < str.length(); n++) {
+        for (std::size_t n = 0; n < str.length(); n++) {
             char c = str[n];
             const std::size_t wire = str.length() - 1 - n;
             switch (c) {
@@ -390,7 +390,7 @@ auto createProductState(std::string_view str) -> TestVector<ComplexT> {
  * @return std::vector<typename StateVectorT::ComplexT>>
  */
 template <class StateVectorT>
-auto createNonTrivialState(size_t num_qubits = 3) {
+auto createNonTrivialState(std::size_t num_qubits = 3) {
     using PrecisionT = typename StateVectorT::PrecisionT;
     using ComplexT = typename StateVectorT::ComplexT;
 
@@ -406,7 +406,7 @@ auto createNonTrivialState(size_t num_qubits = 3) {
     std::vector<std::vector<PrecisionT>> phase;
 
     PrecisionT initial_phase = 0.7;
-    for (size_t n_qubit = 0; n_qubit < num_qubits; n_qubit++) {
+    for (std::size_t n_qubit = 0; n_qubit < num_qubits; n_qubit++) {
         gates.emplace_back("RX");
         gates.emplace_back("RY");
 
@@ -498,7 +498,7 @@ bool operator==(const std::vector<T, AllocA> &lhs,
     if (lhs.size() != rhs.size()) {
         return false;
     }
-    for (size_t idx = 0; idx < lhs.size(); idx++) {
+    for (std::size_t idx = 0; idx < lhs.size(); idx++) {
         if (lhs[idx] != rhs[idx]) {
             return false;
         }
@@ -519,7 +519,7 @@ template <class T>
 auto linspace(T start, T end, std::size_t num_points) -> std::vector<T> {
     std::vector<T> data(num_points);
     T step = (end - start) / (num_points - 1);
-    for (size_t i = 0; i < num_points; i++) {
+    for (std::size_t i = 0; i < num_points; i++) {
         data[i] = start + (step * i);
     }
     return data;
@@ -532,7 +532,7 @@ std::vector<int> randomIntVector(RandomEngine &re, std::size_t size, int min,
     std::vector<int> res;
 
     res.reserve(size);
-    for (size_t i = 0; i < size; i++) {
+    for (std::size_t i = 0; i < size; i++) {
         res.emplace_back(dist(re));
     }
     return res;
@@ -566,9 +566,9 @@ auto randomUnitary(RandomEngine &re, std::size_t num_qubits)
     // This algorithm is unstable but works for a small matrix.
     // Use QR decomposition when we have LAPACK support.
 
-    for (size_t row2 = 0; row2 < dim; row2++) {
+    for (std::size_t row2 = 0; row2 < dim; row2++) {
         ComplexT *row2_p = res.data() + row2 * dim;
-        for (size_t row1 = 0; row1 < row2; row1++) {
+        for (std::size_t row1 = 0; row1 < row2; row1++) {
             const ComplexT *row1_p = res.data() + row1 * dim;
             ComplexT dot12 = std::inner_product(
                 row1_p, row1_p + dim, row2_p, std::complex<PrecisionT>(),
@@ -586,7 +586,7 @@ auto randomUnitary(RandomEngine &re, std::size_t num_qubits)
     }
 
     // Normalize each row
-    for (size_t row = 0; row < dim; row++) {
+    for (std::size_t row = 0; row < dim; row++) {
         ComplexT *row_p = res.data() + row * dim;
         PrecisionT norm2 = std::sqrt(squaredNorm(row_p, dim));
 
diff --git a/pennylane_lightning/core/src/utils/TestHelpersWires.hpp b/pennylane_lightning/core/src/utils/TestHelpersWires.hpp
index 1e90d56255..24d18b3044 100644
--- a/pennylane_lightning/core/src/utils/TestHelpersWires.hpp
+++ b/pennylane_lightning/core/src/utils/TestHelpersWires.hpp
@@ -114,7 +114,7 @@ class CombinationGenerator : public WiresGenerator {
     std::vector<std::vector<std::size_t>> all_perms_;
 
   public:
-    void comb(size_t n, std::size_t r) {
+    void comb(std::size_t n, std::size_t r) {
         if (r == 0) {
             all_perms_.push_back(v_);
             return;
@@ -129,7 +129,7 @@ class CombinationGenerator : public WiresGenerator {
         comb(n - 1, r);
     }
 
-    CombinationGenerator(size_t n, std::size_t r) {
+    CombinationGenerator(std::size_t n, std::size_t r) {
         v_.resize(r);
         comb(n, r);
     }
@@ -156,12 +156,12 @@ class PermutationGenerator : public WiresGenerator {
     std::vector<std::size_t> v;
 
   public:
-    void perm(size_t n, std::size_t r) {
+    void perm(std::size_t n, std::size_t r) {
         if (r == 0) {
             all_perms_.push_back(v);
             return;
         }
-        for (size_t i = 0; i < n; i++) {
+        for (std::size_t i = 0; i < n; i++) {
             v[r - 1] = available_elems_[i];
             std::swap(available_elems_[n - 1], available_elems_[i]);
             perm(n - 1, r - 1);
@@ -169,7 +169,7 @@ class PermutationGenerator : public WiresGenerator {
         }
     }
 
-    PermutationGenerator(size_t n, std::size_t r) {
+    PermutationGenerator(std::size_t n, std::size_t r) {
         v.resize(r);
 
         available_elems_.resize(n);
@@ -191,7 +191,7 @@ class PermutationGenerator : public WiresGenerator {
  * @param gate_op Gate operation
  * @param order Whether the ordering matters (if true, permutation is used)
  */
-auto inline createAllWires(size_t n_qubits,
+auto inline createAllWires(std::size_t n_qubits,
                            Pennylane::Gates::GateOperation gate_op, bool order)
     -> std::vector<std::vector<std::size_t>> {
     if (array_has_elem(Pennylane::Gates::Constant::multi_qubit_gates,
@@ -199,12 +199,12 @@ auto inline createAllWires(size_t n_qubits,
         // make all possible 2^N permutations
         std::vector<std::vector<std::size_t>> res;
         res.reserve((1U << n_qubits) - 1);
-        for (size_t k = 1; k < (static_cast<std::size_t>(1U) << n_qubits);
+        for (std::size_t k = 1; k < (static_cast<std::size_t>(1U) << n_qubits);
              k++) {
             std::vector<std::size_t> wires;
             wires.reserve(std::popcount(k));
 
-            for (size_t i = 0; i < n_qubits; i++) {
+            for (std::size_t i = 0; i < n_qubits; i++) {
                 if (((k >> i) & 1U) == 1U) {
                     wires.emplace_back(i);
                 }
diff --git a/pennylane_lightning/core/src/utils/Util.hpp b/pennylane_lightning/core/src/utils/Util.hpp
index 22a12d4082..e0d3a1170e 100644
--- a/pennylane_lightning/core/src/utils/Util.hpp
+++ b/pennylane_lightning/core/src/utils/Util.hpp
@@ -245,7 +245,7 @@ inline auto exp2(const std::size_t &n) -> std::size_t {
  * @param value Value to calculate for.
  * @return std::size_t
  */
-inline auto log2(size_t value) -> std::size_t {
+inline auto log2(std::size_t value) -> std::size_t {
     return static_cast<std::size_t>(std::log2(value));
 }
 
@@ -257,7 +257,7 @@ inline auto log2(size_t value) -> std::size_t {
  * @param qubits the number of qubits in the circuit
  * @return decimal value for the qubit at specified index
  */
-inline auto maxDecimalForQubit(size_t qubitIndex, std::size_t qubits)
+inline auto maxDecimalForQubit(std::size_t qubitIndex, std::size_t qubits)
     -> std::size_t {
     PL_ASSERT(qubitIndex < qubits);
     return exp2(qubits - qubitIndex - 1);
@@ -315,7 +315,7 @@ inline auto operator<<(std::ostream &os, const std::vector<T> &vec)
     -> std::ostream & {
     os << '[';
     if (!vec.empty()) {
-        for (size_t i = 0; i < vec.size() - 1; i++) {
+        for (std::size_t i = 0; i < vec.size() - 1; i++) {
             os << vec[i] << ", ";
         }
         os << vec.back();
@@ -396,7 +396,7 @@ inline auto sorting_indices(const T *arr, std::size_t length)
 
     // indices will be sorted in accordance to the array provided.
     sort(indices.begin(), indices.end(),
-         [&arr](size_t i1, std::size_t i2) { return arr[i1] < arr[i2]; });
+         [&arr](std::size_t i1, std::size_t i2) { return arr[i1] < arr[i2]; });
 
     return indices;
 }
@@ -429,12 +429,12 @@ inline auto
 getIndicesAfterExclusion(const std::vector<std::size_t> &indicesToExclude,
                          std::size_t num_qubits) -> std::vector<std::size_t> {
     std::vector<std::size_t> indices;
-    for (size_t i = 0; i < num_qubits; i++) {
+    for (std::size_t i = 0; i < num_qubits; i++) {
         indices.emplace_back(i);
     }
 
     for (auto j : indicesToExclude) {
-        for (size_t i = 0; i < indices.size(); i++) {
+        for (std::size_t i = 0; i < indices.size(); i++) {
             if (j == indices[i]) {
                 indices.erase(indices.begin() + static_cast<int>(i));
             }
@@ -461,13 +461,14 @@ inline auto generateBitsPatterns(const std::vector<std::size_t> &qubitIndices,
     indices.reserve(exp2(qubitIndices.size()));
     indices.emplace_back(0);
 
-    for (size_t index_it0 = 0; index_it0 < qubitIndices.size(); index_it0++) {
+    for (std::size_t index_it0 = 0; index_it0 < qubitIndices.size();
+         index_it0++) {
         std::size_t index_it = qubitIndices.size() - 1 - index_it0;
         const std::size_t value =
             maxDecimalForQubit(qubitIndices[index_it], num_qubits);
 
         const std::size_t currentSize = indices.size();
-        for (size_t j = 0; j < currentSize; j++) {
+        for (std::size_t j = 0; j < currentSize; j++) {
             indices.emplace_back(indices[j] + value);
         }
     }
@@ -481,7 +482,7 @@ inline auto generateBitsPatterns(const std::vector<std::size_t> &qubitIndices,
  * @param new_axes new axes distribution.
  * @return unsigned int with the new transposed index.
  */
-inline auto transposed_state_index(size_t ind,
+inline auto transposed_state_index(std::size_t ind,
                                    const std::vector<std::size_t> &new_axes)
     -> std::size_t {
     std::size_t new_index = 0;
@@ -508,7 +509,7 @@ auto transpose_state_tensor(const std::vector<T> &tensor,
                             const std::vector<std::size_t> &new_axes)
     -> std::vector<T> {
     std::vector<T> transposed_tensor(tensor.size());
-    for (size_t ind = 0; ind < tensor.size(); ind++) {
+    for (std::size_t ind = 0; ind < tensor.size(); ind++) {
         transposed_tensor[ind] = tensor[transposed_state_index(ind, new_axes)];
     }
     return transposed_tensor;
@@ -529,8 +530,8 @@ auto kronProd(const std::vector<T> &diagA, const std::vector<T> &diagB)
     -> std::vector<T> {
     std::vector<T> result(diagA.size() * diagB.size(), 0);
 
-    for (size_t i = 0; i < diagA.size(); i++) {
-        for (size_t j = 0; j < diagB.size(); j++) {
+    for (std::size_t i = 0; i < diagA.size(); i++) {
+        for (std::size_t j = 0; j < diagB.size(); j++) {
             result[i * diagB.size() + j] = diagA[i] * diagB[j];
         }
     }
@@ -550,11 +551,11 @@ auto kronProd(const std::vector<T> &diagA, const std::vector<T> &diagB)
  * @return is_Hermitian Is the matrix a Hermitian matrix or not.
  */
 template <typename T>
-bool is_Hermitian(size_t n, std::size_t lda,
+bool is_Hermitian(std::size_t n, std::size_t lda,
                   const std::vector<std::complex<T>> &mat) {
     // TODO OMP support
-    for (size_t i = 0; i < n; i++) {
-        for (size_t j = i + 1; j < lda; j++) {
+    for (std::size_t i = 0; i < n; i++) {
+        for (std::size_t j = i + 1; j < lda; j++) {
             if (mat[j + i * lda] != std::conj(mat[i + j * n])) {
                 return false;
             }
diff --git a/pennylane_lightning/core/src/utils/UtilLinearAlg.hpp b/pennylane_lightning/core/src/utils/UtilLinearAlg.hpp
index 818ff1b25d..d0f58e6dcc 100644
--- a/pennylane_lightning/core/src/utils/UtilLinearAlg.hpp
+++ b/pennylane_lightning/core/src/utils/UtilLinearAlg.hpp
@@ -102,8 +102,8 @@ void compute_diagonalizing_gates(int n, int lda,
     std::vector<std::complex<T>> ah(n * lda, {0.0, 0.0});
 
     // TODO optmize transpose
-    for (size_t i = 0; i < static_cast<std::size_t>(n); i++) {
-        for (size_t j = 0; j <= i; j++) {
+    for (std::size_t i = 0; i < static_cast<std::size_t>(n); i++) {
+        for (std::size_t j = 0; j <= i; j++) {
             ah[j * n + i] = Ah[i * lda + j];
         }
     }
diff --git a/pennylane_lightning/core/src/utils/cuda_utils/CSRMatrix.hpp b/pennylane_lightning/core/src/utils/cuda_utils/CSRMatrix.hpp
index ae86d74e9f..0b09c463ea 100644
--- a/pennylane_lightning/core/src/utils/cuda_utils/CSRMatrix.hpp
+++ b/pennylane_lightning/core/src/utils/cuda_utils/CSRMatrix.hpp
@@ -42,10 +42,10 @@ template <class Precision, class index_type> class CSRMatrix {
     std::vector<std::complex<Precision>> values_;
 
   public:
-    CSRMatrix(size_t num_rows, std::size_t nnz)
+    CSRMatrix(std::size_t num_rows, std::size_t nnz)
         : columns_(nnz, 0), csrOffsets_(num_rows + 1, 0), values_(nnz){};
 
-    CSRMatrix(size_t num_rows, std::size_t nnz, index_type *column_ptr,
+    CSRMatrix(std::size_t num_rows, std::size_t nnz, index_type *column_ptr,
               index_type *csrOffsets_ptr, std::complex<Precision> *value_ptr)
         : columns_(column_ptr, column_ptr + nnz),
           csrOffsets_(csrOffsets_ptr, csrOffsets_ptr + num_rows + 1),
@@ -109,8 +109,9 @@ auto splitCSRMatrix(MPIManager &mpi_manager, const std::size_t &num_rows,
     std::size_t current_global_row, current_global_col;
     std::size_t block_row_id, block_col_id;
     std::size_t local_row_id, local_col_id;
-    for (size_t row = 0; row < num_rows; row++) {
-        for (size_t col_idx = static_cast<std::size_t>(csrOffsets_ptr[row]);
+    for (std::size_t row = 0; row < num_rows; row++) {
+        for (std::size_t col_idx =
+                 static_cast<std::size_t>(csrOffsets_ptr[row]);
              col_idx < static_cast<std::size_t>(csrOffsets_ptr[row + 1]);
              col_idx++) {
             current_global_row = row;
@@ -141,14 +142,14 @@ auto splitCSRMatrix(MPIManager &mpi_manager, const std::size_t &num_rows,
     }
 
     // Add OpenMP support here later.
-    for (size_t block_row_id = 0; block_row_id < num_row_blocks;
+    for (std::size_t block_row_id = 0; block_row_id < num_row_blocks;
          block_row_id++) {
-        for (size_t block_col_id = 0; block_col_id < num_col_blocks;
+        for (std::size_t block_col_id = 0; block_col_id < num_col_blocks;
              block_col_id++) {
             auto &localSpMat = splitSparseMatrix[block_row_id][block_col_id];
             std::size_t local_csr_offset_size =
                 localSpMat.getCsrOffsets().size();
-            for (size_t i0 = 1; i0 < local_csr_offset_size; i0++) {
+            for (std::size_t i0 = 1; i0 < local_csr_offset_size; i0++) {
                 localSpMat.getCsrOffsets()[i0] +=
                     localSpMat.getCsrOffsets()[i0 - 1];
             }
@@ -179,7 +180,7 @@ auto scatterCSRMatrix(MPIManager &mpi_manager,
 
     if (mpi_manager.getRank() == root) {
         nnzs.reserve(matrix.size());
-        for (size_t j = 0; j < matrix.size(); j++) {
+        for (std::size_t j = 0; j < matrix.size(); j++) {
             nnzs.push_back(matrix[j].getValues().size());
         }
     }
@@ -194,7 +195,7 @@ auto scatterCSRMatrix(MPIManager &mpi_manager,
         localCSRMatrix.getColumns() = matrix[0].getColumns();
     }
 
-    for (size_t k = 1; k < num_col_blocks; k++) {
+    for (std::size_t k = 1; k < num_col_blocks; k++) {
         std::size_t dest = k;
         std::size_t source = root;
 
diff --git a/pennylane_lightning/core/src/utils/cuda_utils/LinearAlg.hpp b/pennylane_lightning/core/src/utils/cuda_utils/LinearAlg.hpp
index d1441b6aa8..1dad632532 100644
--- a/pennylane_lightning/core/src/utils/cuda_utils/LinearAlg.hpp
+++ b/pennylane_lightning/core/src/utils/cuda_utils/LinearAlg.hpp
@@ -330,7 +330,7 @@ inline SharedCusparseHandle make_shared_cusparse_handle() {
  * alpha*SparseMat*X + beta)
  *
  * @tparam index_type Integer type for offsets, indices and number of elements
- * (size_t for the moment).
+ * (std::size_t for the moment).
  * @tparam Precision Floating data-type.
  * @tparam DevTypeID Integer type of device id.
  *
@@ -461,7 +461,7 @@ SparseMV_cuSparse(const index_type *csrOffsets_ptr,
  * alpha*SparseMat*X + beta)
  *
  * @tparam index_type Integer type for offsets, indices and number of elements
- * (size_t for the moment).
+ * (std::size_t for the moment).
  * @tparam Precision Floating data-type.
  * @tparam DevTypeID Integer type of device id.
  *
diff --git a/pennylane_lightning/core/src/utils/cuda_utils/MPILinearAlg.hpp b/pennylane_lightning/core/src/utils/cuda_utils/MPILinearAlg.hpp
index f3db919525..eda55aec34 100644
--- a/pennylane_lightning/core/src/utils/cuda_utils/MPILinearAlg.hpp
+++ b/pennylane_lightning/core/src/utils/cuda_utils/MPILinearAlg.hpp
@@ -24,7 +24,7 @@ namespace Pennylane::LightningGPU::Util {
  * alpha*SparseMat*X + beta)
  *
  * @tparam index_type Integer type for offsets, indices and number of elements
- * (size_t for the moment).
+ * (std::size_t for the moment).
  * @tparam Precision Floating data-type.
  * @tparam DevTypeID Integer type of device id.
  *
@@ -57,7 +57,7 @@ inline void SparseMV_cuSparseMPI(
     mpi_manager.Barrier();
 
     std::vector<CSRMatrix<Precision, index_type>> localCSRMatVector;
-    for (size_t i = 0; i < mpi_manager.getSize(); i++) {
+    for (std::size_t i = 0; i < mpi_manager.getSize(); i++) {
         auto localCSRMat = scatterCSRMatrix<Precision, index_type>(
             mpi_manager, csrmatrix_blocks[i], length_local, 0);
         localCSRMatVector.push_back(localCSRMat);
@@ -67,7 +67,7 @@ inline void SparseMV_cuSparseMPI(
     DataBuffer<CFP_t, int> d_res_per_block{length_local, device_id, stream_id,
                                            true};
 
-    for (size_t i = 0; i < mpi_manager.getSize(); i++) {
+    for (std::size_t i = 0; i < mpi_manager.getSize(); i++) {
         // Need to investigate if non-blocking MPI operation can improve
         // performace here.
         auto &localCSRMatrix = localCSRMatVector[i];
diff --git a/pennylane_lightning/core/src/utils/cuda_utils/MPIManager.hpp b/pennylane_lightning/core/src/utils/cuda_utils/MPIManager.hpp
index f8d5427e67..80c9de32a2 100644
--- a/pennylane_lightning/core/src/utils/cuda_utils/MPIManager.hpp
+++ b/pennylane_lightning/core/src/utils/cuda_utils/MPIManager.hpp
@@ -366,7 +366,7 @@ class MPIManager final {
     /**
      * @brief Get the MPI version.
      */
-    auto getVersion() const -> std::tuple<size_t, std::size_t> {
+    auto getVersion() const -> std::tuple<std::size_t, std::size_t> {
         return {version_, subversion_};
     }
 
@@ -815,7 +815,7 @@ class MPIManager final {
      * @param key Rank assignment control.
      * @return new MPIManager object.
      */
-    auto split(size_t color, std::size_t key) -> MPIManager {
+    auto split(std::size_t color, std::size_t key) -> MPIManager {
         MPI_Comm newcomm;
         int colorInt = static_cast<int>(color);
         int keyInt = static_cast<int>(key);
diff --git a/pennylane_lightning/core/src/utils/cuda_utils/cuda_helpers.hpp b/pennylane_lightning/core/src/utils/cuda_utils/cuda_helpers.hpp
index ba329affb1..df9d96d1e6 100644
--- a/pennylane_lightning/core/src/utils/cuda_utils/cuda_helpers.hpp
+++ b/pennylane_lightning/core/src/utils/cuda_utils/cuda_helpers.hpp
@@ -372,7 +372,7 @@ static std::pair<int, int> getGPUArch(int device_number = 0) {
 /**
  * @brief Get free memory size on GPU device
  *
- * @return size_t
+ * @return std::size_t
  */
 inline std::size_t getFreeMemorySize() {
     std::size_t freeBytes{0}, totalBytes{0};
diff --git a/pennylane_lightning/core/src/utils/cuda_utils/tests/mpi/Test_CSRMatrix.cpp b/pennylane_lightning/core/src/utils/cuda_utils/tests/mpi/Test_CSRMatrix.cpp
index d47635f7b5..e31dbe94b4 100644
--- a/pennylane_lightning/core/src/utils/cuda_utils/tests/mpi/Test_CSRMatrix.cpp
+++ b/pennylane_lightning/core/src/utils/cuda_utils/tests/mpi/Test_CSRMatrix.cpp
@@ -80,19 +80,19 @@ TEMPLATE_TEST_CASE("CRSMatrix::Split", "[CRSMatrix]", float, double) {
             std::vector<index_type> localcsrOffsets = {0, 2, 4, 6, 8};
             std::vector<index_type> local_indices = {0, 3, 1, 2, 1, 2, 0, 3};
 
-            for (size_t i = 0; i < localcsrOffsets.size(); i++) {
+            for (std::size_t i = 0; i < localcsrOffsets.size(); i++) {
                 CHECK(CSRMatVector[0][0].getCsrOffsets()[i] ==
                       localcsrOffsets[i]);
                 CHECK(CSRMatVector[1][1].getCsrOffsets()[i] ==
                       localcsrOffsets[i]);
             }
 
-            for (size_t i = 0; i < local_indices.size(); i++) {
+            for (std::size_t i = 0; i < local_indices.size(); i++) {
                 CHECK(CSRMatVector[0][0].getColumns()[i] == local_indices[i]);
                 CHECK(CSRMatVector[1][1].getColumns()[i] == local_indices[i]);
             }
 
-            for (size_t i = 0; i < 8; i++) {
+            for (std::size_t i = 0; i < 8; i++) {
                 CHECK(CSRMatVector[0][0].getValues()[i] == values[i]);
                 CHECK(CSRMatVector[1][1].getValues()[i] == values[i + 8]);
             }
@@ -114,7 +114,7 @@ TEMPLATE_TEST_CASE("CRSMatrix::Split", "[CRSMatrix]", float, double) {
         std::size_t local_num_rows = num_rows / size;
 
         std::vector<CSRMatrix<TestType, index_type>> localCSRMatVector;
-        for (size_t i = 0; i < mpi_manager.getSize(); i++) {
+        for (std::size_t i = 0; i < mpi_manager.getSize(); i++) {
             auto localCSRMat = scatterCSRMatrix<TestType, index_type>(
                 mpi_manager, csrmatrix_blocks[i], local_num_rows, 0);
             localCSRMatVector.push_back(localCSRMat);
@@ -124,31 +124,31 @@ TEMPLATE_TEST_CASE("CRSMatrix::Split", "[CRSMatrix]", float, double) {
         std::vector<index_type> local_indices = {0, 3, 1, 2, 1, 2, 0, 3};
 
         if (rank == 0) {
-            for (size_t i = 0; i < localcsrOffsets.size(); i++) {
+            for (std::size_t i = 0; i < localcsrOffsets.size(); i++) {
                 CHECK(localCSRMatVector[0].getCsrOffsets()[i] ==
                       localcsrOffsets[i]);
             }
 
-            for (size_t i = 0; i < local_indices.size(); i++) {
+            for (std::size_t i = 0; i < local_indices.size(); i++) {
                 CHECK(localCSRMatVector[0].getColumns()[i] == local_indices[i]);
             }
 
-            for (size_t i = 0; i < 8; i++) {
+            for (std::size_t i = 0; i < 8; i++) {
                 CHECK(localCSRMatVector[0].getValues()[i] == values[i]);
             }
 
             CHECK(localCSRMatVector[1].getValues().size() == 0);
         } else {
-            for (size_t i = 0; i < localcsrOffsets.size(); i++) {
+            for (std::size_t i = 0; i < localcsrOffsets.size(); i++) {
                 CHECK(localCSRMatVector[1].getCsrOffsets()[i] ==
                       localcsrOffsets[i]);
             }
 
-            for (size_t i = 0; i < local_indices.size(); i++) {
+            for (std::size_t i = 0; i < local_indices.size(); i++) {
                 CHECK(localCSRMatVector[1].getColumns()[i] == local_indices[i]);
             }
 
-            for (size_t i = 0; i < 8; i++) {
+            for (std::size_t i = 0; i < 8; i++) {
                 CHECK(localCSRMatVector[1].getValues()[i] == values[i + 8]);
             }
 
diff --git a/pennylane_lightning/core/src/utils/cuda_utils/tests/mpi/Test_MPIManager.cpp b/pennylane_lightning/core/src/utils/cuda_utils/tests/mpi/Test_MPIManager.cpp
index 9b31f3199c..9642f4c0aa 100644
--- a/pennylane_lightning/core/src/utils/cuda_utils/tests/mpi/Test_MPIManager.cpp
+++ b/pennylane_lightning/core/src/utils/cuda_utils/tests/mpi/Test_MPIManager.cpp
@@ -59,7 +59,7 @@ TEMPLATE_TEST_CASE("MPIManager::Scatter", "[MPIManager]", float, double) {
         int root = 0;
         cp_t result(2.0 * rank, 2.0 * rank + 1);
         if (rank == root) {
-            for (size_t i = 0; i < sendBuf.size(); i++) {
+            for (std::size_t i = 0; i < sendBuf.size(); i++) {
                 cp_t data(2.0 * i, 2.0 * i + 1);
                 sendBuf[i] = data;
             }
@@ -76,7 +76,7 @@ TEMPLATE_TEST_CASE("MPIManager::Scatter", "[MPIManager]", float, double) {
         int root = 0;
         cp_t result(2.0 * rank, 2.0 * rank + 1);
         if (rank == root) {
-            for (size_t i = 0; i < sendBuf.size(); i++) {
+            for (std::size_t i = 0; i < sendBuf.size(); i++) {
                 cp_t data(2.0 * i, 2.0 * i + 1);
                 sendBuf[i] = data;
             }
@@ -104,7 +104,7 @@ TEMPLATE_TEST_CASE("MPIManager::Allgather", "[MPIManager]", float, double) {
 
         mpi_manager.Allgather<cp_t>(sendBuf, recvBuf);
 
-        for (size_t i = 0; i < recvBuf.size(); i++) {
+        for (std::size_t i = 0; i < recvBuf.size(); i++) {
             CHECK(recvBuf[i].real() == static_cast<PrecisionT>(i));
             CHECK(recvBuf[i].imag() == static_cast<PrecisionT>(0));
         }
@@ -116,7 +116,7 @@ TEMPLATE_TEST_CASE("MPIManager::Allgather", "[MPIManager]", float, double) {
 
         mpi_manager.Allgather<cp_t>(sendBuf, recvBuf);
 
-        for (size_t i = 0; i < recvBuf.size(); i++) {
+        for (std::size_t i = 0; i < recvBuf.size(); i++) {
             CHECK(recvBuf[i].real() == static_cast<PrecisionT>(i));
             CHECK(recvBuf[i].imag() == static_cast<PrecisionT>(0));
         }
@@ -126,7 +126,7 @@ TEMPLATE_TEST_CASE("MPIManager::Allgather", "[MPIManager]", float, double) {
         cp_t sendBuf = {static_cast<PrecisionT>(rank), 0};
 
         auto recvBuf = mpi_manager.allgather<cp_t>(sendBuf);
-        for (size_t i = 0; i < recvBuf.size(); i++) {
+        for (std::size_t i = 0; i < recvBuf.size(); i++) {
             CHECK(recvBuf[i].real() == static_cast<PrecisionT>(i));
             CHECK(recvBuf[i].imag() == static_cast<PrecisionT>(0));
         }
@@ -136,7 +136,7 @@ TEMPLATE_TEST_CASE("MPIManager::Allgather", "[MPIManager]", float, double) {
         std::vector<cp_t> sendBuf(1, {static_cast<PrecisionT>(rank), 0});
         auto recvBuf = mpi_manager.allgather<cp_t>(sendBuf);
 
-        for (size_t i = 0; i < recvBuf.size(); i++) {
+        for (std::size_t i = 0; i < recvBuf.size(); i++) {
             CHECK(recvBuf[i].real() == static_cast<PrecisionT>(i));
             CHECK(recvBuf[i].imag() == static_cast<PrecisionT>(0));
         }
diff --git a/pennylane_lightning/core/src/utils/tests/Test_BitUtil.cpp b/pennylane_lightning/core/src/utils/tests/Test_BitUtil.cpp
index 4cb8e3e47e..6d043b3ed7 100644
--- a/pennylane_lightning/core/src/utils/tests/Test_BitUtil.cpp
+++ b/pennylane_lightning/core/src/utils/tests/Test_BitUtil.cpp
@@ -27,7 +27,7 @@ TEST_CASE("Utility bit operations", "[Util][BitUtil]") {
     SECTION("isPerfectPowerOf2") {
         std::size_t n = 1U;
         CHECK(Util::isPerfectPowerOf2(n));
-        for (size_t k = 0; k < sizeof(size_t) - 2; k++) {
+        for (std::size_t k = 0; k < sizeof(std::size_t) - 2; k++) {
             n *= 2;
             CHECK(Util::isPerfectPowerOf2(n));
             CHECK(!Util::isPerfectPowerOf2(n + 1));
@@ -38,7 +38,7 @@ TEST_CASE("Utility bit operations", "[Util][BitUtil]") {
         CHECK(!Util::isPerfectPowerOf2(1077U));
         CHECK(!Util::isPerfectPowerOf2(1000000000U));
 
-        if constexpr (sizeof(size_t) == 8) {
+        if constexpr (sizeof(std::size_t) == 8) {
             // if std::size_t is uint64_t
             CHECK(!Util::isPerfectPowerOf2(1234556789012345678U));
         }
diff --git a/pennylane_lightning/core/src/utils/tests/Test_Util.cpp b/pennylane_lightning/core/src/utils/tests/Test_Util.cpp
index 58e5df3a04..4f52318fdd 100644
--- a/pennylane_lightning/core/src/utils/tests/Test_Util.cpp
+++ b/pennylane_lightning/core/src/utils/tests/Test_Util.cpp
@@ -70,13 +70,13 @@ TEMPLATE_TEST_CASE("Constant values", "[Util]", float, double) {
 // NOLINTNEXTLINE: Avoid complexity errors
 TEMPLATE_TEST_CASE("Utility math functions", "[Util]", float, double) {
     SECTION("exp2: 2^n") {
-        for (size_t i = 0; i < 10; i++) {
+        for (std::size_t i = 0; i < 10; i++) {
             CHECK(Util::exp2(i) == static_cast<std::size_t>(std::pow(2, i)));
         }
     }
     SECTION("maxDecimalForQubit") {
-        for (size_t num_qubits = 0; num_qubits < 4; num_qubits++) {
-            for (size_t index = 0; index < num_qubits; index++) {
+        for (std::size_t num_qubits = 0; num_qubits < 4; num_qubits++) {
+            for (std::size_t index = 0; index < num_qubits; index++) {
                 CHECK(Util::maxDecimalForQubit(index, num_qubits) ==
                       static_cast<std::size_t>(
                           std::pow(2, num_qubits - index - 1)));
diff --git a/pennylane_lightning/core/src/utils/tests/Test_UtilLinearAlg.cpp b/pennylane_lightning/core/src/utils/tests/Test_UtilLinearAlg.cpp
index 40835a9f79..9e91a95a24 100644
--- a/pennylane_lightning/core/src/utils/tests/Test_UtilLinearAlg.cpp
+++ b/pennylane_lightning/core/src/utils/tests/Test_UtilLinearAlg.cpp
@@ -45,11 +45,11 @@ TEMPLATE_TEST_CASE("Util::compute_diagonalizing_gates", "[Util][LinearAlgebra]",
         std::vector<std::complex<TestType>> Unitaries;
         compute_diagonalizing_gates(N, LDA, A, eigenVals, Unitaries);
 
-        for (size_t i = 0; i < expectedEigenVals.size(); i++) {
+        for (std::size_t i = 0; i < expectedEigenVals.size(); i++) {
             CHECK(eigenVals[i] == Approx(expectedEigenVals[i]).margin(1e-6));
         }
 
-        for (size_t i = 0; i < Unitaries.size(); i++) {
+        for (std::size_t i = 0; i < Unitaries.size(); i++) {
             CHECK(Unitaries[i].real() ==
                   Approx(expectedUnitaries[i].real()).margin(1e-6));
             CHECK(Unitaries[i].imag() ==