From 8a688c813d6695c80ee17c28ee550dedcf038ad3 Mon Sep 17 00:00:00 2001
From: Anurudh Peduri <anurudhp97@gmail.com>
Date: Thu, 7 Nov 2024 16:51:42 +0100
Subject: [PATCH] Upgrade `AddK` - use dtype and simplify controls

---
 qualtran/bloqs/arithmetic/addition.ipynb      | 120 ++++------
 qualtran/bloqs/arithmetic/addition.py         | 223 ++++++------------
 qualtran/bloqs/arithmetic/addition_test.py    |  36 ++-
 qualtran/bloqs/arithmetic/negate.py           |   8 +-
 .../bloqs/block_encoding/sparse_matrix.py     |   5 +-
 qualtran/bloqs/mod_arithmetic/mod_addition.py |  22 +-
 qualtran/bloqs/mod_arithmetic/mod_division.py |   4 +-
 .../mod_arithmetic/mod_multiplication.py      |  19 +-
 .../bloqs/mod_arithmetic/mod_subtraction.py   |  49 ++--
 .../rotations/rz_via_phase_gradient_test.py   |  10 +-
 qualtran/resource_counting/classify_bloqs.py  |   2 +-
 11 files changed, 199 insertions(+), 299 deletions(-)

diff --git a/qualtran/bloqs/arithmetic/addition.ipynb b/qualtran/bloqs/arithmetic/addition.ipynb
index 4cd90a386..d379603de 100644
--- a/qualtran/bloqs/arithmetic/addition.ipynb
+++ b/qualtran/bloqs/arithmetic/addition.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "c231998b",
+   "id": "c9483021",
    "metadata": {
     "cq.autogen": "title_cell"
    },
@@ -13,7 +13,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b572d769",
+   "id": "2dd760e6",
    "metadata": {
     "cq.autogen": "top_imports"
    },
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d3f4ce95",
+   "id": "95d8998c",
    "metadata": {
     "cq.autogen": "Add.bloq_doc.md"
    },
@@ -55,7 +55,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6426bb53",
+   "id": "4cb5b704",
    "metadata": {
     "cq.autogen": "Add.bloq_doc.py"
    },
@@ -66,7 +66,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "3c8caad1",
+   "id": "69f415a2",
    "metadata": {
     "cq.autogen": "Add.example_instances.md"
    },
@@ -77,46 +77,46 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "73929b65",
+   "id": "b0c9815d",
    "metadata": {
-    "cq.autogen": "Add.add_small"
+    "cq.autogen": "Add.add_symb"
    },
    "outputs": [],
    "source": [
-    "add_small = Add(QUInt(bitsize=4))"
+    "n = sympy.Symbol('n')\n",
+    "add_symb = Add(QInt(bitsize=n))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "fbd4dbe9",
+   "id": "b481bd7d",
    "metadata": {
-    "cq.autogen": "Add.add_large"
+    "cq.autogen": "Add.add_small"
    },
    "outputs": [],
    "source": [
-    "add_large = Add(QUInt(bitsize=64))"
+    "add_small = Add(QUInt(bitsize=4))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e749abdb",
+   "id": "90bc9e29",
    "metadata": {
-    "cq.autogen": "Add.add_symb"
+    "cq.autogen": "Add.add_large"
    },
    "outputs": [],
    "source": [
-    "n = sympy.Symbol('n')\n",
-    "add_symb = Add(QInt(bitsize=n))"
+    "add_large = Add(QUInt(bitsize=64))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e5b746c1",
+   "id": "2fbbb318",
    "metadata": {
-    "collapsed": false
+    "cq.autogen": "Add.add_diff_size_regs"
    },
    "outputs": [],
    "source": [
@@ -125,7 +125,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "c3c1a7e4",
+   "id": "253212ec",
    "metadata": {
     "cq.autogen": "Add.graphical_signature.md"
    },
@@ -136,7 +136,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0f92be09",
+   "id": "ed24479f",
    "metadata": {
     "cq.autogen": "Add.graphical_signature.py"
    },
@@ -149,7 +149,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "350cb374",
+   "id": "1c55412d",
    "metadata": {
     "cq.autogen": "Add.call_graph.md"
    },
@@ -160,7 +160,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c36a3fc4",
+   "id": "90569a0c",
    "metadata": {
     "cq.autogen": "Add.call_graph.py"
    },
@@ -174,7 +174,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "ffc76cc5",
+   "id": "8ae20541",
    "metadata": {
     "cq.autogen": "OutOfPlaceAdder.bloq_doc.md"
    },
@@ -202,7 +202,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c2450d62",
+   "id": "7c51ccda",
    "metadata": {
     "cq.autogen": "OutOfPlaceAdder.bloq_doc.py"
    },
@@ -213,7 +213,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "b5c79e25",
+   "id": "6e0af527",
    "metadata": {
     "cq.autogen": "OutOfPlaceAdder.example_instances.md"
    },
@@ -224,7 +224,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e31acd2b",
+   "id": "f062b497",
    "metadata": {
     "cq.autogen": "OutOfPlaceAdder.add_oop_symb"
    },
@@ -237,7 +237,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ef8871a6",
+   "id": "0bde421f",
    "metadata": {
     "cq.autogen": "OutOfPlaceAdder.add_oop_small"
    },
@@ -249,7 +249,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e57b8c8d",
+   "id": "ce284cd8",
    "metadata": {
     "cq.autogen": "OutOfPlaceAdder.add_oop_large"
    },
@@ -260,7 +260,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "01915f46",
+   "id": "052cf86a",
    "metadata": {
     "cq.autogen": "OutOfPlaceAdder.graphical_signature.md"
    },
@@ -271,7 +271,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d117e345",
+   "id": "672073d0",
    "metadata": {
     "cq.autogen": "OutOfPlaceAdder.graphical_signature.py"
    },
@@ -284,7 +284,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "3b6469e0",
+   "id": "ab488ede",
    "metadata": {
     "cq.autogen": "OutOfPlaceAdder.call_graph.md"
    },
@@ -295,7 +295,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ff84301f",
+   "id": "311c752e",
    "metadata": {
     "cq.autogen": "OutOfPlaceAdder.call_graph.py"
    },
@@ -309,7 +309,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "2813f173",
+   "id": "62b014b7",
    "metadata": {
     "cq.autogen": "AddK.bloq_doc.md"
    },
@@ -324,13 +324,12 @@
     "only clifford operations.\n",
     "\n",
     "#### Parameters\n",
-    " - `bitsize`: Number of bits used to represent each integer.\n",
+    " - `dtype`: data type of the input register `x`\n",
     " - `k`: The classical integer value to be added to x.\n",
-    " - `cvs`: A tuple of control values. Each entry specifies whether that control line is a \"positive\" control (`cv[i]=1`) or a \"negative\" control (`cv[i]=0`).\n",
-    " - `signed`: A boolean condition which controls whether the x register holds a value represented in 2's Complement or Unsigned. This affects the ability to add a negative constant. \n",
+    " - `is_controlled`: if True, construct a singly-controlled bloq. \n",
     "\n",
     "#### Registers\n",
-    " - `x`: A bitsize-sized input register (register x above). \n",
+    " - `x`: register of type `self.dtype` \n",
     "\n",
     "#### References\n",
     " - [Improved quantum circuits for elliptic curve discrete logarithms](https://arxiv.org/abs/2001.09580). Haner et al. 2020. Section 3: Components. \"Integer addition\" and Fig 2a.\n"
@@ -339,7 +338,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "cd255bf9",
+   "id": "3155864a",
    "metadata": {
     "cq.autogen": "AddK.bloq_doc.py"
    },
@@ -350,7 +349,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "7538f9a5",
+   "id": "9009b701",
    "metadata": {
     "cq.autogen": "AddK.example_instances.md"
    },
@@ -361,43 +360,43 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4305289f",
+   "id": "7e2a9fdf",
    "metadata": {
     "cq.autogen": "AddK.add_k"
    },
    "outputs": [],
    "source": [
     "n, k = sympy.symbols('n k')\n",
-    "add_k = AddK(bitsize=n, k=k)"
+    "add_k = AddK(QUInt(n), k=k)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f6048819",
+   "id": "f4e218da",
    "metadata": {
     "cq.autogen": "AddK.add_k_small"
    },
    "outputs": [],
    "source": [
-    "add_k_small = AddK(bitsize=4, k=2, signed=False)"
+    "add_k_small = AddK(QUInt(4), k=2)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b67fd469",
+   "id": "97073a76",
    "metadata": {
     "cq.autogen": "AddK.add_k_large"
    },
    "outputs": [],
    "source": [
-    "add_k_large = AddK(bitsize=64, k=-23, signed=True)"
+    "add_k_large = AddK(QInt(64), k=-23)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "b8b04228",
+   "id": "57f1032d",
    "metadata": {
     "cq.autogen": "AddK.graphical_signature.md"
    },
@@ -408,7 +407,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e93e7f2e",
+   "id": "d5d7dad4",
    "metadata": {
     "cq.autogen": "AddK.graphical_signature.py"
    },
@@ -421,7 +420,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "13552795",
+   "id": "f1a51ffb",
    "metadata": {
     "cq.autogen": "AddK.call_graph.md"
    },
@@ -432,7 +431,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d8d6584e",
+   "id": "cbfc2d39",
    "metadata": {
     "cq.autogen": "AddK.call_graph.py"
    },
@@ -443,37 +442,16 @@
     "show_call_graph(add_k_g)\n",
     "show_counts_sigma(add_k_sigma)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8853ae5d",
-   "metadata": {
-    "cq.autogen": "Add.add_diff_size_regs"
-   },
-   "outputs": [],
-   "source": [
-    "add_diff_size_regs = Add(QUInt(bitsize=4), QUInt(bitsize=16))"
-   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
   "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.7"
+   "name": "python"
   }
  },
  "nbformat": 4,
diff --git a/qualtran/bloqs/arithmetic/addition.py b/qualtran/bloqs/arithmetic/addition.py
index 7b208bd36..d2292ea95 100644
--- a/qualtran/bloqs/arithmetic/addition.py
+++ b/qualtran/bloqs/arithmetic/addition.py
@@ -11,19 +11,9 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+from collections import Counter
 from functools import cached_property
-from typing import (
-    Dict,
-    Iterable,
-    Iterator,
-    List,
-    Optional,
-    Sequence,
-    Set,
-    Tuple,
-    TYPE_CHECKING,
-    Union,
-)
+from typing import Dict, Iterator, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union
 
 import attrs
 import cirq
@@ -42,7 +32,7 @@
     CtrlSpec,
     DecomposeTypeError,
     GateWithRegisters,
-    QBit,
+    QAny,
     QInt,
     QMontgomeryUInt,
     QUInt,
@@ -52,24 +42,18 @@
     Soquet,
     SoquetT,
 )
-from qualtran.bloqs.basic_gates import CNOT, XGate
-from qualtran.bloqs.mcmt import MultiControlX
+from qualtran.bloqs.basic_gates import CNOT
 from qualtran.bloqs.mcmt.and_bloq import And
 from qualtran.cirq_interop import decompose_from_cirq_style_method
 from qualtran.drawing import directional_text_box, Text
 from qualtran.resource_counting.generalizers import ignore_split_join
 from qualtran.simulation.classical_sim import add_ints
+from qualtran.symbolics import is_symbolic, SymbolicInt
 
 if TYPE_CHECKING:
     from qualtran.drawing import WireSymbol
-    from qualtran.resource_counting import (
-        BloqCountDictT,
-        BloqCountT,
-        MutableBloqCountDictT,
-        SympySymbolAllocator,
-    )
+    from qualtran.resource_counting import BloqCountDictT, SympySymbolAllocator
     from qualtran.simulation.classical_sim import ClassicalValT
-    from qualtran.symbolics import SymbolicInt
 
 
 @frozen
@@ -405,180 +389,119 @@ class AddK(Bloq):
     only clifford operations.
 
     Args:
-        bitsize: Number of bits used to represent each integer.
+        dtype: data type of the input register `x`
         k: The classical integer value to be added to x.
-        cvs: A tuple of control values. Each entry specifies whether that control line is a
-            "positive" control (`cv[i]=1`) or a "negative" control (`cv[i]=0`).
-        signed: A boolean condition which controls whether the x register holds a value represented
-            in 2's Complement or Unsigned. This affects the ability to add a negative constant.
+        is_controlled: if True, construct a singly-controlled bloq.
 
     Registers:
-        x: A bitsize-sized input register (register x above).
+        x: register of type `self.dtype`
 
     References:
         [Improved quantum circuits for elliptic curve discrete logarithms](https://arxiv.org/abs/2001.09580).
         Haner et al. 2020. Section 3: Components. "Integer addition" and Fig 2a.
     """
 
-    bitsize: 'SymbolicInt'
+    dtype: Union[QInt, QUInt, QMontgomeryUInt]
     k: 'SymbolicInt'
-    cvs: Tuple[int, ...] = field(converter=_cvs_converter, default=())
-    signed: bool = False
+    is_controlled: bool = False
+
+    def __attrs_post_init__(self):
+        if not isinstance(self.dtype, (QInt, QUInt, QMontgomeryUInt)):
+            raise NotImplementedError(
+                "Only QInt, QUInt and QMontgomeryUInt types are supported for composite addition."
+            )
 
     @cached_property
     def signature(self) -> 'Signature':
-        if len(self.cvs) > 0:
-            return Signature(
-                [
-                    Register('ctrls', QBit(), shape=(len(self.cvs),)),
-                    Register('x', QInt(self.bitsize) if self.signed else QUInt(self.bitsize)),
-                ]
-            )
-        else:
-            return Signature(
-                [Register('x', QInt(bitsize=self.bitsize) if self.signed else QUInt(self.bitsize))]
-            )
+        return Signature.build_from_dtypes(ctrl=QAny(1 if self.is_controlled else 0), x=self.dtype)
 
     def on_classical_vals(
         self, x: 'ClassicalValT', **vals: 'ClassicalValT'
     ) -> Dict[str, 'ClassicalValT']:
-        if isinstance(self.k, sympy.Expr) or isinstance(self.bitsize, sympy.Expr):
+        if is_symbolic(self.k) or is_symbolic(self.dtype):
             raise ValueError(f"Classical simulation isn't supported for symbolic block {self}")
-        N = 2**self.bitsize
-        if len(self.cvs) > 0:
-            ctrls = vals['ctrls']
-        else:
-            return {
-                'x': add_ints(int(x), int(self.k), num_bits=self.bitsize, is_signed=self.signed)
-            }
 
-        if np.all(self.cvs == ctrls):
-            x = add_ints(int(x), int(self.k), num_bits=self.bitsize, is_signed=self.signed)
+        if not self.is_controlled or vals['ctrl']:
+            is_signed = isinstance(self.dtype, QInt)
+            x = add_ints(int(x), int(self.k), num_bits=self.dtype.num_qubits, is_signed=is_signed)
+
+        return vals | {'x': x}
+
+    @cached_property
+    def _load_k_bloq(self) -> Bloq:
+        from qualtran.bloqs.arithmetic.bitwise import XorK
+
+        k = self.k
+        if not is_symbolic(k) and k < 0 and isinstance(self.dtype, (QUInt, QMontgomeryUInt)):
+            # Since this is unsigned addition, adding `-v` is equivalent to adding `2**bitsize - v`
+            k %= 2**self.dtype.bitsize
 
-        return {'ctrls': ctrls, 'x': x}
+        xork = XorK(self.dtype, k)
+        return xork.controlled() if self.is_controlled else xork
 
     def build_composite_bloq(
-        self, bb: 'BloqBuilder', x: Soquet, **regs: SoquetT
+        self, bb: 'BloqBuilder', x: Soquet, **soqs: Soquet
     ) -> Dict[str, 'SoquetT']:
-        if isinstance(self.k, sympy.Expr) or isinstance(self.bitsize, sympy.Expr):
+        if is_symbolic(self.k) or is_symbolic(self.dtype):
             raise DecomposeTypeError(f"Cannot decompose symbolic {self}.")
 
-        # Assign registers to variables and allocate ancilla bits for classical integer k.
-        if len(self.cvs) > 0:
-            ctrls = regs['ctrls']
-        else:
-            ctrls = None
-        k = bb.allocate(dtype=x.reg.dtype)
-
-        # Get binary representation of k and split k into separate wires.
-        k_split = bb.split(k)
-        if self.signed:
-            binary_rep = QInt(self.bitsize).to_bits(self.k)
-        else:
-            val = self.k
-            if val < 0:
-                # Since this is unsigned addition adding -v is equivalent to
-                # adding 2^bitsize - v
-                val %= 2**self.bitsize
-            binary_rep = QUInt(self.bitsize).to_bits(val)
-
-        # Apply XGates to qubits in k where the bitstring has value 1. Apply CNOTs when the gate is
-        # controlled.
-        for i in range(self.bitsize):
-            if binary_rep[i] == 1:
-                if len(self.cvs) > 0 and ctrls is not None:
-                    ctrls, k_split[i] = bb.add(
-                        MultiControlX(cvs=self.cvs), controls=ctrls, target=k_split[i]
-                    )
-                else:
-                    k_split[i] = bb.add(XGate(), q=k_split[i])
-
-        # Rejoin the qubits representing k for in-place addition.
-        k = bb.join(k_split, dtype=x.reg.dtype)
-        if not isinstance(x.reg.dtype, (QInt, QUInt, QMontgomeryUInt)):
-            raise ValueError(
-                "Only QInt, QUInt and QMontgomerUInt types are supported for composite addition."
-            )
-        k, x = bb.add(Add(x.reg.dtype, x.reg.dtype), a=k, b=x)
-
-        # Resplit the k qubits in order to undo the original bit flips to go from the binary
-        # representation back to the zero state.
-        k_split = bb.split(k)
-        for i in range(self.bitsize):
-            if binary_rep[i] == 1:
-                if len(self.cvs) > 0 and ctrls is not None:
-                    ctrls, k_split[i] = bb.add(
-                        MultiControlX(cvs=self.cvs), controls=ctrls, target=k_split[i]
-                    )
-                else:
-                    k_split[i] = bb.add(XGate(), q=k_split[i])
-
-        # Free the ancilla qubits.
-        k = bb.join(k_split, dtype=x.reg.dtype)
+        # load `k` (conditional on ctrl if present)
+        k = bb.allocate(dtype=self.dtype)
+        load_soqs = {'x': k}
+        if self.is_controlled:
+            load_soqs |= {'ctrl': soqs.pop('ctrl')}
+        load_soqs = bb.add_d(self._load_k_bloq, **load_soqs)
+        k = load_soqs.pop('x')
+
+        # quantum-quantum addition
+        k, x = bb.add(Add(self.dtype, self.dtype), a=k, b=x)
+
+        # unload `k`
+        load_soqs['x'] = k
+        load_soqs = bb.add_d(self._load_k_bloq.adjoint(), **load_soqs)
+        k = load_soqs.pop('x')
+        assert isinstance(k, Soquet)
         bb.free(k)
 
-        # Return the output registers.
-        if len(self.cvs) > 0 and ctrls is not None:
-            return {'ctrls': ctrls, 'x': x}
-        else:
-            return {'x': x}
-
-    def build_call_graph(
-        self, ssa: 'SympySymbolAllocator'
-    ) -> Union['BloqCountDictT', Set['BloqCountT']]:
-        loading_cost: MutableBloqCountDictT
-        if len(self.cvs) == 0:
-            loading_cost = {XGate(): self.bitsize}  # upper bound; depends on the data.
-        elif len(self.cvs) == 1:
-            loading_cost = {CNOT(): self.bitsize}  # upper bound; depends on the data.
-        else:
-            # Otherwise, use the decomposition
-            return super().build_call_graph(ssa=ssa)
-        loading_cost[Add(QUInt(self.bitsize))] = 1
-        return loading_cost
-
-    def get_ctrl_system(self, ctrl_spec: 'CtrlSpec') -> Tuple['Bloq', 'AddControlledT']:
-        if self.cvs:
-            # We're already controlled, use default fallback
-            return super().get_ctrl_system(ctrl_spec)
-
-        if ctrl_spec.num_ctrl_reg != 1:
-            # Multiple control registers, use default fallback
-            return super().get_ctrl_system(ctrl_spec)
+        return {'x': x} | load_soqs
 
-        ((qdtype, cv_shape),) = ctrl_spec.activation_function_dtypes()
-        if qdtype != QBit():
-            # Control values aren't bits, use default fallback
-            return super().get_ctrl_system(ctrl_spec)
+    def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
+        counts = Counter[Bloq]()
 
-        # Supported via this class's custom `cvs` attribute.
-        bloq = attrs.evolve(self, cvs=ctrl_spec.cvs)
+        counts[self._load_k_bloq] += 1
+        counts[Add(self.dtype, self.dtype)] += 1
+        counts[self._load_k_bloq.adjoint()] += 1
 
-        def _add_ctrled(
-            bb: 'BloqBuilder', ctrl_soqs: Sequence['SoquetT'], in_soqs: Dict[str, 'SoquetT']
-        ) -> Tuple[Iterable['SoquetT'], Iterable['SoquetT']]:
-            ctrls, x = bb.add_t(bloq, ctrls=np.asarray(ctrl_soqs), **in_soqs)
-            return np.asarray(ctrls).tolist(), (x,)
+        return counts
 
-        return bloq, _add_ctrled
+    def get_ctrl_system(self, ctrl_spec: 'CtrlSpec') -> Tuple['Bloq', 'AddControlledT']:
+        from qualtran.bloqs.mcmt.specialized_ctrl import get_ctrl_system_1bit_cv_from_bloqs
+
+        return get_ctrl_system_1bit_cv_from_bloqs(
+            self,
+            ctrl_spec,
+            current_ctrl_bit=1 if self.is_controlled else None,
+            bloq_with_ctrl=attrs.evolve(self, is_controlled=True),
+            ctrl_reg_name='ctrl',
+        )
 
 
 @bloq_example(generalizer=ignore_split_join)
 def _add_k() -> AddK:
     n, k = sympy.symbols('n k')
-    add_k = AddK(bitsize=n, k=k)
+    add_k = AddK(QUInt(n), k=k)
     return add_k
 
 
 @bloq_example(generalizer=ignore_split_join)
 def _add_k_small() -> AddK:
-    add_k_small = AddK(bitsize=4, k=2, signed=False)
+    add_k_small = AddK(QUInt(4), k=2)
     return add_k_small
 
 
 @bloq_example(generalizer=ignore_split_join)
 def _add_k_large() -> AddK:
-    add_k_large = AddK(bitsize=64, k=-23, signed=True)
+    add_k_large = AddK(QInt(64), k=-23)
     return add_k_large
 
 
diff --git a/qualtran/bloqs/arithmetic/addition_test.py b/qualtran/bloqs/arithmetic/addition_test.py
index 7f3576862..0a055595d 100644
--- a/qualtran/bloqs/arithmetic/addition_test.py
+++ b/qualtran/bloqs/arithmetic/addition_test.py
@@ -20,7 +20,7 @@
 import sympy
 
 import qualtran.testing as qlt_testing
-from qualtran import BloqBuilder, CtrlSpec, QInt, QUInt
+from qualtran import Bloq, BloqBuilder, CtrlSpec, QInt, QUInt
 from qualtran.bloqs.arithmetic.addition import (
     _add_diff_size_regs,
     _add_k,
@@ -36,6 +36,7 @@
     AddK,
     OutOfPlaceAdder,
 )
+from qualtran.bloqs.basic_gates import XGate
 from qualtran.bloqs.mcmt.and_bloq import And
 from qualtran.cirq_interop.t_complexity_protocol import TComplexity
 from qualtran.cirq_interop.testing import assert_circuit_inp_out_cirqsim, GateHelper
@@ -315,16 +316,19 @@ def test_out_of_place_adder():
 
 def test_controlled_add_k():
     n, k = sympy.symbols('n k')
-    addk = AddK(n, k)
-    assert addk.controlled() == AddK(n, k, cvs=(1,))
-    assert addk.controlled(CtrlSpec(cvs=0)) == AddK(n, k, cvs=(0,))
+    addk = AddK(QUInt(n), k)
+    assert addk.controlled() == AddK(QUInt(n), k, is_controlled=True)
+    _, sigma = addk.controlled(CtrlSpec(cvs=0)).call_graph(max_depth=1)
+    assert sigma == {addk.controlled(): 1, XGate(): 2}
 
 
 @pytest.mark.parametrize('bitsize', [5])
 @pytest.mark.parametrize('k', [5, 8])
 @pytest.mark.parametrize('cvs', [[], [0, 1], [1, 0], [1, 1]])
 def test_add_k_decomp_unsigned(bitsize, k, cvs):
-    bloq = AddK(bitsize=bitsize, k=k, cvs=cvs, signed=False)
+    bloq: Bloq = AddK(QUInt(bitsize), k=k)
+    if cvs:
+        bloq = bloq.controlled(CtrlSpec(cvs=cvs))
     qlt_testing.assert_valid_bloq_decomposition(bloq)
 
 
@@ -332,7 +336,9 @@ def test_add_k_decomp_unsigned(bitsize, k, cvs):
 @pytest.mark.parametrize('k', [-5, 8])
 @pytest.mark.parametrize('cvs', [[], [0, 1], [1, 0], [1, 1]])
 def test_add_k_decomp_signed(bitsize, k, cvs):
-    bloq = AddK(bitsize=bitsize, k=k, cvs=cvs, signed=True)
+    bloq: Bloq = AddK(QInt(bitsize), k=k)
+    if cvs:
+        bloq = bloq.controlled(CtrlSpec(cvs=cvs))
     qlt_testing.assert_valid_bloq_decomposition(bloq)
 
 
@@ -340,16 +346,18 @@ def test_add_k_decomp_signed(bitsize, k, cvs):
     'bitsize,k,x,cvs,ctrls,result',
     [
         (5, 1, 2, (), (), 3),
-        (5, 3, 2, (1,), (1,), 5),
+        (5, 3, 2, (1,), 1, 5),
         (5, 2, 0, (1, 0), (1, 0), 2),
         (5, 1, 2, (1, 0, 1), (0, 0, 0), 2),
     ],
 )
 def test_classical_add_k_unsigned(bitsize, k, x, cvs, ctrls, result):
-    bloq = AddK(bitsize=bitsize, k=k, cvs=cvs, signed=False)
+    bloq: Bloq = AddK(QUInt(bitsize), k=k)
+    if cvs:
+        bloq = bloq.controlled(CtrlSpec(cvs=cvs))
     cbloq = bloq.decompose_bloq()
-    bloq_classical = bloq.call_classically(ctrls=ctrls, x=x)
-    cbloq_classical = cbloq.call_classically(ctrls=ctrls, x=x)
+    bloq_classical = bloq.call_classically(ctrl=ctrls, x=x)
+    cbloq_classical = cbloq.call_classically(ctrl=ctrls, x=x)
 
     assert len(bloq_classical) == len(cbloq_classical)
     for i in range(len(bloq_classical)):
@@ -369,10 +377,12 @@ def test_classical_add_signed_overflow(bitsize):
     'bitsize,k,x,cvs,ctrls,result', [(5, 2, 0, (1, 0), (1, 0), 2), (6, -3, 2, (), (), -1)]
 )
 def test_classical_add_k_signed(bitsize, k, x, cvs, ctrls, result):
-    bloq = AddK(bitsize=bitsize, k=k, cvs=cvs, signed=True)
+    bloq: Bloq = AddK(QInt(bitsize), k=k)
+    if cvs:
+        bloq = bloq.controlled(CtrlSpec(cvs=cvs))
     cbloq = bloq.decompose_bloq()
-    bloq_classical = bloq.call_classically(ctrls=ctrls, x=x)
-    cbloq_classical = cbloq.call_classically(ctrls=ctrls, x=x)
+    bloq_classical = bloq.call_classically(ctrl=ctrls, x=x)
+    cbloq_classical = cbloq.call_classically(ctrl=ctrls, x=x)
 
     assert len(bloq_classical) == len(cbloq_classical)
     for i in range(len(bloq_classical)):
diff --git a/qualtran/bloqs/arithmetic/negate.py b/qualtran/bloqs/arithmetic/negate.py
index c3910fc1e..0d42dcb3f 100644
--- a/qualtran/bloqs/arithmetic/negate.py
+++ b/qualtran/bloqs/arithmetic/negate.py
@@ -12,11 +12,11 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 from functools import cached_property
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Union
 
 from attrs import frozen
 
-from qualtran import Bloq, bloq_example, BloqDocSpec, QDType, QInt, Signature
+from qualtran import Bloq, bloq_example, BloqDocSpec, QInt, QMontgomeryUInt, QUInt, Signature
 from qualtran.bloqs.arithmetic import AddK
 from qualtran.bloqs.arithmetic.bitwise import BitwiseNot
 
@@ -53,7 +53,7 @@ class Negate(Bloq):
         Operator "Unary Minus". Last accessed 17 July 2024.
     """
 
-    dtype: QDType
+    dtype: Union[QUInt, QInt, QMontgomeryUInt]
 
     @cached_property
     def signature(self) -> 'Signature':
@@ -61,7 +61,7 @@ def signature(self) -> 'Signature':
 
     def build_composite_bloq(self, bb: 'BloqBuilder', x: 'SoquetT') -> dict[str, 'SoquetT']:
         x = bb.add(BitwiseNot(self.dtype), x=x)  # ~x
-        x = bb.add(AddK(self.dtype.num_qubits, k=1, signed=isinstance(self.dtype, QInt)), x=x)  # -x
+        x = bb.add(AddK(self.dtype, k=1), x=x)  # -x
         return {'x': x}
 
 
diff --git a/qualtran/bloqs/block_encoding/sparse_matrix.py b/qualtran/bloqs/block_encoding/sparse_matrix.py
index 60f8a39cf..7c126c77a 100644
--- a/qualtran/bloqs/block_encoding/sparse_matrix.py
+++ b/qualtran/bloqs/block_encoding/sparse_matrix.py
@@ -30,6 +30,7 @@
     DecomposeTypeError,
     QAny,
     QBit,
+    QInt,
     QUInt,
     Register,
     Signature,
@@ -334,7 +335,7 @@ def call_classically(self, l: ClassicalValT, i: ClassicalValT) -> Tuple[Classica
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> BloqCountDictT:
         return {
             Add(QUInt(self.system_bitsize), QUInt(self.system_bitsize)): 1,
-            AddK(self.system_bitsize, -self.bandsize, signed=True): 1,
+            AddK(QInt(self.system_bitsize), -self.bandsize): 1,
         }
 
     def build_composite_bloq(self, bb: BloqBuilder, l: SoquetT, i: SoquetT) -> Dict[str, SoquetT]:
@@ -342,7 +343,7 @@ def build_composite_bloq(self, bb: BloqBuilder, l: SoquetT, i: SoquetT) -> Dict[
             raise DecomposeTypeError(f"Cannot decompose symbolic {self=}")
 
         i, l = bb.add(Add(QUInt(self.system_bitsize), QUInt(self.system_bitsize)), a=i, b=l)
-        l = bb.add(AddK(self.system_bitsize, -self.bandsize, signed=True), x=l)
+        l = bb.add(AddK(QInt(self.system_bitsize), -self.bandsize), x=l)
 
         return {"l": l, "i": i}
 
diff --git a/qualtran/bloqs/mod_arithmetic/mod_addition.py b/qualtran/bloqs/mod_arithmetic/mod_addition.py
index a8186cdfe..cb7ab6396 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_addition.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_addition.py
@@ -117,7 +117,7 @@ def build_composite_bloq(self, bb: 'BloqBuilder', x: Soquet, y: Soquet) -> Dict[
         x = bb.join(x_split[1:], dtype=QMontgomeryUInt(bitsize=self.bitsize))
 
         # Add constant -p to the y register.
-        y = bb.add(AddK(bitsize=self.bitsize + 1, k=-1 * self.mod, signed=False, cvs=()), x=y)
+        y = bb.add(AddK(QMontgomeryUInt(self.bitsize + 1), k=-1 * self.mod), x=y)
 
         # Controlled addition of classical constant p if the sign of y after the last addition is
         # negative.
@@ -125,11 +125,9 @@ def build_composite_bloq(self, bb: 'BloqBuilder', x: Soquet, y: Soquet) -> Dict[
         sign = y_split[0]
         y = bb.join(y_split[1:], dtype=QMontgomeryUInt(bitsize=self.bitsize))
 
-        sign_split = bb.split(sign)
-        sign_split, y = bb.add(
-            AddK(bitsize=self.bitsize, k=self.mod, signed=False, cvs=(1,)), x=y, ctrls=sign_split
+        sign, y = bb.add(
+            AddK(QMontgomeryUInt(self.bitsize), k=self.mod).controlled(), ctrl=sign, x=y
         )
-        sign = bb.join(sign_split)
 
         # Check if y < x; if yes flip the bit of the signed ancilla bit. Then bitflip the sign bit
         # again before freeing.
@@ -148,8 +146,8 @@ def build_composite_bloq(self, bb: 'BloqBuilder', x: Soquet, y: Soquet) -> Dict[
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
         return {
             Add(QUInt(self.bitsize + 1)): 1,
-            AddK(self.bitsize + 1, k=-self.mod): 1,
-            AddK(self.bitsize, k=self.mod).controlled(): 1,
+            AddK(QUInt(self.bitsize + 1), k=-self.mod): 1,
+            AddK(QUInt(self.bitsize), k=self.mod).controlled(): 1,
             LinearDepthGreaterThan(self.bitsize): 1,
             XGate(): 1,
         }
@@ -512,12 +510,12 @@ def build_composite_bloq(
             a=x,
             b=y,
         )
-        y = bb.add(AddK(self.dtype.bitsize + 1, -self.mod, signed=False), x=y)
+        y = bb.add(AddK(QUInt(self.dtype.bitsize + 1), -self.mod), x=y)
         y_arr = bb.split(y)
         ancilla, y_arr = y_arr[0], y_arr[1:]
         y = bb.join(y_arr)
-        (ancilla,), y = bb.add(
-            AddK(self.dtype.bitsize, self.mod, signed=False, cvs=(1,)), ctrls=(ancilla,), x=y
+        ancilla, y = bb.add(
+            AddK(QUInt(self.dtype.bitsize), self.mod).controlled(), ctrl=ancilla, x=y
         )
 
         ctrl, x, y, ancilla = bb.add(
@@ -538,8 +536,8 @@ def build_composite_bloq(
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
         return {
             CAdd(QUInt(self.dtype.bitsize), QUInt(self.dtype.bitsize + 1), cv=self.cv): 1,
-            AddK(self.dtype.bitsize + 1, -self.mod, signed=False): 1,
-            AddK(self.dtype.bitsize, self.mod, cvs=(1,), signed=False): 1,
+            AddK(QUInt(self.dtype.bitsize + 1), -self.mod): 1,
+            AddK(QUInt(self.dtype.bitsize), self.mod).controlled(): 1,
             CLinearDepthGreaterThan(QUInt(self.dtype.bitsize), cv=self.cv): 1,
             XGate(): 1,
         }
diff --git a/qualtran/bloqs/mod_arithmetic/mod_division.py b/qualtran/bloqs/mod_arithmetic/mod_division.py
index 005323b96..c099c7562 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_division.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_division.py
@@ -563,7 +563,7 @@ def build_composite_bloq(
             )
 
         r = bb.add(BitwiseNot(QMontgomeryUInt(self.bitsize)), x=r)
-        r = bb.add(AddK(self.bitsize, self.mod + 1, signed=False), x=r)
+        r = bb.add(AddK(QMontgomeryUInt(self.bitsize), self.mod + 1), x=r)
 
         u = bb.add(XorK(QMontgomeryUInt(self.bitsize), 1), x=u)
         s = bb.add(XorK(QMontgomeryUInt(self.bitsize), self.mod), x=s)
@@ -610,7 +610,7 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
         return {
             self._kaliski_iteration: 2 * self.bitsize,
             BitwiseNot(QMontgomeryUInt(self.bitsize)): 1,
-            AddK(self.bitsize, self.mod + 1, signed=False): 1,
+            AddK(QMontgomeryUInt(self.bitsize), self.mod + 1): 1,
             XGate(): 1,
             XorK(QMontgomeryUInt(self.bitsize), self.mod): 2,
             XorK(QMontgomeryUInt(self.bitsize), 1): 2,
diff --git a/qualtran/bloqs/mod_arithmetic/mod_multiplication.py b/qualtran/bloqs/mod_arithmetic/mod_multiplication.py
index 3de2e5b96..e02e60712 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_multiplication.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_multiplication.py
@@ -30,6 +30,7 @@
     BloqDocSpec,
     DecomposeNotImplementedError,
     QBit,
+    QInt,
     QMontgomeryUInt,
     QUInt,
     Register,
@@ -103,7 +104,7 @@ def build_composite_bloq(self, bb: 'BloqBuilder', x: Soquet) -> Dict[str, 'Soque
         )
 
         # Add constant -p to the x register.
-        x = bb.add(AddK(bitsize=self.dtype.bitsize + 2, k=-self.mod, signed=False), x=x)
+        x = bb.add(AddK(QInt(self.dtype.bitsize + 2), k=-self.mod), x=x)
 
         # Split the three bit pieces again so that we can use the sign to control our constant
         # addition circuit.
@@ -112,10 +113,8 @@ def build_composite_bloq(self, bb: 'BloqBuilder', x: Soquet) -> Dict[str, 'Soque
         x = bb.join(x_split[1:], dtype=attrs.evolve(self.dtype, bitsize=self.dtype.bitsize + 1))
 
         # Add constant p to the x register if the result of the last modular reduction is negative.
-        (sign,), x = bb.add(
-            AddK(bitsize=self.dtype.bitsize + 1, k=self.mod, signed=False, cvs=(1,)),
-            ctrls=(sign,),
-            x=x,
+        sign, x = bb.add(
+            AddK(QUInt(self.dtype.bitsize + 1), k=self.mod).controlled(), ctrl=sign, x=x
         )
 
         # Split the lower bit ancilla from the x register for use in resetting the other ancilla bit
@@ -145,8 +144,8 @@ def wire_symbol(
 
     def build_call_graph(self, ssa: SympySymbolAllocator) -> BloqCountDictT:
         return {
-            AddK(self.dtype.bitsize + 2, -self.mod, signed=False): 1,
-            AddK(self.dtype.bitsize + 1, self.mod, cvs=(1,), signed=False): 1,
+            AddK(QInt(self.dtype.bitsize + 2), -self.mod): 1,
+            AddK(QUInt(self.dtype.bitsize + 1), self.mod).controlled(): 1,
             CNOT(): 1,
             XGate(): 2,
         }
@@ -520,8 +519,8 @@ def build_composite_bloq(
         target = bb.join(target_arr[-self.bitsize :])
         reduced = bb.add(XGate(), q=reduced)
         target, reduced = bb.add(LessThanConstant(self.bitsize, self.mod), x=target, target=reduced)
-        (reduced,), target = bb.add(
-            AddK(self.bitsize, self.mod, cvs=(1,), signed=False), ctrls=(reduced,), x=target
+        reduced, target = bb.add(
+            AddK(QUInt(self.bitsize), self.mod).controlled(), ctrl=reduced, x=target
         )
 
         return {'x': x, 'y': y, 'target': target, 'qrom_indices': qrom_indices, 'reduced': reduced}
@@ -529,7 +528,7 @@ def build_composite_bloq(
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> BloqCountDictT:
         num_windows = (self.bitsize + self.window_size - 1) // self.window_size
         return {
-            AddK(self.bitsize, self.mod, cvs=(1,), signed=False): 1,
+            AddK(QUInt(self.bitsize), self.mod).controlled(): 1,
             LessThanConstant(bitsize=self.bitsize, less_than_val=self.mod): 1,
             XGate(): 1,
             self._window: num_windows,
diff --git a/qualtran/bloqs/mod_arithmetic/mod_subtraction.py b/qualtran/bloqs/mod_arithmetic/mod_subtraction.py
index 01863dc04..c4da9e861 100644
--- a/qualtran/bloqs/mod_arithmetic/mod_subtraction.py
+++ b/qualtran/bloqs/mod_arithmetic/mod_subtraction.py
@@ -22,6 +22,7 @@
     bloq_example,
     BloqBuilder,
     BloqDocSpec,
+    CtrlSpec,
     DecomposeTypeError,
     QBit,
     QMontgomeryUInt,
@@ -79,38 +80,28 @@ def build_composite_bloq(self, bb: 'BloqBuilder', x: Soquet) -> Dict[str, 'Soque
         ancilla = bb.allocate(1)
         ancilla = bb.add(XGate(), q=ancilla)
 
-        x_arr = bb.split(x)
-        x_arr, ancilla = bb.add(
-            MultiControlX(cvs=[0] * self.dtype.bitsize), controls=x_arr, target=ancilla
+        x, ancilla = bb.add(
+            XGate().controlled(CtrlSpec(qdtypes=self.dtype, cvs=0)), ctrl=x, q=ancilla
         )
-        x = bb.join(x_arr)
 
         ancilla, x = bb.add(MultiTargetCNOT(self.dtype.bitsize), control=ancilla, targets=x)
-        (ancilla,), x = bb.add(
-            AddK(self.dtype.bitsize, self.mod + 1, cvs=(1,), signed=False), ctrls=(ancilla,), x=x
+        ancilla, x = bb.add(
+            AddK(QUInt(self.dtype.bitsize), self.mod + 1).controlled(), ctrl=ancilla, x=x
         )
 
-        x_arr = bb.split(x)
-        x_arr, ancilla = bb.add(
-            MultiControlX(cvs=[0] * self.dtype.bitsize).adjoint(), controls=x_arr, target=ancilla
+        x, ancilla = bb.add(
+            XGate().controlled(CtrlSpec(qdtypes=self.dtype, cvs=0)), ctrl=x, q=ancilla
         )
-        x = bb.join(x_arr)
 
         ancilla = bb.add(XGate(), q=ancilla)
         bb.free(ancilla)
         return {'x': x}
 
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
-        cvs: Union[list[int], HasLength]
-        if isinstance(self.dtype.bitsize, int):
-            cvs = [0] * self.dtype.bitsize
-        else:
-            cvs = HasLength(self.dtype.bitsize)
         return {
-            MultiControlX(cvs): 1,
-            MultiControlX(cvs).adjoint(): 1,
+            XGate().controlled(CtrlSpec(qdtypes=self.dtype, cvs=0)): 2,
             MultiTargetCNOT(self.dtype.bitsize): 1,
-            AddK(self.dtype.bitsize, k=self.mod + 1, cvs=(1), signed=False): 1,
+            AddK(QUInt(self.dtype.bitsize), k=self.mod + 1).controlled(): 1,
             XGate(): 2,
         }
 
@@ -178,9 +169,7 @@ def build_composite_bloq(
         (ctrl, ancilla), apply_op = bb.add(And(self.cv, 1), ctrl=(ctrl, ancilla))
 
         apply_op, x = bb.add(MultiTargetCNOT(self.dtype.bitsize), control=apply_op, targets=x)
-        (apply_op,), x = bb.add(
-            AddK(self.dtype.bitsize, self.mod + 1, cvs=(1,), signed=False), ctrls=(apply_op,), x=x
-        )
+        apply_op, x = bb.add(AddK(self.dtype, self.mod + 1).controlled(), ctrl=apply_op, x=x)
 
         ctrl, ancilla = bb.add(And(self.cv, 1).adjoint(), ctrl=(ctrl, ancilla), target=apply_op)
 
@@ -206,7 +195,7 @@ def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
             And(self.cv, 1): 1,
             And(self.cv, 1).adjoint(): 1,
             MultiTargetCNOT(self.dtype.bitsize): 1,
-            AddK(self.dtype.bitsize, k=self.mod + 1, cvs=(1,), signed=False): 1,
+            AddK(self.dtype, k=self.mod + 1).controlled(): 1,
             XGate(): 2,
         }
 
@@ -279,17 +268,17 @@ def signature(self) -> 'Signature':
 
     def build_composite_bloq(self, bb: 'BloqBuilder', x: Soquet, y: Soquet) -> Dict[str, 'SoquetT']:
         x = bb.add(BitwiseNot(self.dtype), x=x)
-        x = bb.add(AddK(self.dtype.bitsize, self.mod + 1, signed=False), x=x)
+        x = bb.add(AddK(self.dtype, self.mod + 1), x=x)
         x, y = bb.add(ModAdd(self.dtype.bitsize, self.mod), x=x, y=y)
-        x = bb.add(AddK(self.dtype.bitsize, self.mod + 1, signed=False).adjoint(), x=x)
+        x = bb.add(AddK(self.dtype, self.mod + 1).adjoint(), x=x)
         x = bb.add(BitwiseNot(self.dtype), x=x)
         return {'x': x, 'y': y}
 
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
         return {
             BitwiseNot(self.dtype): 2,
-            AddK(self.dtype.bitsize, self.mod + 1, signed=False): 1,
-            AddK(self.dtype.bitsize, self.mod + 1, signed=False).adjoint(): 1,
+            AddK(self.dtype, self.mod + 1): 1,
+            AddK(self.dtype, self.mod + 1).adjoint(): 1,
             ModAdd(self.dtype.bitsize, self.mod): 1,
         }
 
@@ -359,17 +348,17 @@ def build_composite_bloq(
         self, bb: 'BloqBuilder', ctrl: Soquet, x: Soquet, y: Soquet
     ) -> Dict[str, 'SoquetT']:
         x = bb.add(BitwiseNot(self.dtype), x=x)
-        x = bb.add(AddK(self.dtype.bitsize, self.mod + 1, signed=False), x=x)
+        x = bb.add(AddK(self.dtype, self.mod + 1), x=x)
         ctrl, x, y = bb.add(CModAdd(self.dtype, self.mod, self.cv), ctrl=ctrl, x=x, y=y)
-        x = bb.add(AddK(self.dtype.bitsize, self.mod + 1, signed=False).adjoint(), x=x)
+        x = bb.add(AddK(self.dtype, self.mod + 1).adjoint(), x=x)
         x = bb.add(BitwiseNot(self.dtype), x=x)
         return {'ctrl': ctrl, 'x': x, 'y': y}
 
     def build_call_graph(self, ssa: 'SympySymbolAllocator') -> 'BloqCountDictT':
         return {
             BitwiseNot(self.dtype): 2,
-            AddK(self.dtype.bitsize, self.mod + 1, signed=False): 1,
-            AddK(self.dtype.bitsize, self.mod + 1, signed=False).adjoint(): 1,
+            AddK(self.dtype, self.mod + 1): 1,
+            AddK(self.dtype, self.mod + 1).adjoint(): 1,
             CModAdd(self.dtype, self.mod, self.cv): 1,
         }
 
diff --git a/qualtran/bloqs/rotations/rz_via_phase_gradient_test.py b/qualtran/bloqs/rotations/rz_via_phase_gradient_test.py
index 0e905a1e5..110583b25 100644
--- a/qualtran/bloqs/rotations/rz_via_phase_gradient_test.py
+++ b/qualtran/bloqs/rotations/rz_via_phase_gradient_test.py
@@ -17,13 +17,13 @@
 from attrs import frozen
 
 from qualtran import Bloq, BloqBuilder, QBit, QFxp, QUInt, Signature, Soquet, SoquetT
-from qualtran.bloqs.basic_gates import IntState, Rz, TGate
+from qualtran.bloqs.basic_gates import IntState, Rz
 from qualtran.bloqs.rotations.phase_gradient import PhaseGradientState
 from qualtran.bloqs.rotations.rz_via_phase_gradient import (
     _rz_via_phase_gradient,
     RzViaPhaseGradient,
 )
-from qualtran.resource_counting import BloqCount, get_cost_value
+from qualtran.resource_counting import GateCounts, get_cost_value, QECGatesCost
 
 
 def test_examples(bloq_autotester):
@@ -34,8 +34,10 @@ def test_costs():
     n = sympy.Symbol("n")
     dtype = QUInt(n)
     bloq = RzViaPhaseGradient(angle_dtype=dtype, phasegrad_dtype=dtype)
-    # TODO need to improve this to `4 * n - 8` (i.e. Toffoli cost of `n - 2`)
-    assert get_cost_value(bloq, BloqCount.for_gateset('t')) == {TGate(): 4 * n - 4}
+    # TODO need to improve this to `n - 2` Ands.
+    assert get_cost_value(bloq, QECGatesCost()) == GateCounts(
+        and_bloq=n - 1, clifford=9 * n - 8, measurement=n - 1
+    )
 
 
 @frozen
diff --git a/qualtran/resource_counting/classify_bloqs.py b/qualtran/resource_counting/classify_bloqs.py
index eae56e52e..bb2a57323 100644
--- a/qualtran/resource_counting/classify_bloqs.py
+++ b/qualtran/resource_counting/classify_bloqs.py
@@ -229,7 +229,7 @@ def bloq_is_rotation(b: Bloq) -> bool:
     )
 
     if isinstance(b, Controlled):
-        if b.ctrl_spec.num_qubits > 1:
+        if b.ctrl_spec.num_qubits != 1:
             return False
 
         # TODO https://github.com/quantumlib/Qualtran/issues/878