diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 5a21d310..e509a852 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,33 +1,40 @@
-name: build-expander-exec
+name: build-release-binary
 on:
   release:
-    types: [released]
+    types: [published]
 
 env:
-  RUSTFLAGS: "-Dwarnings"
+  RUSTFLAGS: "-Dwarnings -C target-cpu=native"
   RUST_BACKTRACE: 1
 
 jobs:
   build:
     strategy:
       matrix:
-        platform: [macos, 7950x3d]
         include:
-          - platform: macos
-            ci_image: macos-latest
-            flags: ''
-          - platform: linux-avx2
-            ci_image: ubuntu-latest
-            flags: 'RUSTFLAGS="-C target-feature=+avx2"'
-    runs-on: ${{ matrix.ci_image }}
+          - os: macos-latest
+            binary_name: expander-macos
+          - os: 7950x3d
+            feature: avx2
+            binary_name: expander-linux-avx2
+          - os: 7950x3d
+            feature: avx512f
+            binary_name: expander-linux-avx512
+    runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@stable
       - uses: Swatinem/rust-cache@v2
+        with:
+          # The prefix cache key, this can be changed to start a new cache manually.
+          prefix-key: "mpi-v5.0.5" # update me if brew formula changes to a new version
       - name: Install MPI
         run: python3 ./scripts/install.py
+      - name: Set RUSTFLAGS for AVX
+        if: matrix.feature != ''
+        run: echo "RUSTFLAGS=$RUSTFLAGS -C target-feature=+${{ matrix.feature }}" >> $GITHUB_ENV  
       - name: Prepare binary
-        run: ${{ matrix.flags }} cargo build --release --bin expander-exec
+        run: cargo build --release --bin expander-exec
       - name: Upload release asset
         uses: actions/github-script@v6
         with:
@@ -37,6 +44,6 @@ jobs:
               owner: context.repo.owner,
               repo: context.repo.repo,
               release_id: ${{ github.event.release.id }},
-              name: 'expander-exec-${{ matrix.platform }}',
+              name: '${{ matrix.binary_name }}',
               data: await fs.readFile('target/release/expander-exec')
             });
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5d70e72c..5e1e881c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -47,6 +47,7 @@ jobs:
           cargo run --bin=dev-setup --release
           cargo build --all-features --release
           cargo test --all-features --release --workspace
+          ./scripts/test_recursion.py
 
   gkr-e2e:
     name: Benchmark (${{ matrix.os }}${{ matrix.feature != '' && format(', {0}', matrix.feature) || '' }}, ${{ matrix.field }})
diff --git a/.github/workflows/nightly_e2e.yml b/.github/workflows/nightly_e2e.yml
index 3016ec99..9735f582 100644
--- a/.github/workflows/nightly_e2e.yml
+++ b/.github/workflows/nightly_e2e.yml
@@ -21,8 +21,14 @@ jobs:
     - uses: actions/checkout@v2
 
     - name: Set up Rust
-      uses: dtolnay/rust-toolchain@stable
+      uses: dtolnay/rust-toolchain@master
+      with:
+        toolchain: nightly-2024-09-01
+        components: rustfmt, clippy
     - uses: Swatinem/rust-cache@v2
+      with:
+        # The prefix cache key, this can be changed to start a new cache manually.
+        prefix-key: "mpi-v5.0.5" # update me if brew formula changes to a new version
     
     - name: Set up Go
       uses: actions/setup-go@v2
diff --git a/.gitignore b/.gitignore
index e39866b7..3b60d22e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,7 +37,6 @@ output.txt
 
 # idea
 .idea/
-/data
 
 # vscode settings
 .vscode/
diff --git a/Cargo.lock b/Cargo.lock
index 91ab73b9..fe692cc6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -61,9 +61,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 
 [[package]]
 name = "anstream"
-version = "0.6.15"
+version = "0.6.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
+checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -76,36 +76,36 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.8"
+version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
+checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
 
 [[package]]
 name = "anstyle-parse"
-version = "0.2.5"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
+checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
 dependencies = [
  "utf8parse",
 ]
 
 [[package]]
 name = "anstyle-query"
-version = "1.1.1"
+version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
+checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
 dependencies = [
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
 name = "anstyle-wincon"
-version = "3.0.4"
+version = "3.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
+checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125"
 dependencies = [
  "anstyle",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -320,7 +320,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.79",
+ "syn 2.0.87",
  "which",
 ]
 
@@ -395,9 +395,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.7.2"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3"
+checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da"
 
 [[package]]
 name = "cast"
@@ -407,9 +407,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.1.28"
+version = "1.1.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1"
+checksum = "baee610e9452a8f6f0a1b6194ec09ff9e2d85dea54432acdae41aa0761c95d70"
 dependencies = [
  "shlex",
 ]
@@ -525,7 +525,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -536,9 +536,9 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
 
 [[package]]
 name = "colorchoice"
-version = "1.0.2"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
+checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
 
 [[package]]
 name = "config"
@@ -695,6 +695,17 @@ dependencies = [
  "subtle",
 ]
 
+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "either"
 version = "1.13.0"
@@ -703,9 +714,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
 
 [[package]]
 name = "encoding_rs"
-version = "0.8.34"
+version = "0.8.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"
+checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
 dependencies = [
  "cfg-if",
 ]
@@ -1003,9 +1014,9 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.15.0"
+version = "0.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb"
+checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3"
 
 [[package]]
 name = "headers"
@@ -1111,9 +1122,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
 
 [[package]]
 name = "hyper"
-version = "0.14.30"
+version = "0.14.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a152ddd61dfaec7273fe8419ab357f33aee0d914c5f4efbf0d96fa749eea5ec9"
+checksum = "8c08302e8fa335b151b788c775ff56e7a03ae64ff85c548ee820fecb70356e85"
 dependencies = [
  "bytes",
  "futures-channel",
@@ -1156,14 +1167,143 @@ dependencies = [
  "cc",
 ]
 
+[[package]]
+name = "icu_collections"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid_transform"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
+dependencies = [
+ "displaydoc",
+ "icu_locid",
+ "icu_locid_transform_data",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid_transform_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
+
+[[package]]
+name = "icu_normalizer"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
+dependencies = [
+ "displaydoc",
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "utf16_iter",
+ "utf8_iter",
+ "write16",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
+
+[[package]]
+name = "icu_properties"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
+dependencies = [
+ "displaydoc",
+ "icu_collections",
+ "icu_locid_transform",
+ "icu_properties_data",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
+
+[[package]]
+name = "icu_provider"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
+dependencies = [
+ "displaydoc",
+ "icu_locid",
+ "icu_provider_macros",
+ "stable_deref_trait",
+ "tinystr",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_provider_macros"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "idna"
-version = "0.5.0"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
+checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
 dependencies = [
- "unicode-bidi",
- "unicode-normalization",
+ "icu_normalizer",
+ "icu_properties",
 ]
 
 [[package]]
@@ -1173,7 +1313,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
 ]
 
 [[package]]
@@ -1228,9 +1368,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
 
 [[package]]
 name = "js-sys"
-version = "0.3.71"
+version = "0.3.72"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0cb94a0ffd3f3ee755c20f7d8752f45cac88605a4dcf808abcff72873296ec7b"
+checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9"
 dependencies = [
  "wasm-bindgen",
 ]
@@ -1252,9 +1392,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
 
 [[package]]
 name = "libc"
-version = "0.2.159"
+version = "0.2.162"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5"
+checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398"
 
 [[package]]
 name = "libffi"
@@ -1291,6 +1431,12 @@ version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
 
+[[package]]
+name = "litemap"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704"
+
 [[package]]
 name = "lock_api"
 version = "0.4.12"
@@ -1530,6 +1676,15 @@ version = "1.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
 
+[[package]]
+name = "pcs"
+version = "0.1.0"
+dependencies = [
+ "arith",
+ "polynomials",
+ "rand",
+]
+
 [[package]]
 name = "percent-encoding"
 version = "2.3.1"
@@ -1538,29 +1693,29 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
 
 [[package]]
 name = "pin-project"
-version = "1.1.6"
+version = "1.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf123a161dde1e524adf36f90bc5d8d3462824a9c43553ad07a8183161189ec"
+checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95"
 dependencies = [
  "pin-project-internal",
 ]
 
 [[package]]
 name = "pin-project-internal"
-version = "1.1.6"
+version = "1.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4502d8515ca9f32f1fb543d987f63d95a14934883db45bdb48060b6b69257f8"
+checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.87",
 ]
 
 [[package]]
 name = "pin-project-lite"
-version = "0.2.14"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02"
+checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff"
 
 [[package]]
 name = "pin-utils"
@@ -1623,19 +1778,19 @@ dependencies = [
 
 [[package]]
 name = "prettyplease"
-version = "0.2.22"
+version = "0.2.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba"
+checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033"
 dependencies = [
  "proc-macro2",
- "syn 2.0.79",
+ "syn 2.0.87",
 ]
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.87"
+version = "1.0.89"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3e4daa0dcf6feba26f985457cdf104d4b4256fc5a09547140f3631bb076b19a"
+checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e"
 dependencies = [
  "unicode-ident",
 ]
@@ -1725,9 +1880,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.11.0"
+version = "1.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8"
+checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -1775,9 +1930,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.37"
+version = "0.38.39"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811"
+checksum = "375116bee2be9ed569afe2154ea6a99dfdffd257f533f187498c2a8f5feaf4ee"
 dependencies = [
  "bitflags",
  "errno",
@@ -1821,29 +1976,29 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
 
 [[package]]
 name = "serde"
-version = "1.0.210"
+version = "1.0.214"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a"
+checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.210"
+version = "1.0.214"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
+checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.87",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.128"
+version = "1.0.132"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8"
+checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03"
 dependencies = [
  "itoa",
  "memchr",
@@ -1937,6 +2092,12 @@ version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
 [[package]]
 name = "static_assertions"
 version = "1.1.0"
@@ -1981,15 +2142,26 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.79"
+version = "2.0.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590"
+checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d"
 dependencies = [
  "proc-macro2",
  "quote",
  "unicode-ident",
 ]
 
+[[package]]
+name = "synstructure"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "tap"
 version = "1.0.1"
@@ -1998,22 +2170,22 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
 
 [[package]]
 name = "thiserror"
-version = "1.0.64"
+version = "1.0.68"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84"
+checksum = "02dd99dc800bbb97186339685293e1cc5d9df1f8fae2d0aecd9ff1c77efea892"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.64"
+version = "1.0.68"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3"
+checksum = "a7c61ec9a6f64d2793d8a45faba21efbe3ced62a886d44c36a009b2b519b4c7e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -2026,35 +2198,30 @@ dependencies = [
 ]
 
 [[package]]
-name = "tinytemplate"
-version = "1.2.1"
+name = "tinystr"
+version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
 dependencies = [
- "serde",
- "serde_json",
+ "displaydoc",
+ "zerovec",
 ]
 
 [[package]]
-name = "tinyvec"
-version = "1.8.0"
+name = "tinytemplate"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
 dependencies = [
- "tinyvec_macros",
+ "serde",
+ "serde_json",
 ]
 
-[[package]]
-name = "tinyvec_macros"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
-
 [[package]]
 name = "tokio"
-version = "1.40.0"
+version = "1.41.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998"
+checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33"
 dependencies = [
  "backtrace",
  "bytes",
@@ -2076,7 +2243,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.87",
 ]
 
 [[package]]
@@ -2181,18 +2348,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
 
 [[package]]
 name = "unicase"
-version = "2.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89"
-dependencies = [
- "version_check",
-]
-
-[[package]]
-name = "unicode-bidi"
-version = "0.3.17"
+version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893"
+checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df"
 
 [[package]]
 name = "unicode-ident"
@@ -2200,15 +2358,6 @@ version = "1.0.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
 
-[[package]]
-name = "unicode-normalization"
-version = "0.1.24"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
-dependencies = [
- "tinyvec",
-]
-
 [[package]]
 name = "unroll"
 version = "0.1.5"
@@ -2221,9 +2370,9 @@ dependencies = [
 
 [[package]]
 name = "url"
-version = "2.5.2"
+version = "2.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c"
+checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada"
 dependencies = [
  "form_urlencoded",
  "idna",
@@ -2236,6 +2385,18 @@ version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
 
+[[package]]
+name = "utf16_iter"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
 [[package]]
 name = "utf8parse"
 version = "0.2.2"
@@ -2304,9 +2465,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.94"
+version = "0.2.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef073ced962d62984fb38a36e5fdc1a2b23c9e0e1fa0689bb97afa4202ef6887"
+checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -2315,24 +2476,24 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.94"
+version = "0.2.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4bfab14ef75323f4eb75fa52ee0a3fb59611977fd3240da19b2cf36ff85030e"
+checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358"
 dependencies = [
  "bumpalo",
  "log",
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.87",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.94"
+version = "0.2.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a7bec9830f60924d9ceb3ef99d55c155be8afa76954edffbb5936ff4509474e7"
+checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -2340,28 +2501,28 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.94"
+version = "0.2.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c74f6e152a76a2ad448e223b0fc0b6b5747649c3d769cc6bf45737bf97d0ed6"
+checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.87",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.94"
+version = "0.2.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a42f6c679374623f295a8623adfe63d9284091245c3504bde47c17a3ce2777d9"
+checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d"
 
 [[package]]
 name = "web-sys"
-version = "0.3.71"
+version = "0.3.72"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44188d185b5bdcae1052d08bcbcf9091a5524038d4572cc4f4f2bb9d5554ddd9"
+checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -2479,6 +2640,18 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "write16"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
+
+[[package]]
+name = "writeable"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
+
 [[package]]
 name = "wyz"
 version = "0.5.1"
@@ -2488,6 +2661,30 @@ dependencies = [
  "tap",
 ]
 
+[[package]]
+name = "yoke"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5"
+dependencies = [
+ "serde",
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+ "synstructure",
+]
+
 [[package]]
 name = "zerocopy"
 version = "0.7.35"
@@ -2506,7 +2703,28 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.87",
+]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+ "synstructure",
 ]
 
 [[package]]
@@ -2526,5 +2744,27 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.79",
+ "syn 2.0.87",
+]
+
+[[package]]
+name = "zerovec"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 15c0d5fc..a7fbd95b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,8 +10,9 @@ members = [
     "circuit", 
     "config", 
     "gkr", 
+    "pcs",
     "sumcheck",
-    "transcript"
+    "transcript",
 ]
 resolver = "2"
 
diff --git a/arith/src/serde.rs b/arith/src/serde.rs
index a12ed26b..03e77cbe 100644
--- a/arith/src/serde.rs
+++ b/arith/src/serde.rs
@@ -56,6 +56,31 @@ field_serde_for_integer!(u64, 8);
 field_serde_for_integer!(usize, 8);
 field_serde_for_integer!(u8, 1);
 
+impl<V: FieldSerde> FieldSerde for Vec<V> {
+    const SERIALIZED_SIZE: usize = unimplemented!();
+
+    fn serialize_into<W: Write>(&self, mut writer: W) -> FieldSerdeResult<()> {
+        self.len().serialize_into(&mut writer)?;
+        for v in self.iter() {
+            v.serialize_into(&mut writer)?;
+        }
+        Ok(())
+    }
+
+    fn deserialize_from<R: Read>(mut reader: R) -> FieldSerdeResult<Self> {
+        let mut v = Self::default();
+        let len = usize::deserialize_from(&mut reader)?;
+        for _ in 0..len {
+            v.push(V::deserialize_from(&mut reader)?);
+        }
+        Ok(v)
+    }
+
+    fn try_deserialize_from_ecc_format<R: Read>(_reader: R) -> FieldSerdeResult<Self> {
+        unimplemented!()
+    }
+}
+
 // Consider use const generics after it gets stable
 impl FieldSerde for [u64; 4] {
     const SERIALIZED_SIZE: usize = 32;
diff --git a/bi-kzg/Cargo.toml b/bi-kzg/Cargo.toml
index ba9324c7..cd14034d 100644
--- a/bi-kzg/Cargo.toml
+++ b/bi-kzg/Cargo.toml
@@ -23,7 +23,6 @@ criterion.workspace = true
 name = "bi_kzg_bench"
 harness = false
 
-
 [features]
 default = [ 
     # "ark-std/print-trace",
diff --git a/bi-kzg/src/bi_fft.rs b/bi-kzg/src/bi_fft.rs
index 1ac7fd5d..d5e42e8b 100644
--- a/bi-kzg/src/bi_fft.rs
+++ b/bi-kzg/src/bi_fft.rs
@@ -53,7 +53,9 @@ fn mul_assign_vec<F: Field>(a: &mut [F], b: &F) {
 //
 /// Performs a radix-$2$ Fast-Fourier Transformation (FFT) on a vector of size
 /// $n = 2^k$, when provided `log_n` = $k$ and an element of multiplicative
-/// order $n$ called `omega` ($\omega$). The result is that the vector `a`, when
+/// order $n$ called `omega` ($\omega$).
+///
+/// The result is that the vector `a`, when
 /// interpreted as the coefficients of a polynomial of degree $n - 1$, is
 /// transformed into the evaluations of this polynomial at each of the $n$
 /// distinct powers of $\omega$. This transformation is invertible by providing
diff --git a/bi-kzg/src/coeff_form_bi_kzg.rs b/bi-kzg/src/coeff_form_bi_kzg.rs
index 4f4eb908..0ac1365f 100644
--- a/bi-kzg/src/coeff_form_bi_kzg.rs
+++ b/bi-kzg/src/coeff_form_bi_kzg.rs
@@ -12,14 +12,14 @@ use halo2curves::CurveAffine;
 use itertools::Itertools;
 use rand::RngCore;
 
-use crate::poly::{lagrange_coefficients, univariate_quotient};
-use crate::structs::BivariateLagrangePolynomial;
-use crate::structs::BivariatePolynomial;
+use crate::poly::{
+    lagrange_coefficients, univariate_quotient, BivariateLagrangePolynomial, BivariatePolynomial,
+};
 use crate::util::parallelize;
 use crate::{
     pcs::PolynomialCommitmentScheme,
     util::{powers_of_field_elements, tensor_product_parallel},
-    BiKZGCommitment, BiKZGProof, BiKZGSRS, BiKZGVerifierParam,
+    BiKZGCommitment, BiKZGProof, BiKZGVerifierParam, CoefFormBiKZGSRS,
 };
 
 /// Commit to the bi-variate polynomial in its coefficient form.
@@ -32,8 +32,8 @@ impl<E: MultiMillerLoop> PolynomialCommitmentScheme for CoeffFormBiKZG<E>
 where
     E::G1Affine: CurveAffine<ScalarExt = E::Fr, CurveExt = E::G1>,
 {
-    type SRS = BiKZGSRS<E>;
-    type ProverParam = BiKZGSRS<E>;
+    type SRS = CoefFormBiKZGSRS<E>;
+    type ProverParam = CoefFormBiKZGSRS<E>;
     type VerifierParam = BiKZGVerifierParam<E>;
     type Polynomial = BivariatePolynomial<E::Fr>;
     type Commitment = BiKZGCommitment<E>;
@@ -123,7 +123,7 @@ where
             affine_bases
         };
 
-        BiKZGSRS {
+        CoefFormBiKZGSRS {
             powers_of_g: coeff_bases,
             powers_of_g_lagrange_over_both_roots: lagrange_bases,
             h: E::G2Affine::generator(),
diff --git a/bi-kzg/src/lagrange_form_bi_kzg.rs b/bi-kzg/src/lagrange_form_bi_kzg.rs
index ffa0a738..b836c364 100644
--- a/bi-kzg/src/lagrange_form_bi_kzg.rs
+++ b/bi-kzg/src/lagrange_form_bi_kzg.rs
@@ -1,10 +1,7 @@
-//! We don't need this file for now. We will use the `CoeffFormBiKZG`.
-
 use std::{borrow::Borrow, marker::PhantomData};
 
 use ark_std::{end_timer, start_timer};
 use halo2curves::ff::Field;
-use halo2curves::ff::PrimeField;
 use halo2curves::group::prime::PrimeCurveAffine;
 use halo2curves::group::Curve;
 use halo2curves::group::Group;
@@ -14,13 +11,13 @@ use halo2curves::CurveAffine;
 use itertools::Itertools;
 use rand::RngCore;
 
-use crate::poly::{lagrange_coefficients, univariate_quotient};
-use crate::structs::BivariateLagrangePolynomial;
-use crate::util::parallelize;
+use crate::parallelize;
+use crate::poly::{lagrange_coefficients, BivariateLagrangePolynomial};
+use crate::primitive_root_of_unity;
 use crate::{
     pcs::PolynomialCommitmentScheme,
     util::{powers_of_field_elements, tensor_product_parallel},
-    BiKZGCommitment, BiKZGProof, BiKZGSRS, BiKZGVerifierParam,
+    BiKZGCommitment, BiKZGProof, BiKZGVerifierParam, LagrangeFormBiKZGSRS,
 };
 
 /// Commit to the bi-variate polynomial in its lagrange form.
@@ -33,8 +30,8 @@ impl<E: MultiMillerLoop> PolynomialCommitmentScheme for LagrangeFormBiKZG<E>
 where
     E::G1Affine: CurveAffine<ScalarExt = E::Fr, CurveExt = E::G1>,
 {
-    type SRS = BiKZGSRS<E>;
-    type ProverParam = BiKZGSRS<E>;
+    type SRS = LagrangeFormBiKZGSRS<E>;
+    type ProverParam = LagrangeFormBiKZGSRS<E>;
     type VerifierParam = BiKZGVerifierParam<E>;
     type Polynomial = BivariateLagrangePolynomial<E::Fr>;
     type Commitment = BiKZGCommitment<E>;
@@ -53,52 +50,37 @@ where
 
         let tau_0 = E::Fr::random(&mut rng);
         let tau_1 = E::Fr::random(&mut rng);
+
         let g1 = E::G1Affine::generator();
 
         // roots of unity for supported_n and supported_m
-        let (omega_0, omega_1) = {
-            let omega = E::Fr::ROOT_OF_UNITY;
-            let omega_0 = omega.pow_vartime(&[(1 << E::Fr::S) / supported_n as u64]);
-            let omega_1 = omega.pow_vartime(&[(1 << E::Fr::S) / supported_m as u64]);
-
-            assert!(
-                omega_0.pow_vartime(&[supported_n as u64]) == E::Fr::ONE,
-                "omega_0 is not root of unity for supported_n"
-            );
-            assert!(
-                omega_1.pow_vartime(&[supported_m as u64]) == E::Fr::ONE,
-                "omega_1 is not root of unity for supported_m"
-            );
-            (omega_0, omega_1)
-        };
+        let omega_0 = primitive_root_of_unity(supported_n);
+        let omega_1 = primitive_root_of_unity(supported_m);
 
         // computes the vector of L_i^N(tau_0) * L_j^M(tau_1) for i in 0..supported_n and j in 0..supported_m
-        let (scalars, lagrange_scalars) = {
+        let (lagrange_tau_0, lagrange_scalars) = {
             let powers_of_omega_0 = powers_of_field_elements(&omega_0, supported_n);
-            let powers_of_tau_0 = powers_of_field_elements(&tau_0, supported_n);
             let lagrange_tau_0 = lagrange_coefficients(&powers_of_omega_0, &tau_0);
             let powers_of_omega_1 = powers_of_field_elements(&omega_1, supported_m);
-            let powers_of_tau_1 = powers_of_field_elements(&tau_1, supported_m);
             let lagrange_tau_1 = lagrange_coefficients(&powers_of_omega_1, &tau_1);
-            let scalars = tensor_product_parallel(&powers_of_tau_0, &powers_of_tau_1);
             let lagrange_scalars = tensor_product_parallel(&lagrange_tau_0, &lagrange_tau_1);
 
-            (scalars, lagrange_scalars)
+            (lagrange_tau_0, lagrange_scalars)
         };
 
         let g1_prog = g1.to_curve();
-        let coeff_bases = {
-            let mut proj_bases = vec![E::G1::identity(); supported_n * supported_m];
+        let lagrange_x_bases = {
+            let mut proj_bases = vec![E::G1::identity(); supported_n];
             parallelize(&mut proj_bases, |g, start| {
                 for (idx, g) in g.iter_mut().enumerate() {
                     let offset = start + idx;
-                    *g = g1_prog * scalars[offset];
+                    *g = g1_prog * lagrange_tau_0[offset];
                 }
             });
 
-            let mut g_bases = vec![E::G1Affine::identity(); supported_n * supported_m];
-            parallelize(&mut g_bases, |g, starts| {
-                E::G1::batch_normalize(&proj_bases[starts..(starts + g.len())], g);
+            let mut g_bases = vec![E::G1Affine::identity(); supported_n];
+            parallelize(&mut g_bases, |g, start| {
+                E::G1::batch_normalize(&proj_bases[start..start + g.len()], g);
             });
             drop(proj_bases);
             g_bases
@@ -114,18 +96,16 @@ where
             });
 
             let mut affine_bases = vec![E::G1Affine::identity(); supported_n * supported_m];
-            parallelize(&mut affine_bases, |affine_bases, starts| {
-                E::G1::batch_normalize(
-                    &proj_bases[starts..(starts + affine_bases.len())],
-                    affine_bases,
-                );
+            parallelize(&mut affine_bases, |g, start| {
+                E::G1::batch_normalize(&proj_bases[start..start + g.len()], g);
             });
             drop(proj_bases);
             affine_bases
         };
 
-        BiKZGSRS {
-            powers_of_g: coeff_bases,
+        LagrangeFormBiKZGSRS {
+            g: g1,
+            powers_of_g_lagrange_over_x: lagrange_x_bases,
             powers_of_g_lagrange_over_both_roots: lagrange_bases,
             h: E::G2Affine::generator(),
             tau_0_h: (E::G2Affine::generator() * tau_0).into(),
@@ -179,55 +159,52 @@ where
 
         let timer2 = start_timer!(|| "Computing the proof pi0");
         let (pi_0, f_x_b) = {
-            let f_x_b = polynomial.evaluate_y(&point.1);
-            let mut q_0_x_b = f_x_b.clone();
-            q_0_x_b[0] -= u;
-            let q_0_x_b = univariate_quotient(&q_0_x_b, &point.0);
+            let f_x_b = polynomial.evaluate_at_y(&point.1);
+
+            let omega_0 = primitive_root_of_unity(polynomial.degree_0);
+            let powers_of_omega_0 =
+                powers_of_field_elements::<E::Fr>(&omega_0, polynomial.degree_0);
+            // todo use batch inversion
+            let powers_of_omega_0_minus_x_inv = powers_of_omega_0
+                .iter()
+                .map(|w| (*w - point.0).invert().unwrap())
+                .collect::<Vec<_>>();
+
+            let q_0_x_b = f_x_b
+                .iter()
+                .zip(powers_of_omega_0_minus_x_inv)
+                .map(|(v0, v1)| (*v0 - u) * v1)
+                .collect::<Vec<_>>();
 
             let pi_0 = best_multiexp(
                 &q_0_x_b,
-                prover_param.borrow().powers_of_g[..polynomial.degree_0].as_ref(),
+                prover_param.borrow().powers_of_g_lagrange_over_x.as_ref(),
             )
             .to_affine();
             (pi_0, f_x_b)
         };
         end_timer!(timer2);
 
+        // f(X, Y) = qx(X)(X - a) + qy(X, Y)(Y - b) + u
         let timer2 = start_timer!(|| "Computing the proof pi1");
         let pi_1 = {
-            let mut t = polynomial.clone();
-            t.coefficients
-                .iter_mut()
-                .take(polynomial.degree_0)
-                .zip_eq(f_x_b.iter())
-                .for_each(|(c, f)| *c -= f);
-            let coeffs = t.lagrange_coeffs();
-
-            let mut divisor = vec![E::Fr::from(0); polynomial.degree_0 * polynomial.degree_1];
-            divisor[0] = -point.1;
-            divisor[polynomial.degree_0] = E::Fr::ONE;
-            let divisor =
-                BivariatePolynomial::new(divisor, polynomial.degree_0, polynomial.degree_1);
-
-            let divisor = divisor.lagrange_coeffs();
-
-            // todo: batch invert
-            let y_minus_a_inv_lag = divisor
-                .iter()
-                .map(|o| {
-                    if o.is_zero_vartime() {
-                        panic!("not invertible")
-                    } else {
-                        o.invert().unwrap()
-                    }
+            let omega_1 = primitive_root_of_unity(polynomial.degree_1);
+            let powers_of_omega_1 =
+                powers_of_field_elements::<E::Fr>(&omega_1, polynomial.degree_1);
+
+            // todo use batch inversion
+            let q_1_x_y = polynomial
+                .coefficients
+                .chunks_exact(polynomial.degree_0)
+                .zip_eq(powers_of_omega_1)
+                .flat_map(|(coeffs_i, w_y_i)| {
+                    coeffs_i
+                        .iter()
+                        .zip(f_x_b.iter())
+                        .map(|(coeff, v)| (*coeff - v) * (w_y_i - point.1).invert().unwrap())
+                        .collect::<Vec<E::Fr>>()
                 })
-                .collect::<Vec<_>>();
-
-            let q_1_x_y = coeffs
-                .iter()
-                .zip_eq(y_minus_a_inv_lag.iter())
-                .map(|(c, y)| (*c) * *y)
-                .collect::<Vec<_>>();
+                .collect::<Vec<E::Fr>>();
 
             best_multiexp(
                 &q_1_x_y,
@@ -260,12 +237,7 @@ where
     {
         let pi0_a_pi1_b_g1_cmu = best_multiexp(
             &[point.0, point.1, E::Fr::ONE, -*value],
-            &[
-                proof.pi0,
-                proof.pi1,
-                commitment.com.into(),
-                verifier_param.g.into(),
-            ],
+            &[proof.pi0, proof.pi1, commitment.com, verifier_param.g],
         );
         let pi0_a_pi1_b_g1_cmu = (-pi0_a_pi1_b_g1_cmu).to_affine();
         let res = E::multi_miller_loop(&[
@@ -273,11 +245,9 @@ where
             (&proof.pi1, &verifier_param.tau_1_h.into()),
             (&pi0_a_pi1_b_g1_cmu, &verifier_param.h.into()),
         ]);
-        let res = res.final_exponentiation().is_identity().into();
 
-        res
+        res.final_exponentiation().is_identity().into()
     }
 
-    
     // TODO: implement multi-opening and batch verification
 }
diff --git a/bi-kzg/src/lib.rs b/bi-kzg/src/lib.rs
index 3d4f5d8d..4bade254 100644
--- a/bi-kzg/src/lib.rs
+++ b/bi-kzg/src/lib.rs
@@ -1,18 +1,23 @@
 mod bi_fft;
+pub use bi_fft::*;
+
 mod coeff_form_bi_kzg;
+pub use coeff_form_bi_kzg::*;
+
+mod lagrange_form_bi_kzg;
+pub use lagrange_form_bi_kzg::*;
+
 mod pcs;
+pub use pcs::*;
+
 mod poly;
+pub use poly::*;
+
 mod structs;
-mod util;
+pub use structs::*;
 
-// mod lagrange_form_bi_kzg;
+mod util;
+pub use util::*;
 
 #[cfg(test)]
 mod tests;
-
-pub use coeff_form_bi_kzg::CoeffFormBiKZG;
-pub use pcs::PolynomialCommitmentScheme;
-pub use structs::BivariatePolynomial;
-pub use structs::{BiKZGCommitment, BiKZGProof, BiKZGSRS, BiKZGVerifierParam};
-
-// pub use lagrange_form_bi_kzg::LagrangeFormBiKZG;
diff --git a/bi-kzg/src/poly.rs b/bi-kzg/src/poly.rs
index 47df6675..1c4fcb79 100644
--- a/bi-kzg/src/poly.rs
+++ b/bi-kzg/src/poly.rs
@@ -1,206 +1,19 @@
-use ark_std::{end_timer, start_timer};
-use halo2curves::ff::{Field, PrimeField};
-use itertools::Itertools;
-use rand::RngCore;
-use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
+pub mod coef_form;
+pub mod lagrange_form;
 
-use crate::bi_fft::bi_fft_in_place;
-use crate::structs::{BivariateLagrangePolynomial, BivariatePolynomial};
-use crate::util::powers_of_field_elements;
+pub mod utils;
+pub use utils::*;
 
-impl<F: PrimeField> BivariatePolynomial<F> {
-    #[inline]
-    pub fn new(coefficients: Vec<F>, degree_0: usize, degree_1: usize) -> Self {
-        assert_eq!(coefficients.len(), degree_0 * degree_1);
-        Self {
-            coefficients,
-            degree_0,
-            degree_1,
-        }
-    }
-
-    pub fn random(mut rng: impl RngCore, degree_0: usize, degree_1: usize) -> Self {
-        let coefficients = (0..degree_0 * degree_1)
-            .map(|_| F::random(&mut rng))
-            .collect();
-        Self::new(coefficients, degree_0, degree_1)
-    }
-
-    /// evaluate the polynomial at (x, y)
-    pub fn evaluate(&self, x: &F, y: &F) -> F {
-        let x_power = powers_of_field_elements(x, self.degree_0);
-        let y_power = powers_of_field_elements(y, self.degree_1);
-
-        self.coefficients
-            .chunks_exact(self.degree_0)
-            .zip(y_power.iter())
-            .fold(F::ZERO, |acc, (chunk, y_i)| {
-                acc + chunk
-                    .iter()
-                    .zip(x_power.iter())
-                    .fold(F::ZERO, |acc, (c, x_i)| acc + *c * *x_i)
-                    * y_i
-            })
-    }
-
-    /// evaluate the polynomial at y, return a univariate polynomial in x
-    pub fn evaluate_at_y(&self, y: &F) -> Vec<F> {
-        let mut f_x_b = self.coefficients[0..self.degree_0].to_vec();
-        let powers_of_b = powers_of_field_elements(y, self.degree_1);
-        powers_of_b
-            .iter()
-            .zip_eq(self.coefficients.chunks_exact(self.degree_0))
-            .skip(1)
-            .for_each(|(bi, chunk_i)| {
-                f_x_b
-                    .iter_mut()
-                    .zip(chunk_i.iter())
-                    .for_each(|(f, c)| *f += *c * *bi)
-            });
-
-        f_x_b
-    }
-
-    /// same as interpolate but slower.
-    pub fn evaluate_at_roots(&self) -> Vec<F> {
-        let timer = start_timer!(|| format!(
-            "Lagrange coefficients of degree {} {}",
-            self.degree_0, self.degree_1
-        ));
-
-        // roots of unity for supported_n and supported_m
-        let (omega_0, omega_1) = {
-            let omega = F::ROOT_OF_UNITY;
-            let omega_0 = omega.pow_vartime([(1 << F::S) / self.degree_0 as u64]);
-            let omega_1 = omega.pow_vartime([(1 << F::S) / self.degree_1 as u64]);
-
-            assert!(
-                omega_0.pow_vartime([self.degree_0 as u64]) == F::ONE,
-                "omega_0 is not root of unity for supported_n"
-            );
-            assert!(
-                omega_1.pow_vartime([self.degree_1 as u64]) == F::ONE,
-                "omega_1 is not root of unity for supported_m"
-            );
-            (omega_0, omega_1)
-        };
-        let powers_of_omega_0 = powers_of_field_elements(&omega_0, self.degree_0);
-        let powers_of_omega_1 = powers_of_field_elements(&omega_1, self.degree_1);
-
-        let mut res = vec![];
-        for omega_1_power in powers_of_omega_1.iter() {
-            for omega_0_power in powers_of_omega_0.iter() {
-                res.push(self.evaluate(omega_0_power, omega_1_power));
-            }
-        }
-        end_timer!(timer);
-        res
-    }
-
-    /// interpolate the polynomial over the roots via bi-variate FFT
-    pub fn interpolate(&self) -> Vec<F> {
-        let timer = start_timer!(|| format!(
-            "Lagrange coefficients of degree {} {}",
-            self.degree_0, self.degree_1
-        ));
-
-        let mut coeff = self.coefficients.clone();
-        bi_fft_in_place(&mut coeff, self.degree_0, self.degree_1);
-        end_timer!(timer);
-        coeff
-    }
-}
-
-/// For a point x, compute the coefficients of Lagrange polynomial L_{i}(x) at x, given the roots.
-/// `L_{i}(x) = \prod_{j \neq i} \frac{x - r_j}{r_i - r_j}`
-pub(crate) fn lagrange_coefficients<F: Field + Send + Sync>(roots: &[F], points: &F) -> Vec<F> {
-    roots
-        .par_iter()
-        .enumerate()
-        .map(|(i, _)| {
-            let mut numerator = F::ONE;
-            let mut denominator = F::ONE;
-            for j in 0..roots.len() {
-                if i == j {
-                    continue;
-                }
-                numerator *= roots[j] - points;
-                denominator *= roots[j] - roots[i];
-            }
-            numerator * denominator.invert().unwrap()
-        })
-        .collect()
-}
-
-/// Compute poly / (x-point) using univariate division
-pub(crate) fn univariate_quotient<F: PrimeField>(poly: &[F], point: &F) -> Vec<F> {
-    let timer = start_timer!(|| format!("Univariate quotient of degree {}", poly.len()));
-    let mut dividend_coeff = poly.to_vec();
-    let divisor = [-*point, F::from(1u64)];
-    let mut coefficients = vec![];
-
-    let mut dividend_pos = dividend_coeff.len() - 1;
-    let divisor_pos = divisor.len() - 1;
-    let mut difference = dividend_pos as isize - divisor_pos as isize;
-
-    while difference >= 0 {
-        let term_quotient = dividend_coeff[dividend_pos] * divisor[divisor_pos].invert().unwrap();
-        coefficients.push(term_quotient);
-
-        for i in (0..=divisor_pos).rev() {
-            let difference = difference as usize;
-            let x = divisor[i];
-            let y = x * term_quotient;
-            let z = dividend_coeff[difference + i];
-            dividend_coeff[difference + i] = z - y;
-        }
-
-        dividend_pos -= 1;
-        difference -= 1;
-    }
-    coefficients.reverse();
-    coefficients.push(F::from(0u64));
-    end_timer!(timer);
-    coefficients
-}
-
-impl<F: Field> BivariateLagrangePolynomial<F> {
-    fn new(coeffs: Vec<F>, degree_0: usize, degree_1: usize) -> Self {
-        assert_eq!(coeffs.len(), degree_0 * degree_1);
-        Self {
-            coefficients: coeffs,
-            degree_0,
-            degree_1,
-        }
-    }
-}
-
-impl<F: PrimeField> From<BivariatePolynomial<F>> for BivariateLagrangePolynomial<F> {
-    fn from(poly: BivariatePolynomial<F>) -> Self {
-        Self::from(&poly)
-    }
-}
-
-impl<F: PrimeField> From<&BivariatePolynomial<F>> for BivariateLagrangePolynomial<F> {
-    fn from(poly: &BivariatePolynomial<F>) -> Self {
-        let coeffs = poly.interpolate();
-        BivariateLagrangePolynomial::new(coeffs, poly.degree_0, poly.degree_1)
-    }
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+pub struct BivariatePolynomial<F> {
+    pub coefficients: Vec<F>,
+    pub degree_0: usize,
+    pub degree_1: usize,
 }
 
-impl<F: PrimeField> BivariateLagrangePolynomial<F> {
-    /// construct a bivariate lagrange polynomial from a monomial f(y) = y - b
-    pub(crate) fn from_y_monomial(b: &F, n: usize, m: usize) -> Self {
-        // roots of unity for supported_n and supported_m
-        let omega_1 = {
-            let omega = F::ROOT_OF_UNITY;
-            omega.pow_vartime([(1 << F::S) / m as u64])
-        };
-        let mut coeffs = vec![F::ZERO; n * m];
-        for i in 0..m {
-            let element = omega_1.pow_vartime([i as u64]) - *b;
-            coeffs[i * n..(i + 1) * n].copy_from_slice(vec![element; n].as_slice());
-        }
-        BivariateLagrangePolynomial::new(coeffs, n, m)
-    }
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+pub struct BivariateLagrangePolynomial<F> {
+    pub coefficients: Vec<F>,
+    pub degree_0: usize,
+    pub degree_1: usize,
 }
diff --git a/bi-kzg/src/poly/coef_form.rs b/bi-kzg/src/poly/coef_form.rs
new file mode 100644
index 00000000..aae4276a
--- /dev/null
+++ b/bi-kzg/src/poly/coef_form.rs
@@ -0,0 +1,117 @@
+use ark_std::{end_timer, start_timer};
+use halo2curves::ff::PrimeField;
+use itertools::Itertools;
+use rand::RngCore;
+
+use crate::bi_fft::bi_fft_in_place;
+use crate::util::powers_of_field_elements;
+
+use super::BivariatePolynomial;
+
+impl<F: PrimeField> BivariatePolynomial<F> {
+    #[inline]
+    pub fn new(coefficients: Vec<F>, degree_0: usize, degree_1: usize) -> Self {
+        assert_eq!(coefficients.len(), degree_0 * degree_1);
+        Self {
+            coefficients,
+            degree_0,
+            degree_1,
+        }
+    }
+
+    #[inline]
+    pub fn random(mut rng: impl RngCore, degree_0: usize, degree_1: usize) -> Self {
+        let coefficients = (0..degree_0 * degree_1)
+            .map(|_| F::random(&mut rng))
+            .collect();
+        Self::new(coefficients, degree_0, degree_1)
+    }
+
+    /// evaluate the polynomial at (x, y)
+    #[inline]
+    pub fn evaluate(&self, x: &F, y: &F) -> F {
+        let x_power = powers_of_field_elements(x, self.degree_0);
+        let y_power = powers_of_field_elements(y, self.degree_1);
+
+        self.coefficients
+            .chunks_exact(self.degree_0)
+            .zip(y_power.iter())
+            .fold(F::ZERO, |acc, (chunk, y_i)| {
+                acc + chunk
+                    .iter()
+                    .zip(x_power.iter())
+                    .fold(F::ZERO, |acc, (c, x_i)| acc + *c * *x_i)
+                    * y_i
+            })
+    }
+
+    /// evaluate the polynomial at y, return a univariate polynomial in x
+    #[inline]
+    pub fn evaluate_at_y(&self, y: &F) -> Vec<F> {
+        let mut f_x_b = self.coefficients[0..self.degree_0].to_vec();
+        let powers_of_b = powers_of_field_elements(y, self.degree_1);
+        powers_of_b
+            .iter()
+            .zip_eq(self.coefficients.chunks_exact(self.degree_0))
+            .skip(1)
+            .for_each(|(bi, chunk_i)| {
+                f_x_b
+                    .iter_mut()
+                    .zip(chunk_i.iter())
+                    .for_each(|(f, c)| *f += *c * *bi)
+            });
+
+        f_x_b
+    }
+
+    /// same as interpolate but slower.
+    #[inline]
+    pub fn evaluate_at_roots(&self) -> Vec<F> {
+        let timer = start_timer!(|| format!(
+            "Lagrange coefficients of degree {} {}",
+            self.degree_0, self.degree_1
+        ));
+
+        // roots of unity for supported_n and supported_m
+        let (omega_0, omega_1) = {
+            let omega = F::ROOT_OF_UNITY;
+            let omega_0 = omega.pow_vartime([(1 << F::S) / self.degree_0 as u64]);
+            let omega_1 = omega.pow_vartime([(1 << F::S) / self.degree_1 as u64]);
+
+            assert!(
+                omega_0.pow_vartime([self.degree_0 as u64]) == F::ONE,
+                "omega_0 is not root of unity for supported_n"
+            );
+            assert!(
+                omega_1.pow_vartime([self.degree_1 as u64]) == F::ONE,
+                "omega_1 is not root of unity for supported_m"
+            );
+            (omega_0, omega_1)
+        };
+        let powers_of_omega_0 = powers_of_field_elements(&omega_0, self.degree_0);
+        let powers_of_omega_1 = powers_of_field_elements(&omega_1, self.degree_1);
+
+        let mut res = vec![];
+        for omega_1_power in powers_of_omega_1.iter() {
+            for omega_0_power in powers_of_omega_0.iter() {
+                res.push(self.evaluate(omega_0_power, omega_1_power));
+            }
+        }
+        end_timer!(timer);
+        res
+    }
+
+    /// interpolate the polynomial over the roots via bi-variate FFT
+    #[inline]
+    pub fn interpolate(&self) -> Vec<F> {
+        let timer = start_timer!(|| format!(
+            "Lagrange coefficients of degree {} {}",
+            self.degree_0, self.degree_1
+        ));
+
+        let mut coeff = self.coefficients.clone();
+        bi_fft_in_place(&mut coeff, self.degree_0, self.degree_1);
+        end_timer!(timer);
+        coeff
+    }
+}
diff --git a/bi-kzg/src/poly/lagrange_form.rs b/bi-kzg/src/poly/lagrange_form.rs
new file mode 100644
index 00000000..5e0dd155
--- /dev/null
+++ b/bi-kzg/src/poly/lagrange_form.rs
@@ -0,0 +1,102 @@
+use crate::{powers_of_field_elements, primitive_root_of_unity};
+
+use super::{lagrange_coefficients, BivariateLagrangePolynomial, BivariatePolynomial};
+use halo2curves::ff::{Field, PrimeField};
+use rand::RngCore;
+
+impl<F: Field> BivariateLagrangePolynomial<F> {
+    #[inline]
+    pub fn new(coeffs: Vec<F>, degree_0: usize, degree_1: usize) -> Self {
+        assert_eq!(coeffs.len(), degree_0 * degree_1);
+        Self {
+            coefficients: coeffs,
+            degree_0,
+            degree_1,
+        }
+    }
+
+    #[inline]
+    pub fn random(mut rng: impl RngCore, degree_0: usize, degree_1: usize) -> Self {
+        let coefficients = (0..degree_0 * degree_1)
+            .map(|_| F::random(&mut rng))
+            .collect();
+        Self::new(coefficients, degree_0, degree_1)
+    }
+}
+
+impl<F: PrimeField> From<BivariatePolynomial<F>> for BivariateLagrangePolynomial<F> {
+    #[inline]
+    fn from(poly: BivariatePolynomial<F>) -> Self {
+        Self::from(&poly)
+    }
+}
+
+impl<F: PrimeField> From<&BivariatePolynomial<F>> for BivariateLagrangePolynomial<F> {
+    #[inline]
+    fn from(poly: &BivariatePolynomial<F>) -> Self {
+        let coeffs = poly.interpolate();
+        BivariateLagrangePolynomial::new(coeffs, poly.degree_0, poly.degree_1)
+    }
+}
+
+impl<F: PrimeField> BivariateLagrangePolynomial<F> {
+    /// construct a bivariate lagrange polynomial from a monomial f(y) = y - b
+    #[inline]
+    pub fn from_y_monomial(b: &F, n: usize, m: usize) -> Self {
+        // roots of unity for supported_n and supported_m
+        let omega_1 = {
+            let omega = F::ROOT_OF_UNITY;
+            omega.pow_vartime([(1 << F::S) / m as u64])
+        };
+        let mut coeffs = vec![F::ZERO; n * m];
+        for i in 0..m {
+            let element = omega_1.pow_vartime([i as u64]) - *b;
+            coeffs[i * n..(i + 1) * n].copy_from_slice(vec![element; n].as_slice());
+        }
+        BivariateLagrangePolynomial::new(coeffs, n, m)
+    }
+}
+
+impl<F: PrimeField> BivariateLagrangePolynomial<F> {
+    /// evaluate the polynomial at (x, y)
+    #[inline]
+    pub fn evaluate(&self, x: &F, y: &F) -> F {
+        let omega_0 = primitive_root_of_unity::<F>(self.degree_0);
+        let omega_1 = primitive_root_of_unity::<F>(self.degree_1);
+
+        let powers_of_omega_0 = powers_of_field_elements(&omega_0, self.degree_0);
+        let lagrange_x = lagrange_coefficients(&powers_of_omega_0, x);
+
+        let powers_of_omega_1 = powers_of_field_elements(&omega_1, self.degree_1);
+        let lagrange_y = lagrange_coefficients(&powers_of_omega_1, y);
+
+        self.coefficients
+            .chunks_exact(self.degree_0)
+            .zip(lagrange_y.iter())
+            .fold(F::ZERO, |acc, (chunk, y_i)| {
+                acc + chunk
+                    .iter()
+                    .zip(lagrange_x.iter())
+                    .fold(F::ZERO, |acc, (c, x_i)| acc + *c * *x_i)
+                    * y_i
+            })
+    }
+
+    #[inline]
+    pub fn evaluate_at_y(&self, y: &F) -> Vec<F> {
+        let omega_1 = primitive_root_of_unity::<F>(self.degree_1);
+        let powers_of_omega_1 = powers_of_field_elements(&omega_1, self.degree_1);
+        let lagrange_y = lagrange_coefficients(&powers_of_omega_1, y);
+
+        self.coefficients
+            .chunks_exact(self.degree_0)
+            .zip(lagrange_y.iter())
+            .fold(vec![F::ZERO; self.degree_0], |acc, (chunk, y_i)| {
+                let mut ret = acc.clone();
+                for i in 0..self.degree_0 {
+                    ret[i] += chunk[i] * y_i;
+                }
+                ret
+            })
+    }
+}
diff --git a/bi-kzg/src/poly/utils.rs b/bi-kzg/src/poly/utils.rs
new file mode 100644
index 00000000..a26c0565
--- /dev/null
+++ b/bi-kzg/src/poly/utils.rs
@@ -0,0 +1,55 @@
+use ark_std::{end_timer, start_timer};
+use halo2curves::ff::{Field, PrimeField};
+
+/// For a point x, compute the coefficients of Lagrange polynomial L_{i}(x) at x, given the roots.
+/// `L_{i}(x) = \prod_{j \neq i} \frac{x - r_j}{r_i - r_j}`
+pub fn lagrange_coefficients<F: Field + Send + Sync>(roots: &[F], points: &F) -> Vec<F> {
+    roots
+        .iter()
+        .enumerate()
+        .map(|(i, _)| {
+            let mut numerator = F::ONE;
+            let mut denominator = F::ONE;
+            for j in 0..roots.len() {
+                if i == j {
+                    continue;
+                }
+                numerator *= roots[j] - points;
+                denominator *= roots[j] - roots[i];
+            }
+            numerator * denominator.invert().unwrap()
+        })
+        .collect()
+}
+
+/// Compute poly / (x-point) using univariate division
+pub fn univariate_quotient<F: PrimeField>(poly: &[F], point: &F) -> Vec<F> {
+    let timer = start_timer!(|| format!("Univariate quotient of degree {}", poly.len()));
+    let mut dividend_coeff = poly.to_vec();
+    let divisor = [-*point, F::from(1u64)];
+    let mut coefficients = vec![];
+
+    let mut dividend_pos = dividend_coeff.len() - 1;
+    let divisor_pos = divisor.len() - 1;
+    let mut difference = dividend_pos as isize - divisor_pos as isize;
+
+    while difference >= 0 {
+        let term_quotient = dividend_coeff[dividend_pos] * divisor[divisor_pos].invert().unwrap();
+        coefficients.push(term_quotient);
+
+        for i in (0..=divisor_pos).rev() {
+            let difference = difference as usize;
+            let x = divisor[i];
+            let y = x * term_quotient;
+            let z = dividend_coeff[difference + i];
+            dividend_coeff[difference + i] = z - y;
+        }
+
+        dividend_pos -= 1;
+        difference -= 1;
+    }
+    coefficients.reverse();
+    coefficients.push(F::from(0u64));
+    end_timer!(timer);
+    coefficients
+}
diff --git a/bi-kzg/src/structs.rs b/bi-kzg/src/structs.rs
index 0bc3eeaf..91932272 100644
--- a/bi-kzg/src/structs.rs
+++ b/bi-kzg/src/structs.rs
@@ -1,22 +1,8 @@
 use halo2curves::pairing::Engine;
 
-#[derive(Clone, Debug, Default, PartialEq, Eq)]
-pub struct BivariatePolynomial<F> {
-    pub coefficients: Vec<F>,
-    pub degree_0: usize,
-    pub degree_1: usize,
-}
-
-#[derive(Clone, Debug, Default, PartialEq, Eq)]
-pub struct BivariateLagrangePolynomial<F> {
-    pub coefficients: Vec<F>,
-    pub degree_0: usize,
-    pub degree_1: usize,
-}
-
 /// Structured reference string for Bi-KZG polynomial commitment scheme.
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
-pub struct BiKZGSRS<E: Engine> {
+pub struct CoefFormBiKZGSRS<E: Engine> {
     /// (g_1^{\tau_0^i\tau_1^j})_{i\in [0,N], j\in [0, M]} = \\
     /// (
     ///  g_1, g_1^{\tau_0}, g_1^{\tau_0^2}, ..., g_1^{\tau_0^N},
@@ -34,6 +20,23 @@ pub struct BiKZGSRS<E: Engine> {
     pub tau_1_h: E::G2Affine,
 }
 
+/// Structured reference string for Bi-KZG polynomial commitment scheme.
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
+pub struct LagrangeFormBiKZGSRS<E: Engine> {
+    /// The generator of G1
+    pub g: E::G1Affine,
+    /// g in lagrange form over omega_0
+    pub powers_of_g_lagrange_over_x: Vec<E::G1Affine>,
+    /// g in lagrange form over omega_0 and omega_1
+    pub powers_of_g_lagrange_over_both_roots: Vec<E::G1Affine>,
+    /// The generator of G2.
+    pub h: E::G2Affine,
+    /// tau_0 times the above generator of G2.
+    pub tau_0_h: E::G2Affine,
+    /// tau_1 times the above generator of G2.
+    pub tau_1_h: E::G2Affine,
+}
+
 /// `UnivariateVerifierParam` is used to check evaluation proofs for a given
 /// commitment.
 #[derive(Clone, Debug, Eq, PartialEq, Default)]
@@ -63,8 +66,8 @@ pub struct BiKZGProof<E: Engine> {
     pub pi1: E::G1Affine,
 }
 
-impl<E: Engine> From<&BiKZGSRS<E>> for BiKZGVerifierParam<E> {
-    fn from(srs: &BiKZGSRS<E>) -> Self {
+impl<E: Engine> From<&CoefFormBiKZGSRS<E>> for BiKZGVerifierParam<E> {
+    fn from(srs: &CoefFormBiKZGSRS<E>) -> Self {
         Self {
             g: srs.powers_of_g[0],
             h: srs.h,
@@ -73,3 +76,14 @@ impl<E: Engine> From<&BiKZGSRS<E>> for BiKZGVerifierParam<E> {
         }
     }
 }
+
+impl<E: Engine> From<&LagrangeFormBiKZGSRS<E>> for BiKZGVerifierParam<E> {
+    fn from(srs: &LagrangeFormBiKZGSRS<E>) -> Self {
+        Self {
+            g: srs.g,
+            h: srs.h,
+            tau_0_h: srs.tau_0_h,
+            tau_1_h: srs.tau_1_h,
+        }
+    }
+}
diff --git a/bi-kzg/src/tests.rs b/bi-kzg/src/tests.rs
index 4b20c041..23e61518 100644
--- a/bi-kzg/src/tests.rs
+++ b/bi-kzg/src/tests.rs
@@ -8,14 +8,16 @@ use crate::{
     bi_fft::bi_fft_in_place,
     coeff_form_bi_kzg::CoeffFormBiKZG,
     pcs::PolynomialCommitmentScheme,
-    poly::{lagrange_coefficients, univariate_quotient},
-    structs::BivariateLagrangePolynomial,
+    poly::{
+        lagrange_coefficients, univariate_quotient, BivariateLagrangePolynomial,
+        BivariatePolynomial,
+    },
     util::tensor_product_parallel,
-    BiKZGVerifierParam, BivariatePolynomial,
+    BiKZGVerifierParam, LagrangeFormBiKZG,
 };
 
 #[test]
-fn test_bi_kzg_single_pass() {
+fn test_coef_form_bi_kzg_single_pass() {
     let mut rng = test_rng();
     let n = 16;
     let m = 32;
@@ -40,7 +42,32 @@ fn test_bi_kzg_single_pass() {
 }
 
 #[test]
-fn test_bi_kzg_e2e() {
+fn test_lagrange_form_bi_kzg_single_pass() {
+    let mut rng = test_rng();
+    let n = 16;
+    let m = 32;
+
+    let srs = LagrangeFormBiKZG::<Bn256>::gen_srs_for_testing(&mut rng, n, m);
+    let vk = BiKZGVerifierParam::<Bn256>::from(&srs);
+
+    let poly = BivariateLagrangePolynomial::<Fr>::random(&mut rng, n, m);
+
+    let x = Fr::random(&mut rng);
+    let y = Fr::random(&mut rng);
+
+    let commit = LagrangeFormBiKZG::<Bn256>::commit(&srs, &poly);
+    let (proof, eval) = LagrangeFormBiKZG::<Bn256>::open(&srs, &poly, &(x, y));
+    assert!(CoeffFormBiKZG::<Bn256>::verify(
+        &vk,
+        &commit,
+        &(x, y),
+        &eval,
+        &proof
+    ));
+}
+
+#[test]
+fn test_coeff_form_bi_kzg_e2e() {
     let mut rng = test_rng();
     let n = 2;
     let m = 4;
@@ -60,7 +87,6 @@ fn test_bi_kzg_e2e() {
         n,
         m,
     );
-    // let poly = BivariatePolynomial::random(&mut rng, n, m);
 
     let x = Fr::from(9u64);
     let y = Fr::from(10u64);
@@ -102,6 +128,66 @@ fn test_bi_kzg_e2e() {
     }
 }
 
+#[test]
+fn test_lagrange_form_bi_kzg_e2e() {
+    let mut rng = test_rng();
+    let n = 2;
+    let m = 4;
+    let srs = LagrangeFormBiKZG::<Bn256>::gen_srs_for_testing(&mut rng, n, m);
+    let vk = BiKZGVerifierParam::<Bn256>::from(&srs);
+    let poly = BivariateLagrangePolynomial::new(
+        vec![
+            Fr::from(1u64),
+            Fr::from(2u64),
+            Fr::from(3u64),
+            Fr::from(4u64),
+            Fr::from(5u64),
+            Fr::from(6u64),
+            Fr::from(7u64),
+            Fr::from(8u64),
+        ],
+        n,
+        m,
+    );
+
+    let x = Fr::from(9u64);
+    let y = Fr::from(10u64);
+
+    let commit = LagrangeFormBiKZG::<Bn256>::commit(&srs, &poly);
+    let (proof, eval) = LagrangeFormBiKZG::<Bn256>::open(&srs, &poly, &(x, y));
+
+    assert!(LagrangeFormBiKZG::<Bn256>::verify(
+        &vk,
+        &commit,
+        &(x, y),
+        &eval,
+        &proof
+    ));
+
+    for n in [2, 4, 8, 16] {
+        for m in [2, 4, 8, 16] {
+            let srs = LagrangeFormBiKZG::<Bn256>::gen_srs_for_testing(&mut rng, n, m);
+            let vk = BiKZGVerifierParam::<Bn256>::from(&srs);
+            for _ in 0..10 {
+                let poly = BivariateLagrangePolynomial::<Fr>::random(&mut rng, n, m);
+
+                let x = Fr::random(&mut rng);
+                let y = Fr::random(&mut rng);
+
+                let commit = LagrangeFormBiKZG::<Bn256>::commit(&srs, &poly);
+                let (proof, eval) = LagrangeFormBiKZG::<Bn256>::open(&srs, &poly, &(x, y));
+                assert!(LagrangeFormBiKZG::<Bn256>::verify(
+                    &vk,
+                    &commit,
+                    &(x, y),
+                    &eval,
+                    &proof
+                ));
+            }
+        }
+    }
+}
+
 #[test]
 fn test_tensor_product() {
     let vec1 = vec![Fr::from(1u64), Fr::from(2u64), Fr::from(3u64)];
diff --git a/bi-kzg/src/util.rs b/bi-kzg/src/util.rs
index 49084e90..4d7d6b14 100644
--- a/bi-kzg/src/util.rs
+++ b/bi-kzg/src/util.rs
@@ -1,6 +1,16 @@
-use halo2curves::ff::Field;
+use halo2curves::ff::{Field, PrimeField};
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 
+pub fn primitive_root_of_unity<F: PrimeField>(group_size: usize) -> F {
+    let omega = F::ROOT_OF_UNITY;
+    let omega = omega.pow_vartime([(1 << F::S) / group_size as u64]);
+    assert!(
+        omega.pow_vartime([group_size as u64]) == F::ONE,
+        "omega_0 is not root of unity for supported_n"
+    );
+    omega
+}
+
 pub(crate) fn powers_of_field_elements<F: Field>(x: &F, n: usize) -> Vec<F> {
     let mut powers = vec![F::ONE];
     let mut cur = *x;
@@ -48,5 +58,10 @@ pub(crate) fn parallelize_internal<T: Send, F: Fn(&mut [T], usize) + Send + Sync
 }
 
 pub fn parallelize<T: Send, F: Fn(&mut [T], usize) + Send + Sync + Clone>(v: &mut [T], f: F) {
-    parallelize_internal(v, f);
+    if rayon::current_num_threads() == 1 {
+        // do not spawn a new thread
+        f(v, 0)
+    } else {
+        parallelize_internal(v, f);
+    }
 }
diff --git a/circuit/src/expander_circuit.rs b/circuit/src/expander_circuit.rs
index 0fcee1c1..9ab20ecf 100644
--- a/circuit/src/expander_circuit.rs
+++ b/circuit/src/expander_circuit.rs
@@ -1,11 +1,10 @@
-use std::fs;
 use std::io::Cursor;
+use std::{any::TypeId, fs};
 
-use arith::{Field, SimdField};
+use arith::{Field, FieldSerde, SimdField};
 use ark_std::test_rng;
 use config::GKRConfig;
 use transcript::Transcript;
-use transcript::TranscriptInstance;
 
 use crate::*;
 
@@ -279,11 +278,25 @@ impl<C: GKRConfig> Circuit<C> {
         self.rnd_coefs_identified = true;
     }
 
-    pub fn fill_rnd_coefs(&mut self, transcript: &mut TranscriptInstance<C::FiatShamirHashType>) {
+    pub fn fill_rnd_coefs<T: Transcript<C::ChallengeField>>(&mut self, transcript: &mut T) {
         assert!(self.rnd_coefs_identified);
-        for &rnd_coef_ptr in &self.rnd_coefs {
-            unsafe {
-                *rnd_coef_ptr = transcript.generate_challenge::<C::CircuitField>();
+
+        if TypeId::of::<C::ChallengeField>() == TypeId::of::<C::CircuitField>() {
+            for &rnd_coef_ptr in &self.rnd_coefs {
+                unsafe {
+                    *(rnd_coef_ptr as *mut C::ChallengeField) =
+                        transcript.generate_challenge_field_element();
+                }
+            }
+        } else {
+            let n_bytes_required = C::CircuitField::SIZE * self.rnd_coefs.len();
+            let challenge_bytes = transcript.generate_challenge_u8_slice(n_bytes_required);
+            let mut cursor = Cursor::new(challenge_bytes);
+
+            for &rnd_coef_ptr in &self.rnd_coefs {
+                unsafe {
+                    *rnd_coef_ptr = C::CircuitField::deserialize_from(&mut cursor).unwrap();
+                }
             }
         }
     }
diff --git a/circuit/src/serde.rs b/circuit/src/serde.rs
index d2e754dd..b43d2919 100644
--- a/circuit/src/serde.rs
+++ b/circuit/src/serde.rs
@@ -92,7 +92,7 @@ pub struct CustomGateWrapper<C: GKRConfig, const INPUT_NUM: usize> {
 impl<C: GKRConfig, const INPUT_NUM: usize> FromEccSerde for CustomGateWrapper<C, INPUT_NUM> {
     fn deserialize_from<R: Read>(mut reader: R) -> Self {
         let gate_type = <usize as FieldSerde>::deserialize_from(&mut reader).unwrap();
-        let i_ids: [usize; INPUT_NUM] = Vec::<usize>::deserialize_from(&mut reader)
+        let i_ids: [usize; INPUT_NUM] = <Vec<usize> as FromEccSerde>::deserialize_from(&mut reader)
             .try_into()
             .unwrap();
 
@@ -191,7 +191,7 @@ impl<C: GKRConfig> FromEccSerde for RecursiveCircuit<C> {
                 .unwrap(),
 
             segments: Vec::<Segment<C>>::deserialize_from(&mut reader),
-            layers: Vec::<usize>::deserialize_from(&mut reader),
+            layers: <Vec<usize> as FromEccSerde>::deserialize_from(&mut reader),
         }
     }
 }
diff --git a/config/src/gkr_config.rs b/config/src/gkr_config.rs
index 97edd3b0..6bac5a3e 100644
--- a/config/src/gkr_config.rs
+++ b/config/src/gkr_config.rs
@@ -1,4 +1,5 @@
 mod bn254_keccak;
+mod bn254_mimc;
 mod bn254_sha2;
 mod gf2_ext_keccak;
 mod gf2_ext_sha2;
@@ -9,9 +10,9 @@ use std::fmt::Debug;
 
 use arith::{ExtensionField, Field, FieldForECC, FieldSerde, SimdField};
 use ark_std::{end_timer, start_timer};
-use transcript::FiatShamirHash;
 
 pub use bn254_keccak::BN254ConfigKeccak;
+pub use bn254_mimc::BN254ConfigMIMC5;
 pub use bn254_sha2::BN254ConfigSha2;
 pub use gf2_ext_keccak::GF2ExtConfigKeccak;
 pub use gf2_ext_sha2::GF2ExtConfigSha2;
@@ -32,7 +33,7 @@ pub enum FiatShamirHashType {
     Keccak256,
     Poseidon,
     Animoe,
-    MIMC7,
+    MIMC5, // Note: use MIMC5 for bn254 ONLY
 }
 
 pub trait GKRConfig: Default + Debug + Clone + Send + Sync + 'static {
@@ -51,7 +52,7 @@ pub trait GKRConfig: Default + Debug + Clone + Send + Sync + 'static {
     type SimdCircuitField: SimdField<Scalar = Self::CircuitField> + FieldSerde + Send;
 
     /// Fiat Shamir hash type
-    type FiatShamirHashType: FiatShamirHash;
+    const FIAT_SHAMIR_HASH: FiatShamirHashType;
 
     /// Enum type for Self::Field
     const FIELD_TYPE: FieldType;
diff --git a/config/src/gkr_config/bn254_keccak.rs b/config/src/gkr_config/bn254_keccak.rs
index 04c9b1fa..d503f6cf 100644
--- a/config/src/gkr_config/bn254_keccak.rs
+++ b/config/src/gkr_config/bn254_keccak.rs
@@ -1,7 +1,6 @@
 use halo2curves::bn256::Fr;
-use transcript::Keccak256hasher;
 
-use super::{FieldType, GKRConfig};
+use super::{FiatShamirHashType, FieldType, GKRConfig};
 
 #[derive(Debug, Clone, PartialEq, Default)]
 pub struct BN254ConfigKeccak;
@@ -15,7 +14,7 @@ impl GKRConfig for BN254ConfigKeccak {
 
     type SimdCircuitField = Fr;
 
-    type FiatShamirHashType = Keccak256hasher;
+    const FIAT_SHAMIR_HASH: FiatShamirHashType = FiatShamirHashType::Keccak256;
 
     const FIELD_TYPE: FieldType = FieldType::BN254;
 
diff --git a/config/src/gkr_config/bn254_mimc.rs b/config/src/gkr_config/bn254_mimc.rs
new file mode 100644
index 00000000..e55586a5
--- /dev/null
+++ b/config/src/gkr_config/bn254_mimc.rs
@@ -0,0 +1,83 @@
+use halo2curves::bn256::Fr;
+
+use super::{FiatShamirHashType, FieldType, GKRConfig};
+
+#[derive(Debug, Clone, PartialEq, Default)]
+pub struct BN254ConfigMIMC5;
+
+impl GKRConfig for BN254ConfigMIMC5 {
+    type CircuitField = Fr;
+
+    type ChallengeField = Fr;
+
+    type Field = Fr;
+
+    type SimdCircuitField = Fr;
+
+    const FIAT_SHAMIR_HASH: FiatShamirHashType = FiatShamirHashType::MIMC5;
+
+    const FIELD_TYPE: FieldType = FieldType::BN254;
+
+    #[inline(always)]
+    fn challenge_mul_circuit_field(
+        a: &Self::ChallengeField,
+        b: &Self::CircuitField,
+    ) -> Self::ChallengeField {
+        a * b
+    }
+
+    #[inline(always)]
+    fn field_mul_circuit_field(a: &Self::Field, b: &Self::CircuitField) -> Self::Field {
+        a * b
+    }
+
+    #[inline(always)]
+    fn field_add_circuit_field(a: &Self::Field, b: &Self::CircuitField) -> Self::Field {
+        *a + *b
+    }
+
+    #[inline(always)]
+    fn field_add_simd_circuit_field(a: &Self::Field, b: &Self::SimdCircuitField) -> Self::Field {
+        a + b
+    }
+
+    #[inline(always)]
+    fn field_mul_simd_circuit_field(a: &Self::Field, b: &Self::SimdCircuitField) -> Self::Field {
+        a * b
+    }
+
+    #[inline(always)]
+    fn challenge_mul_field(a: &Self::ChallengeField, b: &Self::Field) -> Self::Field {
+        a * b
+    }
+
+    #[inline(always)]
+    fn circuit_field_into_field(a: &Self::CircuitField) -> Self::Field {
+        *a
+    }
+
+    #[inline(always)]
+    fn circuit_field_mul_simd_circuit_field(
+        a: &Self::CircuitField,
+        b: &Self::SimdCircuitField,
+    ) -> Self::SimdCircuitField {
+        *a * *b
+    }
+
+    #[inline(always)]
+    fn circuit_field_to_simd_circuit_field(a: &Self::CircuitField) -> Self::SimdCircuitField {
+        *a
+    }
+    #[inline(always)]
+    fn simd_circuit_field_into_field(a: &Self::SimdCircuitField) -> Self::Field {
+        *a
+    }
+
+    #[inline(always)]
+    fn simd_circuit_field_mul_challenge_field(
+        a: &Self::SimdCircuitField,
+        b: &Self::ChallengeField,
+    ) -> Self::Field {
+        *a * b
+    }
+}
diff --git a/config/src/gkr_config/bn254_sha2.rs b/config/src/gkr_config/bn254_sha2.rs
index 7684f86c..6b19e612 100644
--- a/config/src/gkr_config/bn254_sha2.rs
+++ b/config/src/gkr_config/bn254_sha2.rs
@@ -1,7 +1,6 @@
 use halo2curves::bn256::Fr;
-use transcript::SHA256hasher;
 
-use super::{FieldType, GKRConfig};
+use super::{FiatShamirHashType, FieldType, GKRConfig};
 
 #[derive(Debug, Clone, PartialEq, Default)]
 pub struct BN254ConfigSha2;
@@ -15,7 +14,7 @@ impl GKRConfig for BN254ConfigSha2 {
 
     type SimdCircuitField = Fr;
 
-    type FiatShamirHashType = SHA256hasher;
+    const FIAT_SHAMIR_HASH: FiatShamirHashType = FiatShamirHashType::SHA256;
 
     const FIELD_TYPE: FieldType = FieldType::BN254;
 
diff --git a/config/src/gkr_config/gf2_ext_keccak.rs b/config/src/gkr_config/gf2_ext_keccak.rs
index cecc4610..63ad06c7 100644
--- a/config/src/gkr_config/gf2_ext_keccak.rs
+++ b/config/src/gkr_config/gf2_ext_keccak.rs
@@ -1,9 +1,8 @@
 use arith::ExtensionField;
 use gf2::{GF2x8, GF2};
 use gf2_128::{GF2_128x8, GF2_128};
-use transcript::Keccak256hasher;
 
-use super::{FieldType, GKRConfig};
+use super::{FiatShamirHashType, FieldType, GKRConfig};
 
 #[derive(Debug, Clone, PartialEq, Default)]
 pub struct GF2ExtConfigKeccak;
@@ -17,7 +16,7 @@ impl GKRConfig for GF2ExtConfigKeccak {
 
     type Field = GF2_128x8;
 
-    type FiatShamirHashType = Keccak256hasher;
+    const FIAT_SHAMIR_HASH: FiatShamirHashType = FiatShamirHashType::Keccak256;
 
     const FIELD_TYPE: FieldType = FieldType::GF2;
 
diff --git a/config/src/gkr_config/gf2_ext_sha2.rs b/config/src/gkr_config/gf2_ext_sha2.rs
index fc7dfd6c..5fcdba37 100644
--- a/config/src/gkr_config/gf2_ext_sha2.rs
+++ b/config/src/gkr_config/gf2_ext_sha2.rs
@@ -1,9 +1,8 @@
 use arith::ExtensionField;
 use gf2::{GF2x8, GF2};
 use gf2_128::{GF2_128x8, GF2_128};
-use transcript::SHA256hasher;
 
-use super::{FieldType, GKRConfig};
+use super::{FiatShamirHashType, FieldType, GKRConfig};
 
 #[derive(Debug, Clone, PartialEq, Default)]
 pub struct GF2ExtConfigSha2;
@@ -17,7 +16,7 @@ impl GKRConfig for GF2ExtConfigSha2 {
 
     type Field = GF2_128x8;
 
-    type FiatShamirHashType = SHA256hasher;
+    const FIAT_SHAMIR_HASH: FiatShamirHashType = FiatShamirHashType::SHA256;
 
     const FIELD_TYPE: FieldType = FieldType::GF2;
 
diff --git a/config/src/gkr_config/m31_ext_keccak.rs b/config/src/gkr_config/m31_ext_keccak.rs
index 0b698df1..59979833 100644
--- a/config/src/gkr_config/m31_ext_keccak.rs
+++ b/config/src/gkr_config/m31_ext_keccak.rs
@@ -1,8 +1,7 @@
 use arith::ExtensionField;
 use mersenne31::{M31Ext3, M31Ext3x16, M31x16, M31};
-use transcript::Keccak256hasher;
 
-use super::{FieldType, GKRConfig};
+use super::{FiatShamirHashType, FieldType, GKRConfig};
 
 #[derive(Debug, Clone, PartialEq, Default)]
 pub struct M31ExtConfigKeccak;
@@ -16,7 +15,7 @@ impl GKRConfig for M31ExtConfigKeccak {
 
     type Field = M31Ext3x16;
 
-    type FiatShamirHashType = Keccak256hasher;
+    const FIAT_SHAMIR_HASH: FiatShamirHashType = FiatShamirHashType::Keccak256;
 
     const FIELD_TYPE: FieldType = FieldType::M31;
 
diff --git a/config/src/gkr_config/m31_ext_sha2.rs b/config/src/gkr_config/m31_ext_sha2.rs
index b17da02a..a38b5772 100644
--- a/config/src/gkr_config/m31_ext_sha2.rs
+++ b/config/src/gkr_config/m31_ext_sha2.rs
@@ -1,8 +1,7 @@
 use arith::ExtensionField;
 use mersenne31::{M31Ext3, M31Ext3x16, M31x16, M31};
-use transcript::SHA256hasher;
 
-use super::{FieldType, GKRConfig};
+use super::{FiatShamirHashType, FieldType, GKRConfig};
 
 #[derive(Debug, Clone, PartialEq, Default)]
 pub struct M31ExtConfigSha2;
@@ -16,7 +15,7 @@ impl GKRConfig for M31ExtConfigSha2 {
 
     type Field = M31Ext3x16;
 
-    type FiatShamirHashType = SHA256hasher;
+    const FIAT_SHAMIR_HASH: FiatShamirHashType = FiatShamirHashType::SHA256;
 
     const FIELD_TYPE: FieldType = FieldType::M31;
 
diff --git a/config/src/mpi_config.rs b/config/src/mpi_config.rs
index d7345b23..5eb21912 100644
--- a/config/src/mpi_config.rs
+++ b/config/src/mpi_config.rs
@@ -7,7 +7,7 @@ use mpi::{
     topology::{Process, SimpleCommunicator},
     traits::*,
 };
-use transcript::{FiatShamirHash, Transcript, TranscriptInstance};
+use transcript::Transcript;
 
 #[macro_export]
 macro_rules! root_println {
@@ -284,27 +284,31 @@ impl MPIConfig {
     }
 
     /// broadcast root transcript state. incurs an additional hash if self.world_size > 1
-    #[inline]
-    pub fn transcript_sync_up<H: FiatShamirHash>(&self, transcript: &mut TranscriptInstance<H>) {
+    pub fn transcript_sync_up<F, T>(&self, transcript: &mut T)
+    where
+        F: Field + FieldSerde,
+        T: Transcript<F>,
+    {
         if self.world_size == 1 {
         } else {
-            transcript.hash_to_digest();
-            self.root_process().broadcast_into(&mut transcript.digest);
+            let mut state = transcript.hash_and_return_state();
+            self.root_process().broadcast_into(&mut state);
+            transcript.set_state(&state);
         }
     }
 
     /// Transcript IO for MPI
     #[inline]
-    pub fn transcript_io<F, H>(&self, ps: &[F], transcript: &mut TranscriptInstance<H>) -> F
+    pub fn transcript_io<F, T>(&self, ps: &[F], transcript: &mut T) -> F
     where
         F: Field + FieldSerde,
-        H: FiatShamirHash,
+        T: Transcript<F>,
     {
         assert!(ps.len() == 3 || ps.len() == 4); // 3 for x, y; 4 for simd var
         for p in ps {
-            transcript.append_field_element::<F>(p);
+            transcript.append_field_element(p);
         }
-        let mut r = transcript.generate_challenge::<F>();
+        let mut r = transcript.generate_challenge_field_element();
         self.root_broadcast(&mut r);
         r
     }
diff --git a/gkr/benches/gkr_hashes.rs b/gkr/benches/gkr_hashes.rs
index dc365921..16714f37 100644
--- a/gkr/benches/gkr_hashes.rs
+++ b/gkr/benches/gkr_hashes.rs
@@ -23,11 +23,13 @@ fn benchmark_setup<C: GKRConfig>(
 ) -> (Config<C>, Circuit<C>) {
     let config = Config::<C>::new(scheme, MPIConfig::new());
     let mut circuit = Circuit::<C>::load_circuit(circuit_file);
-    if witness_file.is_some() {
-        circuit.load_witness_file(witness_file.unwrap());
+
+    if let Some(witness_file) = witness_file {
+        circuit.load_witness_file(witness_file);
     } else {
         circuit.set_random_input_for_test();
     }
+
     (config, circuit)
 }
 
diff --git a/gkr/data/circuit_bn254.txt b/gkr/data/circuit_bn254.txt
deleted file mode 100644
index a0079dbf..00000000
Binary files a/gkr/data/circuit_bn254.txt and /dev/null differ
diff --git a/gkr/data/circuit_gf2.txt b/gkr/data/circuit_gf2.txt
deleted file mode 100644
index 1dcb7a0b..00000000
Binary files a/gkr/data/circuit_gf2.txt and /dev/null differ
diff --git a/gkr/data/circuit_m31.txt b/gkr/data/circuit_m31.txt
deleted file mode 100644
index ea5d9403..00000000
Binary files a/gkr/data/circuit_m31.txt and /dev/null differ
diff --git a/gkr/data/poseidon_120_circuit_bn254.txt b/gkr/data/poseidon_120_circuit_bn254.txt
deleted file mode 100644
index 5cb75176..00000000
Binary files a/gkr/data/poseidon_120_circuit_bn254.txt and /dev/null differ
diff --git a/gkr/data/poseidon_120_circuit_m31.txt b/gkr/data/poseidon_120_circuit_m31.txt
deleted file mode 100644
index 0e12b10a..00000000
Binary files a/gkr/data/poseidon_120_circuit_m31.txt and /dev/null differ
diff --git a/gkr/data/witness_bn254.txt b/gkr/data/witness_bn254.txt
deleted file mode 100644
index 5383a4e1..00000000
Binary files a/gkr/data/witness_bn254.txt and /dev/null differ
diff --git a/gkr/data/witness_gf2.txt b/gkr/data/witness_gf2.txt
deleted file mode 100644
index 6a0566e8..00000000
Binary files a/gkr/data/witness_gf2.txt and /dev/null differ
diff --git a/gkr/data/witness_m31.txt b/gkr/data/witness_m31.txt
deleted file mode 100644
index 2dc6d6a1..00000000
Binary files a/gkr/data/witness_m31.txt and /dev/null differ
diff --git a/gkr/src/exec.rs b/gkr/src/exec.rs
index e40a4ecb..0008e88d 100644
--- a/gkr/src/exec.rs
+++ b/gkr/src/exec.rs
@@ -8,7 +8,7 @@ use std::{
 use arith::{Field, FieldSerde, FieldSerdeError};
 use circuit::Circuit;
 use config::{
-    BN254ConfigSha2, Config, FieldType, GF2ExtConfigSha2, GKRConfig, GKRScheme, M31ExtConfigSha2,
+    BN254ConfigMIMC5, Config, FieldType, GF2ExtConfigSha2, GKRConfig, GKRScheme, M31ExtConfigSha2,
     MPIConfig, SENTINEL_BN254, SENTINEL_GF2, SENTINEL_M31,
 };
 use log::{debug, info};
@@ -65,19 +65,33 @@ async fn run_command<'a, C: GKRConfig>(
             let output_file = &args[4];
             let mut circuit = Circuit::<C>::load_circuit(circuit_file);
             circuit.load_witness_file(witness_file);
-            circuit.evaluate();
             let mut prover = gkr::Prover::new(&config);
             prover.prepare_mem(&circuit);
             let (claimed_v, proof) = prover.prove(&mut circuit);
-            let bytes =
-                dump_proof_and_claimed_v(&proof, &claimed_v).expect("Unable to serialize proof.");
-            fs::write(output_file, bytes).expect("Unable to write proof to file.");
+
+            if config.mpi_config.is_root() {
+                let bytes = dump_proof_and_claimed_v(&proof, &claimed_v)
+                    .expect("Unable to serialize proof.");
+                fs::write(output_file, bytes).expect("Unable to write proof to file.");
+            }
         }
         "verify" => {
             let witness_file = &args[3];
             let output_file = &args[4];
             let mut circuit = Circuit::<C>::load_circuit(circuit_file);
             circuit.load_witness_file(witness_file);
+
+            // Repeating the same public input for mpi_size times
+            // TODO: Fix this, use real input
+            if args.len() > 5 {
+                let mpi_size = args[5].parse::<i32>().unwrap();
+                let n_public_input_per_mpi = circuit.public_input.len();
+                for _ in 1..mpi_size {
+                    circuit
+                        .public_input
+                        .append(&mut circuit.public_input[..n_public_input_per_mpi].to_owned());
+                }
+            }
             let bytes = fs::read(output_file).expect("Unable to read proof from file.");
             let (proof, claimed_v) =
                 load_proof_and_claimed_v(&bytes).expect("Unable to deserialize proof.");
@@ -116,7 +130,6 @@ async fn run_command<'a, C: GKRConfig>(
                         let mut circuit = circuit.lock().unwrap();
                         let mut prover = prover.lock().unwrap();
                         circuit.load_witness_bytes(&witness_bytes, true);
-                        circuit.evaluate();
                         let (claimed_v, proof) = prover.prove(&mut circuit);
                         reply::with_status(
                             dump_proof_and_claimed_v(&proof, &claimed_v).unwrap(),
@@ -169,17 +182,17 @@ async fn run_command<'a, C: GKRConfig>(
 async fn main() {
     // examples:
     // expander-exec prove <input:circuit_file> <input:witness_file> <output:proof>
-    // expander-exec verify <input:circuit_file> <input:witness_file> <input:proof>
+    // expander-exec verify <input:circuit_file> <input:witness_file> <input:proof> <input:mpi_size>
     // expander-exec serve <input:circuit_file> <input:ip> <input:port>
-    let mpi_config = MPIConfig::new();
+    let mut mpi_config = MPIConfig::new();
 
     let args = std::env::args().collect::<Vec<String>>();
-    if args.len() < 4 {
+    if args.len() < 5 {
         println!(
             "Usage: expander-exec prove <input:circuit_file> <input:witness_file> <output:proof>"
         );
         println!(
-            "Usage: expander-exec verify <input:circuit_file> <input:witness_file> <input:proof>"
+            "Usage: expander-exec verify <input:circuit_file> <input:witness_file> <input:proof> <input:mpi_size>"
         );
         println!("Usage: expander-exec serve <input:circuit_file> <input:host> <input:port>");
         return;
@@ -189,6 +202,12 @@ async fn main() {
         println!("Invalid command.");
         return;
     }
+
+    if command == "verify" && args.len() > 5 {
+        assert!(mpi_config.world_size == 1); // verifier should not be run with mpiexec
+        mpi_config.world_size = args[5].parse::<i32>().expect("Parsing mpi size fails");
+    }
+
     let circuit_file = &args[2];
     let field_type = detect_field_type_from_circuit_file(circuit_file);
     debug!("field type: {:?}", field_type);
@@ -203,10 +222,10 @@ async fn main() {
             .await;
         }
         FieldType::BN254 => {
-            run_command::<BN254ConfigSha2>(
+            run_command::<BN254ConfigMIMC5>(
                 command,
                 circuit_file,
-                Config::<BN254ConfigSha2>::new(GKRScheme::Vanilla, mpi_config.clone()),
+                Config::<BN254ConfigMIMC5>::new(GKRScheme::Vanilla, mpi_config.clone()),
                 &args,
             )
             .await;
diff --git a/gkr/src/prover/gkr.rs b/gkr/src/prover/gkr.rs
index d1d6cfdb..92b0ba9d 100644
--- a/gkr/src/prover/gkr.rs
+++ b/gkr/src/prover/gkr.rs
@@ -6,14 +6,14 @@ use circuit::Circuit;
 use config::{GKRConfig, MPIConfig};
 use polynomials::MultiLinearPoly;
 use sumcheck::{sumcheck_prove_gkr_layer, ProverScratchPad};
-use transcript::{Transcript, TranscriptInstance};
+use transcript::Transcript;
 
 // FIXME
 #[allow(clippy::type_complexity)]
-pub fn gkr_prove<C: GKRConfig>(
+pub fn gkr_prove<C: GKRConfig, T: Transcript<C::ChallengeField>>(
     circuit: &Circuit<C>,
     sp: &mut ProverScratchPad<C>,
-    transcript: &mut TranscriptInstance<C::FiatShamirHashType>,
+    transcript: &mut T,
     mpi_config: &MPIConfig,
 ) -> (
     C::ChallengeField,
@@ -30,19 +30,18 @@ pub fn gkr_prove<C: GKRConfig>(
     let mut r_simd = vec![];
     let mut r_mpi = vec![];
     for _ in 0..circuit.layers.last().unwrap().output_var_num {
-        rz0.push(transcript.generate_challenge::<C::ChallengeField>());
+        rz0.push(transcript.generate_challenge_field_element());
     }
 
     for _ in 0..C::get_field_pack_size().trailing_zeros() {
-        r_simd.push(transcript.generate_challenge::<C::ChallengeField>());
+        r_simd.push(transcript.generate_challenge_field_element());
     }
 
     for _ in 0..mpi_config.world_size().trailing_zeros() {
-        r_mpi.push(transcript.generate_challenge::<C::ChallengeField>());
+        r_mpi.push(transcript.generate_challenge_field_element());
     }
 
-    let mut alpha = C::ChallengeField::one();
-    let mut beta = None;
+    let mut alpha = None;
 
     let output_vals = &circuit.layers.last().unwrap().output_vals;
 
@@ -75,22 +74,19 @@ pub fn gkr_prove<C: GKRConfig>(
             &r_simd,
             &r_mpi,
             alpha,
-            beta,
             transcript,
             sp,
             mpi_config,
+            i == layer_num - 1,
         );
-        alpha = transcript.generate_challenge::<C::ChallengeField>();
-
-        mpi_config.root_broadcast(&mut alpha);
 
         if rz1.is_some() {
             // TODO: try broadcast beta.unwrap directly
-            let mut tmp = transcript.generate_challenge::<C::ChallengeField>();
+            let mut tmp = transcript.generate_challenge_field_element();
             mpi_config.root_broadcast(&mut tmp);
-            beta = Some(tmp)
+            alpha = Some(tmp)
         } else {
-            beta = None;
+            alpha = None;
         }
     }
 
diff --git a/gkr/src/prover/gkr_square.rs b/gkr/src/prover/gkr_square.rs
index 0a8a41f2..a289fd07 100644
--- a/gkr/src/prover/gkr_square.rs
+++ b/gkr/src/prover/gkr_square.rs
@@ -5,19 +5,19 @@ use ark_std::{end_timer, start_timer};
 use circuit::Circuit;
 use config::GKRConfig;
 use sumcheck::{sumcheck_prove_gkr_square_layer, ProverScratchPad};
-use transcript::{Transcript, TranscriptInstance};
+use transcript::Transcript;
 
-pub fn gkr_square_prove<C: GKRConfig>(
+pub fn gkr_square_prove<C: GKRConfig, T: Transcript<C::ChallengeField>>(
     circuit: &Circuit<C>,
     sp: &mut ProverScratchPad<C>,
-    transcript: &mut TranscriptInstance<C::FiatShamirHashType>,
+    transcript: &mut T,
 ) -> (C::Field, Vec<C::ChallengeField>) {
     let timer = start_timer!(|| "gkr^2 prove");
     let layer_num = circuit.layers.len();
 
     let mut rz0 = vec![];
     for _i in 0..circuit.layers.last().unwrap().output_var_num {
-        rz0.push(transcript.generate_challenge::<C::ChallengeField>());
+        rz0.push(transcript.generate_challenge_field_element());
     }
 
     let circuit_output = &circuit.layers.last().unwrap().output_vals;
diff --git a/gkr/src/prover/linear_gkr.rs b/gkr/src/prover/linear_gkr.rs
index 5d4ebf29..286f5e9c 100644
--- a/gkr/src/prover/linear_gkr.rs
+++ b/gkr/src/prover/linear_gkr.rs
@@ -2,19 +2,21 @@
 
 use ark_std::{end_timer, start_timer};
 use circuit::Circuit;
-use config::{Config, GKRConfig, GKRScheme, PolynomialCommitmentType};
+use config::{Config, FiatShamirHashType, GKRConfig, GKRScheme, PolynomialCommitmentType};
 use sumcheck::ProverScratchPad;
-use transcript::{Proof, Transcript, TranscriptInstance};
+use transcript::{
+    BytesHashTranscript, FieldHashTranscript, Keccak256hasher, MIMCHasher, Proof, SHA256hasher,
+    Transcript,
+};
 
 use crate::{gkr_prove, gkr_square_prove, RawCommitment};
 
 #[cfg(feature = "grinding")]
-pub(crate) fn grind<C: GKRConfig>(
-    transcript: &mut transcript::TranscriptInstance<C::FiatShamirHashType>,
+pub(crate) fn grind<C: GKRConfig, T: Transcript<C::ChallengeField>>(
+    transcript: &mut T,
     config: &Config<C>,
 ) {
     use arith::{Field, FieldSerde};
-    use transcript::FiatShamirHash;
 
     let timer = start_timer!(|| format!("grind {} bits", config.grinding_bits));
 
@@ -23,8 +25,7 @@ pub(crate) fn grind<C: GKRConfig>(
     // ceil(32/field_size)
     let num_field_elements = (31 + C::ChallengeField::SIZE) / C::ChallengeField::SIZE;
 
-    let initial_hash =
-        transcript.generate_challenge_vector::<C::ChallengeField>(num_field_elements);
+    let initial_hash = transcript.generate_challenge_field_elements(num_field_elements);
     initial_hash
         .iter()
         .for_each(|h| h.serialize_into(&mut hash_bytes).unwrap()); // TODO: error propagation
@@ -32,10 +33,13 @@ pub(crate) fn grind<C: GKRConfig>(
     assert!(hash_bytes.len() >= 32, "hash len: {}", hash_bytes.len());
     hash_bytes.truncate(32);
 
+    transcript.lock_proof();
     for _ in 0..(1 << config.grinding_bits) {
-        C::FiatShamirHashType::hash_inplace(&mut hash_bytes);
+        transcript.append_u8_slice(&hash_bytes);
+        hash_bytes = transcript.generate_challenge_u8_slice(32);
     }
     transcript.append_u8_slice(&hash_bytes[..32]);
+    transcript.unlock_proof();
     end_timer!(timer);
 }
 
@@ -77,7 +81,14 @@ impl<C: GKRConfig> Prover<C> {
         );
     }
 
-    pub fn prove(&mut self, c: &mut Circuit<C>) -> (C::ChallengeField, Proof) {
+    fn prove_internal<T>(
+        &mut self,
+        c: &mut Circuit<C>,
+        transcript: &mut T,
+    ) -> (C::ChallengeField, Proof)
+    where
+        T: Transcript<C::ChallengeField>,
+    {
         let timer = start_timer!(|| "prove");
 
         // PC commit
@@ -86,15 +97,14 @@ impl<C: GKRConfig> Prover<C> {
 
         let mut buffer = vec![];
         commitment.serialize_into(&mut buffer).unwrap(); // TODO: error propagation
-        let mut transcript = TranscriptInstance::new();
         transcript.append_u8_slice(&buffer);
 
-        self.config.mpi_config.transcript_sync_up(&mut transcript);
+        self.config.mpi_config.transcript_sync_up(transcript);
 
         #[cfg(feature = "grinding")]
-        grind::<C>(&mut transcript, &self.config);
+        grind::<C, T>(transcript, &self.config);
 
-        c.fill_rnd_coefs(&mut transcript);
+        c.fill_rnd_coefs(transcript);
         c.evaluate();
 
         let mut claimed_v = C::ChallengeField::default();
@@ -104,10 +114,10 @@ impl<C: GKRConfig> Prover<C> {
         let mut _rmpi = vec![];
 
         if self.config.gkr_scheme == GKRScheme::GkrSquare {
-            (_, _rx) = gkr_square_prove(c, &mut self.sp, &mut transcript);
+            (_, _rx) = gkr_square_prove(c, &mut self.sp, transcript);
         } else {
             (claimed_v, _rx, _ry, _rsimd, _rmpi) =
-                gkr_prove(c, &mut self.sp, &mut transcript, &self.config.mpi_config);
+                gkr_prove(c, &mut self.sp, transcript, &self.config.mpi_config);
         }
 
         // open
@@ -117,7 +127,29 @@ impl<C: GKRConfig> Prover<C> {
             }
             _ => todo!(),
         }
+
         end_timer!(timer);
-        (claimed_v, transcript.proof)
+
+        (claimed_v, transcript.finalize_and_get_proof())
+    }
+
+    pub fn prove(&mut self, c: &mut Circuit<C>) -> (C::ChallengeField, Proof) {
+        match C::FIAT_SHAMIR_HASH {
+            FiatShamirHashType::Keccak256 => {
+                let mut transcript =
+                    BytesHashTranscript::<C::ChallengeField, Keccak256hasher>::new();
+                self.prove_internal(c, &mut transcript)
+            }
+            FiatShamirHashType::SHA256 => {
+                let mut transcript = BytesHashTranscript::<C::ChallengeField, SHA256hasher>::new();
+                self.prove_internal(c, &mut transcript)
+            }
+            FiatShamirHashType::MIMC5 => {
+                let mut transcript: FieldHashTranscript<<C as GKRConfig>::ChallengeField, _> =
+                    FieldHashTranscript::<C::ChallengeField, MIMCHasher<C::ChallengeField>>::new();
+                self.prove_internal(c, &mut transcript)
+            }
+            _ => unreachable!(),
+        }
     }
 }
diff --git a/gkr/src/tests/gkr_correctness.rs b/gkr/src/tests/gkr_correctness.rs
index 6d157107..5f7007d5 100644
--- a/gkr/src/tests/gkr_correctness.rs
+++ b/gkr/src/tests/gkr_correctness.rs
@@ -1,12 +1,14 @@
-use std::panic;
+use std::io::Write;
 use std::panic::AssertUnwindSafe;
 use std::time::Instant;
+use std::{fs, panic};
 
-use arith::Field;
+use arith::{Field, FieldSerde};
 use circuit::Circuit;
 use config::{
-    root_println, BN254ConfigKeccak, BN254ConfigSha2, Config, FieldType, GF2ExtConfigKeccak,
-    GF2ExtConfigSha2, GKRConfig, GKRScheme, M31ExtConfigKeccak, M31ExtConfigSha2, MPIConfig,
+    root_println, BN254ConfigKeccak, BN254ConfigMIMC5, BN254ConfigSha2, Config, FieldType,
+    GF2ExtConfigKeccak, GF2ExtConfigSha2, GKRConfig, GKRScheme, M31ExtConfigKeccak,
+    M31ExtConfigSha2, MPIConfig,
 };
 use rand::Rng;
 use sha2::Digest;
@@ -17,36 +19,40 @@ use crate::{utils::*, Prover, Verifier};
 fn test_gkr_correctness() {
     let mpi_config = MPIConfig::new();
 
-    test_gkr_correctness_helper::<GF2ExtConfigSha2>(&Config::<GF2ExtConfigSha2>::new(
-        GKRScheme::Vanilla,
-        mpi_config.clone(),
-    ));
-    test_gkr_correctness_helper::<GF2ExtConfigKeccak>(&Config::<GF2ExtConfigKeccak>::new(
-        GKRScheme::Vanilla,
-        mpi_config.clone(),
-    ));
-    test_gkr_correctness_helper::<M31ExtConfigSha2>(&Config::<M31ExtConfigSha2>::new(
-        GKRScheme::Vanilla,
-        mpi_config.clone(),
-    ));
-    test_gkr_correctness_helper::<M31ExtConfigKeccak>(&Config::<M31ExtConfigKeccak>::new(
-        GKRScheme::Vanilla,
-        mpi_config.clone(),
-    ));
-    test_gkr_correctness_helper::<BN254ConfigSha2>(&Config::<BN254ConfigSha2>::new(
-        GKRScheme::Vanilla,
-        mpi_config.clone(),
-    ));
-    test_gkr_correctness_helper::<BN254ConfigKeccak>(&Config::<BN254ConfigKeccak>::new(
-        GKRScheme::Vanilla,
-        mpi_config.clone(),
-    ));
+    test_gkr_correctness_helper::<GF2ExtConfigSha2>(
+        &Config::<GF2ExtConfigSha2>::new(GKRScheme::Vanilla, mpi_config.clone()),
+        None,
+    );
+    test_gkr_correctness_helper::<GF2ExtConfigKeccak>(
+        &Config::<GF2ExtConfigKeccak>::new(GKRScheme::Vanilla, mpi_config.clone()),
+        None,
+    );
+    test_gkr_correctness_helper::<M31ExtConfigSha2>(
+        &Config::<M31ExtConfigSha2>::new(GKRScheme::Vanilla, mpi_config.clone()),
+        None,
+    );
+    test_gkr_correctness_helper::<M31ExtConfigKeccak>(
+        &Config::<M31ExtConfigKeccak>::new(GKRScheme::Vanilla, mpi_config.clone()),
+        None,
+    );
+    test_gkr_correctness_helper::<BN254ConfigSha2>(
+        &Config::<BN254ConfigSha2>::new(GKRScheme::Vanilla, mpi_config.clone()),
+        None,
+    );
+    test_gkr_correctness_helper::<BN254ConfigKeccak>(
+        &Config::<BN254ConfigKeccak>::new(GKRScheme::Vanilla, mpi_config.clone()),
+        None,
+    );
+    test_gkr_correctness_helper::<BN254ConfigMIMC5>(
+        &Config::<BN254ConfigMIMC5>::new(GKRScheme::Vanilla, mpi_config.clone()),
+        Some("../data/gkr_proof.txt"),
+    );
 
     MPIConfig::finalize();
 }
 
 #[allow(unreachable_patterns)]
-fn test_gkr_correctness_helper<C: GKRConfig>(config: &Config<C>) {
+fn test_gkr_correctness_helper<C: GKRConfig>(config: &Config<C>, write_proof_to: Option<&str>) {
     root_println!(config.mpi_config, "============== start ===============");
     root_println!(config.mpi_config, "Field Type: {:?}", C::FIELD_TYPE);
     let circuit_copy_size: usize = match C::FIELD_TYPE {
@@ -63,21 +69,21 @@ fn test_gkr_correctness_helper<C: GKRConfig>(config: &Config<C>) {
     root_println!(config.mpi_config, "Config created.");
 
     let circuit_path = match C::FIELD_TYPE {
-        FieldType::GF2 => KECCAK_GF2_CIRCUIT,
-        FieldType::M31 => KECCAK_M31_CIRCUIT,
-        FieldType::BN254 => KECCAK_BN254_CIRCUIT,
+        FieldType::GF2 => "../".to_owned() + KECCAK_GF2_CIRCUIT,
+        FieldType::M31 => "../".to_owned() + KECCAK_M31_CIRCUIT,
+        FieldType::BN254 => "../".to_owned() + KECCAK_BN254_CIRCUIT,
         _ => unreachable!(),
     };
-    let mut circuit = Circuit::<C>::load_circuit(circuit_path);
+    let mut circuit = Circuit::<C>::load_circuit(&circuit_path);
     root_println!(config.mpi_config, "Circuit loaded.");
 
     let witness_path = match C::FIELD_TYPE {
-        FieldType::GF2 => KECCAK_GF2_WITNESS,
-        FieldType::M31 => KECCAK_M31_WITNESS,
-        FieldType::BN254 => KECCAK_BN254_WITNESS,
+        FieldType::GF2 => "../".to_owned() + KECCAK_GF2_WITNESS,
+        FieldType::M31 => "../".to_owned() + KECCAK_M31_WITNESS,
+        FieldType::BN254 => "../".to_owned() + KECCAK_BN254_WITNESS,
         _ => unreachable!(),
     };
-    circuit.load_witness_file(witness_path);
+    circuit.load_witness_file(&witness_path);
     root_println!(config.mpi_config, "Witness loaded.");
 
     circuit.evaluate();
@@ -102,17 +108,6 @@ fn test_gkr_correctness_helper<C: GKRConfig>(config: &Config<C>) {
         "Proof generated. Size: {} bytes",
         proof.bytes.len()
     );
-    // first and last 16 proof u8
-    root_println!(config.mpi_config, "Proof bytes: ");
-    proof.bytes.iter().take(16).for_each(|b| print!("{} ", b));
-    print!("... ");
-    proof
-        .bytes
-        .iter()
-        .rev()
-        .take(16)
-        .rev()
-        .for_each(|b| print!("{} ", b));
     root_println!(config.mpi_config,);
 
     root_println!(config.mpi_config, "Proof hash: ");
@@ -132,6 +127,18 @@ fn test_gkr_correctness_helper<C: GKRConfig>(config: &Config<C>) {
 
     // Verify
     if config.mpi_config.is_root() {
+        if let Some(str) = write_proof_to {
+            let mut file = fs::OpenOptions::new()
+                .write(true)
+                .create(true)
+                .truncate(true)
+                .open(str)
+                .unwrap();
+
+            let mut buf = vec![];
+            proof.serialize_into(&mut buf).unwrap();
+            file.write_all(&buf).unwrap();
+        }
         let verifier = Verifier::new(config);
         println!("Verifier created.");
         let verification_start = Instant::now();
diff --git a/gkr/src/verifier.rs b/gkr/src/verifier.rs
index 1a0d619a..2be3c090 100644
--- a/gkr/src/verifier.rs
+++ b/gkr/src/verifier.rs
@@ -6,19 +6,22 @@ use std::{
 use arith::{Field, FieldSerde};
 use ark_std::{end_timer, start_timer};
 use circuit::{Circuit, CircuitLayer};
-use config::{Config, GKRConfig, PolynomialCommitmentType};
+use config::{Config, FiatShamirHashType, GKRConfig, PolynomialCommitmentType};
 use sumcheck::{GKRVerifierHelper, VerifierScratchPad};
-use transcript::{Proof, Transcript, TranscriptInstance};
+use transcript::{
+    BytesHashTranscript, FieldHashTranscript, Keccak256hasher, MIMCHasher, Proof, SHA256hasher,
+    Transcript,
+};
 
 #[cfg(feature = "grinding")]
 use crate::grind;
 use crate::RawCommitment;
 
 #[inline(always)]
-fn verify_sumcheck_step<C: GKRConfig>(
+fn verify_sumcheck_step<C: GKRConfig, T: Transcript<C::ChallengeField>>(
     mut proof_reader: impl Read,
     degree: usize,
-    transcript: &mut TranscriptInstance<C::FiatShamirHashType>,
+    transcript: &mut T,
     claimed_sum: &mut C::ChallengeField,
     randomness_vec: &mut Vec<C::ChallengeField>,
     sp: &VerifierScratchPad<C>,
@@ -26,10 +29,10 @@ fn verify_sumcheck_step<C: GKRConfig>(
     let mut ps = vec![];
     for i in 0..(degree + 1) {
         ps.push(C::ChallengeField::deserialize_from(&mut proof_reader).unwrap());
-        transcript.append_field_element::<C::ChallengeField>(&ps[i]);
+        transcript.append_field_element(&ps[i]);
     }
 
-    let r = transcript.generate_challenge::<C::ChallengeField>();
+    let r = transcript.generate_challenge_field_element();
     randomness_vec.push(r);
 
     let verified = (ps[0] + ps[1]) == *claimed_sum;
@@ -47,7 +50,7 @@ fn verify_sumcheck_step<C: GKRConfig>(
 #[allow(clippy::too_many_arguments)]
 #[allow(clippy::type_complexity)]
 #[allow(clippy::unnecessary_unwrap)]
-fn sumcheck_verify_gkr_layer<C: GKRConfig>(
+fn sumcheck_verify_gkr_layer<C: GKRConfig, T: Transcript<C::ChallengeField>>(
     config: &Config<C>,
     layer: &CircuitLayer<C>,
     public_input: &[C::SimdCircuitField],
@@ -57,11 +60,11 @@ fn sumcheck_verify_gkr_layer<C: GKRConfig>(
     r_mpi: &Vec<C::ChallengeField>,
     claimed_v0: C::ChallengeField,
     claimed_v1: Option<C::ChallengeField>,
-    alpha: C::ChallengeField,
-    beta: Option<C::ChallengeField>,
+    alpha: Option<C::ChallengeField>,
     mut proof_reader: impl Read,
-    transcript: &mut TranscriptInstance<C::FiatShamirHashType>,
+    transcript: &mut T,
     sp: &mut VerifierScratchPad<C>,
+    is_output_layer: bool,
 ) -> (
     bool,
     Vec<C::ChallengeField>,
@@ -72,16 +75,17 @@ fn sumcheck_verify_gkr_layer<C: GKRConfig>(
     Option<C::ChallengeField>,
 ) {
     assert_eq!(rz1.is_none(), claimed_v1.is_none());
-    assert_eq!(rz1.is_none(), beta.is_none());
+    assert_eq!(rz1.is_none(), alpha.is_none());
 
-    GKRVerifierHelper::prepare_layer(layer, &alpha, &beta, rz0, rz1, r_simd, r_mpi, sp);
+    GKRVerifierHelper::prepare_layer(layer, &alpha, rz0, rz1, r_simd, r_mpi, sp, is_output_layer);
 
     let var_num = layer.input_var_num;
     let simd_var_num = C::get_field_pack_size().trailing_zeros() as usize;
-    let mut sum = claimed_v0 * alpha;
-    if claimed_v1.is_some() && beta.is_some() {
-        sum += claimed_v1.unwrap() * beta.unwrap();
+    let mut sum = claimed_v0;
+    if claimed_v1.is_some() && alpha.is_some() {
+        sum += claimed_v1.unwrap() * alpha.unwrap();
     }
+
     sum -= GKRVerifierHelper::eval_cst(&layer.const_, public_input, sp);
 
     let mut rx = vec![];
@@ -92,13 +96,13 @@ fn sumcheck_verify_gkr_layer<C: GKRConfig>(
 
     for _i_var in 0..var_num {
         verified &=
-            verify_sumcheck_step::<C>(&mut proof_reader, 2, transcript, &mut sum, &mut rx, sp);
+            verify_sumcheck_step::<C, T>(&mut proof_reader, 2, transcript, &mut sum, &mut rx, sp);
         // println!("x {} var, verified? {}", _i_var, verified);
     }
     GKRVerifierHelper::set_rx(&rx, sp);
 
     for _i_var in 0..simd_var_num {
-        verified &= verify_sumcheck_step::<C>(
+        verified &= verify_sumcheck_step::<C, T>(
             &mut proof_reader,
             3,
             transcript,
@@ -111,7 +115,7 @@ fn sumcheck_verify_gkr_layer<C: GKRConfig>(
     GKRVerifierHelper::set_r_simd_xy(&r_simd_xy, sp);
 
     for _i_var in 0..config.mpi_config.world_size().trailing_zeros() {
-        verified &= verify_sumcheck_step::<C>(
+        verified &= verify_sumcheck_step::<C, T>(
             &mut proof_reader,
             3,
             transcript,
@@ -126,12 +130,12 @@ fn sumcheck_verify_gkr_layer<C: GKRConfig>(
     let vx_claim = C::ChallengeField::deserialize_from(&mut proof_reader).unwrap();
 
     sum -= vx_claim * GKRVerifierHelper::eval_add(&layer.add, sp);
-    transcript.append_field_element::<C::ChallengeField>(&vx_claim);
+    transcript.append_field_element(&vx_claim);
 
     let vy_claim = if !layer.structure_info.max_degree_one {
         ry = Some(vec![]);
         for _i_var in 0..var_num {
-            verified &= verify_sumcheck_step::<C>(
+            verified &= verify_sumcheck_step::<C, T>(
                 &mut proof_reader,
                 2,
                 transcript,
@@ -144,7 +148,7 @@ fn sumcheck_verify_gkr_layer<C: GKRConfig>(
         GKRVerifierHelper::set_ry(ry.as_ref().unwrap(), sp);
 
         let vy_claim = C::ChallengeField::deserialize_from(&mut proof_reader).unwrap();
-        transcript.append_field_element::<C::ChallengeField>(&vy_claim);
+        transcript.append_field_element(&vy_claim);
         verified &= sum == vx_claim * vy_claim * GKRVerifierHelper::eval_mul(&layer.mul, sp);
         Some(vy_claim)
     } else {
@@ -157,12 +161,12 @@ fn sumcheck_verify_gkr_layer<C: GKRConfig>(
 
 // todo: FIXME
 #[allow(clippy::type_complexity)]
-pub fn gkr_verify<C: GKRConfig>(
+pub fn gkr_verify<C: GKRConfig, T: Transcript<C::ChallengeField>>(
     config: &Config<C>,
     circuit: &Circuit<C>,
     public_input: &[C::SimdCircuitField],
     claimed_v: &C::ChallengeField,
-    transcript: &mut TranscriptInstance<C::FiatShamirHashType>,
+    transcript: &mut T,
     mut proof_reader: impl Read,
 ) -> (
     bool,
@@ -183,19 +187,18 @@ pub fn gkr_verify<C: GKRConfig>(
     let mut r_mpi = vec![];
 
     for _ in 0..circuit.layers.last().unwrap().output_var_num {
-        rz0.push(transcript.generate_challenge::<C::ChallengeField>());
+        rz0.push(transcript.generate_challenge_field_element());
     }
 
     for _ in 0..C::get_field_pack_size().trailing_zeros() {
-        r_simd.push(transcript.generate_challenge::<C::ChallengeField>());
+        r_simd.push(transcript.generate_challenge_field_element());
     }
 
     for _ in 0..config.mpi_config.world_size().trailing_zeros() {
-        r_mpi.push(transcript.generate_challenge::<C::ChallengeField>());
+        r_mpi.push(transcript.generate_challenge_field_element());
     }
 
-    let mut alpha = C::ChallengeField::one();
-    let mut beta = None;
+    let mut alpha = None;
     let mut claimed_v0 = *claimed_v;
     let mut claimed_v1 = None;
 
@@ -221,26 +224,17 @@ pub fn gkr_verify<C: GKRConfig>(
             claimed_v0,
             claimed_v1,
             alpha,
-            beta,
             &mut proof_reader,
             transcript,
             &mut sp,
+            i == layer_num - 1,
         );
         verified &= cur_verified;
-        alpha = transcript.generate_challenge::<C::ChallengeField>();
-        beta = if rz1.is_some() {
-            Some(transcript.generate_challenge::<C::ChallengeField>())
+        alpha = if rz1.is_some() {
+            Some(transcript.generate_challenge_field_element())
         } else {
             None
         };
-        log::trace!(
-            "Layer {} verified with alpha={:?} and beta={:?}, claimed_v0={:?}, claimed_v1={:?}",
-            i,
-            alpha,
-            beta,
-            claimed_v0,
-            claimed_v1
-        );
     }
     end_timer!(timer);
     (verified, rz0, rz1, r_simd, r_mpi, claimed_v0, claimed_v1)
@@ -265,12 +259,13 @@ impl<C: GKRConfig> Verifier<C> {
         }
     }
 
-    pub fn verify(
+    fn verify_internal<T: Transcript<C::ChallengeField>>(
         &self,
         circuit: &mut Circuit<C>,
         public_input: &[C::SimdCircuitField],
         claimed_v: &C::ChallengeField,
         proof: &Proof,
+        transcript: &mut T,
     ) -> bool {
         let timer = start_timer!(|| "verify");
 
@@ -279,37 +274,25 @@ impl<C: GKRConfig> Verifier<C> {
         let mut cursor = Cursor::new(&proof.bytes);
 
         let commitment = RawCommitment::<C>::deserialize_from(&mut cursor, poly_size);
-
-        let mut transcript = TranscriptInstance::new();
         transcript.append_u8_slice(&proof.bytes[..commitment.size()]);
 
         if self.config.mpi_config.world_size() > 1 {
-            transcript.hash_to_digest(); // In prover, we call hash_to_digest before sync up the transcript state
+            let _ = transcript.hash_and_return_state(); // Trigger an additional hash
         }
 
         // ZZ: shall we use probabilistic grinding so the verifier can avoid this cost?
         // (and also be recursion friendly)
         #[cfg(feature = "grinding")]
-        grind::<C>(&mut transcript, &self.config);
-
-        circuit.fill_rnd_coefs(&mut transcript);
+        grind::<C, T>(transcript, &self.config);
 
-        // FIXME
-        // We don't really need to put the grinding result into the proof.
-        // The verifier already recomputed it -- and if it doesn't match, the proof is invalid.
-        #[cfg(feature = "grinding")]
-        {
-            // skip 32 bytes which is the grinding result
-            let mut buf = [0u8; 32];
-            cursor.read_exact(&mut buf).unwrap()
-        }
+        circuit.fill_rnd_coefs(transcript);
 
         let (mut verified, rz0, rz1, r_simd, r_mpi, claimed_v0, claimed_v1) = gkr_verify(
             &self.config,
             circuit,
             public_input,
             claimed_v,
-            &mut transcript,
+            transcript,
             &mut cursor,
         );
 
@@ -341,4 +324,30 @@ impl<C: GKRConfig> Verifier<C> {
 
         verified
     }
+
+    pub fn verify(
+        &self,
+        circuit: &mut Circuit<C>,
+        public_input: &[C::SimdCircuitField],
+        claimed_v: &C::ChallengeField,
+        proof: &Proof,
+    ) -> bool {
+        match C::FIAT_SHAMIR_HASH {
+            FiatShamirHashType::Keccak256 => {
+                let mut transcript =
+                    BytesHashTranscript::<C::ChallengeField, Keccak256hasher>::new();
+                self.verify_internal(circuit, public_input, claimed_v, proof, &mut transcript)
+            }
+            FiatShamirHashType::SHA256 => {
+                let mut transcript = BytesHashTranscript::<C::ChallengeField, SHA256hasher>::new();
+                self.verify_internal(circuit, public_input, claimed_v, proof, &mut transcript)
+            }
+            FiatShamirHashType::MIMC5 => {
+                let mut transcript =
+                    FieldHashTranscript::<C::ChallengeField, MIMCHasher<C::ChallengeField>>::new();
+                self.verify_internal(circuit, public_input, claimed_v, proof, &mut transcript)
+            }
+            _ => unreachable!(),
+        }
+    }
 }
diff --git a/pcs/Cargo.toml b/pcs/Cargo.toml
new file mode 100644
index 00000000..1c5ab0e6
--- /dev/null
+++ b/pcs/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "pcs"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+arith = { path = "../arith" }
+polynomials = { path = "../arith/polynomials"}
+
+rand.workspace = true
diff --git a/pcs/src/lib.rs b/pcs/src/lib.rs
new file mode 100644
index 00000000..759aecf0
--- /dev/null
+++ b/pcs/src/lib.rs
@@ -0,0 +1,4 @@
+mod traits;
+pub use traits::{EmptyType, PCS};
+
+pub mod raw;
diff --git a/pcs/src/raw.rs b/pcs/src/raw.rs
new file mode 100644
index 00000000..9097d030
--- /dev/null
+++ b/pcs/src/raw.rs
@@ -0,0 +1,70 @@
+use crate::{EmptyType, PCS};
+use arith::{Field, FieldSerde};
+use polynomials::MultiLinearPoly;
+use rand::RngCore;
+
+#[derive(Clone, Debug)]
+pub struct RawMLParams {
+    pub n_vars: usize,
+}
+
+// Raw commitment for multi-linear polynomials
+pub struct RawML {}
+
+impl<F: Field + FieldSerde> PCS<F> for RawML {
+    type Params = RawMLParams;
+
+    type Poly = MultiLinearPoly<F>;
+
+    type EvalPoint = Vec<F>;
+
+    type SRS = EmptyType;
+
+    type PKey = EmptyType;
+
+    type VKey = EmptyType;
+
+    type Commitment = Vec<F>;
+
+    type Opening = EmptyType;
+
+    fn gen_srs_for_testing(&mut self, _rng: impl RngCore, _params: &Self::Params) -> Self::SRS {
+        Self::SRS::default()
+    }
+
+    fn commit(
+        &mut self,
+        params: &Self::Params,
+        _proving_key: &Self::PKey,
+        poly: &Self::Poly,
+    ) -> Self::Commitment {
+        assert!(1 << params.n_vars == poly.coeffs.len());
+        poly.coeffs.clone()
+    }
+
+    fn open(
+        &mut self,
+        params: &Self::Params,
+        _proving_key: &Self::PKey,
+        poly: &Self::Poly,
+        x: &Self::EvalPoint,
+    ) -> (F, Self::Opening) {
+        assert!(1 << params.n_vars == poly.coeffs.len());
+        (poly.evaluate_jolt(x), Self::Opening::default())
+    }
+
+    fn verify(
+        params: &Self::Params,
+        _verifying_key: &Self::VKey,
+        commitment: &Self::Commitment,
+        x: &Self::EvalPoint,
+        v: F,
+        _opening: &Self::Opening,
+    ) -> bool {
+        assert!(1 << params.n_vars == commitment.len());
+        let ml_poly = MultiLinearPoly::<F> {
+            coeffs: commitment.clone(),
+        };
+        ml_poly.evaluate_jolt(x) == v
+    }
+}
diff --git a/pcs/src/traits.rs b/pcs/src/traits.rs
new file mode 100644
index 00000000..121205dd
--- /dev/null
+++ b/pcs/src/traits.rs
@@ -0,0 +1,62 @@
+use arith::{Field, FieldSerde};
+use rand::RngCore;
+use std::fmt::Debug;
+
+pub trait PCS<F: Field + FieldSerde> {
+    type Params: Clone + Debug;
+    type Poly: Clone + Debug;
+    type EvalPoint: Clone + Debug;
+
+    type SRS: Clone + Debug + FieldSerde;
+    type PKey: Clone + Debug + From<Self::SRS> + FieldSerde;
+    type VKey: Clone + Debug + From<Self::SRS> + FieldSerde;
+    type Commitment: Clone + Debug + FieldSerde;
+    type Opening: Clone + Debug + FieldSerde;
+
+    fn gen_srs_for_testing(&mut self, rng: impl RngCore, params: &Self::Params) -> Self::SRS;
+
+    fn commit(
+        &mut self,
+        params: &Self::Params,
+        proving_key: &Self::PKey,
+        poly: &Self::Poly,
+    ) -> Self::Commitment;
+
+    fn open(
+        &mut self,
+        params: &Self::Params,
+        proving_key: &Self::PKey,
+        poly: &Self::Poly,
+        x: &Self::EvalPoint,
+    ) -> (F, Self::Opening);
+
+    fn verify(
+        params: &Self::Params,
+        verifying_key: &Self::VKey,
+        commitment: &Self::Commitment,
+        x: &Self::EvalPoint,
+        v: F,
+        opening: &Self::Opening,
+    ) -> bool;
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct EmptyType {}
+
+impl FieldSerde for EmptyType {
+    const SERIALIZED_SIZE: usize = 0;
+
+    fn serialize_into<W: std::io::Write>(&self, _writer: W) -> arith::FieldSerdeResult<()> {
+        Ok(())
+    }
+
+    fn deserialize_from<R: std::io::Read>(_reader: R) -> arith::FieldSerdeResult<Self> {
+        Ok(Self {})
+    }
+
+    fn try_deserialize_from_ecc_format<R: std::io::Read>(
+        _reader: R,
+    ) -> arith::FieldSerdeResult<Self> {
+        unimplemented!()
+    }
+}
diff --git a/pcs/tests/common.rs b/pcs/tests/common.rs
new file mode 100644
index 00000000..69ca6d0c
--- /dev/null
+++ b/pcs/tests/common.rs
@@ -0,0 +1,29 @@
+use arith::{Field, FieldSerde};
+use pcs::PCS;
+use rand::thread_rng;
+
+pub fn test_pcs<F: Field + FieldSerde, P: PCS<F>>(
+    pcs: &mut P,
+    params: &P::Params,
+    poly: &P::Poly,
+    xs: &[P::EvalPoint],
+) {
+    let mut rng = thread_rng();
+    let srs = pcs.gen_srs_for_testing(&mut rng, params);
+    let proving_key = srs.clone().into();
+    let verification_key = srs.clone().into();
+
+    let commitment = pcs.commit(params, &proving_key, poly);
+
+    for x in xs {
+        let (v, opening) = pcs.open(params, &proving_key, poly, x);
+        assert!(P::verify(
+            params,
+            &verification_key,
+            &commitment,
+            x,
+            v,
+            &opening
+        ));
+    }
+}
diff --git a/pcs/tests/test_raw.rs b/pcs/tests/test_raw.rs
new file mode 100644
index 00000000..054a877e
--- /dev/null
+++ b/pcs/tests/test_raw.rs
@@ -0,0 +1,23 @@
+mod common;
+
+use arith::{BN254Fr, Field};
+use pcs::raw::{RawML, RawMLParams};
+use polynomials::MultiLinearPoly;
+use rand::thread_rng;
+
+#[test]
+fn test_raw() {
+    let params = RawMLParams { n_vars: 8 };
+    let mut raw_ml = RawML {};
+    let mut rng = thread_rng();
+    let poly = MultiLinearPoly::random(params.n_vars, &mut rng);
+    let xs = (0..100)
+        .map(|_| {
+            (0..params.n_vars)
+                .map(|_| BN254Fr::random_unsafe(&mut rng))
+                .collect::<Vec<BN254Fr>>()
+        })
+        .collect::<Vec<Vec<BN254Fr>>>();
+
+    common::test_pcs::<BN254Fr, RawML>(&mut raw_ml, &params, &poly, &xs);
+}
diff --git a/recursion/go.mod b/recursion/go.mod
new file mode 100644
index 00000000..e3425947
--- /dev/null
+++ b/recursion/go.mod
@@ -0,0 +1,108 @@
+module ExpanderVerifierCircuit
+
+go 1.22
+
+toolchain go1.22.5
+
+require (
+	github.com/consensys/gnark v0.11.0
+	github.com/consensys/gnark-crypto v0.14.0
+)
+
+require (
+	github.com/DataDog/zstd v1.4.5 // indirect
+	github.com/Microsoft/go-winio v0.6.1 // indirect
+	github.com/StackExchange/wmi v1.2.1 // indirect
+	github.com/VictoriaMetrics/fastcache v1.12.1 // indirect
+	github.com/beorn7/perks v1.0.1 // indirect
+	github.com/btcsuite/btcd/btcec/v2 v2.2.0 // indirect
+	github.com/cespare/xxhash/v2 v2.2.0 // indirect
+	github.com/cockroachdb/errors v1.8.1 // indirect
+	github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f // indirect
+	github.com/cockroachdb/pebble v0.0.0-20230928194634-aa077af62593 // indirect
+	github.com/cockroachdb/redact v1.0.8 // indirect
+	github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2 // indirect
+	github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect
+	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
+	github.com/crate-crypto/go-ipa v0.0.0-20231025140028-3c0104f4b233 // indirect
+	github.com/crate-crypto/go-kzg-4844 v0.7.0 // indirect
+	github.com/deckarep/golang-set/v2 v2.1.0 // indirect
+	github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 // indirect
+	github.com/ethereum/c-kzg-4844 v0.4.0 // indirect
+	github.com/fjl/memsize v0.0.0-20190710130421-bcb5799ab5e5 // indirect
+	github.com/fsnotify/fsnotify v1.6.0 // indirect
+	github.com/gballet/go-libpcsclite v0.0.0-20190607065134-2772fd86a8ff // indirect
+	github.com/gballet/go-verkle v0.1.1-0.20231031103413-a67434b50f46 // indirect
+	github.com/go-ole/go-ole v1.2.5 // indirect
+	github.com/gofrs/flock v0.8.1 // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
+	github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
+	github.com/golang/protobuf v1.5.3 // indirect
+	github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect
+	github.com/google/uuid v1.3.0 // indirect
+	github.com/gorilla/websocket v1.4.2 // indirect
+	github.com/hashicorp/go-bexpr v0.1.10 // indirect
+	github.com/holiman/billy v0.0.0-20230718173358-1c7e68d277a7 // indirect
+	github.com/holiman/bloomfilter/v2 v2.0.3 // indirect
+	github.com/holiman/uint256 v1.2.4 // indirect
+	github.com/huin/goupnp v1.3.0 // indirect
+	github.com/ingonyama-zk/icicle v1.1.0 // indirect
+	github.com/ingonyama-zk/iciclegnark v0.1.0 // indirect
+	github.com/jackpal/go-nat-pmp v1.0.2 // indirect
+	github.com/klauspost/compress v1.15.15 // indirect
+	github.com/kr/pretty v0.3.1 // indirect
+	github.com/kr/text v0.2.0 // indirect
+	github.com/mattn/go-runewidth v0.0.13 // indirect
+	github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
+	github.com/mitchellh/mapstructure v1.4.1 // indirect
+	github.com/mitchellh/pointerstructure v1.2.0 // indirect
+	github.com/olekukonko/tablewriter v0.0.5 // indirect
+	github.com/pkg/errors v0.9.1 // indirect
+	github.com/prometheus/client_golang v1.12.0 // indirect
+	github.com/prometheus/client_model v0.2.1-0.20210607210712-147c58e9608a // indirect
+	github.com/prometheus/common v0.32.1 // indirect
+	github.com/prometheus/procfs v0.7.3 // indirect
+	github.com/rivo/uniseg v0.2.0 // indirect
+	github.com/rogpeppe/go-internal v1.12.0 // indirect
+	github.com/ronanh/intcomp v1.1.0 // indirect
+	github.com/rs/cors v1.7.0 // indirect
+	github.com/russross/blackfriday/v2 v2.1.0 // indirect
+	github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible // indirect
+	github.com/status-im/keycard-go v0.2.0 // indirect
+	github.com/supranational/blst v0.3.11 // indirect
+	github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 // indirect
+	github.com/tklauser/go-sysconf v0.3.12 // indirect
+	github.com/tklauser/numcpus v0.6.1 // indirect
+	github.com/tyler-smith/go-bip39 v1.1.0 // indirect
+	github.com/urfave/cli/v2 v2.25.7 // indirect
+	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
+	golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 // indirect
+	golang.org/x/mod v0.20.0 // indirect
+	golang.org/x/text v0.17.0 // indirect
+	golang.org/x/time v0.3.0 // indirect
+	golang.org/x/tools v0.24.0 // indirect
+	google.golang.org/protobuf v1.27.1 // indirect
+	gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect
+)
+
+require (
+	github.com/bits-and-blooms/bitset v1.14.2 // indirect
+	github.com/blang/semver/v4 v4.0.0 // indirect
+	github.com/consensys/bavard v0.1.13 // indirect
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/ethereum/go-ethereum v1.13.10
+	github.com/fxamacker/cbor/v2 v2.7.0 // indirect
+	github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 // indirect
+	github.com/mattn/go-colorable v0.1.13 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/mmcloughlin/addchain v0.4.0 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
+	github.com/rs/zerolog v1.33.0 // indirect
+	github.com/stretchr/testify v1.9.0 // indirect
+	github.com/x448/float16 v0.8.4 // indirect
+	golang.org/x/crypto v0.26.0 // indirect
+	golang.org/x/sync v0.8.0 // indirect
+	golang.org/x/sys v0.24.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+	rsc.io/tmplfunc v0.0.3 // indirect
+)
diff --git a/recursion/go.sum b/recursion/go.sum
new file mode 100644
index 00000000..8f40238b
--- /dev/null
+++ b/recursion/go.sum
@@ -0,0 +1,860 @@
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
+cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU=
+cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
+cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
+cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
+cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
+cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=
+cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=
+cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc=
+cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk=
+cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs=
+cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc=
+cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY=
+cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
+cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
+cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=
+cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg=
+cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc=
+cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ=
+cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
+cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
+cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
+cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=
+cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA=
+cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU=
+cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
+cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
+cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
+cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
+cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
+dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
+github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
+github.com/CloudyKit/fastprinter v0.0.0-20170127035650-74b38d55f37a/go.mod h1:EFZQ978U7x8IRnstaskI3IysnWY5Ao3QgZUKOXlsAdw=
+github.com/CloudyKit/jet v2.1.3-0.20180809161101-62edd43e4f88+incompatible/go.mod h1:HPYO+50pSWkPoj9Q/eq0aRGByCL6ScRlUmiEX5Zgm+w=
+github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ=
+github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
+github.com/Joker/hpp v1.0.0/go.mod h1:8x5n+M1Hp5hC0g8okX3sR3vFQwynaX/UgSOM9MeBKzY=
+github.com/Joker/jade v1.0.1-0.20190614124447-d475f43051e7/go.mod h1:6E6s8o2AE4KhCrqr6GRJjdC/gNfTdxkIXvuGZZda2VM=
+github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow=
+github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
+github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0=
+github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
+github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
+github.com/VictoriaMetrics/fastcache v1.12.1 h1:i0mICQuojGDL3KblA7wUNlY5lOK6a4bwt3uRKnkZU40=
+github.com/VictoriaMetrics/fastcache v1.12.1/go.mod h1:tX04vaqcNoQeGLD+ra5pU5sWkuxnzWhEzLwhP9w653o=
+github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY=
+github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
+github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
+github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
+github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
+github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
+github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM=
+github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
+github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g=
+github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
+github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
+github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
+github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
+github.com/bits-and-blooms/bitset v1.8.0 h1:FD+XqgOZDUxxZ8hzoBFuV9+cGWY9CslN6d5MS5JVb4c=
+github.com/bits-and-blooms/bitset v1.8.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
+github.com/bits-and-blooms/bitset v1.10.0 h1:ePXTeiPEazB5+opbv5fr8umg2R/1NlzgDsyepwsSr88=
+github.com/bits-and-blooms/bitset v1.10.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
+github.com/bits-and-blooms/bitset v1.14.2 h1:YXVoyPndbdvcEVcseEovVfp0qjJp7S+i5+xgp/Nfbdc=
+github.com/bits-and-blooms/bitset v1.14.2/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
+github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
+github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
+github.com/btcsuite/btcd/btcec/v2 v2.2.0 h1:fzn1qaOt32TuLjFlkzYSsBC35Q3KUjT1SwPxiMSCF5k=
+github.com/btcsuite/btcd/btcec/v2 v2.2.0/go.mod h1:U7MHm051Al6XmscBQ0BoNydpOTsFAn707034b5nY8zU=
+github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
+github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
+github.com/cockroachdb/datadriven v1.0.0/go.mod h1:5Ib8Meh+jk1RlHIXej6Pzevx/NLlNvQB9pmSBZErGA4=
+github.com/cockroachdb/errors v1.6.1/go.mod h1:tm6FTP5G81vwJ5lC0SizQo374JNCOPrHyXGitRJoDqM=
+github.com/cockroachdb/errors v1.8.1 h1:A5+txlVZfOqFBDa4mGz2bUWSp0aHElvHX2bKkdbQu+Y=
+github.com/cockroachdb/errors v1.8.1/go.mod h1:qGwQn6JmZ+oMjuLwjWzUNqblqk0xl4CVV3SQbGwK7Ac=
+github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f h1:o/kfcElHqOiXqcou5a3rIlMc7oJbMQkeLk0VQJ7zgqY=
+github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI=
+github.com/cockroachdb/pebble v0.0.0-20230928194634-aa077af62593 h1:aPEJyR4rPBvDmeyi+l/FS/VtA00IWvjeFvjen1m1l1A=
+github.com/cockroachdb/pebble v0.0.0-20230928194634-aa077af62593/go.mod h1:6hk1eMY/u5t+Cf18q5lFMUA1Rc+Sm5I6Ra1QuPyxXCo=
+github.com/cockroachdb/redact v1.0.8 h1:8QG/764wK+vmEYoOlfobpe12EQcS81ukx/a4hdVMxNw=
+github.com/cockroachdb/redact v1.0.8/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg=
+github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2 h1:IKgmqgMQlVJIZj19CdocBeSfSaiCbEBZGKODaixqtHM=
+github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2/go.mod h1:8BT+cPK6xvFOcRlk0R8eg+OTkcqI6baNH4xAkpiYVvQ=
+github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 h1:zuQyyAKVxetITBuuhv3BI9cMrmStnpT18zmgmTxunpo=
+github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06/go.mod h1:7nc4anLGjupUW/PeY5qiNYsdNXj7zopG+eqsS7To5IQ=
+github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0/go.mod h1:4Zcjuz89kmFXt9morQgcfYZAYZ5n8WHjt81YYWIwtTM=
+github.com/consensys/bavard v0.1.13 h1:oLhMLOFGTLdlda/kma4VOJazblc7IM5y5QPd2A/YjhQ=
+github.com/consensys/bavard v0.1.13/go.mod h1:9ItSMtA/dXMAiL7BG6bqW2m3NdSEObYWoH223nGHukI=
+github.com/consensys/gnark v0.9.1 h1:aTwBp5469MY/2jNrf4ABrqHRW3+JytfkADdw4ZBY7T0=
+github.com/consensys/gnark v0.9.1/go.mod h1:udWvWGXnfBE7mn7BsNoGAvZDnUhcONBEtNijvVjfY80=
+github.com/consensys/gnark v0.11.0 h1:YlndnlbRAoIEA+aIIHzNIW4P0dCIOM9/jCVzsXf356c=
+github.com/consensys/gnark v0.11.0/go.mod h1:2LbheIOxsBI1a9Ck1XxUoy6PRnH28mSI9qrvtN2HwDY=
+github.com/consensys/gnark-crypto v0.12.2-0.20231013160410-1f65e75b6dfb h1:f0BMgIjhZy4lSRHCXFbQst85f5agZAjtDMixQqBWNpc=
+github.com/consensys/gnark-crypto v0.12.2-0.20231013160410-1f65e75b6dfb/go.mod h1:v2Gy7L/4ZRosZ7Ivs+9SfUDr0f5UlG+EM5t7MPHiLuY=
+github.com/consensys/gnark-crypto v0.14.0 h1:DDBdl4HaBtdQsq/wfMwJvZNE80sHidrK3Nfrefatm0E=
+github.com/consensys/gnark-crypto v0.14.0/go.mod h1:CU4UijNPsHawiVGNxe9co07FkzCeWHHrb1li/n1XoU0=
+github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
+github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
+github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
+github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
+github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
+github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
+github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/crate-crypto/go-ipa v0.0.0-20231025140028-3c0104f4b233 h1:d28BXYi+wUpz1KBmiF9bWrjEMacUEREV6MBi2ODnrfQ=
+github.com/crate-crypto/go-ipa v0.0.0-20231025140028-3c0104f4b233/go.mod h1:geZJZH3SzKCqnz5VT0q/DyIG/tvu/dZk+VIfXicupJs=
+github.com/crate-crypto/go-kzg-4844 v0.7.0 h1:C0vgZRk4q4EZ/JgPfzuSoxdCq3C3mOZMBShovmncxvA=
+github.com/crate-crypto/go-kzg-4844 v0.7.0/go.mod h1:1kMhvPgI0Ky3yIa+9lFySEBUBXkYxeOi8ZF1sYioxhc=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/deckarep/golang-set/v2 v2.1.0 h1:g47V4Or+DUdzbs8FxCCmgb6VYd+ptPAngjM6dtGktsI=
+github.com/deckarep/golang-set/v2 v2.1.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
+github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc=
+github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 h1:YLtO71vCjJRCBcrPMtQ9nqBsqpA1m5sE92cU+pd5Mcc=
+github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs=
+github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4=
+github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
+github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
+github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
+github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM=
+github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
+github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw=
+github.com/ethereum/c-kzg-4844 v0.4.0 h1:3MS1s4JtA868KpJxroZoepdV0ZKBp3u/O5HcZ7R3nlY=
+github.com/ethereum/c-kzg-4844 v0.4.0/go.mod h1:VewdlzQmpT5QSrVhbBuGoCdFJkpaJlO1aQputP83wc0=
+github.com/ethereum/go-ethereum v1.13.10 h1:Ppdil79nN+Vc+mXfge0AuUgmKWuVv4eMqzoIVSdqZek=
+github.com/ethereum/go-ethereum v1.13.10/go.mod h1:sc48XYQxCzH3fG9BcrXCOOgQk2JfZzNAmIKnceogzsA=
+github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod h1:duJ4Jxv5lDcvg4QuQr0oowTf7dz4/CR8NtyCooz9HL8=
+github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
+github.com/fjl/memsize v0.0.0-20190710130421-bcb5799ab5e5 h1:FtmdgXiUlNeRsoNMFlKLDt+S+6hbjVMEW6RGQ7aUf7c=
+github.com/fjl/memsize v0.0.0-20190710130421-bcb5799ab5e5/go.mod h1:VvhXpOYNQvB+uIk2RvXzuaQtkQJzzIx6lSBe1xv7hi0=
+github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4/go.mod h1:T9YF2M40nIgbVgp3rreNmTged+9HrbNTIQf1PsaIiTA=
+github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
+github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
+github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
+github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
+github.com/fxamacker/cbor/v2 v2.5.0 h1:oHsG0V/Q6E/wqTS2O1Cozzsy69nqCiguo5Q1a1ADivE=
+github.com/fxamacker/cbor/v2 v2.5.0/go.mod h1:TA1xS00nchWmaBnEIxPSE5oHLuJBAVvqrtAnWBwBCVo=
+github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
+github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
+github.com/gavv/httpexpect v2.0.0+incompatible/go.mod h1:x+9tiU1YnrOvnB725RkpoLv1M62hOWzwo5OXotisrKc=
+github.com/gballet/go-libpcsclite v0.0.0-20190607065134-2772fd86a8ff h1:tY80oXqGNY4FhTFhk+o9oFHGINQ/+vhlm8HFzi6znCI=
+github.com/gballet/go-libpcsclite v0.0.0-20190607065134-2772fd86a8ff/go.mod h1:x7DCsMOv1taUwEWCzT4cmDeAkigA5/QCwUodaVOe8Ww=
+github.com/gballet/go-verkle v0.1.1-0.20231031103413-a67434b50f46 h1:BAIP2GihuqhwdILrV+7GJel5lyPV3u1+PgzrWLc0TkE=
+github.com/gballet/go-verkle v0.1.1-0.20231031103413-a67434b50f46/go.mod h1:QNpY22eby74jVhqH4WhDLDwxc/vqsern6pW+u2kbkpc=
+github.com/gin-contrib/sse v0.0.0-20190301062529-5545eab6dad3/go.mod h1:VJ0WA2NBN22VlZ2dKZQPAPnyWw5XTlK1KymzLKsr59s=
+github.com/gin-gonic/gin v1.4.0/go.mod h1:OW2EZn3DO8Ln9oIKOvM++LBO+5UPHJJDH72/q/3rZdM=
+github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98=
+github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q=
+github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
+github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
+github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
+github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
+github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
+github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=
+github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
+github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
+github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=
+github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8=
+github.com/go-ole/go-ole v1.2.5 h1:t4MGB5xEDZvXI+0rMjjsfBsD7yAgp/s9ZDkL1JndXwY=
+github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
+github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
+github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo=
+github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
+github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM=
+github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw=
+github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU=
+github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s=
+github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
+github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
+github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/gogo/status v1.1.0/go.mod h1:BFv9nrluPLmrS0EmGVvLaPNmRosr9KapBYd5/hpY1WM=
+github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg=
+github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
+github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
+github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
+github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
+github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
+github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
+github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
+github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
+github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
+github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
+github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
+github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
+github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
+github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
+github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb h1:PBC98N2aIaM3XXiurYmW7fx4GZkL8feAMVq7nEjURHk=
+github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/gomodule/redigo v1.7.1-0.20190724094224-574c33c3df38/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4=
+github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
+github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
+github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
+github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
+github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
+github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
+github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
+github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
+github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
+github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
+github.com/google/pprof v0.0.0-20230817174616-7a8ec2ada47b h1:h9U78+dx9a4BKdQkBBos92HalKpaGKHrp+3Uo6yTodo=
+github.com/google/pprof v0.0.0-20230817174616-7a8ec2ada47b/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik=
+github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k=
+github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
+github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
+github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk=
+github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
+github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
+github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
+github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
+github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
+github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
+github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
+github.com/hashicorp/go-bexpr v0.1.10 h1:9kuI5PFotCboP3dkDYFr/wi0gg0QVbSNz5oFRpxn4uE=
+github.com/hashicorp/go-bexpr v0.1.10/go.mod h1:oxlubA2vC/gFVfX1A6JGp7ls7uCDlfJn732ehYYg+g0=
+github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
+github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
+github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
+github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
+github.com/holiman/billy v0.0.0-20230718173358-1c7e68d277a7 h1:3JQNjnMRil1yD0IfZKHF9GxxWKDJGj8I0IqOUol//sw=
+github.com/holiman/billy v0.0.0-20230718173358-1c7e68d277a7/go.mod h1:5GuXa7vkL8u9FkFuWdVvfR5ix8hRB7DbOAaYULamFpc=
+github.com/holiman/bloomfilter/v2 v2.0.3 h1:73e0e/V0tCydx14a0SCYS/EWCxgwLZ18CZcZKVu0fao=
+github.com/holiman/bloomfilter/v2 v2.0.3/go.mod h1:zpoh+gs7qcpqrHr3dB55AMiJwo0iURXE7ZOP9L9hSkA=
+github.com/holiman/uint256 v1.2.4 h1:jUc4Nk8fm9jZabQuqr2JzednajVmBpC+oiTiXZJEApU=
+github.com/holiman/uint256 v1.2.4/go.mod h1:EOMSn4q6Nyt9P6efbI3bueV4e1b3dGlUCXeiRV4ng7E=
+github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
+github.com/huin/goupnp v1.3.0 h1:UvLUlWDNpoUdYzb2TCn+MuTWtcjXKSza2n6CBdQ0xXc=
+github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFckNX8=
+github.com/hydrogen18/memlistener v0.0.0-20141126152155-54553eb933fb/go.mod h1:qEIFzExnS6016fRpRfxrExeVn2gbClQA99gQhnIcdhE=
+github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
+github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/CoI+jC3w2iA=
+github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
+github.com/ingonyama-zk/icicle v1.1.0 h1:a2MUIaF+1i4JY2Lnb961ZMvaC8GFs9GqZgSnd9e95C8=
+github.com/ingonyama-zk/icicle v1.1.0/go.mod h1:kAK8/EoN7fUEmakzgZIYdWy1a2rBnpCaZLqSHwZWxEk=
+github.com/ingonyama-zk/iciclegnark v0.1.0 h1:88MkEghzjQBMjrYRJFxZ9oR9CTIpB8NG2zLeCJSvXKQ=
+github.com/ingonyama-zk/iciclegnark v0.1.0/go.mod h1:wz6+IpyHKs6UhMMoQpNqz1VY+ddfKqC/gRwR/64W6WU=
+github.com/iris-contrib/blackfriday v2.0.0+incompatible/go.mod h1:UzZ2bDEoaSGPbkg6SAB4att1aAwTmVIx/5gCVqeyUdI=
+github.com/iris-contrib/go.uuid v2.0.0+incompatible/go.mod h1:iz2lgM/1UnEf1kP0L/+fafWORmlnuysV2EMP8MW+qe0=
+github.com/iris-contrib/i18n v0.0.0-20171121225848-987a633949d0/go.mod h1:pMCz62A0xJL6I+umB2YTlFRwWXaDFA0jy+5HzGiJjqI=
+github.com/iris-contrib/schema v0.0.1/go.mod h1:urYA3uvUNG1TIIjOSCzHr9/LmbQo8LrOcOqfqxa4hXw=
+github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7BdWus=
+github.com/jackpal/go-nat-pmp v1.0.2/go.mod h1:QPH045xvCAeXUZOxsnwmrtiCoxIr9eob+4orBN1SBKc=
+github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
+github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
+github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
+github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
+github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
+github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
+github.com/juju/errors v0.0.0-20181118221551-089d3ea4e4d5/go.mod h1:W54LbzXuIE0boCoNJfwqpmkKJ1O4TCTZMetAt6jGk7Q=
+github.com/juju/loggo v0.0.0-20180524022052-584905176618/go.mod h1:vgyd7OREkbtVEN/8IXZe5Ooef3LQePvuBm9UWj6ZL8U=
+github.com/juju/testing v0.0.0-20180920084828-472a3e8b2073/go.mod h1:63prj8cnj0tU0S9OHjGJn+b1h0ZghCndfnbQolrYTwA=
+github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
+github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
+github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q1U84EfirKl04SVQ/s7nPm1ZPhiXd34z40TNz36k=
+github.com/kataras/golog v0.0.9/go.mod h1:12HJgwBIZFNGL0EJnMRhmvGA0PQGx8VFwrZtM4CqbAk=
+github.com/kataras/iris/v12 v12.0.1/go.mod h1:udK4vLQKkdDqMGJJVd/msuMtN6hpYJhg/lSzuxjhO+U=
+github.com/kataras/neffos v0.0.10/go.mod h1:ZYmJC07hQPW67eKuzlfY7SO3bC0mw83A3j6im82hfqw=
+github.com/kataras/pio v0.0.0-20190103105442-ea782b38602d/go.mod h1:NV88laa9UiiDuX9AhMbDPkGYSPugBOV6yTZB1l2K9Z0=
+github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
+github.com/klauspost/compress v1.9.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
+github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw=
+github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4=
+github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
+github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
+github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
+github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/labstack/echo/v4 v4.1.11/go.mod h1:i541M3Fj6f76NZtHSj7TXnyM8n2gaodfvfxNnFqi74g=
+github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k=
+github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c=
+github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
+github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
+github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
+github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
+github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ=
+github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
+github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
+github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
+github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mattn/goveralls v0.0.2/go.mod h1:8d1ZMHsd7fW6IRPKQh46F2WRpyib5/X4FOpevwGNQEw=
+github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
+github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI=
+github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
+github.com/mediocregopher/mediocre-go-lib v0.0.0-20181029021733-cb65787f37ed/go.mod h1:dSsfyI2zABAdhcbvkXqgxOxrCsbYeHCPgrZkku60dSg=
+github.com/mediocregopher/radix/v3 v3.3.0/go.mod h1:EmfVyvspXz1uZEyPBMyGK+kjWiKQGvsUt6O3Pj+LDCQ=
+github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc=
+github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
+github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
+github.com/mitchellh/mapstructure v1.4.1 h1:CpVNEelQCZBooIPDn+AR3NpivK/TIKU8bDxdASFVQag=
+github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
+github.com/mitchellh/pointerstructure v1.2.0 h1:O+i9nHnXS3l/9Wu7r4NrEdwA2VFTicjUEN1uBnDo34A=
+github.com/mitchellh/pointerstructure v1.2.0/go.mod h1:BRAsLI5zgXmw97Lf6s25bs8ohIXc3tViBH44KcwB2g4=
+github.com/mmcloughlin/addchain v0.4.0 h1:SobOdjm2xLj1KkXN5/n0xTIWyZA2+s99UCY1iPfkHRY=
+github.com/mmcloughlin/addchain v0.4.0/go.mod h1:A86O+tHqZLMNO4w6ZZ4FlVQEadcoqkyU72HC5wJ4RlU=
+github.com/mmcloughlin/profile v0.1.1/go.mod h1:IhHD7q1ooxgwTgjxQYkACGA77oFTDdFVejUS1/tS/qU=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
+github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ=
+github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
+github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
+github.com/nats-io/nats.go v1.8.1/go.mod h1:BrFz9vVn0fU3AcH9Vn4Kd7W0NpJ651tD5omQ3M8LwxM=
+github.com/nats-io/nkeys v0.0.2/go.mod h1:dab7URMsZm6Z/jp9Z5UGa87Uutgc2mVpXLC4B7TDb/4=
+github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
+github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
+github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
+github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
+github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
+github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
+github.com/onsi/ginkgo v1.13.0/go.mod h1:+REjRxOmWfHCjfv9TTWB1jD1Frx4XydAD3zm1lskyM0=
+github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
+github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
+github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
+github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
+github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
+github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
+github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
+github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
+github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
+github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
+github.com/prometheus/client_golang v1.12.0 h1:C+UIj/QWtmqY13Arb8kwMt5j34/0Z2iKamrJ+ryC0Gg=
+github.com/prometheus/client_golang v1.12.0/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY=
+github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
+github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/prometheus/client_model v0.2.1-0.20210607210712-147c58e9608a h1:CmF68hwI0XsOQ5UwlBopMi2Ow4Pbg32akc4KIVCOm+Y=
+github.com/prometheus/client_model v0.2.1-0.20210607210712-147c58e9608a/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w=
+github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
+github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
+github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=
+github.com/prometheus/common v0.32.1 h1:hWIdL3N2HoUx3B8j3YN9mWor0qhY/NlEKZEaXxuIRh4=
+github.com/prometheus/common v0.32.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls=
+github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
+github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
+github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
+github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
+github.com/prometheus/procfs v0.7.3 h1:4jVXhlkAyzOScmCkXBTOLRLTz8EeU+eyjrwB/EPq0VU=
+github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
+github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
+github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
+github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
+github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
+github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
+github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
+github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
+github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
+github.com/ronanh/intcomp v1.1.0 h1:i54kxmpmSoOZFcWPMWryuakN0vLxLswASsGa07zkvLU=
+github.com/ronanh/intcomp v1.1.0/go.mod h1:7FOLy3P3Zj3er/kVrU/pl+Ql7JFZj7bwliMGketo0IU=
+github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik=
+github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU=
+github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
+github.com/rs/zerolog v1.30.0 h1:SymVODrcRsaRaSInD9yQtKbtWqwsfoPcRff/oRXLj4c=
+github.com/rs/zerolog v1.30.0/go.mod h1:/tk+P47gFdPXq4QYjvCmT5/Gsug2nagsFWBWhAiSi1w=
+github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
+github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
+github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
+github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
+github.com/sclevine/agouti v3.0.0+incompatible/go.mod h1:b4WX9W9L1sfQKXeJf1mUTLZKJ48R1S7H23Ji7oFO5Bw=
+github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
+github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible h1:Bn1aCHHRnjv4Bl16T8rcaFjYSrGrIZvpiGO6P3Q4GpU=
+github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
+github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
+github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
+github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
+github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
+github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
+github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
+github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
+github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
+github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
+github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
+github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
+github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
+github.com/status-im/keycard-go v0.2.0 h1:QDLFswOQu1r5jsycloeQh3bVU8n/NatHHaZobtDnDzA=
+github.com/status-im/keycard-go v0.2.0/go.mod h1:wlp8ZLbsmrF6g6WjugPAx+IzoLrkdf9+mHxBEeo3Hbg=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/supranational/blst v0.3.11 h1:LyU6FolezeWAhvQk0k6O/d49jqgO52MSDDfYgbeoEm4=
+github.com/supranational/blst v0.3.11/go.mod h1:jZJtfjgudtNl4en1tzwPIV3KjUnQUvG3/j+w+fVonLw=
+github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 h1:epCh84lMvA70Z7CTTCmYQn2CKbY8j86K7/FAIr141uY=
+github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7/go.mod h1:q4W45IWZaF22tdD+VEXcAWRA037jwmWEB5VWYORlTpc=
+github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
+github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
+github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
+github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
+github.com/tyler-smith/go-bip39 v1.1.0 h1:5eUemwrMargf3BSLRRCalXT93Ns6pQJIjYQN2nyfOP8=
+github.com/tyler-smith/go-bip39 v1.1.0/go.mod h1:gUYDtqQw1JS3ZJ8UWVcGTGqqr6YIN3CWg+kkNaLt55U=
+github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
+github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
+github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs=
+github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
+github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4=
+github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
+github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w=
+github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
+github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
+github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
+github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
+github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
+github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
+github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
+github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
+github.com/yalp/jsonpath v0.0.0-20180802001716-5cc68e5049a0/go.mod h1:/LWChgwKmvncFJFHJ7Gvn9wZArjbV5/FppcK2fKk/tI=
+github.com/yudai/gojsondiff v1.0.0/go.mod h1:AY32+k2cwILAkW1fbgxQ5mUmMiZFgLIV+FBNExI05xg=
+github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82/go.mod h1:lgjkn3NuSvDfVJdfcVVdX+jpBxNmX4rDAzaS45IcYoM=
+github.com/yudai/pp v2.0.1+incompatible/go.mod h1:PuxR/8QJ7cyCkFp/aUDS+JY727OFEZkTdatxwunjIkc=
+github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
+go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
+go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
+go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
+go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
+golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
+golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.12.0 h1:tFM/ta59kqch6LlvYnPa0yx5a83cL2nHflFhYKvv9Yk=
+golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw=
+golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k=
+golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
+golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
+golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
+golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek=
+golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
+golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
+golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
+golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
+golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
+golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
+golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ=
+golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
+golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 h1:kx6Ds3MlpiUHKj7syVnbp57++8WpuKPcR5yjLBjvLEA=
+golang.org/x/exp v0.0.0-20240823005443-9b4947da3948/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ=
+golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
+golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
+golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
+golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
+golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
+golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
+golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
+golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0=
+golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
+golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190327091125-710a502c58a2/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
+golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
+golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
+golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200814200057-3d37ad5750ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
+golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
+golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
+golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
+golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
+golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
+golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190327201419-c70d86f8b7cf/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
+golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
+golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
+golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
+golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
+golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
+golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
+golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
+golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.15.0 h1:zdAyfUGbYmuVokhzVmghFl2ZJh5QhcfebBgmVPFYA+8=
+golang.org/x/tools v0.15.0/go.mod h1:hpksKq4dtpQWS1uQ61JkdqWM3LscIS6Slf+VVkm+wQk=
+golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
+golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
+google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
+google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
+google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
+google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
+google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
+google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
+google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
+google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
+google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
+google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
+google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
+google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
+google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
+google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM=
+google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
+google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
+google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
+google.golang.org/genproto v0.0.0-20180518175338-11a468237815/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
+google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
+google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
+google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
+google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
+google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA=
+google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U=
+google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
+google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA=
+google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
+google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
+google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
+google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
+google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
+google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
+google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
+google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
+google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
+google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
+google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
+google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
+google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
+google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
+google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ=
+google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
+gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
+gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE=
+gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y=
+gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
+gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8=
+gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
+gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
+gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
+honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
+honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
+rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
+rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
+rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
+rsc.io/tmplfunc v0.0.3 h1:53XFQh69AfOa8Tw0Jm7t+GV7KZhOi6jzsCzTtKbMvzU=
+rsc.io/tmplfunc v0.0.3/go.mod h1:AG3sTPzElb1Io3Yg4voV9AGZJuleGAwaVRxL9M49PhA=
diff --git a/recursion/main.go b/recursion/main.go
new file mode 100644
index 00000000..8a497a01
--- /dev/null
+++ b/recursion/main.go
@@ -0,0 +1,137 @@
+package main
+
+import (
+	"errors"
+	"flag"
+	"os"
+
+	"ExpanderVerifierCircuit/modules/circuit"
+	"ExpanderVerifierCircuit/modules/verifier"
+
+	"github.com/consensys/gnark-crypto/ecc"
+
+	"github.com/consensys/gnark/backend/groth16"
+	"github.com/consensys/gnark/frontend"
+	"github.com/consensys/gnark/frontend/cs/r1cs"
+)
+
+type VerifierCircuit struct {
+	MpiSize         uint
+	SimdSize        uint
+	OriginalCircuit circuit.Circuit
+	Proof           circuit.Proof // private input
+}
+
+// Define declares the circuit constraints
+func (circuit *VerifierCircuit) Define(api frontend.API) error {
+	verifier.Verify(api, &circuit.OriginalCircuit, circuit.OriginalCircuit.PublicInput, 0, circuit.SimdSize, circuit.MpiSize, &circuit.Proof)
+	return nil
+}
+
+func checkFileExists(filePath string) bool {
+	_, error := os.Stat(filePath)
+	//return !os.IsNotExist(err)
+	return !errors.Is(error, os.ErrNotExist)
+}
+
+func testGroth16() {
+	circuit_file := flag.String("circuit", "../data/circuit_bn254.txt", "circuit file")
+	witness_file := flag.String("witness", "../data/witness_bn254.txt", "witness file")
+	gkr_proof_file := flag.String("gkr_proof", "../data/gkr_proof.txt", "gkr proof file")
+
+	with_groth16 := flag.Bool("with_groth16", false, "set true to do groth16 proof")
+	groth16_pk_file := flag.String("groth16_pk", "", "where to put the proving key, will create a new one and write to this file if it does not exist")
+	groth16_vk_file := flag.String("groth16_vk", "", "where to put the verifying key, will create a new one and write to this file if it does not exist")
+	recursive_proof_file := flag.String("recursive_proof", "../data/recursive_proof.txt", "where to output the groth16 recursive proof")
+
+	mpi_size := flag.Uint("mpi_size", 1, "mpi size of gkr proof")
+	simd_size := flag.Uint("simd_size", 1, "simd size of gkr proof")
+	flag.Parse()
+
+	if *simd_size != 1 {
+		panic("For bn254, Expander only implements simd size 1, so it must be 1 here")
+	}
+
+	original_circuit, _ := circuit.ReadCircuit(*circuit_file, *witness_file, *mpi_size)
+	proof := circuit.ReadProof(*gkr_proof_file)
+	original_circuit.PrintStats()
+
+	verifier_circuit := VerifierCircuit{
+		MpiSize:         *mpi_size,
+		SimdSize:        *simd_size,
+		OriginalCircuit: *original_circuit,
+		Proof:           *proof.PlaceHolder(),
+	}
+	r1cs, _ := frontend.Compile(ecc.BN254.ScalarField(), r1cs.NewBuilder, &verifier_circuit)
+
+	println("Nb Constraints: ", r1cs.GetNbConstraints())
+	println("Nb Internal Witnesss: ", r1cs.GetNbInternalVariables())
+	println("Nb Private Witness: ", r1cs.GetNbSecretVariables())
+	println("Nb Public Witness:", r1cs.GetNbPublicVariables())
+
+	// witness definition
+	original_circuit, _ = circuit.ReadCircuit(*circuit_file, *witness_file, *mpi_size)
+	assignment := VerifierCircuit{
+		MpiSize:         *mpi_size,
+		SimdSize:        *simd_size,
+		OriginalCircuit: *original_circuit,
+		Proof:           *proof,
+	}
+
+	println("Solving witness...")
+	witness, witness_err := frontend.NewWitness(&assignment, ecc.BN254.ScalarField())
+	if witness_err != nil {
+		panic(witness_err.Error())
+	}
+
+	println("Checking satisfiability...")
+	err := r1cs.IsSolved(witness)
+	if err != nil {
+		panic("R1CS not satisfied.")
+	}
+	println("R1CS satisfied.")
+
+	if *with_groth16 {
+		pk := groth16.NewProvingKey(ecc.BN254)
+		vk := groth16.NewVerifyingKey(ecc.BN254)
+		var setup_err error
+		// groth16 zkSNARK: Setup
+		if *groth16_pk_file != "" && *groth16_vk_file != "" &&
+			checkFileExists(*groth16_pk_file) && checkFileExists(*groth16_vk_file) {
+			println("Groth16 reading pk vk from file...", groth16_pk_file, " ", groth16_vk_file)
+			pk_file, _ := os.OpenFile(*groth16_pk_file, os.O_RDONLY, 0444)
+			pk.ReadFrom(pk_file)
+			vk_file, _ := os.OpenFile(*groth16_vk_file, os.O_RDONLY, 0444)
+			vk.ReadFrom(vk_file)
+		} else {
+			println("Groth16 generating setup from scratch...")
+			pk, vk, setup_err = groth16.Setup(r1cs)
+
+			pk_file, _ := os.OpenFile(*groth16_pk_file, os.O_WRONLY|os.O_CREATE, 0644)
+			pk.WriteTo(pk_file)
+
+			vk_file, _ := os.OpenFile(*groth16_vk_file, os.O_WRONLY|os.O_CREATE, 0644)
+			vk.WriteTo(vk_file)
+		}
+		println("Setup done.")
+
+		println("Groth16 prove-verify ing...")
+		publicWitness, public_err := witness.Public()
+		groth16_proof, prove_err := groth16.Prove(r1cs, pk, witness)
+		verify_err := groth16.Verify(groth16_proof, vk, publicWitness)
+		if setup_err != nil || public_err != nil || prove_err != nil || verify_err != nil {
+			panic("Groth16 fails")
+		}
+
+		file, _ := os.OpenFile(*recursive_proof_file, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+		groth16_proof.WriteTo(file)
+	} else {
+		println("Groth16 proof skipped, set '-with_groth16=1' to produce a proof")
+	}
+
+	println("Done.")
+}
+
+func main() {
+	testGroth16()
+}
diff --git a/recursion/modules/circuit/ecc_circuit.go b/recursion/modules/circuit/ecc_circuit.go
new file mode 100644
index 00000000..4e08c826
--- /dev/null
+++ b/recursion/modules/circuit/ecc_circuit.go
@@ -0,0 +1,138 @@
+package circuit
+
+type Allocation struct {
+	IOffset uint
+	OOffset uint
+}
+
+type ChildSegInfo struct {
+	Id         uint
+	Allocation []Allocation
+}
+
+type Segment struct {
+	IVarNum    uint
+	OVarNum    uint
+	ChildSegs  []ChildSegInfo
+	GateMuls   []Gate
+	GateAdds   []Gate
+	GateConsts []Gate
+	// TODO: Support custom gate
+}
+
+type ECCCircuit struct {
+	NumPublicInputs        uint
+	NumOutputs             uint
+	ExpectedNumOutputZeros uint
+
+	Segments []Segment
+	LayerIds []uint
+}
+
+func (segment *Segment) insert_gates(muls *[]Gate, adds *[]Gate, csts *[]Gate, i_offset uint, o_offset uint) {
+	for i := 0; i < len(segment.GateMuls); i++ {
+		mul_gate := segment.GateMuls[i]
+
+		i_0 := mul_gate.IIds[0] + i_offset
+		i_1 := mul_gate.IIds[1] + i_offset
+		o := mul_gate.OId + o_offset
+
+		*muls = append(*muls,
+			Gate{
+				IIds: []uint{i_0, i_1},
+				OId:  o,
+				Coef: mul_gate.Coef,
+			},
+		)
+	}
+
+	for i := 0; i < len(segment.GateAdds); i++ {
+		add_gate := segment.GateAdds[i]
+		i_0 := add_gate.IIds[0] + i_offset
+		o := add_gate.OId + o_offset
+
+		*adds = append(*adds,
+			Gate{
+				IIds: []uint{i_0},
+				OId:  o,
+				Coef: add_gate.Coef,
+			},
+		)
+	}
+
+	for i := 0; i < len(segment.GateConsts); i++ {
+		cst_gate := segment.GateConsts[i]
+		*csts = append(*csts,
+			Gate{
+				IIds: make([]uint, 0),
+				OId:  cst_gate.OId + o_offset,
+				Coef: cst_gate.Coef,
+			},
+		)
+	}
+}
+
+// Return mul, add, cst gates
+func (segment *Segment) FlattenInto(
+	all_segments []Segment,
+	i_offset uint,
+	o_offset uint,
+	muls *[]Gate,
+	adds *[]Gate,
+	csts *[]Gate,
+) {
+	segment.insert_gates(muls, adds, csts, i_offset, o_offset)
+	for i := 0; i < len(segment.ChildSegs); i++ {
+		child_seg_info := segment.ChildSegs[i]
+		child_seg := &all_segments[child_seg_info.Id]
+		for j := 0; j < len(child_seg_info.Allocation); j++ {
+			alloc := child_seg_info.Allocation[j]
+			child_seg.FlattenInto(
+				all_segments,
+				alloc.IOffset+i_offset,
+				alloc.OOffset+o_offset,
+				muls,
+				adds,
+				csts,
+			)
+		}
+	}
+}
+
+func (ecc_circuit *ECCCircuit) Flatten() *Circuit {
+	var ret_circuit Circuit
+	ret_circuit.ExpectedNumOutputZeros = ecc_circuit.ExpectedNumOutputZeros
+
+	all_segments := ecc_circuit.Segments
+	for i := 0; i < len(ecc_circuit.LayerIds); i++ {
+		layer_id := ecc_circuit.LayerIds[i]
+		cur_segment := &all_segments[layer_id]
+
+		var muls []Gate
+		var adds []Gate
+		var csts []Gate
+		cur_segment.FlattenInto(
+			all_segments,
+			0,
+			0,
+			&muls,
+			&adds,
+			&csts,
+		)
+
+		ret_circuit.Layers = append(ret_circuit.Layers,
+			Layer{
+				InputLenLog:  max(cur_segment.IVarNum, 1),
+				OutputLenLog: max(cur_segment.OVarNum, 1),
+
+				Cst: csts,
+				Add: adds,
+				Mul: muls,
+
+				StructureInfo: StructureInfo{len(muls) == 0},
+			},
+		)
+	}
+
+	return &ret_circuit
+}
diff --git a/recursion/modules/circuit/eval_test.go b/recursion/modules/circuit/eval_test.go
new file mode 100644
index 00000000..fbc84c3c
--- /dev/null
+++ b/recursion/modules/circuit/eval_test.go
@@ -0,0 +1,141 @@
+package circuit
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark/frontend"
+	"github.com/consensys/gnark/frontend/cs/r1cs"
+	"golang.org/x/exp/rand"
+)
+
+type Evaluation struct {
+	Circuit      Circuit // public input is part of circuit, see the definition
+	PrivateInput [][]frontend.Variable
+}
+
+func (e *Evaluation) Define(api frontend.API) error {
+	api.Println("Definition start")
+	n_witnesses := len(e.PrivateInput)
+	for i := 0; i < n_witnesses; i++ {
+		cur_input := e.PrivateInput[i]
+		for j := 0; j < len(e.Circuit.Layers); j++ {
+			layer := &e.Circuit.Layers[j]
+
+			cur_output := make([]frontend.Variable, uint(1)<<layer.OutputLenLog)
+			for k := 0; k < len(cur_output); k++ {
+				cur_output[k] = 0
+			}
+
+			for k := 0; k < len(layer.Mul); k++ {
+				mul_gate := layer.Mul[k]
+				cur_output[mul_gate.OId] = api.Add(cur_output[mul_gate.OId],
+					api.Mul(cur_input[mul_gate.IIds[0]], cur_input[mul_gate.IIds[1]], mul_gate.Coef.GetActualLocalValue()),
+				)
+			}
+
+			for k := 0; k < len(layer.Add); k++ {
+				add_gate := layer.Add[k]
+				cur_output[add_gate.OId] = api.Add(cur_output[add_gate.OId],
+					api.Mul(cur_input[add_gate.IIds[0]], add_gate.Coef.GetActualLocalValue()),
+				)
+			}
+
+			for k := 0; k < len(layer.Cst); k++ {
+				cst_gate := layer.Cst[k]
+				var v frontend.Variable
+				if cst_gate.Coef.CoefType == PublicInput {
+					v = e.Circuit.PublicInput[i][cst_gate.Coef.InputIdx]
+				} else if cst_gate.Coef.CoefType == Constant {
+					v = cst_gate.Coef.Value
+				} else {
+					v = cst_gate.Coef.RandomValue
+				}
+
+				cur_output[cst_gate.OId] = api.Add(cur_output[cst_gate.OId], v)
+			}
+
+			cur_input = cur_output
+		}
+
+		api.Println("wit: #", i)
+		for j := uint(0); j < e.Circuit.ExpectedNumOutputZeros; j++ {
+			api.Println(cur_input[j])
+			api.AssertIsEqual(cur_input[j], 0)
+		}
+	}
+
+	return nil
+}
+
+func TestCircuitEvaluation(t *testing.T) {
+	circuit, private_input := ReadCircuit("../../../data/circuit.txt", "../../../data/witness.txt", 1)
+
+	println(circuit.ExpectedNumOutputZeros)
+	for i := 0; i < len(circuit.PublicInput[0]); i++ {
+		v, _ := circuit.PublicInput[0][i].(big.Int)
+		println("Public Input", v.String())
+	}
+
+	public_input_empty := make([][]frontend.Variable, len(circuit.PublicInput))
+	for i := 0; i < len(public_input_empty); i++ {
+		public_input_empty[i] = make([]frontend.Variable, len(circuit.PublicInput[0]))
+	}
+	circuit.PublicInput = public_input_empty
+
+	private_input_empty := make([][]frontend.Variable, len(private_input))
+	for i := 0; i < len(private_input_empty); i++ {
+		private_input_empty[i] = make([]frontend.Variable, len(private_input[0]))
+	}
+
+	evaluation := Evaluation{
+		Circuit:      *circuit,
+		PrivateInput: private_input_empty,
+	}
+	r1cs, r1cs_err := frontend.Compile(ecc.BN254.ScalarField(), r1cs.NewBuilder, &evaluation)
+	if r1cs_err != nil {
+		panic("Unable to generate r1cs")
+	}
+
+	println("Nb Constraints: ", r1cs.GetNbConstraints())
+	println("Nb Internal Witnesss: ", r1cs.GetNbInternalVariables())
+	println("Nb Private Witness: ", r1cs.GetNbSecretVariables())
+	println("Nb Public Witness:", r1cs.GetNbPublicVariables())
+
+	// Correct Witness
+	circuit, private_input = ReadCircuit("../../../data/circuit.txt", "../../../data/witness.txt", 1)
+
+	assignment := Evaluation{
+		Circuit:      *circuit,
+		PrivateInput: private_input,
+	}
+	witness, witness_err := frontend.NewWitness(&assignment, ecc.BN254.ScalarField())
+	if witness_err != nil {
+		panic("Unable to solve witness")
+	}
+
+	err := r1cs.IsSolved(witness)
+	if err != nil {
+		panic("R1CS not satisfied")
+	}
+
+	// Incorrect witness
+	circuit, private_input = ReadCircuit("../../../data/circuit.txt", "../../../data/witness.txt", 1)
+	ri := rand.Intn(len(private_input))
+	rj := rand.Intn(len(private_input[0]))
+	private_input[ri][rj] = 147258369 // this should make the evaluation incorrect
+
+	assignment = Evaluation{
+		Circuit:      *circuit,
+		PrivateInput: private_input,
+	}
+	witness, witness_err = frontend.NewWitness(&assignment, ecc.BN254.ScalarField())
+	if witness_err != nil {
+		panic("Unable to solve witness")
+	}
+	err = r1cs.IsSolved(witness)
+	if err == nil {
+		panic("Incorrect witness should not be marked as solved")
+	}
+}
diff --git a/recursion/modules/circuit/expander_circuit.go b/recursion/modules/circuit/expander_circuit.go
new file mode 100644
index 00000000..693f7e17
--- /dev/null
+++ b/recursion/modules/circuit/expander_circuit.go
@@ -0,0 +1,112 @@
+package circuit
+
+import (
+	"ExpanderVerifierCircuit/modules/transcript"
+	"log"
+	"math/big"
+
+	"github.com/consensys/gnark/frontend"
+)
+
+type CoefType uint
+
+const (
+	Constant = iota
+	Random
+	PublicInput
+)
+
+type Coef struct {
+	CoefType    CoefType
+	Value       big.Int           // CoefType == Constant
+	RandomValue frontend.Variable // CoefType == Random
+	InputIdx    uint              // CoefType == PublicInput
+}
+
+func (c *Coef) GetActualLocalValue() frontend.Variable {
+	switch c.CoefType {
+	case Constant:
+		return c.Value
+	case Random:
+		return c.RandomValue
+	default:
+		panic("Do not use this function for public input")
+	}
+}
+
+type Gate struct {
+	IIds []uint
+	OId  uint
+
+	Coef Coef
+}
+
+type StructureInfo struct {
+	MaxDegreeOne bool
+}
+
+type Layer struct {
+	InputLenLog  uint
+	OutputLenLog uint
+
+	Cst []Gate
+	Add []Gate
+	Mul []Gate
+
+	StructureInfo StructureInfo
+}
+
+type Circuit struct {
+	Layers      []Layer
+	PublicInput [][]frontend.Variable `gnark:",public"`
+
+	ExpectedNumOutputZeros uint
+}
+
+func (l *Layer) FillRndCoef(transcript *transcript.Transcript) {
+	for i := 0; i < len(l.Mul); i++ {
+		if l.Mul[i].Coef.CoefType == Random {
+			l.Mul[i].Coef.RandomValue = transcript.ChallengeF()
+		}
+	}
+
+	for i := 0; i < len(l.Add); i++ {
+		if l.Add[i].Coef.CoefType == Random {
+			l.Add[i].Coef.RandomValue = transcript.ChallengeF()
+		}
+	}
+
+	for i := 0; i < len(l.Cst); i++ {
+		if l.Cst[i].Coef.CoefType == Random {
+			l.Cst[i].Coef.RandomValue = transcript.ChallengeF()
+		}
+	}
+}
+
+func (c *Circuit) FillRndCoef(transcript *transcript.Transcript) {
+	for i := 0; i < len(c.Layers); i++ {
+		c.Layers[i].FillRndCoef(transcript)
+	}
+}
+
+func (c *Circuit) PrintStats() {
+	n_mul := 0
+	n_add := 0
+	n_cst_circuit := 0
+	n_cst_input := 0
+
+	for i := 0; i < len(c.Layers); i++ {
+		n_mul += len(c.Layers[i].Mul)
+		n_add += len(c.Layers[i].Add)
+		n_cst_circuit += len(c.Layers[i].Cst)
+	}
+
+	n_cst_input = len(c.PublicInput[0])
+	n_cst_circuit -= n_cst_input
+
+	log.Println("#Layers: ", len(c.Layers))
+	log.Println("#Mul Gates: ", n_mul)
+	log.Println("#Add Gates: ", n_add)
+	log.Println("#Cst Circuit: ", n_cst_circuit)
+	log.Println("#Cst Input: ", n_cst_input)
+}
diff --git a/recursion/modules/circuit/proof.go b/recursion/modules/circuit/proof.go
new file mode 100644
index 00000000..0eb77283
--- /dev/null
+++ b/recursion/modules/circuit/proof.go
@@ -0,0 +1,37 @@
+package circuit
+
+import "github.com/consensys/gnark/frontend"
+
+type Proof struct {
+	Idx   uint
+	Elems []frontend.Variable
+}
+
+func (p *Proof) Next() frontend.Variable {
+	var e = p.Elems[p.Idx]
+	p.Idx++
+
+	return e
+}
+
+func (p *Proof) Reset() {
+	p.Idx = 0
+}
+
+func (p *Proof) PlaceHolder() *Proof {
+	return &Proof{
+		Idx:   0,
+		Elems: make([]frontend.Variable, len(p.Elems)),
+	}
+}
+
+func NewRandomProof(n_elems uint) *Proof {
+	var proof = Proof{}
+
+	proof.Idx = 0
+	for i := uint(0); i < n_elems; i++ {
+		proof.Elems = append(proof.Elems, uint(123456789))
+	}
+
+	return &proof
+}
diff --git a/recursion/modules/circuit/random.go b/recursion/modules/circuit/random.go
new file mode 100644
index 00000000..728fe328
--- /dev/null
+++ b/recursion/modules/circuit/random.go
@@ -0,0 +1,70 @@
+package circuit
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark/frontend"
+)
+
+func NewRandomLayer(input_len_log uint, output_len_log uint, public_input_start_idx *uint) *Layer {
+	var layer = Layer{}
+
+	layer.InputLenLog = input_len_log
+	layer.OutputLenLog = output_len_log
+
+	var input_size = uint(1) << input_len_log
+	var output_size = uint(1) << output_len_log
+	for i := uint(0); i < output_size; i++ {
+		layer.Add = append(layer.Add,
+			Gate{
+				IIds: []uint{i % input_size},
+				OId:  i,
+				Coef: Coef{Constant, *big.NewInt(1), 0, 0},
+			},
+		)
+
+		layer.Mul = append(layer.Mul,
+			Gate{
+				IIds: []uint{i % input_size, (i * 2) % input_size},
+				OId:  i,
+				Coef: Coef{Constant, *big.NewInt(1), 0, 0},
+			},
+		)
+
+		layer.Cst = append(layer.Cst,
+			Gate{
+				IIds: make([]uint, 0),
+				OId:  i,
+				Coef: Coef{PublicInput, *big.NewInt(0), 0, *public_input_start_idx},
+			},
+		)
+		(*public_input_start_idx)++
+	}
+
+	return &layer
+}
+
+func NewRandomCircuit(n_layers uint, simd_size uint, mpi_size uint, set_public_input bool) *Circuit {
+	var circuit = Circuit{}
+
+	var n_public_input uint = 0
+	for i := uint(0); i < n_layers; i++ {
+		circuit.Layers = append(circuit.Layers, *NewRandomLayer(
+			n_layers-i+1,
+			n_layers-i,
+			&n_public_input,
+		))
+	}
+
+	for i := uint(0); i < mpi_size*simd_size; i++ {
+		circuit.PublicInput = append(circuit.PublicInput, make([]frontend.Variable, n_public_input))
+		if set_public_input {
+			for j := uint(0); j < n_public_input; j++ {
+				circuit.PublicInput[i][j] = 0
+			}
+		}
+	}
+
+	circuit.ExpectedNumOutputZeros = uint(1) << circuit.Layers[len(circuit.Layers)-1].OutputLenLog
+	return &circuit
+}
diff --git a/recursion/modules/circuit/serde.go b/recursion/modules/circuit/serde.go
new file mode 100644
index 00000000..bd701d40
--- /dev/null
+++ b/recursion/modules/circuit/serde.go
@@ -0,0 +1,275 @@
+package circuit
+
+import (
+	"encoding/binary"
+	"math/big"
+	"math/bits"
+	"os"
+
+	"github.com/consensys/gnark/frontend"
+)
+
+type InputBuf struct {
+	data []byte
+}
+
+func NewInputBuf(file_name string) *InputBuf {
+	data, err := os.ReadFile(file_name)
+	if err != nil {
+		panic("Unable to open file")
+	}
+	return &InputBuf{data: data}
+}
+
+func (buf *InputBuf) Step(n_bytes uint) {
+	buf.data = buf.data[n_bytes:]
+}
+
+func (buf *InputBuf) Len() uint {
+	return uint(len(buf.data))
+}
+
+func (buf *InputBuf) ReadUint64() uint64 {
+	x := binary.LittleEndian.Uint64(buf.data[:8])
+	buf.Step(8)
+	return x
+}
+
+func (buf *InputBuf) ReadUint() uint {
+	return uint(buf.ReadUint64())
+}
+
+func (buf *InputBuf) ReadUint8() uint8 {
+	x := buf.data[0]
+	buf.Step(1)
+	return x
+}
+
+const N_FIELD_BYTES uint = 32
+
+func (buf *InputBuf) ReadField(field_size_in_bytes uint) *big.Int {
+
+	// little endian to big endian
+	for i := uint(0); i < field_size_in_bytes/2; i++ {
+		buf.data[i], buf.data[field_size_in_bytes-i-1] =
+			buf.data[field_size_in_bytes-i-1], buf.data[i]
+	}
+
+	x := big.NewInt(0).SetBytes(buf.data[:field_size_in_bytes])
+	buf.Step(field_size_in_bytes)
+	return x
+}
+
+func (buf *InputBuf) ReadGate(input_num uint) Gate {
+	var i_ids []uint
+	for i := uint(0); i < input_num; i++ {
+		i_ids = append(i_ids, buf.ReadUint())
+	}
+	o_id := buf.ReadUint()
+
+	var coef Coef
+	coef_type_u8 := buf.ReadUint8()
+	switch coef_type_u8 {
+	case 1:
+		coef.CoefType = Constant
+		coef.Value = *buf.ReadField(N_FIELD_BYTES)
+		coef.RandomValue = 0 // This will not be used, but gnark will complain if this value is nil
+	case 2:
+		coef.CoefType = Random
+		coef.RandomValue = 1 // Give some default value for random, the actuall value should be generated by transcript
+	case 3:
+		coef.CoefType = PublicInput
+		coef.RandomValue = 0 // This will not be used, but gnark will complain if this value is nil
+		coef.InputIdx = buf.ReadUint()
+		if input_num != 0 {
+			panic("Public input can only appear in the form of cst gate")
+		}
+	default:
+		panic("Unrecognized coef type")
+	}
+
+	return Gate{
+		IIds: i_ids,
+		OId:  o_id,
+		Coef: coef,
+	}
+}
+
+func (buf *InputBuf) ReadAllocation() Allocation {
+	return Allocation{
+		IOffset: buf.ReadUint(),
+		OOffset: buf.ReadUint(),
+	}
+}
+
+func (buf *InputBuf) ReadChildSegInfo() ChildSegInfo {
+	id := buf.ReadUint()
+
+	n_allocations := buf.ReadUint()
+	var allocation []Allocation
+	for i := uint(0); i < n_allocations; i++ {
+		allocation = append(allocation, buf.ReadAllocation())
+	}
+
+	return ChildSegInfo{
+		Id:         id,
+		Allocation: allocation,
+	}
+}
+
+func (buf *InputBuf) ReadSegment() Segment {
+	i_len := buf.ReadUint()
+	o_len := buf.ReadUint()
+
+	if bits.OnesCount(i_len) != 1 || bits.OnesCount(o_len) != 1 {
+		panic("Incorrect input or output length")
+	}
+
+	n_child_seg := buf.ReadUint()
+	var child_segs []ChildSegInfo
+	for i := uint(0); i < n_child_seg; i++ {
+		child_segs = append(child_segs, buf.ReadChildSegInfo())
+	}
+
+	n_muls := buf.ReadUint()
+	var muls []Gate
+	for i := uint(0); i < n_muls; i++ {
+		muls = append(muls, buf.ReadGate(2))
+	}
+
+	n_adds := buf.ReadUint()
+	var adds []Gate
+	for i := uint(0); i < n_adds; i++ {
+		adds = append(adds, buf.ReadGate(1))
+	}
+
+	n_csts := buf.ReadUint()
+	var csts []Gate
+	for i := uint(0); i < n_csts; i++ {
+		csts = append(csts, buf.ReadGate(0))
+	}
+
+	n_customs := buf.ReadUint()
+	if n_customs != 0 {
+		panic("Custom gate not supported yet.")
+	}
+
+	return Segment{
+		IVarNum:    uint(bits.TrailingZeros(i_len)),
+		OVarNum:    uint(bits.TrailingZeros(o_len)),
+		ChildSegs:  child_segs,
+		GateMuls:   muls,
+		GateAdds:   adds,
+		GateConsts: csts,
+	}
+}
+
+const VERSION_NUM uint = 3914834606642317635 // b'CIRCUIT6'
+
+func (buf *InputBuf) ReadECCCircuit() *ECCCircuit {
+	version_num := buf.ReadUint()
+	if version_num != VERSION_NUM {
+		panic("Incorrect version of circuit serialization")
+	}
+
+	field_mod := buf.ReadField(N_FIELD_BYTES)
+	bn254_fr_mod, _ := big.NewInt(0).SetString("21888242871839275222246405745257275088548364400416034343698204186575808495617", 10)
+	if field_mod.Cmp(bn254_fr_mod) != 0 {
+		panic("Support bn254 fr only, incorrect field mod detected")
+	}
+
+	num_public_inputs := buf.ReadUint()
+	num_outputs := buf.ReadUint()
+	expected_num_output_zeros := buf.ReadUint()
+
+	n_segments := buf.ReadUint()
+	var segments []Segment
+	for i := uint(0); i < n_segments; i++ {
+		segments = append(segments, buf.ReadSegment())
+	}
+
+	n_layers := buf.ReadUint()
+	var layer_ids []uint
+	for i := uint(0); i < n_layers; i++ {
+		layer_ids = append(layer_ids, buf.ReadUint())
+	}
+
+	return &ECCCircuit{
+		NumPublicInputs:        num_public_inputs,
+		NumOutputs:             num_outputs,
+		ExpectedNumOutputZeros: expected_num_output_zeros,
+
+		Segments: segments,
+		LayerIds: layer_ids,
+	}
+}
+
+func (buf *InputBuf) ReadWitness() *Witness {
+	num_witnesses := buf.ReadUint()
+	num_private_inputs_per_witness := buf.ReadUint()
+	num_public_inputs_per_witness := buf.ReadUint()
+	modulus := buf.ReadField(N_FIELD_BYTES)
+	bn254_fr_modulus, _ := big.NewInt(0).SetString("21888242871839275222246405745257275088548364400416034343698204186575808495617", 10)
+	if modulus.Cmp(bn254_fr_modulus) != 0 {
+		panic("Support bn254 fr only, incorrect field mod detected")
+	}
+
+	var values []big.Int
+	for i := 0; i < int(num_witnesses*(num_private_inputs_per_witness+num_public_inputs_per_witness)); i++ {
+		values = append(values, *buf.ReadField(N_FIELD_BYTES))
+	}
+
+	return &Witness{
+		NumWitnesses:               num_witnesses,
+		NumPrivateInputsPerWitness: num_private_inputs_per_witness,
+		NumPublicInputsPerWitness:  num_public_inputs_per_witness,
+		Values:                     values,
+	}
+}
+
+func (buf *InputBuf) ReadProof() *Proof {
+	elems := make([]frontend.Variable, 0)
+	_ = buf.ReadUint64()
+	for buf.Len() > 0 {
+		if buf.Len() < N_FIELD_BYTES {
+			panic("Trailing bytes, proof parsing fails")
+		}
+		elems = append(elems, buf.ReadField(N_FIELD_BYTES))
+	}
+	return &Proof{
+		Idx:   0,
+		Elems: elems,
+	}
+}
+
+// TODO:
+// Verifier should not have access to the private part of witness, consider separating the witness
+func ReadCircuit(circuit_filename string, witness_filename string, mpi_size uint) (*Circuit, [][]frontend.Variable) {
+	circuit_input_buf := NewInputBuf(circuit_filename)
+	ecc_circuit := circuit_input_buf.ReadECCCircuit()
+	expander_circuit := ecc_circuit.Flatten()
+
+	witness_input_buf := NewInputBuf(witness_filename)
+	witness := witness_input_buf.ReadWitness()
+
+	// Now the witness only takes into account the simd size
+	// We're repeating the witness for each mpi
+	// TODO: fix this later
+	witness.NumWitnesses *= mpi_size
+	n_witness_per_mpi_node := len(witness.Values)
+	for i := 1; i < int(mpi_size); i++ {
+		for j := 0; j < int(n_witness_per_mpi_node); j++ {
+			witness.Values = append(witness.Values, witness.Values[j])
+		}
+	}
+
+	public_input, private_input := witness.ToPubPri()
+	expander_circuit.PublicInput = public_input
+
+	return expander_circuit, private_input
+}
+
+func ReadProof(proof_filename string) *Proof {
+	proof_input_buf := NewInputBuf(proof_filename)
+	return proof_input_buf.ReadProof()
+}
diff --git a/recursion/modules/circuit/witness.go b/recursion/modules/circuit/witness.go
new file mode 100644
index 00000000..c9774f36
--- /dev/null
+++ b/recursion/modules/circuit/witness.go
@@ -0,0 +1,36 @@
+package circuit
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark/frontend"
+)
+
+type Witness struct {
+	NumWitnesses               uint
+	NumPrivateInputsPerWitness uint
+	NumPublicInputsPerWitness  uint
+	Values                     []big.Int
+}
+
+func (w *Witness) ToPubPri() ([][]frontend.Variable, [][]frontend.Variable) {
+	public_input := make([][]frontend.Variable, w.NumWitnesses)
+	private_input := make([][]frontend.Variable, w.NumWitnesses)
+
+	witness_size := w.NumPrivateInputsPerWitness + w.NumPublicInputsPerWitness
+	for i := uint(0); i < w.NumWitnesses; i++ {
+		start_idx := i * witness_size
+		private_input[i] = make([]frontend.Variable, 0)
+		for j := uint(0); j < w.NumPrivateInputsPerWitness; j++ {
+			private_input[i] = append(private_input[i], w.Values[start_idx+j])
+		}
+
+		start_idx += w.NumPrivateInputsPerWitness
+		public_input[i] = make([]frontend.Variable, 0)
+		for j := uint(0); j < w.NumPublicInputsPerWitness; j++ {
+			public_input[i] = append(public_input[i], w.Values[start_idx+j])
+		}
+	}
+
+	return public_input, private_input
+}
diff --git a/recursion/modules/polycommit/raw.go b/recursion/modules/polycommit/raw.go
new file mode 100644
index 00000000..915fbdac
--- /dev/null
+++ b/recursion/modules/polycommit/raw.go
@@ -0,0 +1,37 @@
+package polycommit
+
+import "github.com/consensys/gnark/frontend"
+
+type RawCommitment struct {
+	Vals []frontend.Variable
+}
+
+func EvalMultilinear(api frontend.API, vs []frontend.Variable, r []frontend.Variable) frontend.Variable {
+	if 1<<len(r) != len(vs) {
+		panic("Inconsistent length of vals and randomness in eval multi-linear")
+	}
+
+	scratch := make([]frontend.Variable, len(vs))
+	copy(scratch, vs)
+
+	cur_eval_size := len(vs) >> 1
+	for i := 0; i < len(r); i++ {
+		for j := 0; j < cur_eval_size; j++ {
+			scratch[j] = api.Add(scratch[2*j], api.Mul(
+				api.Sub(scratch[2*j+1], scratch[2*j]),
+				r[i],
+			))
+		}
+	}
+	return scratch[0]
+}
+
+func (c *RawCommitment) Verify(api frontend.API, r []frontend.Variable, y frontend.Variable) {
+	api.AssertIsEqual(EvalMultilinear(api, c.Vals, r), y)
+}
+
+func NewRawCommitment(vals []frontend.Variable) *RawCommitment {
+	return &RawCommitment{
+		Vals: vals,
+	}
+}
diff --git a/recursion/modules/transcript/hash_test.go b/recursion/modules/transcript/hash_test.go
new file mode 100644
index 00000000..a0d0a98b
--- /dev/null
+++ b/recursion/modules/transcript/hash_test.go
@@ -0,0 +1,67 @@
+package transcript
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark/frontend"
+	"github.com/consensys/gnark/frontend/cs/r1cs"
+	"github.com/consensys/gnark/std/hash/mimc"
+)
+
+type Hash struct {
+	Input  frontend.Variable
+	Output frontend.Variable
+}
+
+func (h *Hash) Define(api frontend.API) error {
+	mimc, err := mimc.NewMiMC(api)
+	if err != nil {
+		panic("Fail to create mimc")
+	}
+	mimc.Write(h.Input)
+	out := mimc.Sum()
+	api.Println("Out", out)
+	api.AssertIsEqual(out, h.Output)
+	return nil
+}
+
+func TestHash(t *testing.T) {
+	evaluation := Hash{}
+	r1cs, r1cs_err := frontend.Compile(ecc.BN254.ScalarField(), r1cs.NewBuilder, &evaluation)
+	if r1cs_err != nil {
+		panic("Unable to generate r1cs")
+	}
+	println("Nb Constraints: ", r1cs.GetNbConstraints())
+	println("Nb Internal Witnesss: ", r1cs.GetNbInternalVariables())
+	println("Nb Private Witness: ", r1cs.GetNbSecretVariables())
+	println("Nb Public Witness:", r1cs.GetNbPublicVariables())
+
+	// The hash output is generated by the MIMCHasher in the Expander repo
+	nb_bytes := 32
+	expected_le_output_bytes := []uint8{23, 0, 30, 22, 99, 236, 217, 86, 113, 255, 221, 106, 184, 226, 45, 109, 67, 123, 85, 88, 103, 54, 177, 150, 88, 18, 208, 172, 76, 143, 30, 5}
+	if nb_bytes != len(expected_le_output_bytes) {
+		panic("Incorrect length of expected output")
+	}
+
+	expected_be_output_bytes := make([]uint8, nb_bytes)
+	for i := 0; i < nb_bytes; i++ {
+		expected_be_output_bytes[i] = expected_le_output_bytes[nb_bytes-1-i]
+	}
+
+	assignment := Hash{
+		Input:  123,
+		Output: *big.NewInt(0).SetBytes(expected_be_output_bytes),
+	}
+
+	witness, witness_err := frontend.NewWitness(&assignment, ecc.BN254.ScalarField())
+	if witness_err != nil {
+		panic("Unable to solve witness")
+	}
+
+	err := r1cs.IsSolved(witness)
+	if err != nil {
+		panic("R1CS not satisfied")
+	}
+}
diff --git a/recursion/modules/transcript/transcript.go b/recursion/modules/transcript/transcript.go
new file mode 100644
index 00000000..1663b4cc
--- /dev/null
+++ b/recursion/modules/transcript/transcript.go
@@ -0,0 +1,76 @@
+package transcript
+
+import (
+	"github.com/consensys/gnark/frontend"
+	"github.com/consensys/gnark/std/hash"
+	"github.com/consensys/gnark/std/hash/mimc"
+)
+
+type Transcript struct {
+	api frontend.API
+
+	// The hash function
+	hasher hash.FieldHasher
+
+	// The values to feed the hash function
+	t []frontend.Variable
+
+	// The state
+	state frontend.Variable
+
+	// helper field: counting, irrelevant to circuit
+	count uint
+}
+
+func NewTranscript(api frontend.API) (Transcript, error) {
+	mimc, err := mimc.NewMiMC(api)
+	T := Transcript{
+		api:    api,
+		t:      []frontend.Variable{},
+		hasher: &mimc,
+		state:  0,
+		count:  0,
+	}
+
+	return T, err
+}
+
+func (T *Transcript) AppendF(f frontend.Variable) {
+	T.count++
+	T.t = append(T.t, f)
+}
+
+func (T *Transcript) ChallengeF() frontend.Variable {
+	T.hasher.Reset()
+	if len(T.t) > 0 {
+		for i := 0; i < len(T.t); i++ {
+			T.hasher.Write(T.t[i])
+		}
+		T.t = T.t[:0]
+	} else {
+		T.hasher.Write(T.state)
+		T.count++
+	}
+	T.state = T.hasher.Sum()
+	return T.state
+}
+
+func (T *Transcript) ChallengeFs(n uint) []frontend.Variable {
+	cs := make([]frontend.Variable, n)
+	for i := uint(0); i < n; i++ {
+		cs[i] = T.ChallengeF()
+	}
+	return cs
+}
+
+func (T *Transcript) GetState() frontend.Variable {
+	return T.state
+}
+
+func (T *Transcript) GetCount() uint {
+	return T.count
+}
+
+func (T *Transcript) ResetCount() {
+	T.count = 0
+}
diff --git a/recursion/modules/verifier/scratch_pad.go b/recursion/modules/verifier/scratch_pad.go
new file mode 100644
index 00000000..23a262c4
--- /dev/null
+++ b/recursion/modules/verifier/scratch_pad.go
@@ -0,0 +1,72 @@
+package verifier
+
+import (
+	"ExpanderVerifierCircuit/modules/circuit"
+
+	"github.com/consensys/gnark/frontend"
+)
+
+type ScratchPad struct {
+	// ====== for evaluating cst, add and mul ======
+	EqEvalsAtRz0   []frontend.Variable
+	EqEvalsAtRz1   []frontend.Variable
+	EqEvalsAtRSimd []frontend.Variable
+	EqEvalsAtRMpi  []frontend.Variable
+
+	EqEvalsAtRx []frontend.Variable
+	EqEvalsAtRy []frontend.Variable
+
+	EqEvalsFirstPart  []frontend.Variable
+	EqEvalsSecondPart []frontend.Variable
+
+	RSimd          *[]frontend.Variable
+	RMpi           *[]frontend.Variable
+	EqRSimdRSimdXY frontend.Variable
+	EqRMpiRMpiXY   frontend.Variable
+
+	// ====== for deg2, deg3 eval ======
+	Inv2             frontend.Variable
+	Deg3EvalAt       [4]frontend.Variable
+	Deg3LagDenomsInv [4]frontend.Variable
+
+	// ====== helper field to get the statistics of the circuit =====
+	EqEvalsCount map[uint]uint
+}
+
+func NewScratchPad(api frontend.API, circuit *circuit.Circuit, simd_size uint, mpi_size uint) (*ScratchPad, error) {
+	var sp = ScratchPad{}
+
+	var max_num_var uint = 0
+	for i := 0; i < len(circuit.Layers); i++ {
+		var layer = circuit.Layers[i]
+		max_num_var = max(max_num_var, layer.InputLenLog, layer.OutputLenLog)
+	}
+	var max_io_size uint = 1 << max_num_var
+
+	sp.EqEvalsAtRz0 = make([]frontend.Variable, max_io_size)
+	sp.EqEvalsAtRz1 = make([]frontend.Variable, max_io_size)
+	sp.EqEvalsAtRSimd = make([]frontend.Variable, simd_size)
+	sp.EqEvalsAtRMpi = make([]frontend.Variable, mpi_size)
+
+	sp.EqEvalsAtRx = make([]frontend.Variable, max_io_size)
+	sp.EqEvalsAtRy = make([]frontend.Variable, max_io_size)
+
+	sp.EqEvalsFirstPart = make([]frontend.Variable, max_io_size)
+	sp.EqEvalsSecondPart = make([]frontend.Variable, max_io_size)
+
+	sp.Inv2 = api.Inverse(2)
+	sp.Deg3EvalAt = [4]frontend.Variable{0, 1, 2, 3}
+	for i := 0; i < 4; i++ {
+		var denominator frontend.Variable = 1
+		for j := 0; j < 4; j++ {
+			if j == i {
+				continue
+			}
+			denominator = api.Mul(denominator, api.Sub(sp.Deg3EvalAt[i], sp.Deg3EvalAt[j]))
+		}
+		sp.Deg3LagDenomsInv[i] = api.Inverse(denominator)
+	}
+
+	sp.EqEvalsCount = make(map[uint]uint)
+	return &sp, nil
+}
diff --git a/recursion/modules/verifier/utils.go b/recursion/modules/verifier/utils.go
new file mode 100644
index 00000000..ef74f838
--- /dev/null
+++ b/recursion/modules/verifier/utils.go
@@ -0,0 +1,84 @@
+package verifier
+
+import (
+	"github.com/consensys/gnark/frontend"
+)
+
+func EqEvalsAtPrimitive(
+	api frontend.API,
+	r []frontend.Variable,
+	mul_factor frontend.Variable,
+	ret_evals []frontend.Variable,
+) {
+	ret_evals[0] = mul_factor
+	var cur_eval_num = 1
+
+	for i := 0; i < len(r); i++ {
+		for j := 0; j < cur_eval_num; j++ {
+			ret_evals[j+cur_eval_num] = api.Mul(ret_evals[j], r[i])
+			ret_evals[j] = api.Sub(ret_evals[j], ret_evals[j+cur_eval_num])
+		}
+		cur_eval_num <<= 1
+	}
+}
+
+func EqEvalsAtEfficient(
+	api frontend.API,
+	r []frontend.Variable,
+	mul_factor frontend.Variable,
+	ret_evals []frontend.Variable,
+	tmp_1st_half []frontend.Variable,
+	tmp_2nd_half []frontend.Variable,
+	eq_evals_count map[uint]uint,
+) {
+	ret_len := uint(1) << len(r)
+	if val, ok := eq_evals_count[ret_len]; ok {
+		eq_evals_count[ret_len] = val + 1
+	} else {
+		eq_evals_count[ret_len] = 1
+	}
+
+	var first_half_bits uint = uint(len(r) >> 1)
+	var first_half_mask uint = (1 << first_half_bits) - 1
+
+	EqEvalsAtPrimitive(api, r[:first_half_bits], mul_factor, tmp_1st_half)
+	EqEvalsAtPrimitive(api, r[first_half_bits:], 1, tmp_2nd_half)
+
+	for i := uint(0); i < (1 << len(r)); i++ {
+		var first_half = i & first_half_mask
+		var second_half = i >> first_half_bits
+		ret_evals[i] = api.Mul(tmp_1st_half[first_half], tmp_2nd_half[second_half])
+	}
+}
+
+func CombineWithSimdMpi(
+	api frontend.API,
+	values []frontend.Variable,
+	eq_evals_at_simd []frontend.Variable,
+	eq_evals_at_mpi []frontend.Variable,
+) frontend.Variable {
+	var mpi_size = len(eq_evals_at_mpi)
+	var simd_size = len(eq_evals_at_simd)
+
+	var r frontend.Variable = 0
+	for i := 0; i < mpi_size; i++ {
+		for j := 0; j < simd_size; j++ {
+			var idx = (i*simd_size + j)
+			r = api.Add(r, api.Mul(values[idx], eq_evals_at_simd[j], eq_evals_at_mpi[i]))
+		}
+	}
+	return r
+}
+
+func Eq(api frontend.API, x frontend.Variable, y frontend.Variable) frontend.Variable {
+	var xy = api.Mul(x, y)
+	return api.Sub(api.Add(xy, xy, 1), x, y)
+}
+
+func EqVec(api frontend.API, x []frontend.Variable, y []frontend.Variable) frontend.Variable {
+	var r frontend.Variable = 1
+	for i := 0; i < len(x); i++ {
+		r = api.Mul(r, Eq(api, x[i], y[i]))
+	}
+	return r
+}
diff --git a/recursion/modules/verifier/verifier.go b/recursion/modules/verifier/verifier.go
new file mode 100644
index 00000000..6223978c
--- /dev/null
+++ b/recursion/modules/verifier/verifier.go
@@ -0,0 +1,293 @@
+package verifier
+
+import (
+	"ExpanderVerifierCircuit/modules/circuit"
+	"ExpanderVerifierCircuit/modules/polycommit"
+	"ExpanderVerifierCircuit/modules/transcript"
+	"log"
+	"math/bits"
+
+	"github.com/consensys/gnark/frontend"
+)
+
+func SumcheckStepVerify(
+	api frontend.API,
+	proof *circuit.Proof,
+	degree uint,
+	transcript *transcript.Transcript,
+	claimed_sum frontend.Variable,
+	randomness_vec []frontend.Variable,
+	sp *ScratchPad,
+) (frontend.Variable, []frontend.Variable) {
+	var ps = make([]frontend.Variable, 0)
+	for i := uint(0); i < (degree + 1); i++ {
+		ps = append(ps, proof.Next())
+		transcript.AppendF(ps[i])
+	}
+
+	var r = transcript.ChallengeF()
+	randomness_vec = append(randomness_vec, r)
+	api.AssertIsEqual(api.Add(ps[0], ps[1]), claimed_sum)
+
+	if degree == 2 {
+		return Degree2Eval(api, ps, r, sp), randomness_vec
+	} else if degree == 3 {
+		return Degree3Eval(api, ps, r, sp), randomness_vec
+	} else {
+		panic("Incorrect Degree")
+	}
+}
+
+func SumcheckLayerVerify(
+	api frontend.API,
+	layer *circuit.Layer,
+	public_input [][]frontend.Variable,
+	rz0 []frontend.Variable,
+	rz1 []frontend.Variable,
+	r_simd []frontend.Variable,
+	r_mpi []frontend.Variable,
+	claimed_v0 frontend.Variable,
+	claimed_v1 frontend.Variable,
+	alpha frontend.Variable,
+	proof *circuit.Proof,
+	transcript *transcript.Transcript,
+	sp *ScratchPad,
+	is_output_layer bool,
+) (
+	[]frontend.Variable,
+	[]frontend.Variable,
+	[]frontend.Variable,
+	[]frontend.Variable,
+	frontend.Variable,
+	frontend.Variable,
+) {
+	PrepareLayer(
+		api,
+		layer,
+		alpha,
+		rz0,
+		rz1,
+		r_simd,
+		r_mpi,
+		sp,
+		is_output_layer,
+	)
+
+	var var_num = layer.InputLenLog
+	var simd_var_num = len(r_simd)
+	var mpi_var_num = len(r_mpi)
+	var sum = claimed_v0
+	if alpha != nil && claimed_v1 != nil {
+		sum = api.Add(sum, api.Mul(alpha, claimed_v1))
+	}
+	sum = api.Sub(sum, EvalCst(api, layer.Cst, public_input, sp))
+
+	var rx = make([]frontend.Variable, 0)
+	var ry []frontend.Variable = nil
+	var r_simd_xy = make([]frontend.Variable, 0)
+	var r_mpi_xy = make([]frontend.Variable, 0)
+
+	for i := uint(0); i < var_num; i++ {
+		sum, rx = SumcheckStepVerify(
+			api,
+			proof,
+			2,
+			transcript,
+			sum,
+			rx,
+			sp,
+		)
+	}
+	SetRx(api, rx, sp)
+
+	for i := 0; i < simd_var_num; i++ {
+		sum, r_simd_xy = SumcheckStepVerify(
+			api,
+			proof,
+			3,
+			transcript,
+			sum,
+			r_simd_xy,
+			sp,
+		)
+	}
+	SetRSimdXY(api, r_simd_xy, sp)
+
+	for i := 0; i < mpi_var_num; i++ {
+		sum, r_mpi_xy = SumcheckStepVerify(
+			api,
+			proof,
+			3,
+			transcript,
+			sum,
+			r_mpi_xy,
+			sp,
+		)
+	}
+	SetRMPIXY(api, r_mpi_xy, sp)
+
+	var vx_claim = proof.Next()
+	sum = api.Sub(sum, api.Mul(
+		vx_claim,
+		EvalAdd(api, layer.Add, sp),
+	))
+	transcript.AppendF(vx_claim)
+
+	var vy_claim frontend.Variable = nil
+	if layer.StructureInfo.MaxDegreeOne {
+		api.AssertIsEqual(sum, 0)
+	} else {
+		ry = make([]frontend.Variable, 0)
+		for i := uint(0); i < var_num; i++ {
+			sum, ry = SumcheckStepVerify(
+				api,
+				proof,
+				2,
+				transcript,
+				sum,
+				ry,
+				sp,
+			)
+		}
+		SetRY(api, ry, sp)
+
+		vy_claim = proof.Next()
+		transcript.AppendF(vy_claim)
+		api.AssertIsEqual(sum, api.Mul(
+			vx_claim,
+			vy_claim,
+			EvalMul(api, layer.Mul, sp),
+		))
+	}
+
+	return rx, ry, r_simd_xy, r_mpi_xy, vx_claim, vy_claim
+}
+
+func GKRVerify(
+	api frontend.API,
+	circuit *circuit.Circuit,
+	public_input [][]frontend.Variable,
+	claimed_v frontend.Variable,
+	simd_size uint,
+	mpi_size uint,
+	transcript *transcript.Transcript,
+	proof *circuit.Proof,
+) (
+	[]frontend.Variable,
+	[]frontend.Variable,
+	[]frontend.Variable,
+	[]frontend.Variable,
+	frontend.Variable,
+	frontend.Variable,
+) {
+	var sp, err = NewScratchPad(api, circuit, simd_size, mpi_size)
+	if err != nil {
+		panic("Error init scratch pad")
+	}
+
+	var n_layers = len(circuit.Layers)
+	var rz0 = make([]frontend.Variable, 0)
+	var rz1 []frontend.Variable = nil
+	var r_simd = make([]frontend.Variable, 0)
+	var r_mpi = make([]frontend.Variable, 0)
+
+	for i := 0; i < int(circuit.Layers[len(circuit.Layers)-1].OutputLenLog); i++ {
+		rz0 = append(rz0, transcript.ChallengeF())
+	}
+
+	for i := 0; i < bits.TrailingZeros(simd_size); i++ {
+		r_simd = append(r_simd, transcript.ChallengeF())
+	}
+
+	for i := 0; i < bits.TrailingZeros(mpi_size); i++ {
+		r_mpi = append(r_mpi, transcript.ChallengeF())
+	}
+
+	var alpha frontend.Variable = nil
+	var claimed_v0 = claimed_v
+	var claimed_v1 frontend.Variable = nil
+
+	for i := n_layers - 1; i >= 0; i-- {
+		rz0, rz1, r_simd, r_mpi, claimed_v0, claimed_v1 = SumcheckLayerVerify(
+			api,
+			&circuit.Layers[i],
+			public_input,
+			rz0,
+			rz1,
+			r_simd,
+			r_mpi,
+			claimed_v0,
+			claimed_v1,
+			alpha,
+			proof,
+			transcript,
+			sp,
+			i == n_layers-1,
+		)
+
+		if rz1 != nil && claimed_v1 != nil {
+			alpha = transcript.ChallengeF()
+		} else {
+			alpha = nil
+		}
+	}
+
+	for size, count := range sp.EqEvalsCount {
+		log.Println("Eq Evals Size", size, " Count: ", count)
+	}
+
+	return rz0, rz1, r_simd, r_mpi, claimed_v0, claimed_v1
+}
+
+func Verify(
+	api frontend.API,
+	circuit *circuit.Circuit,
+	public_input [][]frontend.Variable,
+	claimed_v frontend.Variable,
+	simd_size uint,
+	mpi_size uint,
+	proof *circuit.Proof,
+) {
+	var transcript, err = transcript.NewTranscript(api)
+	if err != nil {
+		panic("Err in transcript init")
+	}
+
+	// Only supports RawCommitment now
+	circuit_input_size := uint(1) << circuit.Layers[0].InputLenLog
+	vals := make([]frontend.Variable, 0)
+	for i := uint(0); i < circuit_input_size*mpi_size; i++ {
+		vals = append(vals, proof.Next())
+		transcript.AppendF(vals[i])
+	}
+
+	raw_commitment := polycommit.NewRawCommitment(vals)
+
+	// Trigger an additional hash
+	if mpi_size > 1 {
+		_ = transcript.ChallengeF()
+	}
+
+	log.Println("#Hashes for input: ", transcript.GetCount())
+	transcript.ResetCount()
+
+	circuit.FillRndCoef(&transcript)
+
+	log.Println("#Hashes for random gate: ", transcript.GetCount())
+	transcript.ResetCount()
+
+	var rx, ry, r_simd, r_mpi, claimed_v0, claimed_v1 = GKRVerify(api, circuit, public_input, claimed_v, simd_size, mpi_size, &transcript, proof)
+
+	log.Println("#Hashes for gkr challenge: ", transcript.GetCount())
+	transcript.ResetCount()
+
+	if len(r_simd) > 0 {
+		panic("Simd not supported yet.")
+	}
+
+	rx = append(rx, r_mpi...)
+	ry = append(ry, r_mpi...)
+
+	raw_commitment.Verify(api, rx, claimed_v0)
+	raw_commitment.Verify(api, ry, claimed_v1)
+}
diff --git a/recursion/modules/verifier/verifier_helper.go b/recursion/modules/verifier/verifier_helper.go
new file mode 100644
index 00000000..5cbbeefc
--- /dev/null
+++ b/recursion/modules/verifier/verifier_helper.go
@@ -0,0 +1,248 @@
+package verifier
+
+import (
+	"ExpanderVerifierCircuit/modules/circuit"
+
+	"github.com/consensys/gnark/frontend"
+)
+
+func PrepareLayer(
+	api frontend.API,
+	layer *circuit.Layer,
+	alpha frontend.Variable,
+	rz0 []frontend.Variable,
+	rz1 []frontend.Variable,
+	r_simd []frontend.Variable,
+	r_mpi []frontend.Variable,
+	sp *ScratchPad,
+	is_output_layer bool,
+) {
+	if is_output_layer {
+		EqEvalsAtEfficient(
+			api,
+			rz0,
+			1,
+			sp.EqEvalsAtRz0,
+			sp.EqEvalsFirstPart,
+			sp.EqEvalsSecondPart,
+			sp.EqEvalsCount,
+		)
+	} else {
+		output_len := 1 << len(rz0)
+		copy(sp.EqEvalsAtRz0[:output_len], sp.EqEvalsAtRx[:output_len])
+		if rz1 != nil && alpha != nil {
+			for i := 0; i < 1<<layer.OutputLenLog; i++ {
+				sp.EqEvalsAtRz0[i] = api.Add(
+					sp.EqEvalsAtRz0[i],
+					api.Mul(alpha, sp.EqEvalsAtRy[i]),
+				)
+			}
+		}
+	}
+
+	EqEvalsAtEfficient(
+		api,
+		r_simd,
+		1,
+		sp.EqEvalsAtRSimd,
+		sp.EqEvalsFirstPart,
+		sp.EqEvalsSecondPart,
+		sp.EqEvalsCount,
+	)
+
+	EqEvalsAtEfficient(
+		api,
+		r_mpi,
+		1,
+		sp.EqEvalsAtRMpi,
+		sp.EqEvalsFirstPart,
+		sp.EqEvalsSecondPart,
+		sp.EqEvalsCount,
+	)
+
+	sp.RSimd = &r_simd
+	sp.RMpi = &r_mpi
+}
+
+func EvalCst(
+	api frontend.API,
+	cst_gates []circuit.Gate,
+	public_input [][]frontend.Variable,
+	sp *ScratchPad,
+) frontend.Variable {
+	var v frontend.Variable = 0
+
+	var mpi_size = len(sp.EqEvalsAtRMpi)
+	var simd_size = len(sp.EqEvalsAtRSimd)
+
+	if simd_size != 1 {
+		panic("Only support mpi size 1 and simd size 1 for now")
+	}
+
+	for i := 0; i < len(cst_gates); i++ {
+		var cst_gate circuit.Gate = cst_gates[i]
+
+		var tmp frontend.Variable = 0
+		switch cst_gate.Coef.CoefType {
+		case circuit.PublicInput:
+			n_witnesses := len(public_input)
+			if n_witnesses != mpi_size*simd_size {
+				panic("Incompatible n_witnesses with mpi and simd size")
+			}
+			input_idx := cst_gate.Coef.InputIdx
+			vals := make([]frontend.Variable, n_witnesses)
+			for j := 0; j < n_witnesses; j++ {
+				vals[j] = public_input[j][input_idx]
+			}
+
+			tmp = CombineWithSimdMpi(api, vals, sp.EqEvalsAtRSimd, sp.EqEvalsAtRMpi)
+			tmp = api.Mul(tmp, sp.EqEvalsAtRz0[cst_gate.OId])
+		default:
+			coef_value := cst_gate.Coef.GetActualLocalValue()
+			tmp = api.Mul(sp.EqEvalsAtRz0[cst_gate.OId], coef_value)
+		}
+
+		v = api.Add(v, tmp)
+	}
+	return v
+}
+
+func EvalAdd(
+	api frontend.API,
+	add_gates []circuit.Gate,
+	sp *ScratchPad,
+) frontend.Variable {
+	var v frontend.Variable = 0
+	for i := 0; i < len(add_gates); i++ {
+		var add_gate = add_gates[i]
+		v = api.Add(
+			v,
+			api.Mul(sp.EqEvalsAtRz0[add_gate.OId], sp.EqEvalsAtRx[add_gate.IIds[0]], add_gate.Coef.GetActualLocalValue()),
+		)
+	}
+	return api.Mul(v, sp.EqRSimdRSimdXY, sp.EqRMpiRMpiXY)
+}
+
+func EvalMul(
+	api frontend.API,
+	mul_gates []circuit.Gate,
+	sp *ScratchPad,
+) frontend.Variable {
+	var v frontend.Variable = 0
+	for i := 0; i < len(mul_gates); i++ {
+		var mul_gate = mul_gates[i]
+		v = api.Add(
+			v,
+			api.Mul(
+				sp.EqEvalsAtRz0[mul_gate.OId],
+				sp.EqEvalsAtRx[mul_gate.IIds[0]],
+				sp.EqEvalsAtRy[mul_gate.IIds[1]],
+				mul_gate.Coef.GetActualLocalValue(),
+			),
+		)
+	}
+	return api.Mul(v, sp.EqRSimdRSimdXY, sp.EqRMpiRMpiXY)
+}
+
+func SetRx(
+	api frontend.API,
+	rx []frontend.Variable,
+	sp *ScratchPad,
+) {
+	EqEvalsAtEfficient(
+		api,
+		rx,
+		1,
+		sp.EqEvalsAtRx,
+		sp.EqEvalsFirstPart,
+		sp.EqEvalsSecondPart,
+		sp.EqEvalsCount,
+	)
+}
+
+func SetRSimdXY(
+	api frontend.API,
+	r_simd_xy []frontend.Variable,
+	sp *ScratchPad,
+) {
+	sp.EqRSimdRSimdXY = EqVec(api, *sp.RSimd, r_simd_xy)
+}
+
+func SetRMPIXY(
+	api frontend.API,
+	r_mpi_xy []frontend.Variable,
+	sp *ScratchPad,
+) {
+	sp.EqRMpiRMpiXY = EqVec(api, *sp.RMpi, r_mpi_xy)
+}
+
+func SetRY(
+	api frontend.API,
+	r_y []frontend.Variable,
+	sp *ScratchPad,
+) {
+	EqEvalsAtEfficient(
+		api,
+		r_y,
+		1,
+		sp.EqEvalsAtRy,
+		sp.EqEvalsFirstPart,
+		sp.EqEvalsSecondPart,
+		sp.EqEvalsCount,
+	)
+}
+
+func Degree2Eval(
+	api frontend.API,
+	p []frontend.Variable, // lag vals at x=0, 1, 2
+	x frontend.Variable,
+	sp *ScratchPad,
+) frontend.Variable {
+	var c0 = p[0]
+	var c2 = api.Mul(
+		sp.Inv2,
+		api.Sub(api.Add(p[2], p[0]), p[1], p[1]),
+	)
+	var c1 = api.Sub(p[1], p[0], c2)
+	return api.Add(
+		api.Mul(
+			api.Add(api.Mul(c2, x), c1),
+			x,
+		),
+		c0,
+	)
+}
+
+func Degree3Eval(
+	api frontend.API,
+	p []frontend.Variable, // lag vals at x=0, 1, 2, 3
+	x frontend.Variable,
+	sp *ScratchPad,
+) frontend.Variable {
+	return LagEval(api, p, x, sp)
+}
+
+func LagEval(
+	api frontend.API,
+	vals []frontend.Variable,
+	x frontend.Variable,
+	sp *ScratchPad,
+) frontend.Variable {
+	if len(vals) != len(sp.Deg3EvalAt) {
+		panic("Incorrect length in LagEval")
+	}
+
+	var v frontend.Variable = 0
+	for i := 0; i < len(vals); i++ {
+		var numerator frontend.Variable = 1
+		for j := 0; j < len(vals); j++ {
+			if j == i {
+				continue
+			}
+
+			numerator = api.Mul(numerator, api.Sub(x, sp.Deg3EvalAt[j]))
+		}
+		v = api.Add(v, api.Mul(numerator, sp.Deg3LagDenomsInv[i], vals[i]))
+	}
+	return v
+}
diff --git a/scripts/run_multiple_mpi.py b/scripts/run_multiple_mpi.py
deleted file mode 100644
index 70052144..00000000
--- a/scripts/run_multiple_mpi.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import sys
-import json
-import subprocess
-
-MPI_CONFIG_JSON = '''
-{
-    "field": "gf2ext128",
-    "n_groups": 2,
-    "mpi_size_each_group": 8,
-    "cpu_ids":
-        [
-            [0, 1, 2, 3, 4, 5, 6, 7],
-            [8, 9, 10, 11, 12, 13, 14, 15]
-        ]
-}
-'''
-
-def parse_config(mpi_config):
-    field = mpi_config["field"]
-    n_groups = mpi_config["n_groups"]
-    mpi_size_each_group = mpi_config["mpi_size_each_group"]
-    cpu_ids = mpi_config["cpu_ids"]
-    
-    if field not in ["gf2ext128", "m31ext3", "fr"]:
-        sys.exit("Unrecognized field, now only supports gf2ext128, m31ext3 and fr")
-
-    if n_groups != len(cpu_ids):
-        sys.exit("Lack/Too much cpu specifications.")
-
-    for i in range(n_groups):
-        if len(cpu_ids[i]) != mpi_size_each_group:
-            sys.exit(f"Cpu ids are not correct for group {i}")
-
-    return field, n_groups, mpi_size_each_group, cpu_ids
-
-
-# Run two mpi process
-if __name__ == "__main__":
-    mpi_config = json.loads(MPI_CONFIG_JSON)
-    field, n_groups, mpi_size_each_group, cpu_ids = parse_config(mpi_config)
-
-    for i in range(n_groups):
-        cpu_id = ",".join(map(str, cpu_ids[i]))
-        subprocess.Popen(["mpiexec", "-cpu-set", cpu_id, "-n", str(mpi_size_each_group), "./target/release/expander-rs-mpi", "-f", field])
diff --git a/scripts/test_recursion.py b/scripts/test_recursion.py
new file mode 100755
index 00000000..a5c650d9
--- /dev/null
+++ b/scripts/test_recursion.py
@@ -0,0 +1,98 @@
+#!/usr/bin/python3
+
+# Run the script from the root repo of Expander
+
+import os
+import sys
+import json
+import subprocess
+
+MPI_CONFIG = '''
+{
+    "n_groups": 2,
+    "mpi_size_each_group": 2,
+    "cpu_ids":
+        [
+            [0, 1],
+            [2, 3]
+        ]
+}
+'''
+
+PROOF_CONFIG = '''
+{
+    "field": "fr",
+    "circuit": "data/circuit_bn254.txt",
+    "witness": "data/witness_bn254.txt",
+    "gkr_proof": "data/gkr_proof.txt",
+    "recursive_proof": "data/recursive_proof.txt"
+}
+'''
+
+def change_working_dir():
+    cwd = os.getcwd()
+    if "Expander/scripts" in cwd:
+        os.chdir("..")
+
+def parse_mpi_config(mpi_config):
+    n_groups = mpi_config["n_groups"]
+    mpi_size_each_group = mpi_config["mpi_size_each_group"]
+    cpu_ids = mpi_config["cpu_ids"]
+
+    if n_groups != len(cpu_ids):
+        sys.exit("Lack/Too much cpu specifications.")
+
+    # TODO: Check there are enough cpus on the machine
+    for i in range(n_groups):
+        if len(cpu_ids[i]) != mpi_size_each_group:
+            sys.exit(f"Cpu ids are not correct for group {i}")
+
+    return n_groups, mpi_size_each_group, cpu_ids
+
+def parse_proof_config(proof_config):
+    field = proof_config["field"]
+
+    if field not in ["gf2ext128", "m31ext3", "fr"]:
+        sys.exit("Unrecognized field, gkr now only supports gf2ext128, m31ext3 and fr")
+
+    if field != "fr":
+        sys.exit("Recursive proof only supports fr now")
+
+    return proof_config["circuit"], proof_config["witness"], proof_config["gkr_proof"], proof_config["recursive_proof"]
+
+DEBUG = True
+
+# Run two mpi process
+if __name__ == "__main__":
+    change_working_dir()
+
+    mpi_config = json.loads(MPI_CONFIG)
+    n_groups, mpi_size_each_group, cpu_ids = parse_mpi_config(mpi_config)
+
+    proof_config = json.loads(PROOF_CONFIG)
+    circuit, witness, gkr_proof, recursive_proof = parse_proof_config(proof_config)
+
+    if DEBUG:
+        n_groups = 1
+
+    ps = []
+    subprocess.run("RUSTFLAGS='-C target-feature=+avx512f' cargo build --release --bin expander-exec ", shell=True)
+    for i in range(n_groups):
+        cpu_id = ",".join(map(str, cpu_ids[i]))
+        p = subprocess.Popen(["mpiexec", "-cpu-set", cpu_id, "-n", str(mpi_size_each_group), "./target/release/expander-exec", "prove", circuit, witness, gkr_proof + "." + str(i)])
+        ps.append(p)
+
+    for i in range(n_groups):
+        ps[i].wait()
+
+    print("gkr prove done.")
+
+    for i in range(n_groups):
+        subprocess.run(
+            f'''
+                cd recursion
+                go run main.go -circuit={"../" + circuit} -witness={"../" + witness} -gkr_proof={"../" + gkr_proof + "." + str(i)} -recursive_proof={"../" + recursive_proof + "." + str(i)} -mpi_size={mpi_size_each_group}
+                cd ..
+            ''',
+            shell=True,
+        )
\ No newline at end of file
diff --git a/sumcheck/cuda/.gitignore b/sumcheck/cuda/.gitignore
new file mode 100644
index 00000000..3cdbb4d7
--- /dev/null
+++ b/sumcheck/cuda/.gitignore
@@ -0,0 +1,15 @@
+# IDE
+.idea
+.vscode
+
+# CMake
+.cmake
+CMakeCache.txt
+CMakeFiles
+cmake_install.cmake
+*.bin
+Testing
+
+# Artifact
+*.log
+*.csv
diff --git a/sumcheck/cuda/LICENSE b/sumcheck/cuda/LICENSE
new file mode 100644
index 00000000..0ad25db4
--- /dev/null
+++ b/sumcheck/cuda/LICENSE
@@ -0,0 +1,661 @@
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU Affero General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published
+    by the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<https://www.gnu.org/licenses/>.
diff --git a/sumcheck/cuda/Makefile b/sumcheck/cuda/Makefile
new file mode 100644
index 00000000..b456e44d
--- /dev/null
+++ b/sumcheck/cuda/Makefile
@@ -0,0 +1,47 @@
+# Compiler and flags
+NVCC := nvcc
+NVCC_FLAGS := -O3 -arch=native -std=c++17
+INCLUDE_FLAGS := -Iinclude -Iicicle
+
+# Target executable
+TARGET := sumcheck.bin
+
+# Source file
+SRC := src/sumcheck_cuda.cu
+
+# Default field to use
+USE_FIELD ?= useM31ext3
+
+# Set the minimum required version
+NVCC_MIN_VERSION := 12.5
+
+# Get NVCC version
+NVCC_VERSION := $(shell $(NVCC) --version | grep -oP 'release \K[0-9]+\.[0-9]+' | head -n 1)
+NVCC_MAJOR := $(shell echo $(NVCC_VERSION) | cut -d. -f1)
+NVCC_MINOR := $(shell echo $(NVCC_VERSION) | cut -d. -f2)
+
+# Phony targets
+.PHONY: all check_nvcc clean
+
+# Default target
+all: check_nvcc $(TARGET)
+
+# Check NVCC version
+check_nvcc:
+	@if ! command -v $(NVCC) > /dev/null 2>&1; then \
+		echo "Error: $(NVCC) is not installed."; \
+		exit 1; \
+	elif [ $(NVCC_MAJOR) -lt 12 ] || ([ $(NVCC_MAJOR) -eq 12 ] && [ $(NVCC_MINOR) -lt 5 ]); then \
+		echo "Error: $(NVCC) version must be >= $(NVCC_MIN_VERSION). Current version: $(NVCC_VERSION)"; \
+		exit 1; \
+	else \
+		echo "$(NVCC) version $(NVCC_VERSION) is sufficient."; \
+	fi
+
+# Build target
+$(TARGET): $(SRC)
+	$(NVCC) $(NVCC_FLAGS) $(INCLUDE_FLAGS) -D$(USE_FIELD) -o $@ $<
+
+# Clean build
+clean:
+	rm -f $(TARGET)
diff --git a/sumcheck/cuda/README.md b/sumcheck/cuda/README.md
new file mode 100644
index 00000000..a3332eec
--- /dev/null
+++ b/sumcheck/cuda/README.md
@@ -0,0 +1,52 @@
+# Sumcheck GPU Acceleration
+
+This project implements GPU acceleration for the sumcheck protocol. The core computation leverages CUDA, and users can choose between CPU and GPU modes for computation. The field operations for BN254 and M31 extensions are supported, with `M31ext3` as the default field. 
+
+## Installation
+
+Make sure you have CUDA installed on your system.
+
+### Compile the Project
+
+To compile the project, simply run:
+
+```bash
+make clean && make
+```
+
+This will clean any existing binaries and generate a new one: `sumcheck.bin`.
+
+## Usage
+
+To run the program, use the following syntax:
+
+```bash
+./sumcheck.bin -m [cpu|gpu] -p [2^(size) of circuit] [-v]
+```
+
+For example, run 2^23 sumcheck on GPU, you can use
+
+```bash
+./sumcheck.bin -m gpu -p 23
+```
+
+### Options:
+- `-m [cpu|gpu]`: Choose the computation mode. Default is `cpu`.
+- `-p [circuit size]`: Specify the size of the circuit in powers of 2. Default is 20.
+- `-v`: Enable verbose mode for detailed output.
+
+## Field Support
+
+The project supports different field operations based on compile-time flags:
+- **BN254**: We use Ingonyama's Icicle as the underlying implementation for BN254 field operations.
+- **M31ext3**: Default mode uses M31ext3 extension field.
+
+To switch between fields, adjust the `USE_FIELD` variable in the `Makefile`. For example, to use BN254:
+
+```bash
+make clean && make USE_FIELD=useBN254
+```
+
+## Acknowledgments
+
+We would like to express our sincere thanks to Ingonyama for providing the [Icicle framework](https://github.com/ingonyama-zk/icicle), which is used as the underlying implementation for BN254 field operations.
diff --git a/sumcheck/cuda/icicle/curves/affine.cuh b/sumcheck/cuda/icicle/curves/affine.cuh
new file mode 100644
index 00000000..2b1253e4
--- /dev/null
+++ b/sumcheck/cuda/icicle/curves/affine.cuh
@@ -0,0 +1,47 @@
+#pragma once
+
+#include "gpu-utils/sharedmem.cuh"
+#include "gpu-utils/modifiers.cuh"
+#include <iostream>
+
+template <class FF>
+class Affine
+{
+public:
+  FF x;
+  FF y;
+
+  static HOST_DEVICE_INLINE Affine neg(const Affine& point) { return {point.x, FF::neg(point.y)}; }
+
+  static HOST_DEVICE_INLINE Affine zero() { return {FF::zero(), FF::zero()}; }
+
+  static HOST_DEVICE_INLINE Affine to_montgomery(const Affine& point)
+  {
+    return {FF::to_montgomery(point.x), FF::to_montgomery(point.y)};
+  }
+
+  static HOST_DEVICE_INLINE Affine from_montgomery(const Affine& point)
+  {
+    return {FF::from_montgomery(point.x), FF::from_montgomery(point.y)};
+  }
+
+  friend HOST_DEVICE_INLINE bool operator==(const Affine& xs, const Affine& ys)
+  {
+    return (xs.x == ys.x) && (xs.y == ys.y);
+  }
+
+  friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Affine& point)
+  {
+    os << "x: " << point.x << "; y: " << point.y;
+    return os;
+  }
+};
+
+template <class FF>
+struct SharedMemory<Affine<FF>> {
+  __device__ Affine<FF>* getPointer()
+  {
+    extern __shared__ Affine<FF> s_affine_[];
+    return s_affine_;
+  }
+};
diff --git a/sumcheck/cuda/icicle/curves/curve_config.cuh b/sumcheck/cuda/icicle/curves/curve_config.cuh
new file mode 100644
index 00000000..c9fe109b
--- /dev/null
+++ b/sumcheck/cuda/icicle/curves/curve_config.cuh
@@ -0,0 +1,34 @@
+#pragma once
+#ifndef CURVE_CONFIG_H
+#define CURVE_CONFIG_H
+
+#include "fields/id.h"
+#include "curves/projective.cuh"
+
+/**
+ * @namespace curve_config
+ * Namespace with type definitions for short Weierstrass pairing-friendly [elliptic
+ * curves](https://hyperelliptic.org/EFD/g1p/auto-shortw.html). Here, concrete types are created in accordance
+ * with the `-DCURVE` env variable passed during build.
+ */
+#if CURVE_ID == BN254
+#include "curves/params/bn254.cuh"
+namespace curve_config = bn254;
+
+#elif CURVE_ID == BLS12_381
+#include "curves/params/bls12_381.cuh"
+namespace curve_config = bls12_381;
+
+#elif CURVE_ID == BLS12_377
+#include "curves/params/bls12_377.cuh"
+namespace curve_config = bls12_377;
+
+#elif CURVE_ID == BW6_761
+#include "curves/params/bw6_761.cuh"
+namespace curve_config = bw6_761;
+
+#elif CURVE_ID == GRUMPKIN
+#include "curves/params/grumpkin.cuh"
+namespace curve_config = grumpkin;
+#endif
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/curves/macro.h b/sumcheck/cuda/icicle/curves/macro.h
new file mode 100644
index 00000000..6ce3cb66
--- /dev/null
+++ b/sumcheck/cuda/icicle/curves/macro.h
@@ -0,0 +1,42 @@
+#pragma once
+#ifndef CURVE_MACRO_H
+#define CURVE_MACRO_H
+
+#define CURVE_DEFINITIONS \
+  /** \
+   * Base field of G1 curve. Is always a prime field. \
+   */ \
+  typedef Field<fq_config> point_field_t; \
+  \
+  static constexpr point_field_t generator_x = point_field_t{g1_gen_x}; \
+  static constexpr point_field_t generator_y = point_field_t{g1_gen_y}; \
+  static constexpr point_field_t b = point_field_t{weierstrass_b}; \
+  /** \
+   * [Projective representation](https://hyperelliptic.org/EFD/g1p/auto-shortw-projective.html) \
+   * of G1 curve consisting of three coordinates of type [point_field_t](point_field_t). \
+   */ \
+  typedef Projective<point_field_t, scalar_t, b, generator_x, generator_y> projective_t; \
+  /** \
+   * Affine representation of G1 curve consisting of two coordinates of type [point_field_t](point_field_t). \
+   */ \
+  typedef Affine<point_field_t> affine_t;
+
+#define G2_CURVE_DEFINITIONS \
+  typedef ExtensionField<fq_config, point_field_t> g2_point_field_t; \
+  static constexpr g2_point_field_t g2_generator_x = \
+    g2_point_field_t{point_field_t{g2_gen_x_re}, point_field_t{g2_gen_x_im}}; \
+  static constexpr g2_point_field_t g2_generator_y = \
+    g2_point_field_t{point_field_t{g2_gen_y_re}, point_field_t{g2_gen_y_im}}; \
+  static constexpr g2_point_field_t g2_b = \
+    g2_point_field_t{point_field_t{weierstrass_b_g2_re}, point_field_t{weierstrass_b_g2_im}}; \
+  \
+  /** \
+   * [Projective representation](https://hyperelliptic.org/EFD/g1p/auto-shortw-projective.html) of G2 curve. \
+   */ \
+  typedef Projective<g2_point_field_t, scalar_t, g2_b, g2_generator_x, g2_generator_y> g2_projective_t; \
+  /** \
+   * Affine representation of G1 curve. \
+   */ \
+  typedef Affine<g2_point_field_t> g2_affine_t;
+
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/curves/params/bls12_377.cuh b/sumcheck/cuda/icicle/curves/params/bls12_377.cuh
new file mode 100644
index 00000000..3e81fb17
--- /dev/null
+++ b/sumcheck/cuda/icicle/curves/params/bls12_377.cuh
@@ -0,0 +1,48 @@
+#pragma once
+#ifndef BLS12_377_PARAMS_H
+#define BLS12_377_PARAMS_H
+
+#include "fields/storage.cuh"
+
+#include "curves/macro.h"
+#include "curves/projective.cuh"
+#include "fields/snark_fields/bls12_377_base.cuh"
+#include "fields/snark_fields/bls12_377_scalar.cuh"
+#include "fields/quadratic_extension.cuh"
+
+namespace bls12_377 {
+  // G1 and G2 generators
+  static constexpr storage<fq_config::limbs_count> g1_gen_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512,
+                                                               0xbd37cb5c, 0x188282c8, 0xaa9d41bb, 0x85951e2c,
+                                                               0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
+  static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36,
+                                                               0x4fb82305, 0x6d182ad4, 0xca3e52d9, 0xbd7fb348,
+                                                               0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
+  static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {0x7c005196, 0x74e3e48f, 0xbb535402, 0x71889f52,
+                                                                  0x57db6b9b, 0x7ea501f5, 0x203e5031, 0xc565f071,
+                                                                  0xa3841d01, 0xc89630a2, 0x71c785fe, 0x018480be};
+  static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {0x6ea16afe, 0xb26bfefa, 0xbff76fe6, 0x5cf89984,
+                                                                  0x0799c9de, 0xe7223ece, 0x6651cecb, 0x532777ee,
+                                                                  0xb1b140d5, 0x70dc5a51, 0xe7004031, 0x00ea6040};
+  static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {0x09fd4ddf, 0xf0940944, 0x6d8c7c2e, 0xf2cf8888,
+                                                                  0xf832d204, 0xe458c282, 0x74b49a58, 0xde03ed72,
+                                                                  0xcbb2efb4, 0xd960736b, 0x5d446f7b, 0x00690d66};
+  static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {0x85eb8f93, 0xd9a1cdd1, 0x5e52270b, 0x4279b83f,
+                                                                  0xcee304c2, 0x2463b01a, 0x3d591bf1, 0x61ef11ac,
+                                                                  0x151a70aa, 0x9e549da3, 0xd2835518, 0x00f8169f};
+
+  static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
+                                                                    0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                                                                    0x00000000, 0x00000000, 0x00000000, 0x00000000};
+  static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
+    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
+  static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
+    0x9999999a, 0x1c9ed999, 0x1ccccccd, 0x0dd39e5c, 0x3c6bf800, 0x129207b6,
+    0xcd5fd889, 0xdc7b4f91, 0x7460c589, 0x43bd0373, 0xdb0fd6f3, 0x010222f6};
+
+  CURVE_DEFINITIONS
+  G2_CURVE_DEFINITIONS
+} // namespace bls12_377
+
+#endif
diff --git a/sumcheck/cuda/icicle/curves/params/bls12_381.cuh b/sumcheck/cuda/icicle/curves/params/bls12_381.cuh
new file mode 100644
index 00000000..86c7ef6a
--- /dev/null
+++ b/sumcheck/cuda/icicle/curves/params/bls12_381.cuh
@@ -0,0 +1,48 @@
+#pragma once
+#ifndef BLS12_381_PARAMS_H
+#define BLS12_381_PARAMS_H
+
+#include "fields/storage.cuh"
+
+#include "curves/macro.h"
+#include "curves/projective.cuh"
+#include "fields/snark_fields/bls12_381_base.cuh"
+#include "fields/snark_fields/bls12_381_scalar.cuh"
+#include "fields/quadratic_extension.cuh"
+
+namespace bls12_381 {
+  // G1 and G2 generators
+  static constexpr storage<fq_config::limbs_count> g1_gen_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f,
+                                                               0x171bac58, 0xa14e3a3f, 0x9774b905, 0xc3688c4f,
+                                                               0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
+  static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744,
+                                                               0x2c04b3ed, 0x00db18cb, 0xd5d00af6, 0xfcf5e095,
+                                                               0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
+  static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0x0bac0326,
+                                                                  0x7ae3d177, 0xb4510b64, 0xfa403b02, 0xc6e47ad4,
+                                                                  0x2dc51051, 0x26080527, 0xf08f0a91, 0x024aa2b2};
+  static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112,
+                                                                  0xdc7f5049, 0xb5da61bb, 0x9920b61a, 0x596bd0d0,
+                                                                  0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
+  static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {0x08b82801, 0xe1935486, 0x3baca289, 0x923ac9cc,
+                                                                  0x5160d12c, 0x6d429a69, 0x8cbdd3a7, 0xadfd9baa,
+                                                                  0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0x0ce5d527};
+  static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27,
+                                                                  0x572e99ab, 0x267492ab, 0x85a763af, 0xcb3e287e,
+                                                                  0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x0606c4a0};
+
+  static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000004, 0x00000000, 0x00000000, 0x00000000,
+                                                                    0x00000000, 0x00000000, 0x00000000, 0x00000000,
+                                                                    0x00000000, 0x00000000, 0x00000000, 0x00000000};
+  static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
+    0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
+  static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
+    0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
+
+  CURVE_DEFINITIONS
+  G2_CURVE_DEFINITIONS
+} // namespace bls12_381
+
+#endif
diff --git a/sumcheck/cuda/icicle/curves/params/bn254.cuh b/sumcheck/cuda/icicle/curves/params/bn254.cuh
new file mode 100644
index 00000000..4ae43760
--- /dev/null
+++ b/sumcheck/cuda/icicle/curves/params/bn254.cuh
@@ -0,0 +1,39 @@
+#pragma once
+#ifndef BN254_PARAMS_H
+#define BN254_PARAMS_H
+
+#include "fields/storage.cuh"
+
+#include "curves/macro.h"
+#include "curves/projective.cuh"
+#include "fields/snark_fields/bn254_base.cuh"
+#include "fields/snark_fields/bn254_scalar.cuh"
+#include "fields/quadratic_extension.cuh"
+
+namespace bn254 {
+  // G1 and G2 generators
+  static constexpr storage<fq_config::limbs_count> g1_gen_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
+                                                               0x00000000, 0x00000000, 0x00000000, 0x00000000};
+  static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000,
+                                                               0x00000000, 0x00000000, 0x00000000, 0x00000000};
+  static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {0xd992f6ed, 0x46debd5c, 0xf75edadd, 0x674322d4,
+                                                                  0x5e5c4479, 0x426a0066, 0x121f1e76, 0x1800deef};
+  static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {0xaef312c2, 0x97e485b7, 0x35a9e712, 0xf1aa4933,
+                                                                  0x31fb5d25, 0x7260bfb7, 0x920d483a, 0x198e9393};
+  static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {0x66fa7daa, 0x4ce6cc01, 0x0c43d37b, 0xe3d1e769,
+                                                                  0x8dcb408f, 0x4aab7180, 0xdb8c6deb, 0x12c85ea5};
+  static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {0xd122975b, 0x55acdadc, 0x70b38ef3, 0xbc4b3133,
+                                                                  0x690c3395, 0xec9e99ad, 0x585ff075, 0x090689d0};
+
+  static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000003, 0x00000000, 0x00000000, 0x00000000,
+                                                                    0x00000000, 0x00000000, 0x00000000, 0x00000000};
+  static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
+    0x24a138e5, 0x3267e6dc, 0x59dbefa3, 0xb5b4c5e5, 0x1be06ac3, 0x81be1899, 0xceb8aaae, 0x2b149d40};
+  static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
+    0x85c315d2, 0xe4a2bd06, 0xe52d1852, 0xa74fa084, 0xeed8fdf4, 0xcd2cafad, 0x3af0fed4, 0x009713b0};
+
+  CURVE_DEFINITIONS
+  G2_CURVE_DEFINITIONS
+} // namespace bn254
+
+#endif
diff --git a/sumcheck/cuda/icicle/curves/params/bw6_761.cuh b/sumcheck/cuda/icicle/curves/params/bw6_761.cuh
new file mode 100644
index 00000000..ce649f5e
--- /dev/null
+++ b/sumcheck/cuda/icicle/curves/params/bw6_761.cuh
@@ -0,0 +1,58 @@
+#pragma once
+#ifndef BW6_761_PARAMS_H
+#define BW6_761_PARAMS_H
+
+#include "fields/storage.cuh"
+
+#include "curves/macro.h"
+#include "curves/projective.cuh"
+#include "fields/snark_fields/bw6_761_base.cuh"
+#include "fields/snark_fields/bw6_761_scalar.cuh"
+#include "fields/quadratic_extension.cuh"
+
+namespace bw6_761 {
+  // G1 and G2 generators
+  static constexpr storage<fq_config::limbs_count> g1_gen_x = {
+    0x66e5b43d, 0x4088f3af, 0xa6af603f, 0x055928ac, 0x56133e82, 0x6750dd03, 0x280ca27f, 0x03758f9a,
+    0xc9ea0971, 0x5bd71fa0, 0x47729b90, 0xa17a54ce, 0x94c2e746, 0x11dbfcd2, 0xc15520ac, 0x79017ffa,
+    0x85f56fc7, 0xee05c54b, 0x551b27f0, 0xe6a0cfb7, 0xa477beae, 0xb277ce98, 0x0ea190c8, 0x01075b02};
+  static constexpr storage<fq_config::limbs_count> g1_gen_y = {
+    0xb4e95363, 0xbafc8f2d, 0x0b20d2a1, 0xad1cb2be, 0xcad0fb93, 0xb2b08119, 0xb3053253, 0x9f9df141,
+    0x6fc2cdd4, 0xbe3fb90b, 0x717a4c55, 0xcc685d31, 0x71b5b806, 0xc5b8fa17, 0xaf7e0dba, 0x265909f1,
+    0xa2e573a3, 0x1a7348d2, 0x884c9ec6, 0x0f952589, 0x45cc2a42, 0xe6fd637b, 0x0a6fc574, 0x0058b84e};
+  static constexpr storage<fq_config::limbs_count> g2_gen_x = {
+    0xcd025f1c, 0xa830c194, 0xe1bf995b, 0x6410cf4f, 0xc2ad54b0, 0x00e96efb, 0x3cd208d7, 0xce6948cb,
+    0x00e1b6ba, 0x963317a3, 0xac70e7c7, 0xc5bbcae9, 0xf09feb58, 0x734ec3f1, 0xab3da268, 0x26b41c5d,
+    0x13890f6d, 0x4c062010, 0xc5a7115f, 0xd61053aa, 0x69d660f9, 0xc852a82e, 0x41d9b816, 0x01101332};
+  static constexpr storage<fq_config::limbs_count> g2_gen_y = {
+    0x28c73b61, 0xeb70a167, 0xf9eac689, 0x91ec0594, 0x3c5a02a5, 0x58aa2d3a, 0x504affc7, 0x3ea96fcd,
+    0xffa82300, 0x8906c170, 0xd2c712b8, 0x64f293db, 0x33293fef, 0x94c97eb7, 0x0b95a59c, 0x0a1d86c8,
+    0x53ffe316, 0x81a78e27, 0xcec2181c, 0x26b7cf9a, 0xe4b6d2dc, 0x8179eb10, 0x7761369f, 0x0017c335};
+
+  static constexpr storage<fq_config::limbs_count> weierstrass_b = {
+    0x0000008a, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
+    0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
+    0x8087be41, 0x528275ef, 0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824};
+  static constexpr storage<fq_config::limbs_count> g2_weierstrass_b = {
+    0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
+
+  CURVE_DEFINITIONS
+
+  typedef point_field_t g2_point_field_t;
+  static constexpr g2_point_field_t g2_generator_x = g2_point_field_t{g2_gen_x};
+  static constexpr g2_point_field_t g2_generator_y = g2_point_field_t{g2_gen_y};
+  static constexpr g2_point_field_t g2_b = g2_point_field_t{g2_weierstrass_b};
+
+  /**
+   * [Projective representation](https://hyperelliptic.org/EFD/g1p/auto-shortw-projective.html) of G2 curve.
+   */
+  typedef Projective<g2_point_field_t, scalar_t, g2_b, g2_generator_x, g2_generator_y> g2_projective_t;
+  /**
+   * Affine representation of G1 curve.
+   */
+  typedef Affine<g2_point_field_t> g2_affine_t;
+} // namespace bw6_761
+
+#endif
diff --git a/sumcheck/cuda/icicle/curves/params/grumpkin.cuh b/sumcheck/cuda/icicle/curves/params/grumpkin.cuh
new file mode 100644
index 00000000..855897a1
--- /dev/null
+++ b/sumcheck/cuda/icicle/curves/params/grumpkin.cuh
@@ -0,0 +1,26 @@
+#pragma once
+#ifndef GRUMPKIN_PARAMS_H
+#define GRUMPKIN_PARAMS_H
+
+#include "fields/storage.cuh"
+
+#include "curves/macro.h"
+#include "curves/projective.cuh"
+#include "fields/snark_fields/grumpkin_base.cuh"
+#include "fields/snark_fields/grumpkin_scalar.cuh"
+
+namespace grumpkin {
+  typedef bn254::fp_config fq_config;
+  // G1 generator
+  static constexpr storage<fq_config::limbs_count> g1_gen_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
+                                                               0x00000000, 0x00000000, 0x00000000, 0x00000000};
+  static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x823f272c, 0x833fc48d, 0xf1181294, 0x2d270d45,
+                                                               0x6a45d63,  0xcf135e75, 0x00000002, 0x00000000};
+
+  static constexpr storage<fq_config::limbs_count> weierstrass_b = {0xeffffff0, 0x43e1f593, 0x79b97091, 0x2833e848,
+                                                                    0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
+
+  CURVE_DEFINITIONS
+} // namespace grumpkin
+
+#endif
diff --git a/sumcheck/cuda/icicle/curves/projective.cuh b/sumcheck/cuda/icicle/curves/projective.cuh
new file mode 100644
index 00000000..47ff27a8
--- /dev/null
+++ b/sumcheck/cuda/icicle/curves/projective.cuh
@@ -0,0 +1,239 @@
+#pragma once
+
+#include "affine.cuh"
+#include "gpu-utils/sharedmem.cuh"
+
+template <typename FF, class SCALAR_FF, const FF& B_VALUE, const FF& GENERATOR_X, const FF& GENERATOR_Y>
+class Projective
+{
+  friend Affine<FF>;
+
+public:
+  typedef Affine<FF> Aff;
+  typedef SCALAR_FF Scalar;
+
+  static constexpr unsigned SCALAR_FF_NBITS = SCALAR_FF::NBITS;
+  static constexpr unsigned FF_NBITS = FF::NBITS;
+
+  FF x;
+  FF y;
+  FF z;
+
+  static HOST_DEVICE_INLINE Projective zero() { return {FF::zero(), FF::one(), FF::zero()}; }
+
+  static HOST_DEVICE_INLINE Affine<FF> to_affine(const Projective& point)
+  {
+    FF denom = FF::inverse(point.z);
+    return {point.x * denom, point.y * denom};
+  }
+
+  static HOST_DEVICE_INLINE Projective from_affine(const Affine<FF>& point)
+  {
+    return point == Affine<FF>::zero() ? zero() : Projective{point.x, point.y, FF::one()};
+  }
+
+  static HOST_DEVICE_INLINE Projective to_montgomery(const Projective& point)
+  {
+    return {FF::to_montgomery(point.x), FF::to_montgomery(point.y), FF::to_montgomery(point.z)};
+  }
+
+  static HOST_DEVICE_INLINE Projective from_montgomery(const Projective& point)
+  {
+    return {FF::from_montgomery(point.x), FF::from_montgomery(point.y), FF::from_montgomery(point.z)};
+  }
+
+  static HOST_DEVICE_INLINE Projective generator() { return {GENERATOR_X, GENERATOR_Y, FF::one()}; }
+
+  static HOST_DEVICE_INLINE Projective neg(const Projective& point) { return {point.x, FF::neg(point.y), point.z}; }
+
+  static HOST_DEVICE_INLINE Projective dbl(const Projective& point)
+  {
+    const FF X = point.x;
+    const FF Y = point.y;
+    const FF Z = point.z;
+
+    // TODO: Change to efficient dbl once implemented for field.cuh
+    FF t0 = FF::sqr(Y);                                                     // 1. t0 ← Y · Y
+    FF Z3 = t0 + t0;                                                        // 2. Z3 ← t0 + t0
+    Z3 = Z3 + Z3;                                                           // 3. Z3 ← Z3 + Z3
+    Z3 = Z3 + Z3;                                                           // 4. Z3 ← Z3 + Z3
+    FF t1 = Y * Z;                                                          // 5. t1 ← Y · Z
+    FF t2 = FF::sqr(Z);                                                     // 6. t2 ← Z · Z
+    t2 = FF::template mul_unsigned<3>(FF::template mul_const<B_VALUE>(t2)); // 7. t2 ← b3 · t2
+    FF X3 = t2 * Z3;                                                        // 8. X3 ← t2 · Z3
+    FF Y3 = t0 + t2;                                                        // 9. Y3 ← t0 + t2
+    Z3 = t1 * Z3;                                                           // 10. Z3 ← t1 · Z3
+    t1 = t2 + t2;                                                           // 11. t1 ← t2 + t2
+    t2 = t1 + t2;                                                           // 12. t2 ← t1 + t2
+    t0 = t0 - t2;                                                           // 13. t0 ← t0 − t2
+    Y3 = t0 * Y3;                                                           // 14. Y3 ← t0 · Y3
+    Y3 = X3 + Y3;                                                           // 15. Y3 ← X3 + Y3
+    t1 = X * Y;                                                             // 16. t1 ← X · Y
+    X3 = t0 * t1;                                                           // 17. X3 ← t0 · t1
+    X3 = X3 + X3;                                                           // 18. X3 ← X3 + X3
+    return {X3, Y3, Z3};
+  }
+
+  friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Projective& p2)
+  {
+    const FF X1 = p1.x;                                                                //                   < 2
+    const FF Y1 = p1.y;                                                                //                   < 2
+    const FF Z1 = p1.z;                                                                //                   < 2
+    const FF X2 = p2.x;                                                                //                   < 2
+    const FF Y2 = p2.y;                                                                //                   < 2
+    const FF Z2 = p2.z;                                                                //                   < 2
+    const FF t00 = X1 * X2;                                                            // t00 ← X1 · X2     < 2
+    const FF t01 = Y1 * Y2;                                                            // t01 ← Y1 · Y2     < 2
+    const FF t02 = Z1 * Z2;                                                            // t02 ← Z1 · Z2     < 2
+    const FF t03 = X1 + Y1;                                                            // t03 ← X1 + Y1     < 4
+    const FF t04 = X2 + Y2;                                                            // t04 ← X2 + Y2     < 4
+    const FF t05 = t03 * t04;                                                          // t03 ← t03 · t04   < 3
+    const FF t06 = t00 + t01;                                                          // t06 ← t00 + t01   < 4
+    const FF t07 = t05 - t06;                                                          // t05 ← t05 − t06   < 2
+    const FF t08 = Y1 + Z1;                                                            // t08 ← Y1 + Z1     < 4
+    const FF t09 = Y2 + Z2;                                                            // t09 ← Y2 + Z2     < 4
+    const FF t10 = t08 * t09;                                                          // t10 ← t08 · t09   < 3
+    const FF t11 = t01 + t02;                                                          // t11 ← t01 + t02   < 4
+    const FF t12 = t10 - t11;                                                          // t12 ← t10 − t11   < 2
+    const FF t13 = X1 + Z1;                                                            // t13 ← X1 + Z1     < 4
+    const FF t14 = X2 + Z2;                                                            // t14 ← X2 + Z2     < 4
+    const FF t15 = t13 * t14;                                                          // t15 ← t13 · t14   < 3
+    const FF t16 = t00 + t02;                                                          // t16 ← t00 + t02   < 4
+    const FF t17 = t15 - t16;                                                          // t17 ← t15 − t16   < 2
+    const FF t18 = t00 + t00;                                                          // t18 ← t00 + t00   < 2
+    const FF t19 = t18 + t00;                                                          // t19 ← t18 + t00   < 2
+    const FF t20 = FF::template mul_unsigned<3>(FF::template mul_const<B_VALUE>(t02)); // t20 ← b3 · t02    < 2
+    const FF t21 = t01 + t20;                                                          // t21 ← t01 + t20   < 2
+    const FF t22 = t01 - t20;                                                          // t22 ← t01 − t20   < 2
+    const FF t23 = FF::template mul_unsigned<3>(FF::template mul_const<B_VALUE>(t17)); // t23 ← b3 · t17    < 2
+    const auto t24 = FF::mul_wide(t12, t23);                                           // t24 ← t12 · t23   < 2
+    const auto t25 = FF::mul_wide(t07, t22);                                           // t25 ← t07 · t22   < 2
+    const FF X3 = FF::reduce(t25 - t24);                                               // X3 ← t25 − t24    < 2
+    const auto t27 = FF::mul_wide(t23, t19);                                           // t27 ← t23 · t19   < 2
+    const auto t28 = FF::mul_wide(t22, t21);                                           // t28 ← t22 · t21   < 2
+    const FF Y3 = FF::reduce(t28 + t27);                                               // Y3 ← t28 + t27    < 2
+    const auto t30 = FF::mul_wide(t19, t07);                                           // t30 ← t19 · t07   < 2
+    const auto t31 = FF::mul_wide(t21, t12);                                           // t31 ← t21 · t12   < 2
+    const FF Z3 = FF::reduce(t31 + t30);                                               // Z3 ← t31 + t30    < 2
+    return {X3, Y3, Z3};
+  }
+
+  friend HOST_DEVICE_INLINE Projective operator-(Projective p1, const Projective& p2) { return p1 + neg(p2); }
+
+  friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Affine<FF>& p2)
+  {
+    const FF X1 = p1.x;                                                                //                   < 2
+    const FF Y1 = p1.y;                                                                //                   < 2
+    const FF Z1 = p1.z;                                                                //                   < 2
+    const FF X2 = p2.x;                                                                //                   < 2
+    const FF Y2 = p2.y;                                                                //                   < 2
+    const FF t00 = X1 * X2;                                                            // t00 ← X1 · X2     < 2
+    const FF t01 = Y1 * Y2;                                                            // t01 ← Y1 · Y2     < 2
+    const FF t02 = Z1;                                                                 // t02 ← Z1          < 2
+    const FF t03 = X1 + Y1;                                                            // t03 ← X1 + Y1     < 4
+    const FF t04 = X2 + Y2;                                                            // t04 ← X2 + Y2     < 4
+    const FF t05 = t03 * t04;                                                          // t03 ← t03 · t04   < 3
+    const FF t06 = t00 + t01;                                                          // t06 ← t00 + t01   < 4
+    const FF t07 = t05 - t06;                                                          // t05 ← t05 − t06   < 2
+    const FF t08 = Y1 + Z1;                                                            // t08 ← Y1 + Z1     < 4
+    const FF t09 = Y2 + FF::one();                                                     // t09 ← Y2 + 1      < 4
+    const FF t10 = t08 * t09;                                                          // t10 ← t08 · t09   < 3
+    const FF t11 = t01 + t02;                                                          // t11 ← t01 + t02   < 4
+    const FF t12 = t10 - t11;                                                          // t12 ← t10 − t11   < 2
+    const FF t13 = X1 + Z1;                                                            // t13 ← X1 + Z1     < 4
+    const FF t14 = X2 + FF::one();                                                     // t14 ← X2 + 1      < 4
+    const FF t15 = t13 * t14;                                                          // t15 ← t13 · t14   < 3
+    const FF t16 = t00 + t02;                                                          // t16 ← t00 + t02   < 4
+    const FF t17 = t15 - t16;                                                          // t17 ← t15 − t16   < 2
+    const FF t18 = t00 + t00;                                                          // t18 ← t00 + t00   < 2
+    const FF t19 = t18 + t00;                                                          // t19 ← t18 + t00   < 2
+    const FF t20 = FF::template mul_unsigned<3>(FF::template mul_const<B_VALUE>(t02)); // t20 ← b3 · t02    < 2
+    const FF t21 = t01 + t20;                                                          // t21 ← t01 + t20   < 2
+    const FF t22 = t01 - t20;                                                          // t22 ← t01 − t20   < 2
+    const FF t23 = FF::template mul_unsigned<3>(FF::template mul_const<B_VALUE>(t17)); // t23 ← b3 · t17    < 2
+    const auto t24 = FF::mul_wide(t12, t23);                                           // t24 ← t12 · t23   < 2
+    const auto t25 = FF::mul_wide(t07, t22);                                           // t25 ← t07 · t22   < 2
+    const FF X3 = FF::reduce(t25 - t24);                                               // X3 ← t25 − t24    < 2
+    const auto t27 = FF::mul_wide(t23, t19);                                           // t27 ← t23 · t19   < 2
+    const auto t28 = FF::mul_wide(t22, t21);                                           // t28 ← t22 · t21   < 2
+    const FF Y3 = FF::reduce(t28 + t27);                                               // Y3 ← t28 + t27    < 2
+    const auto t30 = FF::mul_wide(t19, t07);                                           // t30 ← t19 · t07   < 2
+    const auto t31 = FF::mul_wide(t21, t12);                                           // t31 ← t21 · t12   < 2
+    const FF Z3 = FF::reduce(t31 + t30);                                               // Z3 ← t31 + t30    < 2
+    return {X3, Y3, Z3};
+  }
+
+  friend HOST_DEVICE_INLINE Projective operator-(Projective p1, const Affine<FF>& p2)
+  {
+    return p1 + Affine<FF>::neg(p2);
+  }
+
+  friend HOST_DEVICE_INLINE Projective operator*(SCALAR_FF scalar, const Projective& point)
+  {
+    Projective res = zero();
+#ifdef __CUDA_ARCH__
+    UNROLL
+#endif
+    for (int i = 0; i < SCALAR_FF::NBITS; i++) {
+      if (i > 0) { res = dbl(res); }
+      if (scalar.get_scalar_digit(SCALAR_FF::NBITS - i - 1, 1)) { res = res + point; }
+    }
+    return res;
+  }
+
+  friend HOST_DEVICE_INLINE Projective operator*(const Projective& point, SCALAR_FF scalar) { return scalar * point; }
+
+  friend HOST_DEVICE_INLINE bool operator==(const Projective& p1, const Projective& p2)
+  {
+    return (p1.x * p2.z == p2.x * p1.z) && (p1.y * p2.z == p2.y * p1.z);
+  }
+
+  friend HOST_DEVICE_INLINE bool operator!=(const Projective& p1, const Projective& p2) { return !(p1 == p2); }
+
+  friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Projective& point)
+  {
+    os << "Point { x: " << point.x << "; y: " << point.y << "; z: " << point.z << " }";
+    return os;
+  }
+
+  static HOST_DEVICE_INLINE bool is_zero(const Projective& point)
+  {
+    return point.x == FF::zero() && point.y != FF::zero() && point.z == FF::zero();
+  }
+
+  static HOST_DEVICE_INLINE bool is_on_curve(const Projective& point)
+  {
+    if (is_zero(point)) return true;
+    bool eq_holds =
+      (FF::template mul_const<B_VALUE>(FF::sqr(point.z) * point.z) + FF::sqr(point.x) * point.x ==
+       point.z * FF::sqr(point.y));
+    return point.z != FF::zero() && eq_holds;
+  }
+
+  static HOST_INLINE Projective rand_host()
+  {
+    SCALAR_FF rand_scalar = SCALAR_FF::rand_host();
+    return rand_scalar * generator();
+  }
+
+  static void rand_host_many(Projective* out, int size)
+  {
+    for (int i = 0; i < size; i++)
+      out[i] = (i % size < 100) ? rand_host() : out[i - 100];
+  }
+
+  static void rand_host_many_affine(Affine<FF>* out, int size)
+  {
+    for (int i = 0; i < size; i++)
+      out[i] = (i % size < 100) ? to_affine(rand_host()) : out[i - 100];
+  }
+};
+
+template <typename FF, class SCALAR_FF, const FF& B_VALUE, const FF& GENERATOR_X, const FF& GENERATOR_Y>
+struct SharedMemory<Projective<FF, SCALAR_FF, B_VALUE, GENERATOR_X, GENERATOR_Y>> {
+  __device__ Projective<FF, SCALAR_FF, B_VALUE, GENERATOR_X, GENERATOR_Y>* getPointer()
+  {
+    extern __shared__ Projective<FF, SCALAR_FF, B_VALUE, GENERATOR_X, GENERATOR_Y> s_projective_[];
+    return s_projective_;
+  }
+};
diff --git a/sumcheck/cuda/icicle/fields/field.cuh b/sumcheck/cuda/icicle/fields/field.cuh
new file mode 100644
index 00000000..78e4879e
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/field.cuh
@@ -0,0 +1,1007 @@
+/**
+ * This file contains methods for working with elements of a prime field. It is based on and evolved from Matter Labs'
+ * [Zprize
+ * submission](https://github.com/matter-labs/z-prize-msm-gpu/blob/main/bellman-cuda-rust/bellman-cuda-sys/native/ff_dispatch_st.cuh).
+ *
+ * TODO: DmytroTym: current version needs refactoring (e.g. there's no reason to have different classes Field and
+ * ff_storage among other issues). But because this is an internal file and correctness and performance are unaffected,
+ * refactoring it is low in the priority list.
+ *
+ * Documentation of methods is intended to explain inner workings to developers working on icicle. In its current sha3_state
+ * it mostly explains modular multiplication and related methods. One important quirk of modern CUDA that's affecting
+ * most methods is explained by [Niall Emmart](https://youtu.be/KAWlySN7Hm8?si=h7nzDujnvubWXeDX&t=4039). In short, when
+ * 64-bit MAD (`r = a * b + c`) instructions get compiled down to SASS (CUDA assembly) they require two-register values
+ * `r` and `c` to start from even register (e.g. `r` can live in registers 20 and 21, or 14 and 15, but not 15 and 16).
+ * This complicates implementations forcing us to segregate terms into two categories depending on their alignment.
+ * Which is where `even` and `odd` arrays across the codebase come from.
+ */
+
+#pragma once
+
+#include "gpu-utils/error_handler.cuh"
+#include "gpu-utils/modifiers.cuh"
+#include "gpu-utils/sharedmem.cuh"
+#include "host_math.cuh"
+#include "ptx.cuh"
+#include "storage.cuh"
+
+#include <iomanip>
+#include <iostream>
+#include <random>
+#include <sstream>
+#include <string>
+
+template <class CONFIG>
+class Field
+{
+public:
+  static constexpr unsigned TLC = CONFIG::limbs_count;
+  static constexpr unsigned NBITS = CONFIG::modulus_bit_count;
+
+  static constexpr HOST_DEVICE_INLINE Field zero() { return Field{CONFIG::zero}; }
+
+  static constexpr HOST_DEVICE_INLINE Field one() { return Field{CONFIG::one}; }
+
+  static constexpr HOST_DEVICE_INLINE Field from(uint32_t value)
+  {
+    storage<TLC> scalar{};
+    scalar.limbs[0] = value;
+    for (int i = 1; i < TLC; i++) {
+      scalar.limbs[i] = 0;
+    }
+    return Field{scalar};
+  }
+
+  static constexpr HOST_DEVICE_INLINE Field from_u128(__uint128_t value)
+  {
+    storage<TLC> scalar{};
+    scalar.limbs[0] = value;
+    scalar.limbs[1] = value >> 32;
+    scalar.limbs[2] = value >> 64;
+    scalar.limbs[3] = value >> 3 * 32;
+    for (int i = 4; i < TLC; i++) {
+      scalar.limbs[i] = 0;
+    }
+    return Field{scalar};
+  }
+
+  static HOST_INLINE Field omega(uint32_t logn)
+  {
+    if (logn == 0) { return Field{CONFIG::one}; }
+
+    if (logn > CONFIG::omegas_count) { THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Field: Invalid omega index"); }
+
+    Field omega = Field{CONFIG::rou};
+    for (int i = 0; i < CONFIG::omegas_count - logn; i++)
+      omega = sqr(omega);
+    return omega;
+  }
+
+  static HOST_INLINE Field omega_inv(uint32_t logn)
+  {
+    if (logn == 0) { return Field{CONFIG::one}; }
+
+    if (logn > CONFIG::omegas_count) {
+      THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Field: Invalid omega_inv index");
+    }
+
+    Field omega = inverse(Field{CONFIG::rou});
+    for (int i = 0; i < CONFIG::omegas_count - logn; i++)
+      omega = sqr(omega);
+    return omega;
+  }
+
+  static HOST_DEVICE_INLINE Field inv_log_size(uint32_t logn)
+  {
+    if (logn == 0) { return Field{CONFIG::one}; }
+#ifndef __CUDA_ARCH__
+    if (logn > CONFIG::omegas_count) THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Field: Invalid inv index");
+#else
+    if (logn > CONFIG::omegas_count) {
+      printf(
+        "CUDA ERROR: field.cuh: error on inv_log_size(logn): logn(=%u) > omegas_count (=%u)", logn,
+        CONFIG::omegas_count);
+      assert(false);
+    }
+#endif // __CUDA_ARCH__
+    storage_array<CONFIG::omegas_count, TLC> const inv = CONFIG::inv;
+    return Field{inv.storages[logn - 1]};
+  }
+
+  static constexpr HOST_INLINE unsigned get_omegas_count()
+  {
+    if constexpr (has_member_omegas_count<CONFIG>()) {
+      return CONFIG::omegas_count;
+    } else {
+      return 0;
+    }
+  }
+
+  template <typename T>
+  static constexpr bool has_member_omegas_count()
+  {
+    return sizeof(T::omegas_count) > 0;
+  }
+
+  // private:
+  typedef storage<TLC> ff_storage;
+  typedef storage<2 * TLC> ff_wide_storage;
+
+  /**
+   * A new addition to the config file - \f$ 2^{32 \cdot num\_limbs} - p \f$.
+   */
+  static constexpr HOST_DEVICE_INLINE ff_storage get_neg_modulus() { return CONFIG::neg_modulus; }
+
+  /**
+   * A new addition to the config file - the number of times to reduce in [reduce](@ref reduce) function.
+   */
+  static constexpr HOST_DEVICE_INLINE unsigned num_of_reductions() { return CONFIG::num_of_reductions; }
+
+  static constexpr unsigned slack_bits = 32 * TLC - NBITS;
+
+  struct Wide {
+    ff_wide_storage limbs_storage;
+
+    static constexpr Wide HOST_DEVICE_INLINE from_field(const Field& xs)
+    {
+      Wide out{};
+#ifdef __CUDA_ARCH__
+      UNROLL
+#endif
+      for (unsigned i = 0; i < TLC; i++)
+        out.limbs_storage.limbs[i] = xs.limbs_storage.limbs[i];
+      return out;
+    }
+
+    static constexpr Field HOST_DEVICE_INLINE get_lower(const Wide& xs)
+    {
+      Field out{};
+#ifdef __CUDA_ARCH__
+      UNROLL
+#endif
+      for (unsigned i = 0; i < TLC; i++)
+        out.limbs_storage.limbs[i] = xs.limbs_storage.limbs[i];
+      return out;
+    }
+
+    static constexpr Field HOST_DEVICE_INLINE get_higher(const Wide& xs)
+    {
+      Field out{};
+#ifdef __CUDA_ARCH__
+      UNROLL
+#endif
+      for (unsigned i = 0; i < TLC; i++)
+        out.limbs_storage.limbs[i] = xs.limbs_storage.limbs[i + TLC];
+      return out;
+    }
+
+    static constexpr Field HOST_DEVICE_INLINE get_higher_with_slack(const Wide& xs)
+    {
+      Field out{};
+#ifdef __CUDA_ARCH__
+      UNROLL
+#endif
+      for (unsigned i = 0; i < TLC; i++) {
+#ifdef __CUDA_ARCH__
+        out.limbs_storage.limbs[i] =
+          __funnelshift_lc(xs.limbs_storage.limbs[i + TLC - 1], xs.limbs_storage.limbs[i + TLC], 2 * slack_bits);
+#else
+        out.limbs_storage.limbs[i] = (xs.limbs_storage.limbs[i + TLC] << 2 * slack_bits) +
+                                     (xs.limbs_storage.limbs[i + TLC - 1] >> (32 - 2 * slack_bits));
+#endif
+      }
+      return out;
+    }
+
+    template <unsigned REDUCTION_SIZE = 1>
+    static constexpr HOST_DEVICE_INLINE Wide sub_modulus_squared(const Wide& xs)
+    {
+      if (REDUCTION_SIZE == 0) return xs;
+      const ff_wide_storage modulus = get_modulus_squared<REDUCTION_SIZE>();
+      Wide rs = {};
+      return sub_limbs<2 * TLC, true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
+    }
+
+    template <unsigned MODULUS_MULTIPLE = 1>
+    static constexpr HOST_DEVICE_INLINE Wide neg(const Wide& xs)
+    {
+      const ff_wide_storage modulus = get_modulus_squared<MODULUS_MULTIPLE>();
+      Wide rs = {};
+      sub_limbs<2 * TLC, false>(modulus, xs.limbs_storage, rs.limbs_storage);
+      return rs;
+    }
+
+    friend HOST_DEVICE_INLINE Wide operator+(Wide xs, const Wide& ys)
+    {
+      Wide rs = {};
+      add_limbs<2 * TLC, false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
+      return sub_modulus_squared<1>(rs);
+    }
+
+    friend HOST_DEVICE_INLINE Wide operator-(Wide xs, const Wide& ys)
+    {
+      Wide rs = {};
+      uint32_t carry = sub_limbs<2 * TLC, true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
+      if (carry == 0) return rs;
+      const ff_wide_storage modulus = get_modulus_squared<1>();
+      add_limbs<2 * TLC, false>(rs.limbs_storage, modulus, rs.limbs_storage);
+      return rs;
+    }
+  };
+
+  // return modulus multiplied by 1, 2 or 4
+  template <unsigned MULTIPLIER = 1>
+  static constexpr HOST_DEVICE_INLINE ff_storage get_modulus()
+  {
+    switch (MULTIPLIER) {
+    case 1:
+      return CONFIG::modulus;
+    case 2:
+      return CONFIG::modulus_2;
+    case 4:
+      return CONFIG::modulus_4;
+    default:
+      return {};
+    }
+  }
+
+  // return m
+  static constexpr HOST_DEVICE_INLINE ff_storage get_m() { return CONFIG::m; }
+
+  // return modulus^2, helpful for ab +/- cd
+  template <unsigned MULTIPLIER = 1>
+  static constexpr HOST_DEVICE_INLINE ff_wide_storage get_modulus_squared()
+  {
+    switch (MULTIPLIER) {
+    case 1:
+      return CONFIG::modulus_squared;
+    case 2:
+      return CONFIG::modulus_squared_2;
+    case 4:
+      return CONFIG::modulus_squared_4;
+    default:
+      return {};
+    }
+  }
+
+  template <unsigned NLIMBS, bool SUBTRACT, bool CARRY_OUT>
+  static constexpr DEVICE_INLINE uint32_t add_sub_u32_device(const uint32_t* x, const uint32_t* y, uint32_t* r)
+  {
+    r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
+    for (unsigned i = 1; i < NLIMBS; i++)
+      r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
+    if (!CARRY_OUT) {
+      ptx::addc(0, 0);
+      return 0;
+    }
+    return SUBTRACT ? ptx::subc(0, 0) : ptx::addc(0, 0);
+  }
+
+  template <unsigned NLIMBS, bool SUBTRACT, bool CARRY_OUT>
+  static constexpr DEVICE_INLINE uint32_t
+  add_sub_limbs_device(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
+  {
+    const uint32_t* x = xs.limbs;
+    const uint32_t* y = ys.limbs;
+    uint32_t* r = rs.limbs;
+    return add_sub_u32_device<NLIMBS, SUBTRACT, CARRY_OUT>(x, y, r);
+  }
+
+  template <unsigned NLIMBS, bool CARRY_OUT>
+  static constexpr HOST_DEVICE_INLINE uint32_t
+  add_limbs(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
+  {
+#ifdef __CUDA_ARCH__
+    return add_sub_limbs_device<NLIMBS, false, CARRY_OUT>(xs, ys, rs);
+#else
+    return host_math::template add_sub_limbs<NLIMBS, false, CARRY_OUT>(xs, ys, rs);
+#endif
+  }
+
+  template <unsigned NLIMBS, bool CARRY_OUT>
+  static constexpr HOST_DEVICE_INLINE uint32_t
+  sub_limbs(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
+  {
+#ifdef __CUDA_ARCH__
+    return add_sub_limbs_device<NLIMBS, true, CARRY_OUT>(xs, ys, rs);
+#else
+    return host_math::template add_sub_limbs<NLIMBS, true, CARRY_OUT>(xs, ys, rs);
+#endif
+  }
+
+  static DEVICE_INLINE void mul_n(uint32_t* acc, const uint32_t* a, uint32_t bi, size_t n = TLC)
+  {
+    UNROLL
+    for (size_t i = 0; i < n; i += 2) {
+      acc[i] = ptx::mul_lo(a[i], bi);
+      acc[i + 1] = ptx::mul_hi(a[i], bi);
+    }
+  }
+
+  static DEVICE_INLINE void mul_n_msb(uint32_t* acc, const uint32_t* a, uint32_t bi, size_t n = TLC, size_t start_i = 0)
+  {
+    UNROLL
+    for (size_t i = start_i; i < n; i += 2) {
+      acc[i] = ptx::mul_lo(a[i], bi);
+      acc[i + 1] = ptx::mul_hi(a[i], bi);
+    }
+  }
+
+  template <bool CARRY_IN = false>
+  static DEVICE_INLINE void
+  cmad_n(uint32_t* acc, const uint32_t* a, uint32_t bi, size_t n = TLC, uint32_t optional_carry = 0)
+  {
+    if (CARRY_IN) ptx::add_cc(UINT32_MAX, optional_carry);
+    acc[0] = CARRY_IN ? ptx::madc_lo_cc(a[0], bi, acc[0]) : ptx::mad_lo_cc(a[0], bi, acc[0]);
+    acc[1] = ptx::madc_hi_cc(a[0], bi, acc[1]);
+
+    UNROLL
+    for (size_t i = 2; i < n; i += 2) {
+      acc[i] = ptx::madc_lo_cc(a[i], bi, acc[i]);
+      acc[i + 1] = ptx::madc_hi_cc(a[i], bi, acc[i + 1]);
+    }
+  }
+
+  template <bool EVEN_PHASE>
+  static DEVICE_INLINE void cmad_n_msb(uint32_t* acc, const uint32_t* a, uint32_t bi, size_t n = TLC)
+  {
+    if (EVEN_PHASE) {
+      acc[0] = ptx::mad_lo_cc(a[0], bi, acc[0]);
+      acc[1] = ptx::madc_hi_cc(a[0], bi, acc[1]);
+    } else {
+      acc[1] = ptx::mad_hi_cc(a[0], bi, acc[1]);
+    }
+
+    UNROLL
+    for (size_t i = 2; i < n; i += 2) {
+      acc[i] = ptx::madc_lo_cc(a[i], bi, acc[i]);
+      acc[i + 1] = ptx::madc_hi_cc(a[i], bi, acc[i + 1]);
+    }
+  }
+
+  static DEVICE_INLINE void cmad_n_lsb(uint32_t* acc, const uint32_t* a, uint32_t bi, size_t n = TLC)
+  {
+    if (n > 1)
+      acc[0] = ptx::mad_lo_cc(a[0], bi, acc[0]);
+    else
+      acc[0] = ptx::mad_lo(a[0], bi, acc[0]);
+
+    size_t i;
+    UNROLL
+    for (i = 1; i < n - 1; i += 2) {
+      acc[i] = ptx::madc_hi_cc(a[i - 1], bi, acc[i]);
+      if (i == n - 2)
+        acc[i + 1] = ptx::madc_lo(a[i + 1], bi, acc[i + 1]);
+      else
+        acc[i + 1] = ptx::madc_lo_cc(a[i + 1], bi, acc[i + 1]);
+    }
+    if (i == n - 1) acc[i] = ptx::madc_hi(a[i - 1], bi, acc[i]);
+  }
+
+  template <bool CARRY_OUT = false, bool CARRY_IN = false>
+  static DEVICE_INLINE uint32_t mad_row(
+    uint32_t* odd,
+    uint32_t* even,
+    const uint32_t* a,
+    uint32_t bi,
+    size_t n = TLC,
+    uint32_t ci = 0,
+    uint32_t di = 0,
+    uint32_t carry_for_high = 0,
+    uint32_t carry_for_low = 0)
+  {
+    cmad_n<CARRY_IN>(odd, a + 1, bi, n - 2, carry_for_low);
+    odd[n - 2] = ptx::madc_lo_cc(a[n - 1], bi, ci);
+    odd[n - 1] = CARRY_OUT ? ptx::madc_hi_cc(a[n - 1], bi, di) : ptx::madc_hi(a[n - 1], bi, di);
+    uint32_t cr = CARRY_OUT ? ptx::addc(0, 0) : 0;
+    cmad_n(even, a, bi, n);
+    if (CARRY_OUT) {
+      odd[n - 1] = ptx::addc_cc(odd[n - 1], carry_for_high);
+      cr = ptx::addc(cr, 0);
+    } else
+      odd[n - 1] = ptx::addc(odd[n - 1], carry_for_high);
+    return cr;
+  }
+
+  template <bool EVEN_PHASE>
+  static DEVICE_INLINE void mad_row_msb(uint32_t* odd, uint32_t* even, const uint32_t* a, uint32_t bi, size_t n = TLC)
+  {
+    cmad_n_msb<!EVEN_PHASE>(odd, EVEN_PHASE ? a : (a + 1), bi, n - 2);
+    odd[EVEN_PHASE ? (n - 1) : (n - 2)] = ptx::madc_lo_cc(a[n - 1], bi, 0);
+    odd[EVEN_PHASE ? n : (n - 1)] = ptx::madc_hi(a[n - 1], bi, 0);
+    cmad_n_msb<EVEN_PHASE>(even, EVEN_PHASE ? (a + 1) : a, bi, n - 1);
+    odd[EVEN_PHASE ? n : (n - 1)] = ptx::addc(odd[EVEN_PHASE ? n : (n - 1)], 0);
+  }
+
+  static DEVICE_INLINE void mad_row_lsb(uint32_t* odd, uint32_t* even, const uint32_t* a, uint32_t bi, size_t n = TLC)
+  {
+    // bi here is constant so we can do a compile-time check for zero (which does happen once for bls12-381 scalar field
+    // modulus)
+    if (bi != 0) {
+      if (n > 1) cmad_n_lsb(odd, a + 1, bi, n - 1);
+      cmad_n_lsb(even, a, bi, n);
+    }
+    return;
+  }
+
+  static DEVICE_INLINE uint32_t
+  mul_n_and_add(uint32_t* acc, const uint32_t* a, uint32_t bi, uint32_t* extra, size_t n = (TLC >> 1))
+  {
+    acc[0] = ptx::mad_lo_cc(a[0], bi, extra[0]);
+
+    UNROLL
+    for (size_t i = 1; i < n - 1; i += 2) {
+      acc[i] = ptx::madc_hi_cc(a[i - 1], bi, extra[i]);
+      acc[i + 1] = ptx::madc_lo_cc(a[i + 1], bi, extra[i + 1]);
+    }
+
+    acc[n - 1] = ptx::madc_hi_cc(a[n - 2], bi, extra[n - 1]);
+    return ptx::addc(0, 0);
+  }
+
+  /**
+   * A function that computes wide product \f$ rs = as \cdot bs \f$ that's correct for the higher TLC + 1 limbs with a
+   * small maximum error.
+   *
+   * The way this function saves computations (as compared to regular school-book multiplication) is by not including
+   * terms that are too small. Namely, limb product \f$ a_i \cdot b_j \f$ is excluded if \f$ i + j < TLC - 2 \f$ and
+   * only the higher half is included if \f$ i + j = TLC - 2 \f$. All other limb products are included. So, the error
+   * i.e. difference between true product and the result of this function written to `rs` is exactly the sum of all
+   * dropped limbs products, which we can bound: \f$ a_0 \cdot b_0 + 2^{32}(a_0 \cdot b_1 + a_1 \cdot b_0) + \dots +
+   * 2^{32(TLC - 3)}(a_{TLC - 3} \cdot b_0 + \dots + a_0 \cdot b_{TLC - 3}) + 2^{32(TLC - 2)}(\floor{\frac{a_{TLC - 2}
+   * \cdot b_0}{2^{32}}} + \dots + \floor{\frac{a_0 \cdot b_{TLC - 2}}{2^{32}}}) \leq 2^{64} + 2\cdot 2^{96} + \dots +
+   * (TLC - 2) \cdot 2^{32(TLC - 1)} + (TLC - 1) \cdot 2^{32(TLC - 1)} \leq 2(TLC - 1) \cdot 2^{32(TLC - 1)}\f$.
+   */
+  static DEVICE_INLINE void multiply_msb_raw_device(const ff_storage& as, const ff_storage& bs, ff_wide_storage& rs)
+  {
+    if constexpr (TLC > 1) {
+      const uint32_t* a = as.limbs;
+      const uint32_t* b = bs.limbs;
+      uint32_t* even = rs.limbs;
+      __align__(16) uint32_t odd[2 * TLC - 2];
+
+      even[TLC - 1] = ptx::mul_hi(a[TLC - 2], b[0]);
+      odd[TLC - 2] = ptx::mul_lo(a[TLC - 1], b[0]);
+      odd[TLC - 1] = ptx::mul_hi(a[TLC - 1], b[0]);
+      size_t i;
+      UNROLL
+      for (i = 2; i < TLC - 1; i += 2) {
+        mad_row_msb<true>(&even[TLC - 2], &odd[TLC - 2], &a[TLC - i - 1], b[i - 1], i + 1);
+        mad_row_msb<false>(&odd[TLC - 2], &even[TLC - 2], &a[TLC - i - 2], b[i], i + 2);
+      }
+      mad_row(&even[TLC], &odd[TLC - 2], a, b[TLC - 1]);
+
+      // merge |even| and |odd|
+      ptx::add_cc(even[TLC - 1], odd[TLC - 2]);
+      for (i = TLC - 1; i < 2 * TLC - 2; i++)
+        even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
+      even[i + 1] = ptx::addc(even[i + 1], 0);
+    } else {
+      multiply_raw_device(as, bs, rs);
+    }
+  }
+
+  /**
+   * A function that computes the low half of the fused multiply-and-add \f$ rs = as \cdot bs + cs \f$ where
+   * \f$ bs = 2^{32*nof_limbs} \f$.
+   *
+   * For efficiency, this method does not include terms that are too large. Namely, limb product \f$ a_i \cdot b_j \f$
+   * is excluded if \f$ i + j > TLC - 1 \f$ and only the lower half is included if \f$ i + j = TLC - 1 \f$. All other
+   * limb products are included.
+   */
+  static DEVICE_INLINE void
+  multiply_and_add_lsb_neg_modulus_raw_device(const ff_storage& as, ff_storage& cs, ff_storage& rs)
+  {
+    ff_storage bs = get_neg_modulus();
+    const uint32_t* a = as.limbs;
+    const uint32_t* b = bs.limbs;
+    uint32_t* c = cs.limbs;
+    uint32_t* even = rs.limbs;
+
+    if constexpr (TLC > 2) {
+      __align__(16) uint32_t odd[TLC - 1];
+      size_t i;
+      // `b[0]` is \f$ 2^{32} \f$ minus the last limb of prime modulus. Because most scalar (and some base) primes
+      // are necessarily NTT-friendly, `b[0]` often turns out to be \f$ 2^{32} - 1 \f$. This actually leads to
+      // less efficient SASS generated by nvcc, so this case needed separate handling.
+      if (b[0] == UINT32_MAX) {
+        add_sub_u32_device<TLC, true, false>(c, a, even);
+        for (i = 0; i < TLC - 1; i++)
+          odd[i] = a[i];
+      } else {
+        mul_n_and_add(even, a, b[0], c, TLC);
+        mul_n(odd, a + 1, b[0], TLC - 1);
+      }
+      mad_row_lsb(&even[2], &odd[0], a, b[1], TLC - 1);
+      UNROLL
+      for (i = 2; i < TLC - 1; i += 2) {
+        mad_row_lsb(&odd[i], &even[i], a, b[i], TLC - i);
+        mad_row_lsb(&even[i + 2], &odd[i], a, b[i + 1], TLC - i - 1);
+      }
+
+      // merge |even| and |odd|
+      even[1] = ptx::add_cc(even[1], odd[0]);
+      for (i = 1; i < TLC - 2; i++)
+        even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
+      even[i + 1] = ptx::addc(even[i + 1], odd[i]);
+    } else if (TLC == 2) {
+      even[0] = ptx::mad_lo(a[0], b[0], c[0]);
+      even[1] = ptx::mad_hi(a[0], b[0], c[0]);
+      even[1] = ptx::mad_lo(a[0], b[1], even[1]);
+      even[1] = ptx::mad_lo(a[1], b[0], even[1]);
+    } else if (TLC == 1) {
+      even[0] = ptx::mad_lo(a[0], b[0], c[0]);
+    }
+  }
+
+  /**
+   * This method multiplies `a` and `b` (both assumed to have TLC / 2 limbs) and adds `in1` and `in2` (TLC limbs each)
+   * to the result which is written to `even`.
+   *
+   * It is used to compute the "middle" part of Karatsuba: \f$ a_{lo} \cdot b_{hi} + b_{lo} \cdot a_{hi} =
+   * (a_{hi} - a_{lo})(b_{lo} - b_{hi}) + a_{lo} \cdot b_{lo} + a_{hi} \cdot b_{hi} \f$. Currently this method assumes
+   * that the top bit of \f$ a_{hi} \f$ and \f$ b_{hi} \f$ are unset. This ensures correctness by allowing to keep the
+   * result inside TLC limbs and ignore the carries from the highest limb.
+   */
+  static DEVICE_INLINE void
+  multiply_and_add_short_raw_device(const uint32_t* a, const uint32_t* b, uint32_t* even, uint32_t* in1, uint32_t* in2)
+  {
+    __align__(16) uint32_t odd[TLC - 2];
+    uint32_t first_row_carry = mul_n_and_add(even, a, b[0], in1);
+    uint32_t carry = mul_n_and_add(odd, a + 1, b[0], &in2[1]);
+
+    size_t i;
+    UNROLL
+    for (i = 2; i < ((TLC >> 1) - 1); i += 2) {
+      carry = mad_row<true, false>(
+        &even[i], &odd[i - 2], a, b[i - 1], TLC >> 1, in1[(TLC >> 1) + i - 2], in1[(TLC >> 1) + i - 1], carry);
+      carry =
+        mad_row<true, false>(&odd[i], &even[i], a, b[i], TLC >> 1, in2[(TLC >> 1) + i - 1], in2[(TLC >> 1) + i], carry);
+    }
+    mad_row<false, true>(
+      &even[TLC >> 1], &odd[(TLC >> 1) - 2], a, b[(TLC >> 1) - 1], TLC >> 1, in1[TLC - 2], in1[TLC - 1], carry,
+      first_row_carry);
+    // merge |even| and |odd| plus the parts of `in2` we haven't added yet (first and last limbs)
+    even[0] = ptx::add_cc(even[0], in2[0]);
+    for (i = 0; i < (TLC - 2); i++)
+      even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
+    even[i + 1] = ptx::addc(even[i + 1], in2[i + 1]);
+  }
+
+  /**
+   * This method multiplies `a` and `b` and writes the result into `even`. It assumes that `a` and `b` are TLC/2 limbs
+   * long. The usual schoolbook algorithm is used.
+   */
+  static DEVICE_INLINE void multiply_short_raw_device(const uint32_t* a, const uint32_t* b, uint32_t* even)
+  {
+    __align__(16) uint32_t odd[TLC - 2];
+    mul_n(even, a, b[0], TLC >> 1);
+    mul_n(odd, a + 1, b[0], TLC >> 1);
+    mad_row(&even[2], &odd[0], a, b[1], TLC >> 1);
+
+    size_t i;
+    UNROLL
+    for (i = 2; i < ((TLC >> 1) - 1); i += 2) {
+      mad_row(&odd[i], &even[i], a, b[i], TLC >> 1);
+      mad_row(&even[i + 2], &odd[i], a, b[i + 1], TLC >> 1);
+    }
+    // merge |even| and |odd|
+    even[1] = ptx::add_cc(even[1], odd[0]);
+    for (i = 1; i < TLC - 2; i++)
+      even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
+    even[i + 1] = ptx::addc(even[i + 1], 0);
+  }
+
+  /**
+   * This method multiplies `as` and `bs` and writes the (wide) result into `rs`.
+   *
+   * It is assumed that the highest bits of `as` and `bs` are unset which is true for all the numbers icicle had to deal
+   * with so far. This method implements [subtractive
+   * Karatsuba](https://en.wikipedia.org/wiki/Karatsuba_algorithm#Implementation).
+   */
+  static DEVICE_INLINE void multiply_raw_device(const ff_storage& as, const ff_storage& bs, ff_wide_storage& rs)
+  {
+    const uint32_t* a = as.limbs;
+    const uint32_t* b = bs.limbs;
+    uint32_t* r = rs.limbs;
+    if constexpr (TLC > 2) {
+      // Next two lines multiply high and low halves of operands (\f$ a_{lo} \cdot b_{lo}; a_{hi} \cdot b_{hi} \$f) and
+      // write the results into `r`.
+      multiply_short_raw_device(a, b, r);
+      multiply_short_raw_device(&a[TLC >> 1], &b[TLC >> 1], &r[TLC]);
+      __align__(16) uint32_t middle_part[TLC];
+      __align__(16) uint32_t diffs[TLC];
+      // Differences of halves \f$ a_{hi} - a_{lo}; b_{lo} - b_{hi} \$f are written into `diffs`, signs written to
+      // `carry1` and `carry2`.
+      uint32_t carry1 = add_sub_u32_device<(TLC >> 1), true, true>(&a[TLC >> 1], a, diffs);
+      uint32_t carry2 = add_sub_u32_device<(TLC >> 1), true, true>(b, &b[TLC >> 1], &diffs[TLC >> 1]);
+      // Compute the "middle part" of Karatsuba: \f$ a_{lo} \cdot b_{hi} + b_{lo} \cdot a_{hi} \f$.
+      // This is where the assumption about unset high bit of `a` and `b` is relevant.
+      multiply_and_add_short_raw_device(diffs, &diffs[TLC >> 1], middle_part, r, &r[TLC]);
+      // Corrections that need to be performed when differences are negative.
+      // Again, carry doesn't need to be propagated due to unset high bits of `a` and `b`.
+      if (carry1)
+        add_sub_u32_device<(TLC >> 1), true, false>(&middle_part[TLC >> 1], &diffs[TLC >> 1], &middle_part[TLC >> 1]);
+      if (carry2) add_sub_u32_device<(TLC >> 1), true, false>(&middle_part[TLC >> 1], diffs, &middle_part[TLC >> 1]);
+      // Now that middle part is fully correct, it can be added to the result.
+      add_sub_u32_device<TLC, false, true>(&r[TLC >> 1], middle_part, &r[TLC >> 1]);
+
+      // Carry from adding middle part has to be propagated to the highest limb.
+      for (size_t i = TLC + (TLC >> 1); i < 2 * TLC; i++)
+        r[i] = ptx::addc_cc(r[i], 0);
+    } else if (TLC == 2) {
+      __align__(8) uint32_t odd[2];
+      r[0] = ptx::mul_lo(a[0], b[0]);
+      r[1] = ptx::mul_hi(a[0], b[0]);
+      r[2] = ptx::mul_lo(a[1], b[1]);
+      r[3] = ptx::mul_hi(a[1], b[1]);
+      odd[0] = ptx::mul_lo(a[0], b[1]);
+      odd[1] = ptx::mul_hi(a[0], b[1]);
+      odd[0] = ptx::mad_lo(a[1], b[0], odd[0]);
+      odd[1] = ptx::mad_hi(a[1], b[0], odd[1]);
+      r[1] = ptx::add_cc(r[1], odd[0]);
+      r[2] = ptx::addc_cc(r[2], odd[1]);
+      r[3] = ptx::addc(r[3], 0);
+    } else if (TLC == 1) {
+      r[0] = ptx::mul_lo(a[0], b[0]);
+      r[1] = ptx::mul_hi(a[0], b[0]);
+    }
+  }
+
+  static HOST_DEVICE_INLINE void multiply_raw(const ff_storage& as, const ff_storage& bs, ff_wide_storage& rs)
+  {
+#ifdef __CUDA_ARCH__
+    return multiply_raw_device(as, bs, rs);
+#else
+    return host_math::template multiply_raw<TLC>(as, bs, rs);
+#endif
+  }
+
+  static HOST_DEVICE_INLINE void
+  multiply_and_add_lsb_neg_modulus_raw(const ff_storage& as, ff_storage& cs, ff_storage& rs)
+  {
+#ifdef __CUDA_ARCH__
+    return multiply_and_add_lsb_neg_modulus_raw_device(as, cs, rs);
+#else
+    Wide r_wide = {};
+    host_math::template multiply_raw<TLC>(as, get_neg_modulus(), r_wide.limbs_storage);
+    Field r = Wide::get_lower(r_wide);
+    add_limbs<TLC, false>(cs, r.limbs_storage, rs);
+#endif
+  }
+
+  static HOST_DEVICE_INLINE void multiply_msb_raw(const ff_storage& as, const ff_storage& bs, ff_wide_storage& rs)
+  {
+#ifdef __CUDA_ARCH__
+    return multiply_msb_raw_device(as, bs, rs);
+#else
+    return host_math::template multiply_raw<TLC>(as, bs, rs);
+#endif
+  }
+
+public:
+  ff_storage limbs_storage;
+
+  HOST_DEVICE_INLINE uint32_t* export_limbs() { return (uint32_t*)limbs_storage.limbs; }
+
+  HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width) const
+  {
+    const uint32_t limb_lsb_idx = (digit_num * digit_width) / 32;
+    const uint32_t shift_bits = (digit_num * digit_width) % 32;
+    unsigned rv = limbs_storage.limbs[limb_lsb_idx] >> shift_bits;
+    if ((shift_bits + digit_width > 32) && (limb_lsb_idx + 1 < TLC)) {
+      rv += limbs_storage.limbs[limb_lsb_idx + 1] << (32 - shift_bits);
+    }
+    rv &= ((1 << digit_width) - 1);
+    return rv;
+  }
+
+  static HOST_INLINE Field rand_host()
+  {
+    std::random_device rd;
+    std::mt19937_64 generator(rd());
+    std::uniform_int_distribution<unsigned> distribution;
+    Field value{};
+    for (unsigned i = 0; i < TLC; i++)
+      value.limbs_storage.limbs[i] = distribution(generator);
+    while (lt(Field{get_modulus()}, value))
+      value = value - Field{get_modulus()};
+    return value;
+  }
+
+  static void rand_host_many(Field* out, int size)
+  {
+    for (int i = 0; i < size; i++)
+      out[i] = rand_host();
+  }
+
+  template <unsigned REDUCTION_SIZE = 1>
+  static constexpr HOST_DEVICE_INLINE Field sub_modulus(const Field& xs)
+  {
+    if (REDUCTION_SIZE == 0) return xs;
+    const ff_storage modulus = get_modulus<REDUCTION_SIZE>();
+    Field rs = {};
+    return sub_limbs<TLC, true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, const Field& xs)
+  {
+    std::stringstream hex_string;
+    hex_string << std::hex << std::setfill('0');
+
+    for (int i = 0; i < TLC; i++) {
+      hex_string << std::setw(8) << xs.limbs_storage.limbs[TLC - i - 1];
+    }
+
+    os << "0x" << hex_string.str();
+    return os;
+  }
+
+  friend HOST_DEVICE_INLINE Field operator+(Field xs, const Field& ys)
+  {
+    Field rs = {};
+    add_limbs<TLC, false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
+    return sub_modulus<1>(rs);
+  }
+
+  friend HOST_DEVICE_INLINE Field operator-(Field xs, const Field& ys)
+  {
+    Field rs = {};
+    uint32_t carry = sub_limbs<TLC, true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
+    if (carry == 0) return rs;
+    const ff_storage modulus = get_modulus<1>();
+    add_limbs<TLC, false>(rs.limbs_storage, modulus, rs.limbs_storage);
+    return rs;
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE Wide mul_wide(const Field& xs, const Field& ys)
+  {
+    Wide rs = {};
+    multiply_raw(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
+    return rs;
+  }
+
+  /**
+   * This method reduces a Wide number `xs` modulo `p` and returns the result as a Field element.
+   *
+   * It is assumed that the high `2 * slack_bits` bits of `xs` are unset which is always the case for the product of 2
+   * numbers with their high `slack_bits` unset. Larger Wide numbers should be reduced by subtracting an appropriate
+   * factor of `modulus_squared` first.
+   *
+   * This function implements ["multi-precision Barrett"](https://github.com/ingonyama-zk/modular_multiplication). As
+   * opposed to Montgomery reduction, it doesn't require numbers to have a special representation but lets us work with
+   * them as-is. The general idea of Barrett reduction is to estimate the quotient \f$ l \approx \floor{\frac{xs}{p}}
+   * \f$ and return \f$ xs - l \cdot p \f$. But since \f$ l \f$ is inevitably computed with an error (it's always less
+   * or equal than the real quotient). So the modulus `p` might need to be subtracted several times before the result is
+   * in the desired range \f$ [0;p-1] \f$. The estimate of the error is as follows: \f[ \frac{xs}{p} - l = \frac{xs}{p}
+   * - \frac{xs \cdot m}{2^{2n}} + \frac{xs \cdot m}{2^{2n}} - \floor{\frac{xs}{2^k}}\frac{m}{2^{2n-k}}
+   *  + \floor{\frac{xs}{2^k}}\frac{m}{2^{2n-k}} - l \leq p^2(\frac{1}{p}-\frac{m}{2^{2n}}) + \frac{m}{2^{2n-k}} + 2(TLC
+   * - 1) \cdot 2^{-32} \f] Here \f$ l \f$ is the result of [multiply_msb_raw](@ref multiply_msb_raw) function and the
+   * last term in the error is due to its approximation. \f$ n \f$ is the number of bits in \f$ p \f$ and \f$ k = 2n -
+   * 32\cdot TLC \f$. Overall, the error is always less than 2 so at most 2 reductions are needed. However, in most
+   * cases it's less than 1, so setting the [num_of_reductions](@ref num_of_reductions) variable for a field equal to 1
+   * will cause only 1 reduction to be performed.
+   */
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE Field reduce(const Wide& xs)
+  {
+    // `xs` is left-shifted by `2 * slack_bits` and higher half is written to `xs_hi`
+    Field xs_hi = Wide::get_higher_with_slack(xs);
+    Wide l = {};
+    multiply_msb_raw(xs_hi.limbs_storage, get_m(), l.limbs_storage); // MSB mult by `m`
+    Field l_hi = Wide::get_higher(l);
+    Field r = {};
+    Field xs_lo = Wide::get_lower(xs);
+    // Here we need to compute the lsb of `xs - l \cdot p` and to make use of fused multiply-and-add, we rewrite it as
+    // `xs + l \cdot (2^{32 \cdot TLC}-p)` which is the same as original (up to higher limbs which we don't care about).
+    multiply_and_add_lsb_neg_modulus_raw(l_hi.limbs_storage, xs_lo.limbs_storage, r.limbs_storage);
+    ff_storage r_reduced = {};
+    uint32_t carry;
+    // As mentioned, either 2 or 1 reduction can be performed depending on the field in question.
+    if (num_of_reductions() == 2) {
+      carry = sub_limbs<TLC, true>(r.limbs_storage, get_modulus<2>(), r_reduced);
+      if (carry == 0) r = Field{r_reduced};
+    }
+    carry = sub_limbs<TLC, true>(r.limbs_storage, get_modulus<1>(), r_reduced);
+    if (carry == 0) r = Field{r_reduced};
+
+    return r;
+  }
+
+  HOST_DEVICE_INLINE Field& operator=(Field const& other)
+  {
+    for (int i = 0; i < TLC; i++) {
+      this->limbs_storage.limbs[i] = other.limbs_storage.limbs[i];
+    }
+    return *this;
+  }
+
+  friend HOST_DEVICE_INLINE Field operator*(const Field& xs, const Field& ys)
+  {
+    Wide xy = mul_wide(xs, ys); // full mult
+    return reduce(xy);          // reduce mod p
+  }
+
+  friend HOST_DEVICE_INLINE bool operator==(const Field& xs, const Field& ys)
+  {
+#ifdef __CUDA_ARCH__
+    const uint32_t* x = xs.limbs_storage.limbs;
+    const uint32_t* y = ys.limbs_storage.limbs;
+    uint32_t limbs_or = x[0] ^ y[0];
+    UNROLL
+    for (unsigned i = 1; i < TLC; i++)
+      limbs_or |= x[i] ^ y[i];
+    return limbs_or == 0;
+#else
+    for (unsigned i = 0; i < TLC; i++)
+      if (xs.limbs_storage.limbs[i] != ys.limbs_storage.limbs[i]) return false;
+    return true;
+#endif
+  }
+
+  friend HOST_DEVICE_INLINE bool operator!=(const Field& xs, const Field& ys) { return !(xs == ys); }
+
+  template <const Field& multiplier>
+  static HOST_DEVICE_INLINE Field mul_const(const Field& xs)
+  {
+    Field mul = multiplier;
+    static bool is_u32 = true;
+#ifdef __CUDA_ARCH__
+    UNROLL
+#endif
+    for (unsigned i = 1; i < TLC; i++)
+      is_u32 &= (mul.limbs_storage.limbs[i] == 0);
+
+    if (is_u32) return mul_unsigned<multiplier.limbs_storage.limbs[0], Field>(xs);
+    return mul * xs;
+  }
+
+  template <uint32_t multiplier, class T, unsigned REDUCTION_SIZE = 1>
+  static constexpr HOST_DEVICE_INLINE T mul_unsigned(const T& xs)
+  {
+    T rs = {};
+    T temp = xs;
+    bool is_zero = true;
+#ifdef __CUDA_ARCH__
+    UNROLL
+#endif
+    for (unsigned i = 0; i < 32; i++) {
+      if (multiplier & (1 << i)) {
+        rs = is_zero ? temp : (rs + temp);
+        is_zero = false;
+      }
+      if (multiplier & ((1 << (31 - i) - 1) << (i + 1))) break;
+      temp = temp + temp;
+    }
+    return rs;
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE Wide sqr_wide(const Field& xs)
+  {
+    // TODO: change to a more efficient squaring
+    return mul_wide<MODULUS_MULTIPLE>(xs, xs);
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE Field sqr(const Field& xs)
+  {
+    // TODO: change to a more efficient squaring
+    return xs * xs;
+  }
+
+  static constexpr HOST_DEVICE_INLINE Field to_montgomery(const Field& xs) { return xs * Field{CONFIG::montgomery_r}; }
+
+  static constexpr HOST_DEVICE_INLINE Field from_montgomery(const Field& xs)
+  {
+    return xs * Field{CONFIG::montgomery_r_inv};
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE Field neg(const Field& xs)
+  {
+    const ff_storage modulus = get_modulus<MODULUS_MULTIPLE>();
+    Field rs = {};
+    sub_limbs<TLC, false>(modulus, xs.limbs_storage, rs.limbs_storage);
+    return rs;
+  }
+
+  // Assumes the number is even!
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE Field div2(const Field& xs)
+  {
+    const uint32_t* x = xs.limbs_storage.limbs;
+    Field rs = {};
+    uint32_t* r = rs.limbs_storage.limbs;
+    if constexpr (TLC > 1) {
+#ifdef __CUDA_ARCH__
+      UNROLL
+#endif
+      for (unsigned i = 0; i < TLC - 1; i++) {
+#ifdef __CUDA_ARCH__
+        r[i] = __funnelshift_rc(x[i], x[i + 1], 1);
+#else
+        r[i] = (x[i] >> 1) | (x[i + 1] << 31);
+#endif
+      }
+    }
+    r[TLC - 1] = x[TLC - 1] >> 1;
+    return sub_modulus<MODULUS_MULTIPLE>(rs);
+  }
+
+  static constexpr HOST_DEVICE_INLINE bool lt(const Field& xs, const Field& ys)
+  {
+    ff_storage dummy = {};
+    uint32_t carry = sub_limbs<TLC, true>(xs.limbs_storage, ys.limbs_storage, dummy);
+    return carry;
+  }
+
+  static constexpr HOST_DEVICE_INLINE bool is_odd(const Field& xs) { return xs.limbs_storage.limbs[0] & 1; }
+
+  static constexpr HOST_DEVICE_INLINE bool is_even(const Field& xs) { return ~xs.limbs_storage.limbs[0] & 1; }
+
+  static constexpr HOST_DEVICE_INLINE Field inverse(const Field& xs)
+  {
+    if (xs == zero()) return zero();
+    constexpr Field one = Field{CONFIG::one};
+    constexpr ff_storage modulus = CONFIG::modulus;
+    Field u = xs;
+    Field v = Field{modulus};
+    Field b = one;
+    Field c = {};
+    while (!(u == one) && !(v == one)) {
+      while (is_even(u)) {
+        u = div2(u);
+        if (is_odd(b)) add_limbs<TLC, false>(b.limbs_storage, modulus, b.limbs_storage);
+        b = div2(b);
+      }
+      while (is_even(v)) {
+        v = div2(v);
+        if (is_odd(c)) add_limbs<TLC, false>(c.limbs_storage, modulus, c.limbs_storage);
+        c = div2(c);
+      }
+      if (lt(v, u)) {
+        u = u - v;
+        b = b - c;
+      } else {
+        v = v - u;
+        c = c - b;
+      }
+    }
+    return (u == one) ? b : c;
+  }
+
+  static constexpr HOST_DEVICE_INLINE Field pow(Field base, int exp)
+  {
+    Field res = one();
+    while (exp > 0) {
+      if (exp & 1) res = res * base;
+      base = base * base;
+      exp >>= 1;
+    }
+    return res;
+  }
+};
+
+template <class CONFIG>
+struct std::hash<Field<CONFIG>> {
+  std::size_t operator()(const Field<CONFIG>& key) const
+  {
+    std::size_t hash = 0;
+    // boost hashing, see
+    // https://stackoverflow.com/questions/35985960/c-why-is-boosthash-combine-the-best-way-to-combine-hash-values/35991300#35991300
+    for (int i = 0; i < CONFIG::limbs_count; i++)
+      hash ^= std::hash<uint32_t>()(key.limbs_storage.limbs[i]) + 0x9e3779b9 + (hash << 6) + (hash >> 2);
+    return hash;
+  }
+};
+
+template <class CONFIG>
+struct SharedMemory<Field<CONFIG>> {
+  __device__ Field<CONFIG>* getPointer()
+  {
+    extern __shared__ Field<CONFIG> s_scalar_[];
+    return s_scalar_;
+  }
+};
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/field_config.cuh b/sumcheck/cuda/icicle/fields/field_config.cuh
new file mode 100644
index 00000000..d9ec18c0
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/field_config.cuh
@@ -0,0 +1,41 @@
+#pragma once
+#ifndef FIELD_CONFIG_H
+#define FIELD_CONFIG_H
+
+#include "fields/id.h"
+#include "fields/field.cuh"
+
+/**
+ * @namespace field_config
+ * Namespace with type definitions for finite fields. Here, concrete types are created in accordance
+ * with the `-DFIELD` env variable passed during build.
+ */
+#if FIELD_ID == BN254
+#include "fields/snark_fields/bn254_scalar.cuh"
+namespace field_config = bn254;
+#elif FIELD_ID == BLS12_381
+#include "fields/snark_fields/bls12_381_scalar.cuh"
+using bls12_381::fp_config;
+namespace field_config = bls12_381;
+#elif FIELD_ID == BLS12_377
+#include "fields/snark_fields/bls12_377_scalar.cuh"
+namespace field_config = bls12_377;
+#elif FIELD_ID == BW6_761
+#include "fields/snark_fields/bw6_761_scalar.cuh"
+namespace field_config = bw6_761;
+#elif FIELD_ID == GRUMPKIN
+#include "fields/snark_fields/grumpkin_scalar.cuh"
+namespace field_config = grumpkin;
+
+#elif FIELD_ID == BABY_BEAR
+#include "fields/stark_fields/babybear.cuh"
+namespace field_config = babybear;
+#elif FIELD_ID == STARK_252
+#include "fields/stark_fields/stark252.cuh"
+namespace field_config = stark252;
+#elif FIELD_ID == M31
+#include "fields/stark_fields/m31.cuh"
+namespace field_config = m31;
+#endif
+
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/host_math.cuh b/sumcheck/cuda/icicle/fields/host_math.cuh
new file mode 100644
index 00000000..0cdde0ca
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/host_math.cuh
@@ -0,0 +1,188 @@
+#pragma once
+#ifndef HOST_MATH_H
+#define HOST_MATH_H
+
+#include <cstdint>
+#include <cuda_runtime.h>
+#include "gpu-utils/modifiers.cuh"
+#include "storage.cuh"
+
+namespace host_math {
+
+  // return x + y with uint32_t operands
+  static constexpr __host__ uint32_t add(const uint32_t x, const uint32_t y) { return x + y; }
+
+  // return x + y + carry with uint32_t operands
+  static constexpr __host__ uint32_t addc(const uint32_t x, const uint32_t y, const uint32_t carry)
+  {
+    return x + y + carry;
+  }
+
+  // return x + y and carry out with uint32_t operands
+  static constexpr __host__ uint32_t add_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
+  {
+    uint32_t result = x + y;
+    carry = x > result;
+    return result;
+  }
+
+  // return x + y + carry and carry out  with uint32_t operands
+  static constexpr __host__ uint32_t addc_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
+  {
+    const uint32_t result = x + y + carry;
+    carry = carry && x >= result || !carry && x > result;
+    return result;
+  }
+
+  // return x - y with uint32_t operands
+  static constexpr __host__ uint32_t sub(const uint32_t x, const uint32_t y) { return x - y; }
+
+  //    return x - y - borrow with uint32_t operands
+  static constexpr __host__ uint32_t subc(const uint32_t x, const uint32_t y, const uint32_t borrow)
+  {
+    return x - y - borrow;
+  }
+
+  //    return x - y and borrow out with uint32_t operands
+  static constexpr __host__ uint32_t sub_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
+  {
+    uint32_t result = x - y;
+    borrow = x < result;
+    return result;
+  }
+
+  //    return x - y - borrow and borrow out with uint32_t operands
+  static constexpr __host__ uint32_t subc_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
+  {
+    const uint32_t result = x - y - borrow;
+    borrow = borrow && x <= result || !borrow && x < result;
+    return result;
+  }
+
+  // return x * y + z + carry and carry out with uint32_t operands
+  static constexpr __host__ uint32_t madc_cc(const uint32_t x, const uint32_t y, const uint32_t z, uint32_t& carry)
+  {
+    uint64_t r = static_cast<uint64_t>(x) * y + z + carry;
+    carry = (uint32_t)(r >> 32);
+    uint32_t result = r & 0xffffffff;
+    return result;
+  }
+
+  template <unsigned OPS_COUNT = UINT32_MAX, bool CARRY_IN = false, bool CARRY_OUT = false>
+  struct carry_chain {
+    unsigned index;
+
+    constexpr HOST_INLINE carry_chain() : index(0) {}
+
+    constexpr HOST_INLINE uint32_t add(const uint32_t x, const uint32_t y, uint32_t& carry)
+    {
+      index++;
+      if (index == 1 && OPS_COUNT == 1 && !CARRY_IN && !CARRY_OUT)
+        return host_math::add(x, y);
+      else if (index == 1 && !CARRY_IN)
+        return host_math::add_cc(x, y, carry);
+      else if (index < OPS_COUNT || CARRY_OUT)
+        return host_math::addc_cc(x, y, carry);
+      else
+        return host_math::addc(x, y, carry);
+    }
+
+    constexpr HOST_INLINE uint32_t sub(const uint32_t x, const uint32_t y, uint32_t& carry)
+    {
+      index++;
+      if (index == 1 && OPS_COUNT == 1 && !CARRY_IN && !CARRY_OUT)
+        return host_math::sub(x, y);
+      else if (index == 1 && !CARRY_IN)
+        return host_math::sub_cc(x, y, carry);
+      else if (index < OPS_COUNT || CARRY_OUT)
+        return host_math::subc_cc(x, y, carry);
+      else
+        return host_math::subc(x, y, carry);
+    }
+  };
+
+  template <unsigned NLIMBS_A, unsigned NLIMBS_B = NLIMBS_A>
+  static constexpr HOST_INLINE void
+  multiply_raw(const storage<NLIMBS_A>& as, const storage<NLIMBS_B>& bs, storage<NLIMBS_A + NLIMBS_B>& rs)
+  {
+    const uint32_t* a = as.limbs;
+    const uint32_t* b = bs.limbs;
+    uint32_t* r = rs.limbs;
+    for (unsigned i = 0; i < NLIMBS_B; i++) {
+      uint32_t carry = 0;
+      for (unsigned j = 0; j < NLIMBS_A; j++)
+        r[j + i] = host_math::madc_cc(a[j], b[i], r[j + i], carry);
+      r[NLIMBS_A + i] = carry;
+    }
+  }
+
+  template <unsigned NLIMBS, bool SUBTRACT, bool CARRY_OUT>
+  static constexpr HOST_INLINE uint32_t
+  add_sub_limbs(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
+  {
+    const uint32_t* x = xs.limbs;
+    const uint32_t* y = ys.limbs;
+    uint32_t* r = rs.limbs;
+    uint32_t carry = 0;
+    carry_chain<NLIMBS, false, CARRY_OUT> chain;
+    for (unsigned i = 0; i < NLIMBS; i++)
+      r[i] = SUBTRACT ? chain.sub(x[i], y[i], carry) : chain.add(x[i], y[i], carry);
+    return CARRY_OUT ? carry : 0;
+  }
+
+  template <unsigned NLIMBS, unsigned BITS>
+  static constexpr HOST_INLINE storage<NLIMBS> left_shift(const storage<NLIMBS>& xs)
+  {
+    if constexpr (BITS == 0)
+      return xs;
+    else {
+      constexpr unsigned BITS32 = BITS % 32;
+      constexpr unsigned LIMBS_GAP = BITS / 32;
+      storage<NLIMBS> out{};
+      if constexpr (LIMBS_GAP < NLIMBS) {
+        out.limbs[LIMBS_GAP] = xs.limbs[0] << BITS32;
+        for (unsigned i = 1; i < NLIMBS - LIMBS_GAP; i++)
+          out.limbs[i + LIMBS_GAP] = (xs.limbs[i] << BITS32) + (xs.limbs[i - 1] >> (32 - BITS32));
+      }
+      return out;
+    }
+  }
+
+  template <unsigned NLIMBS, unsigned BITS>
+  static constexpr HOST_INLINE storage<NLIMBS> right_shift(const storage<NLIMBS>& xs)
+  {
+    if constexpr (BITS == 0)
+      return xs;
+    else {
+      constexpr unsigned BITS32 = BITS % 32;
+      constexpr unsigned LIMBS_GAP = BITS / 32;
+      storage<NLIMBS> out{};
+      if constexpr (LIMBS_GAP < NLIMBS - 1) {
+        for (unsigned i = 0; i < NLIMBS - LIMBS_GAP - 1; i++)
+          out.limbs[i] = (xs.limbs[i + LIMBS_GAP] >> BITS32) + (xs.limbs[i + LIMBS_GAP + 1] << (32 - BITS32));
+      }
+      if constexpr (LIMBS_GAP < NLIMBS) out.limbs[NLIMBS - LIMBS_GAP - 1] = (xs.limbs[NLIMBS - 1] >> BITS32);
+      return out;
+    }
+  }
+
+  template <unsigned NLIMBS_NUM, unsigned NLIMBS_DENOM, unsigned NLIMBS_Q = (NLIMBS_NUM - NLIMBS_DENOM)>
+  static constexpr HOST_INLINE void integer_division(
+    const storage<NLIMBS_NUM>& num, const storage<NLIMBS_DENOM>& denom, storage<NLIMBS_Q>& q, storage<NLIMBS_DENOM>& r)
+  {
+    storage<NLIMBS_DENOM> temp = {};
+    for (int limb_idx = NLIMBS_NUM - 1; limb_idx >= 0; limb_idx--) {
+      for (int bit_idx = 31; bit_idx >= 0; bit_idx--) {
+        r = left_shift<NLIMBS_DENOM, 1>(r);
+        r.limbs[0] |= ((num.limbs[limb_idx] >> bit_idx) & 1);
+        uint32_t c = add_sub_limbs<NLIMBS_DENOM, true, true>(r, denom, temp);
+        if (limb_idx < NLIMBS_Q & !c) {
+          r = temp;
+          q.limbs[limb_idx] |= 1 << bit_idx;
+        }
+      }
+    }
+  }
+} // namespace host_math
+
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/id.h b/sumcheck/cuda/icicle/fields/id.h
new file mode 100644
index 00000000..4c017c4d
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/id.h
@@ -0,0 +1,15 @@
+#pragma once
+#ifndef FIELD_ID_H
+#define FIELD_ID_H
+
+#define BN254     1
+#define BLS12_381 2
+#define BLS12_377 3
+#define BW6_761   4
+#define GRUMPKIN  5
+
+#define BABY_BEAR 1001
+#define STARK_252 1002
+#define M31       1003
+
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/params_gen.cuh b/sumcheck/cuda/icicle/fields/params_gen.cuh
new file mode 100644
index 00000000..6b21c5e0
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/params_gen.cuh
@@ -0,0 +1,129 @@
+#pragma once
+#ifndef PARAMS_GEN_H
+#define PARAMS_GEN_H
+
+#include "storage.cuh"
+#include "host_math.cuh"
+
+namespace params_gen {
+  template <unsigned NLIMBS, unsigned BIT_SHIFT>
+  static constexpr HOST_INLINE storage<2 * NLIMBS> get_square(const storage<NLIMBS>& xs)
+  {
+    storage<2 * NLIMBS> rs = {};
+    host_math::template multiply_raw<NLIMBS>(xs, xs, rs);
+    return host_math::template left_shift<2 * NLIMBS, BIT_SHIFT>(rs);
+  }
+
+  template <unsigned NLIMBS>
+  static constexpr HOST_INLINE storage<NLIMBS>
+  get_difference_no_carry(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys)
+  {
+    storage<NLIMBS> rs = {};
+    host_math::template add_sub_limbs<NLIMBS, true, false>(xs, ys, rs);
+    return rs;
+  }
+
+  template <unsigned NLIMBS, unsigned EXP>
+  static constexpr HOST_INLINE storage<NLIMBS> get_m(const storage<NLIMBS>& modulus)
+  {
+    storage<NLIMBS> rs = {};
+    storage<NLIMBS> qs = {};
+    storage<2 * NLIMBS> wide_one = {1};
+    storage<2 * NLIMBS> pow_of_2 = host_math::template left_shift<2 * NLIMBS, EXP>(wide_one);
+    host_math::template integer_division<2 * NLIMBS, NLIMBS>(pow_of_2, modulus, qs, rs);
+    return qs;
+  }
+
+  template <unsigned NLIMBS, bool INV>
+  static constexpr HOST_INLINE storage<NLIMBS> get_montgomery_constant(const storage<NLIMBS>& modulus)
+  {
+    storage<NLIMBS> rs = {1};
+    for (int i = 0; i < 32 * NLIMBS; i++) {
+      if (INV) {
+        if (rs.limbs[0] & 1) host_math::template add_sub_limbs<NLIMBS, false, false>(rs, modulus, rs);
+        rs = host_math::template right_shift<NLIMBS, 1>(rs);
+      } else {
+        rs = host_math::template left_shift<NLIMBS, 1>(rs);
+        storage<NLIMBS> temp = {};
+        rs = host_math::template add_sub_limbs<NLIMBS, true, true>(rs, modulus, temp) ? rs : temp;
+      }
+    }
+    return rs;
+  }
+
+  constexpr unsigned floorlog2(uint32_t x) { return x == 1 ? 0 : 1 + floorlog2(x >> 1); }
+
+  template <unsigned NLIMBS, unsigned NBITS>
+  constexpr unsigned num_of_reductions(const storage<NLIMBS>& modulus, const storage<NLIMBS>& m)
+  {
+    storage<2 * NLIMBS> x1 = {};
+    storage<3 * NLIMBS> x2 = {};
+    storage<3 * NLIMBS> x3 = {};
+    host_math::template multiply_raw<NLIMBS>(modulus, m, x1);
+    host_math::template multiply_raw<NLIMBS, 2 * NLIMBS>(modulus, x1, x2);
+    storage<2 * NLIMBS> one = {1};
+    storage<2 * NLIMBS> pow_of_2 = host_math::template left_shift<2 * NLIMBS, NBITS>(one);
+    host_math::template multiply_raw<NLIMBS, 2 * NLIMBS>(modulus, pow_of_2, x3);
+    host_math::template add_sub_limbs<3 * NLIMBS, true, false>(x3, x2, x2);
+    double err = (double)x2.limbs[2 * NLIMBS - 1] / pow_of_2.limbs[2 * NLIMBS - 1];
+    err += (double)m.limbs[NLIMBS - 1] / 0xffffffff;
+    err += (double)NLIMBS / 0x80000000;
+    return unsigned(err) + 1;
+  }
+
+  template <unsigned NLIMBS>
+  constexpr unsigned two_adicity(const storage<NLIMBS>& modulus)
+  {
+    unsigned two_adicity = 1;
+    storage<NLIMBS> temp = host_math::template right_shift<NLIMBS, 1>(modulus);
+    while (!(temp.limbs[0] & 1)) {
+      temp = host_math::template right_shift<NLIMBS, 1>(temp);
+      two_adicity++;
+    }
+    return two_adicity;
+  }
+
+  template <unsigned NLIMBS, unsigned TWO_ADICITY>
+  constexpr storage_array<TWO_ADICITY, NLIMBS> get_invs(const storage<NLIMBS>& modulus)
+  {
+    storage_array<TWO_ADICITY, NLIMBS> invs = {};
+    storage<NLIMBS> rs = {1};
+    for (int i = 0; i < TWO_ADICITY; i++) {
+      if (rs.limbs[0] & 1) host_math::template add_sub_limbs<NLIMBS, false, false>(rs, modulus, rs);
+      rs = host_math::template right_shift<NLIMBS, 1>(rs);
+      invs.storages[i] = rs;
+    }
+    return invs;
+  }
+} // namespace params_gen
+
+#define PARAMS(modulus)                                                                                                \
+  static constexpr unsigned limbs_count = modulus.LC;                                                                  \
+  static constexpr unsigned modulus_bit_count =                                                                        \
+    32 * (limbs_count - 1) + params_gen::floorlog2(modulus.limbs[limbs_count - 1]) + 1;                                \
+  static constexpr storage<limbs_count> zero = {};                                                                     \
+  static constexpr storage<limbs_count> one = {1};                                                                     \
+  static constexpr storage<limbs_count> modulus_2 = host_math::template left_shift<limbs_count, 1>(modulus);           \
+  static constexpr storage<limbs_count> modulus_4 = host_math::template left_shift<limbs_count, 1>(modulus_2);         \
+  static constexpr storage<limbs_count> neg_modulus =                                                                  \
+    params_gen::template get_difference_no_carry<limbs_count>(zero, modulus);                                          \
+  static constexpr storage<2 * limbs_count> modulus_squared =                                                          \
+    params_gen::template get_square<limbs_count, 0>(modulus);                                                          \
+  static constexpr storage<2 * limbs_count> modulus_squared_2 =                                                        \
+    host_math::template left_shift<2 * limbs_count, 1>(modulus_squared);                                               \
+  static constexpr storage<2 * limbs_count> modulus_squared_4 =                                                        \
+    host_math::template left_shift<2 * limbs_count, 1>(modulus_squared_2);                                             \
+  static constexpr storage<limbs_count> m = params_gen::template get_m<limbs_count, 2 * modulus_bit_count>(modulus);   \
+  static constexpr storage<limbs_count> montgomery_r =                                                                 \
+    params_gen::template get_montgomery_constant<limbs_count, false>(modulus);                                         \
+  static constexpr storage<limbs_count> montgomery_r_inv =                                                             \
+    params_gen::template get_montgomery_constant<limbs_count, true>(modulus);                                          \
+  static constexpr unsigned num_of_reductions =                                                                        \
+    params_gen::template num_of_reductions<limbs_count, 2 * modulus_bit_count>(modulus, m);
+
+#define TWIDDLES(modulus, rou)                                                                                         \
+  static constexpr unsigned omegas_count = params_gen::template two_adicity<limbs_count>(modulus);                     \
+  static constexpr storage_array<omegas_count, limbs_count> inv =                                                      \
+    params_gen::template get_invs<limbs_count, omegas_count>(modulus);
+
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/ptx.cuh b/sumcheck/cuda/icicle/fields/ptx.cuh
new file mode 100644
index 00000000..7625bd92
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/ptx.cuh
@@ -0,0 +1,282 @@
+#pragma once
+#include <cstdint>
+#include <cuda_runtime.h>
+
+namespace ptx {
+
+  __device__ __forceinline__ uint32_t add(const uint32_t x, const uint32_t y)
+  {
+    uint32_t result;
+    asm("add.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t add_cc(const uint32_t x, const uint32_t y)
+  {
+    uint32_t result;
+    asm volatile("add.cc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t addc(const uint32_t x, const uint32_t y)
+  {
+    uint32_t result;
+    asm volatile("addc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t addc_cc(const uint32_t x, const uint32_t y)
+  {
+    uint32_t result;
+    asm volatile("addc.cc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t sub(const uint32_t x, const uint32_t y)
+  {
+    uint32_t result;
+    asm("sub.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t sub_cc(const uint32_t x, const uint32_t y)
+  {
+    uint32_t result;
+    asm volatile("sub.cc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t subc(const uint32_t x, const uint32_t y)
+  {
+    uint32_t result;
+    asm volatile("subc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t subc_cc(const uint32_t x, const uint32_t y)
+  {
+    uint32_t result;
+    asm volatile("subc.cc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t mul_lo(const uint32_t x, const uint32_t y)
+  {
+    uint32_t result;
+    asm("mul.lo.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t mul_hi(const uint32_t x, const uint32_t y)
+  {
+    uint32_t result;
+    asm("mul.hi.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t mad_lo(const uint32_t x, const uint32_t y, const uint32_t z)
+  {
+    uint32_t result;
+    asm("mad.lo.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t mad_hi(const uint32_t x, const uint32_t y, const uint32_t z)
+  {
+    uint32_t result;
+    asm("mad.hi.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t mad_lo_cc(const uint32_t x, const uint32_t y, const uint32_t z)
+  {
+    uint32_t result;
+    asm volatile("mad.lo.cc.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t mad_hi_cc(const uint32_t x, const uint32_t y, const uint32_t z)
+  {
+    uint32_t result;
+    asm volatile("mad.hi.cc.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t madc_lo(const uint32_t x, const uint32_t y, const uint32_t z)
+  {
+    uint32_t result;
+    asm volatile("madc.lo.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t madc_hi(const uint32_t x, const uint32_t y, const uint32_t z)
+  {
+    uint32_t result;
+    asm volatile("madc.hi.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t madc_lo_cc(const uint32_t x, const uint32_t y, const uint32_t z)
+  {
+    uint32_t result;
+    asm volatile("madc.lo.cc.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
+    return result;
+  }
+
+  __device__ __forceinline__ uint32_t madc_hi_cc(const uint32_t x, const uint32_t y, const uint32_t z)
+  {
+    uint32_t result;
+    asm volatile("madc.hi.cc.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
+    return result;
+  }
+
+  __device__ __forceinline__ uint64_t mov_b64(uint32_t lo, uint32_t hi)
+  {
+    uint64_t result;
+    asm("mov.b64 %0, {%1,%2};" : "=l"(result) : "r"(lo), "r"(hi));
+    return result;
+  }
+
+  // Gives u64 overloads a dedicated namespace.
+  // Callers should know exactly what they're calling (no implicit conversions).
+  namespace u64 {
+
+    __device__ __forceinline__ uint64_t add(const uint64_t x, const uint64_t y)
+    {
+      uint64_t result;
+      asm("add.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t add_cc(const uint64_t x, const uint64_t y)
+    {
+      uint64_t result;
+      asm volatile("add.cc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t addc(const uint64_t x, const uint64_t y)
+    {
+      uint64_t result;
+      asm volatile("addc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t addc_cc(const uint64_t x, const uint64_t y)
+    {
+      uint64_t result;
+      asm volatile("addc.cc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t sub(const uint64_t x, const uint64_t y)
+    {
+      uint64_t result;
+      asm("sub.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t sub_cc(const uint64_t x, const uint64_t y)
+    {
+      uint64_t result;
+      asm volatile("sub.cc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t subc(const uint64_t x, const uint64_t y)
+    {
+      uint64_t result;
+      asm volatile("subc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t subc_cc(const uint64_t x, const uint64_t y)
+    {
+      uint64_t result;
+      asm volatile("subc.cc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t mul_lo(const uint64_t x, const uint64_t y)
+    {
+      uint64_t result;
+      asm("mul.lo.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t mul_hi(const uint64_t x, const uint64_t y)
+    {
+      uint64_t result;
+      asm("mul.hi.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t mad_lo(const uint64_t x, const uint64_t y, const uint64_t z)
+    {
+      uint64_t result;
+      asm("mad.lo.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t mad_hi(const uint64_t x, const uint64_t y, const uint64_t z)
+    {
+      uint64_t result;
+      asm("mad.hi.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t mad_lo_cc(const uint64_t x, const uint64_t y, const uint64_t z)
+    {
+      uint64_t result;
+      asm volatile("mad.lo.cc.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t mad_hi_cc(const uint64_t x, const uint64_t y, const uint64_t z)
+    {
+      uint64_t result;
+      asm volatile("mad.hi.cc.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t madc_lo(const uint64_t x, const uint64_t y, const uint64_t z)
+    {
+      uint64_t result;
+      asm volatile("madc.lo.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t madc_hi(const uint64_t x, const uint64_t y, const uint64_t z)
+    {
+      uint64_t result;
+      asm volatile("madc.hi.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t madc_lo_cc(const uint64_t x, const uint64_t y, const uint64_t z)
+    {
+      uint64_t result;
+      asm volatile("madc.lo.cc.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
+      return result;
+    }
+
+    __device__ __forceinline__ uint64_t madc_hi_cc(const uint64_t x, const uint64_t y, const uint64_t z)
+    {
+      uint64_t result;
+      asm volatile("madc.hi.cc.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
+      return result;
+    }
+
+  } // namespace u64
+
+  __device__ __forceinline__ void bar_arrive(const unsigned name, const unsigned count)
+  {
+    asm volatile("bar.arrive %0, %1;" : : "r"(name), "r"(count) : "memory");
+  }
+
+  __device__ __forceinline__ void bar_sync(const unsigned name, const unsigned count)
+  {
+    asm volatile("bar.sync %0, %1;" : : "r"(name), "r"(count) : "memory");
+  }
+
+} // namespace ptx
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/quadratic_extension.cuh b/sumcheck/cuda/icicle/fields/quadratic_extension.cuh
new file mode 100644
index 00000000..10065386
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/quadratic_extension.cuh
@@ -0,0 +1,206 @@
+#pragma once
+
+#include "field.cuh"
+#include "gpu-utils/modifiers.cuh"
+#include "gpu-utils/sharedmem.cuh"
+
+template <typename CONFIG, class T>
+class ExtensionField
+{
+private:
+  friend T;
+
+  typedef typename T::Wide FWide;
+
+  struct ExtensionWide {
+    FWide real;
+    FWide imaginary;
+
+    friend HOST_DEVICE_INLINE ExtensionWide operator+(ExtensionWide xs, const ExtensionWide& ys)
+    {
+      return ExtensionWide{xs.real + ys.real, xs.imaginary + ys.imaginary};
+    }
+
+    friend HOST_DEVICE_INLINE ExtensionWide operator-(ExtensionWide xs, const ExtensionWide& ys)
+    {
+      return ExtensionWide{xs.real - ys.real, xs.imaginary - ys.imaginary};
+    }
+  };
+
+public:
+  typedef T FF;
+  static constexpr unsigned TLC = 2 * CONFIG::limbs_count;
+
+  FF real;
+  FF imaginary;
+
+  static constexpr HOST_DEVICE_INLINE ExtensionField zero() { return ExtensionField{FF::zero(), FF::zero()}; }
+
+  static constexpr HOST_DEVICE_INLINE ExtensionField one() { return ExtensionField{FF::one(), FF::zero()}; }
+
+  static constexpr HOST_DEVICE_INLINE ExtensionField to_montgomery(const ExtensionField& xs)
+  {
+    return ExtensionField{xs.real * FF{CONFIG::montgomery_r}, xs.imaginary * FF{CONFIG::montgomery_r}};
+  }
+
+  static constexpr HOST_DEVICE_INLINE ExtensionField from_montgomery(const ExtensionField& xs)
+  {
+    return ExtensionField{xs.real * FF{CONFIG::montgomery_r_inv}, xs.imaginary * FF{CONFIG::montgomery_r_inv}};
+  }
+
+  static HOST_INLINE ExtensionField rand_host() { return ExtensionField{FF::rand_host(), FF::rand_host()}; }
+
+  static void rand_host_many(ExtensionField* out, int size)
+  {
+    for (int i = 0; i < size; i++)
+      out[i] = rand_host();
+  }
+
+  template <unsigned REDUCTION_SIZE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionField sub_modulus(const ExtensionField& xs)
+  {
+    return ExtensionField{FF::sub_modulus<REDUCTION_SIZE>(&xs.real), FF::sub_modulus<REDUCTION_SIZE>(&xs.imaginary)};
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, const ExtensionField& xs)
+  {
+    os << "{ Real: " << xs.real << " }; { Imaginary: " << xs.imaginary << " }";
+    return os;
+  }
+
+  friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const ExtensionField& ys)
+  {
+    return ExtensionField{xs.real + ys.real, xs.imaginary + ys.imaginary};
+  }
+
+  friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const ExtensionField& ys)
+  {
+    return ExtensionField{xs.real - ys.real, xs.imaginary - ys.imaginary};
+  }
+
+  friend HOST_DEVICE_INLINE ExtensionField operator+(FF xs, const ExtensionField& ys)
+  {
+    return ExtensionField{xs + ys.real, ys.imaginary};
+  }
+
+  friend HOST_DEVICE_INLINE ExtensionField operator-(FF xs, const ExtensionField& ys)
+  {
+    return ExtensionField{xs - ys.real, FF::neg(ys.imaginary)};
+  }
+
+  friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const FF& ys)
+  {
+    return ExtensionField{xs.real + ys, xs.imaginary};
+  }
+
+  friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const FF& ys)
+  {
+    return ExtensionField{xs.real - ys, xs.imaginary};
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const ExtensionField& ys)
+  {
+    FWide real_prod = FF::mul_wide(xs.real, ys.real);
+    FWide imaginary_prod = FF::mul_wide(xs.imaginary, ys.imaginary);
+    FWide prod_of_sums = FF::mul_wide(xs.real + xs.imaginary, ys.real + ys.imaginary);
+    FWide nonresidue_times_im = FF::template mul_unsigned<CONFIG::nonresidue>(imaginary_prod);
+    nonresidue_times_im = CONFIG::nonresidue_is_negative ? FWide::neg(nonresidue_times_im) : nonresidue_times_im;
+    return ExtensionWide{real_prod + nonresidue_times_im, prod_of_sums - real_prod - imaginary_prod};
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const FF& ys)
+  {
+    return ExtensionWide{FF::mul_wide(xs.real, ys), FF::mul_wide(xs.imaginary, ys)};
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const FF& xs, const ExtensionField& ys)
+  {
+    return mul_wide(ys, xs);
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionField reduce(const ExtensionWide& xs)
+  {
+    return ExtensionField{
+      FF::template reduce<MODULUS_MULTIPLE>(xs.real), FF::template reduce<MODULUS_MULTIPLE>(xs.imaginary)};
+  }
+
+  template <class T1, class T2>
+  friend HOST_DEVICE_INLINE ExtensionField operator*(const T1& xs, const T2& ys)
+  {
+    ExtensionWide xy = mul_wide(xs, ys);
+    return reduce(xy);
+  }
+
+  friend HOST_DEVICE_INLINE bool operator==(const ExtensionField& xs, const ExtensionField& ys)
+  {
+    return (xs.real == ys.real) && (xs.imaginary == ys.imaginary);
+  }
+
+  friend HOST_DEVICE_INLINE bool operator!=(const ExtensionField& xs, const ExtensionField& ys) { return !(xs == ys); }
+
+  template <const ExtensionField& multiplier>
+  static HOST_DEVICE_INLINE ExtensionField mul_const(const ExtensionField& xs)
+  {
+    static constexpr FF mul_real = multiplier.real;
+    static constexpr FF mul_imaginary = multiplier.imaginary;
+    const FF xs_real = xs.real;
+    const FF xs_imaginary = xs.imaginary;
+    FF real_prod = FF::template mul_const<mul_real>(xs_real);
+    FF imaginary_prod = FF::template mul_const<mul_imaginary>(xs_imaginary);
+    FF re_im = FF::template mul_const<mul_real>(xs_imaginary);
+    FF im_re = FF::template mul_const<mul_imaginary>(xs_real);
+    FF nonresidue_times_im = FF::template mul_unsigned<CONFIG::nonresidue>(imaginary_prod);
+    nonresidue_times_im = CONFIG::nonresidue_is_negative ? FF::neg(nonresidue_times_im) : nonresidue_times_im;
+    return ExtensionField{real_prod + nonresidue_times_im, re_im + im_re};
+  }
+
+  template <uint32_t multiplier, unsigned REDUCTION_SIZE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField& xs)
+  {
+    return {FF::template mul_unsigned<multiplier>(xs.real), FF::template mul_unsigned<multiplier>(xs.imaginary)};
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionWide sqr_wide(const ExtensionField& xs)
+  {
+    // TODO: change to a more efficient squaring
+    return mul_wide<MODULUS_MULTIPLE>(xs, xs);
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionField sqr(const ExtensionField& xs)
+  {
+    // TODO: change to a more efficient squaring
+    return xs * xs;
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionField neg(const ExtensionField& xs)
+  {
+    return ExtensionField{FF::neg(xs.real), FF::neg(xs.imaginary)};
+  }
+
+  // inverse of zero is set to be zero which is what we want most of the time
+  static constexpr HOST_DEVICE_INLINE ExtensionField inverse(const ExtensionField& xs)
+  {
+    ExtensionField xs_conjugate = {xs.real, FF::neg(xs.imaginary)};
+    FF nonresidue_times_im = FF::template mul_unsigned<CONFIG::nonresidue>(FF::sqr(xs.imaginary));
+    nonresidue_times_im = CONFIG::nonresidue_is_negative ? FF::neg(nonresidue_times_im) : nonresidue_times_im;
+    // TODO: wide here
+    FF xs_norm_squared = FF::sqr(xs.real) - nonresidue_times_im;
+    return xs_conjugate * ExtensionField{FF::inverse(xs_norm_squared), FF::zero()};
+  }
+};
+
+template <typename CONFIG, class T>
+struct SharedMemory<ExtensionField<CONFIG, T>> {
+  __device__ ExtensionField<CONFIG, T>* getPointer()
+  {
+    extern __shared__ ExtensionField<CONFIG, T> s_ext2_scalar_[];
+    return s_ext2_scalar_;
+  }
+};
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/quartic_extension.cuh b/sumcheck/cuda/icicle/fields/quartic_extension.cuh
new file mode 100644
index 00000000..8fead58c
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/quartic_extension.cuh
@@ -0,0 +1,256 @@
+#pragma once
+
+#include "field.cuh"
+#include "gpu-utils/modifiers.cuh"
+#include "gpu-utils/sharedmem.cuh"
+
+template <typename CONFIG, class T>
+class ExtensionField
+{
+private:
+  typedef typename T::Wide FWide;
+
+  struct ExtensionWide {
+    FWide real;
+    FWide im1;
+    FWide im2;
+    FWide im3;
+
+    friend HOST_DEVICE_INLINE ExtensionWide operator+(ExtensionWide xs, const ExtensionWide& ys)
+    {
+      return ExtensionWide{xs.real + ys.real, xs.im1 + ys.im1, xs.im2 + ys.im2, xs.im3 + ys.im3};
+    }
+
+    friend HOST_DEVICE_INLINE ExtensionWide operator-(ExtensionWide xs, const ExtensionWide& ys)
+    {
+      return ExtensionWide{xs.real - ys.real, xs.im1 - ys.im1, xs.im2 - ys.im2, xs.im3 - ys.im3};
+    }
+  };
+
+public:
+  typedef T FF;
+  static constexpr unsigned TLC = 4 * CONFIG::limbs_count;
+
+  FF real;
+  FF im1;
+  FF im2;
+  FF im3;
+
+  static constexpr HOST_DEVICE_INLINE ExtensionField zero()
+  {
+    return ExtensionField{FF::zero(), FF::zero(), FF::zero(), FF::zero()};
+  }
+
+  static constexpr HOST_DEVICE_INLINE ExtensionField one()
+  {
+    return ExtensionField{FF::one(), FF::zero(), FF::zero(), FF::zero()};
+  }
+
+  static constexpr HOST_DEVICE_INLINE ExtensionField to_montgomery(const ExtensionField& xs)
+  {
+    return ExtensionField{
+      FF::to_montgomery(xs.real), FF::to_montgomery(xs.im1), FF::to_montgomery(xs.im2), FF::to_montgomery(xs.im3)};
+  }
+
+  static constexpr HOST_DEVICE_INLINE ExtensionField from_montgomery(const ExtensionField& xs)
+  {
+    return ExtensionField{
+      FF::from_montgomery(xs.real), FF::from_montgomery(xs.im1), FF::from_montgomery(xs.im2),
+      FF::from_montgomery(xs.im3)};
+  }
+
+  static HOST_INLINE ExtensionField rand_host()
+  {
+    return ExtensionField{FF::rand_host(), FF::rand_host(), FF::rand_host(), FF::rand_host()};
+  }
+
+  static void rand_host_many(ExtensionField* out, int size)
+  {
+    for (int i = 0; i < size; i++)
+      out[i] = rand_host();
+  }
+
+  template <unsigned REDUCTION_SIZE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionField sub_modulus(const ExtensionField& xs)
+  {
+    return ExtensionField{
+      FF::sub_modulus<REDUCTION_SIZE>(&xs.real), FF::sub_modulus<REDUCTION_SIZE>(&xs.im1),
+      FF::sub_modulus<REDUCTION_SIZE>(&xs.im2), FF::sub_modulus<REDUCTION_SIZE>(&xs.im3)};
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, const ExtensionField& xs)
+  {
+    os << "{ Real: " << xs.real << " }; { Im1: " << xs.im1 << " }; { Im2: " << xs.im2 << " }; { Im3: " << xs.im3
+       << " };";
+    return os;
+  }
+
+  friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const ExtensionField& ys)
+  {
+    return ExtensionField{xs.real + ys.real, xs.im1 + ys.im1, xs.im2 + ys.im2, xs.im3 + ys.im3};
+  }
+
+  friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const ExtensionField& ys)
+  {
+    return ExtensionField{xs.real - ys.real, xs.im1 - ys.im1, xs.im2 - ys.im2, xs.im3 - ys.im3};
+  }
+
+  friend HOST_DEVICE_INLINE ExtensionField operator+(FF xs, const ExtensionField& ys)
+  {
+    return ExtensionField{xs + ys.real, ys.im1, ys.im2, ys.im3};
+  }
+
+  friend HOST_DEVICE_INLINE ExtensionField operator-(FF xs, const ExtensionField& ys)
+  {
+    return ExtensionField{xs - ys.real, FF::neg(ys.im1), FF::neg(ys.im2), FF::neg(ys.im3)};
+  }
+
+  friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const FF& ys)
+  {
+    return ExtensionField{xs.real + ys, xs.im1, xs.im2, xs.im3};
+  }
+
+  friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const FF& ys)
+  {
+    return ExtensionField{xs.real - ys, xs.im1, xs.im2, xs.im3};
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const ExtensionField& ys)
+  {
+    if (CONFIG::nonresidue_is_negative)
+      return ExtensionWide{
+        FF::mul_wide(xs.real, ys.real) -
+          FF::template mul_unsigned<CONFIG::nonresidue>(
+            FF::mul_wide(xs.im1, ys.im3) + FF::mul_wide(xs.im2, ys.im2) + FF::mul_wide(xs.im3, ys.im1)),
+        FF::mul_wide(xs.real, ys.im1) + FF::mul_wide(xs.im1, ys.real) -
+          FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im2, ys.im3) + FF::mul_wide(xs.im3, ys.im2)),
+        FF::mul_wide(xs.real, ys.im2) + FF::mul_wide(xs.im1, ys.im1) + FF::mul_wide(xs.im2, ys.real) -
+          FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im3, ys.im3)),
+        FF::mul_wide(xs.real, ys.im3) + FF::mul_wide(xs.im1, ys.im2) + FF::mul_wide(xs.im2, ys.im1) +
+          FF::mul_wide(xs.im3, ys.real)};
+    else
+      return ExtensionWide{
+        FF::mul_wide(xs.real, ys.real) +
+          FF::template mul_unsigned<CONFIG::nonresidue>(
+            FF::mul_wide(xs.im1, ys.im3) + FF::mul_wide(xs.im2, ys.im2) + FF::mul_wide(xs.im3, ys.im1)),
+        FF::mul_wide(xs.real, ys.im1) + FF::mul_wide(xs.im1, ys.real) +
+          FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im2, ys.im3) + FF::mul_wide(xs.im3, ys.im2)),
+        FF::mul_wide(xs.real, ys.im2) + FF::mul_wide(xs.im1, ys.im1) + FF::mul_wide(xs.im2, ys.real) +
+          FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im3, ys.im3)),
+        FF::mul_wide(xs.real, ys.im3) + FF::mul_wide(xs.im1, ys.im2) + FF::mul_wide(xs.im2, ys.im1) +
+          FF::mul_wide(xs.im3, ys.real)};
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const FF& ys)
+  {
+    return ExtensionWide{
+      FF::mul_wide(xs.real, ys), FF::mul_wide(xs.im1, ys), FF::mul_wide(xs.im2, ys), FF::mul_wide(xs.im3, ys)};
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const FF& xs, const ExtensionField& ys)
+  {
+    return ExtensionWide{
+      FF::mul_wide(xs, ys.real), FF::mul_wide(xs, ys.im1), FF::mul_wide(xs, ys.im2), FF::mul_wide(xs, ys.im3)};
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionField reduce(const ExtensionWide& xs)
+  {
+    return ExtensionField{
+      FF::template reduce<MODULUS_MULTIPLE>(xs.real), FF::template reduce<MODULUS_MULTIPLE>(xs.im1),
+      FF::template reduce<MODULUS_MULTIPLE>(xs.im2), FF::template reduce<MODULUS_MULTIPLE>(xs.im3)};
+  }
+
+  template <class T1, class T2>
+  friend HOST_DEVICE_INLINE ExtensionField operator*(const T1& xs, const T2& ys)
+  {
+    ExtensionWide xy = mul_wide(xs, ys);
+    return reduce(xy);
+  }
+
+  friend HOST_DEVICE_INLINE bool operator==(const ExtensionField& xs, const ExtensionField& ys)
+  {
+    return (xs.real == ys.real) && (xs.im1 == ys.im1) && (xs.im2 == ys.im2) && (xs.im3 == ys.im3);
+  }
+
+  friend HOST_DEVICE_INLINE bool operator!=(const ExtensionField& xs, const ExtensionField& ys) { return !(xs == ys); }
+
+  template <uint32_t multiplier, unsigned REDUCTION_SIZE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField& xs)
+  {
+    return {
+      FF::template mul_unsigned<multiplier>(xs.real), FF::template mul_unsigned<multiplier>(xs.im1),
+      FF::template mul_unsigned<multiplier>(xs.im2), FF::template mul_unsigned<multiplier>(xs.im3)};
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionWide sqr_wide(const ExtensionField& xs)
+  {
+    // TODO: change to a more efficient squaring
+    return mul_wide<MODULUS_MULTIPLE>(xs, xs);
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionField sqr(const ExtensionField& xs)
+  {
+    // TODO: change to a more efficient squaring
+    return xs * xs;
+  }
+
+  template <unsigned MODULUS_MULTIPLE = 1>
+  static constexpr HOST_DEVICE_INLINE ExtensionField neg(const ExtensionField& xs)
+  {
+    return {FF::neg(xs.real), FF::neg(xs.im1), FF::neg(xs.im2), FF::neg(xs.im3)};
+  }
+
+  // inverse of zero is set to be zero which is what we want most of the time
+  static constexpr HOST_DEVICE_INLINE ExtensionField inverse(const ExtensionField& xs)
+  {
+    FF x, x0, x2;
+    if (CONFIG::nonresidue_is_negative) {
+      x0 = FF::reduce(
+        FF::sqr_wide(xs.real) +
+        FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im1, xs.im3 + xs.im3) - FF::sqr_wide(xs.im2)));
+      x2 = FF::reduce(
+        FF::mul_wide(xs.real, xs.im2 + xs.im2) - FF::sqr_wide(xs.im1) +
+        FF::template mul_unsigned<CONFIG::nonresidue>(FF::sqr_wide(xs.im3)));
+      x = FF::reduce(FF::sqr_wide(x0) + FF::template mul_unsigned<CONFIG::nonresidue>(FF::sqr_wide(x2)));
+    } else {
+      x0 = FF::reduce(
+        FF::sqr_wide(xs.real) -
+        FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im1, xs.im3 + xs.im3) - FF::sqr_wide(xs.im2)));
+      x2 = FF::reduce(
+        FF::mul_wide(xs.real, xs.im2 + xs.im2) - FF::sqr_wide(xs.im1) -
+        FF::template mul_unsigned<CONFIG::nonresidue>(FF::sqr_wide(xs.im3)));
+      x = FF::reduce(FF::sqr_wide(x0) - FF::template mul_unsigned<CONFIG::nonresidue>(FF::sqr_wide(x2)));
+    }
+    FF x_inv = FF::inverse(x);
+    x0 = x0 * x_inv;
+    x2 = x2 * x_inv;
+    return {
+      FF::reduce(
+        (CONFIG::nonresidue_is_negative
+           ? (FF::mul_wide(xs.real, x0) + FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im2, x2)))
+           : (FF::mul_wide(xs.real, x0)) - FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im2, x2)))),
+      FF::reduce(
+        (CONFIG::nonresidue_is_negative
+           ? FWide::neg(FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im3, x2)))
+           : FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im3, x2))) -
+        FF::mul_wide(xs.im1, x0)),
+      FF::reduce(FF::mul_wide(xs.im2, x0) - FF::mul_wide(xs.real, x2)),
+      FF::reduce(FF::mul_wide(xs.im1, x2) - FF::mul_wide(xs.im3, x0)),
+    };
+  }
+};
+
+template <class CONFIG, class T>
+struct SharedMemory<ExtensionField<CONFIG, T>> {
+  __device__ ExtensionField<CONFIG, T>* getPointer()
+  {
+    extern __shared__ ExtensionField<CONFIG, T> s_ext4_scalar_[];
+    return s_ext4_scalar_;
+  }
+};
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/snark_fields/bls12_377_base.cuh b/sumcheck/cuda/icicle/fields/snark_fields/bls12_377_base.cuh
new file mode 100644
index 00000000..dc7a13fe
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/snark_fields/bls12_377_base.cuh
@@ -0,0 +1,25 @@
+#pragma once
+#ifndef BLS12_377_BASE_PARAMS_H
+#define BLS12_377_BASE_PARAMS_H
+
+#include "fields/storage.cuh"
+#include "fields/params_gen.cuh"
+
+namespace bls12_377 {
+  struct fq_config {
+    static constexpr storage<12> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f,
+                                            0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
+    PARAMS(modulus)
+
+    static constexpr storage<12> rou = {0xc563b9a1, 0x7eca603c, 0x06fe0bc3, 0x06df0a43, 0x0ddff8c6, 0xb44d994a,
+                                        0x4512a3d4, 0x40fbe05b, 0x8aeffc9b, 0x30f15248, 0x05198a80, 0x0036a92e};
+    TWIDDLES(modulus, rou)
+
+    // nonresidue to generate the extension field
+    static constexpr uint32_t nonresidue = 5;
+    // true if nonresidue is negative
+    static constexpr bool nonresidue_is_negative = true;
+  };
+} // namespace bls12_377
+
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/snark_fields/bls12_377_scalar.cuh b/sumcheck/cuda/icicle/fields/snark_fields/bls12_377_scalar.cuh
new file mode 100644
index 00000000..3924f59f
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/snark_fields/bls12_377_scalar.cuh
@@ -0,0 +1,26 @@
+#pragma once
+#ifndef BLS12_377_SCALAR_PARAMS_H
+#define BLS12_377_SCALAR_PARAMS_H
+
+#include "fields/storage.cuh"
+#include "fields/field.cuh"
+#include "fields/params_gen.cuh"
+
+namespace bls12_377 {
+  struct fp_config {
+    static constexpr storage<8> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
+                                           0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
+    PARAMS(modulus)
+
+    static constexpr storage<8> rou = {0xec2a895e, 0x476ef4a4, 0x63e3f04a, 0x9b506ee3,
+                                       0xd1a8a12f, 0x60c69477, 0x0cb92cc1, 0x11d4b7f6};
+    TWIDDLES(modulus, rou)
+  };
+
+  /**
+   * Scalar field. Is always a prime field.
+   */
+  typedef Field<fp_config> scalar_t;
+} // namespace bls12_377
+
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/snark_fields/bls12_381_base.cuh b/sumcheck/cuda/icicle/fields/snark_fields/bls12_381_base.cuh
new file mode 100644
index 00000000..54fc0ab5
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/snark_fields/bls12_381_base.cuh
@@ -0,0 +1,21 @@
+#pragma once
+#ifndef BLS12_381_BASE_PARAMS_H
+#define BLS12_381_BASE_PARAMS_H
+
+#include "fields/storage.cuh"
+#include "fields/params_gen.cuh"
+
+namespace bls12_381 {
+  struct fq_config {
+    static constexpr storage<12> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0,
+                                            0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
+    PARAMS(modulus)
+
+    // nonresidue to generate the extension field
+    static constexpr uint32_t nonresidue = 1;
+    // true if nonresidue is negative
+    static constexpr bool nonresidue_is_negative = true;
+  };
+} // namespace bls12_381
+
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/snark_fields/bls12_381_scalar.cuh b/sumcheck/cuda/icicle/fields/snark_fields/bls12_381_scalar.cuh
new file mode 100644
index 00000000..c6bd12fe
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/snark_fields/bls12_381_scalar.cuh
@@ -0,0 +1,26 @@
+#pragma once
+#ifndef BLS12_381_SCALAR_PARAMS_H
+#define BLS12_381_SCALAR_PARAMS_H
+
+#include "fields/storage.cuh"
+#include "fields/field.cuh"
+#include "fields/params_gen.cuh"
+
+namespace bls12_381 {
+  struct fp_config {
+    static constexpr storage<8> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402,
+                                           0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
+    PARAMS(modulus)
+
+    static constexpr storage<8> rou = {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2,
+                                       0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
+    TWIDDLES(modulus, rou)
+  };
+
+  /**
+   * Scalar field. Is always a prime field.
+   */
+  typedef Field<fp_config> scalar_t;
+} // namespace bls12_381
+
+#endif
diff --git a/sumcheck/cuda/icicle/fields/snark_fields/bn254_base.cuh b/sumcheck/cuda/icicle/fields/snark_fields/bn254_base.cuh
new file mode 100644
index 00000000..95f18ed3
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/snark_fields/bn254_base.cuh
@@ -0,0 +1,21 @@
+#pragma once
+#ifndef BN254_BASE_PARAMS_H
+#define BN254_BASE_PARAMS_H
+
+#include "fields/storage.cuh"
+#include "fields/params_gen.cuh"
+
+namespace bn254 {
+  struct fq_config {
+    static constexpr storage<8> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91,
+                                           0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
+    PARAMS(modulus)
+
+    // nonresidue to generate the extension field
+    static constexpr uint32_t nonresidue = 1;
+    // true if nonresidue is negative
+    static constexpr bool nonresidue_is_negative = true;
+  };
+} // namespace bn254
+
+#endif
diff --git a/sumcheck/cuda/icicle/fields/snark_fields/bn254_scalar.cuh b/sumcheck/cuda/icicle/fields/snark_fields/bn254_scalar.cuh
new file mode 100644
index 00000000..67478e2d
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/snark_fields/bn254_scalar.cuh
@@ -0,0 +1,26 @@
+#pragma once
+#ifndef BN254_SCALAR_PARAMS_H
+#define BN254_SCALAR_PARAMS_H
+
+#include "fields/storage.cuh"
+#include "fields/field.cuh"
+#include "fields/params_gen.cuh"
+
+namespace bn254 {
+  struct fp_config {
+    static constexpr storage<8> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848,
+                                           0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
+    PARAMS(modulus)
+
+    static constexpr storage<8> rou = {0x725b19f0, 0x9bd61b6e, 0x41112ed4, 0x402d111e,
+                                       0x8ef62abc, 0x00e0a7eb, 0xa58a7e85, 0x2a3c09f0};
+    TWIDDLES(modulus, rou)
+  };
+
+  /**
+   * Scalar field. Is always a prime field.
+   */
+  typedef Field<fp_config> scalar_t;
+} // namespace bn254
+
+#endif
diff --git a/sumcheck/cuda/icicle/fields/snark_fields/bw6_761_base.cuh b/sumcheck/cuda/icicle/fields/snark_fields/bw6_761_base.cuh
new file mode 100644
index 00000000..4ec110ff
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/snark_fields/bw6_761_base.cuh
@@ -0,0 +1,18 @@
+#pragma once
+#ifndef BW6_761_BASE_BASE_H
+#define BW6_761_BASE_BASE_H
+
+#include "fields/storage.cuh"
+#include "fields/params_gen.cuh"
+
+namespace bw6_761 {
+  struct fq_config {
+    static constexpr storage<24> modulus = {0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae,
+                                            0x5667a8f8, 0x98a116c2, 0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed,
+                                            0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638, 0x8087be41, 0x528275ef,
+                                            0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824};
+    PARAMS(modulus)
+  };
+} // namespace bw6_761
+
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/snark_fields/bw6_761_scalar.cuh b/sumcheck/cuda/icicle/fields/snark_fields/bw6_761_scalar.cuh
new file mode 100644
index 00000000..304595a7
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/snark_fields/bw6_761_scalar.cuh
@@ -0,0 +1,18 @@
+#pragma once
+#ifndef BW6_761_SCALAR_PARAMS_H
+#define BW6_761_SCALAR_PARAMS_H
+
+#include "fields/storage.cuh"
+#include "fields/field.cuh"
+#include "fields/snark_fields/bls12_377_base.cuh"
+
+namespace bw6_761 {
+  typedef bls12_377::fq_config fp_config;
+
+  /**
+   * Scalar field. Is always a prime field.
+   */
+  typedef Field<fp_config> scalar_t;
+} // namespace bw6_761
+
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/snark_fields/grumpkin_base.cuh b/sumcheck/cuda/icicle/fields/snark_fields/grumpkin_base.cuh
new file mode 100644
index 00000000..355f2f30
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/snark_fields/grumpkin_base.cuh
@@ -0,0 +1,12 @@
+#pragma once
+#ifndef GRUMPKIN_BASE_PARAMS_H
+#define GRUMPKIN_BASE_PARAMS_H
+
+#include "fields/storage.cuh"
+#include "fields/snark_fields/bn254_scalar.cuh"
+
+namespace grumpkin {
+  typedef bn254::fp_config fq_config;
+}
+
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/snark_fields/grumpkin_scalar.cuh b/sumcheck/cuda/icicle/fields/snark_fields/grumpkin_scalar.cuh
new file mode 100644
index 00000000..4354c8de
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/snark_fields/grumpkin_scalar.cuh
@@ -0,0 +1,18 @@
+#pragma once
+#ifndef GRUMPKIN_SCALAR_PARAMS_H
+#define GRUMPKIN_SCALAR_PARAMS_H
+
+#include "fields/storage.cuh"
+#include "fields/field.cuh"
+#include "fields/snark_fields/bn254_base.cuh"
+
+namespace grumpkin {
+  typedef bn254::fq_config fp_config;
+
+  /**
+   * Scalar field. Is always a prime field.
+   */
+  typedef Field<fp_config> scalar_t;
+} // namespace grumpkin
+
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/stark_fields/babybear.cuh b/sumcheck/cuda/icicle/fields/stark_fields/babybear.cuh
new file mode 100644
index 00000000..57f3f06e
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/stark_fields/babybear.cuh
@@ -0,0 +1,31 @@
+#pragma once
+
+#include "fields/storage.cuh"
+#include "fields/field.cuh"
+#include "fields/quartic_extension.cuh"
+#include "fields/params_gen.cuh"
+
+namespace babybear {
+  struct fp_config {
+    static constexpr storage<1> modulus = {0x78000001};
+    PARAMS(modulus)
+
+    static constexpr storage<1> rou = {0x00000089};
+    TWIDDLES(modulus, rou)
+
+    // nonresidue to generate the extension field
+    static constexpr uint32_t nonresidue = 11;
+    // true if nonresidue is negative.
+    static constexpr bool nonresidue_is_negative = false;
+  };
+
+  /**
+   * Scalar field. Is always a prime field.
+   */
+  typedef Field<fp_config> scalar_t;
+
+  /**
+   * Extension field of `scalar_t` enabled if `-DEXT_FIELD` env variable is.
+   */
+  typedef ExtensionField<fp_config, scalar_t> extension_t;
+} // namespace babybear
diff --git a/sumcheck/cuda/icicle/fields/stark_fields/m31.cuh b/sumcheck/cuda/icicle/fields/stark_fields/m31.cuh
new file mode 100644
index 00000000..b45592ce
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/stark_fields/m31.cuh
@@ -0,0 +1,224 @@
+#pragma once
+
+#include "fields/storage.cuh"
+#include "fields/field.cuh"
+#include "fields/quartic_extension.cuh"
+
+namespace m31 {
+  template <class CONFIG>
+  class MersenneField : public Field<CONFIG>
+  {
+  public:
+    HOST_DEVICE_INLINE MersenneField(const MersenneField& other) : Field<CONFIG>(other) {}
+    HOST_DEVICE_INLINE MersenneField(const uint32_t& x = 0) : Field<CONFIG>({x}) {}
+    HOST_DEVICE_INLINE MersenneField(storage<CONFIG::limbs_count> x) : Field<CONFIG>{x} {}
+    HOST_DEVICE_INLINE MersenneField(const Field<CONFIG>& other) : Field<CONFIG>(other) {}
+
+    static constexpr HOST_DEVICE_INLINE MersenneField zero() { return MersenneField(CONFIG::zero); }
+
+    static constexpr HOST_DEVICE_INLINE MersenneField one() { return MersenneField(CONFIG::one.limbs[0]); }
+
+    static constexpr HOST_DEVICE_INLINE MersenneField from(uint32_t value) { return MersenneField(value); }
+
+    static HOST_INLINE MersenneField rand_host() { return MersenneField(Field<CONFIG>::rand_host()); }
+
+    static void rand_host_many(MersenneField* out, int size)
+    {
+      for (int i = 0; i < size; i++)
+        out[i] = rand_host();
+    }
+
+    HOST_DEVICE_INLINE MersenneField& operator=(const Field<CONFIG>& other)
+    {
+      if (this != &other) { Field<CONFIG>::operator=(other); }
+      return *this;
+    }
+
+    HOST_DEVICE_INLINE uint32_t get_limb() const { return this->limbs_storage.limbs[0]; }
+
+    //  The `Wide` struct represents a redundant 32-bit form of the Mersenne Field.
+    struct Wide {
+      uint32_t storage;
+      static constexpr HOST_DEVICE_INLINE Wide from_field(const MersenneField& xs)
+      {
+        Wide out{};
+        out.storage = xs.get_limb();
+        return out;
+      }
+      static constexpr HOST_DEVICE_INLINE Wide from_number(const uint32_t& xs)
+      {
+        Wide out{};
+        out.storage = xs;
+        return out;
+      }
+      friend HOST_DEVICE_INLINE Wide operator+(Wide xs, const Wide& ys)
+      {
+        uint64_t tmp = (uint64_t)xs.storage + ys.storage;                   // max: 2^33 - 2 = 2^32(1) + (2^32 - 2)
+        tmp = ((tmp >> 32) << 1) + (uint32_t)(tmp);                         // 2(1)+(2^32-2) = 2^32(1)+(0)
+        return from_number((uint32_t)((tmp >> 32) << 1) + (uint32_t)(tmp)); // max: 2(1) + 0 = 2
+      }
+      friend HOST_DEVICE_INLINE Wide operator-(Wide xs, const Wide& ys)
+      {
+        uint64_t tmp = CONFIG::modulus_3 + xs.storage -
+                       ys.storage; // max: 3(2^31-1) + 2^32-1 - 0 = 2^33 + 2^31-4 = 2^32(2) + (2^31-4)
+        return from_number(((uint32_t)(tmp >> 32) << 1) + (uint32_t)(tmp)); // max: 2(2)+(2^31-4) = 2^31
+      }
+      template <unsigned MODULUS_MULTIPLE = 1>
+      static constexpr HOST_DEVICE_INLINE Wide neg(const Wide& xs)
+      {
+        uint64_t tmp = CONFIG::modulus_3 - xs.storage;                      // max: 3(2^31-1) - 0 = 2^32(1) + (2^31 - 3)
+        return from_number(((uint32_t)(tmp >> 32) << 1) + (uint32_t)(tmp)); // max: 2(1)+(2^31-3) = 2^31 - 1
+      }
+      friend HOST_DEVICE_INLINE Wide operator*(Wide xs, const Wide& ys)
+      {
+        uint64_t t1 = (uint64_t)xs.storage * ys.storage; // max: 2^64 - 2^33+1 = 2^32(2^32 - 2) + 1
+        t1 = ((t1 >> 32) << 1) + (uint32_t)(t1);         // max: 2(2^32 - 2) + 1 = 2^32(1) + (2^32 - 3)
+        return from_number((((uint32_t)(t1 >> 32)) << 1) + (uint32_t)(t1)); // max: 2(1) - (2^32 - 3) = 2^32 - 1
+      }
+    };
+
+    static constexpr HOST_DEVICE_INLINE MersenneField div2(const MersenneField& xs, const uint32_t& power = 1)
+    {
+      uint32_t t = xs.get_limb();
+      return MersenneField{{((t >> power) | (t << (31 - power))) & MersenneField::get_modulus().limbs[0]}};
+    }
+
+    static constexpr HOST_DEVICE_INLINE MersenneField neg(const MersenneField& xs)
+    {
+      uint32_t t = xs.get_limb();
+      return MersenneField{{t == 0 ? t : MersenneField::get_modulus().limbs[0] - t}};
+    }
+
+    template <unsigned MODULUS_MULTIPLE = 1>
+    static constexpr HOST_DEVICE_INLINE MersenneField reduce(Wide xs)
+    {
+      const uint32_t modulus = MersenneField::get_modulus().limbs[0];
+      uint32_t tmp = (xs.storage >> 31) + (xs.storage & modulus); // max: 1 + 2^31-1 = 2^31
+      tmp = (xs.storage >> 31) + (xs.storage & modulus);          // max: 1 + 0 = 1
+      return MersenneField{{tmp == modulus ? 0 : tmp}};
+    }
+
+    static constexpr HOST_DEVICE_INLINE MersenneField inverse(const MersenneField& x)
+    {
+      uint32_t xs = x.limbs_storage.limbs[0];
+      if (xs <= 1) return xs;
+      uint32_t a = 1, b = 0, y = xs, z = MersenneField::get_modulus().limbs[0], e, m = z;
+      while (1) {
+#ifdef __CUDA_ARCH__
+        e = __ffs(y) - 1;
+#else
+        e = __builtin_ctz(y);
+#endif
+        y >>= e;
+        if (a >= m) {
+          a = (a & m) + (a >> 31);
+          if (a == m) a = 0;
+        }
+        a = ((a >> e) | (a << (31 - e))) & m;
+        if (y == 1) return a;
+        e = a + b;
+        b = a;
+        a = e;
+        e = y + z;
+        z = y;
+        y = e;
+      }
+    }
+
+    friend HOST_DEVICE_INLINE MersenneField operator+(MersenneField xs, const MersenneField& ys)
+    {
+      uint32_t m = MersenneField::get_modulus().limbs[0];
+      uint32_t t = xs.get_limb() + ys.get_limb();
+      if (t > m) t = (t & m) + (t >> 31);
+      if (t == m) t = 0;
+      return MersenneField{{t}};
+    }
+
+    friend HOST_DEVICE_INLINE MersenneField operator-(MersenneField xs, const MersenneField& ys)
+    {
+      return xs + neg(ys);
+    }
+
+    friend HOST_DEVICE_INLINE MersenneField operator*(MersenneField xs, const MersenneField& ys)
+    {
+      uint64_t x = (uint64_t)(xs.get_limb()) * ys.get_limb();
+      uint32_t t = ((x >> 31) + (x & MersenneField::get_modulus().limbs[0]));
+      uint32_t m = MersenneField::get_modulus().limbs[0];
+      if (t > m) t = (t & m) + (t >> 31);
+      if (t > m) t = (t & m) + (t >> 31);
+      if (t == m) t = 0;
+      return MersenneField{{t}};
+    }
+
+    static constexpr HOST_DEVICE_INLINE Wide mul_wide(const MersenneField& xs, const MersenneField& ys)
+    {
+      return Wide::from_field(xs) * Wide::from_field(ys);
+    }
+
+    template <unsigned MODULUS_MULTIPLE = 1>
+    static constexpr HOST_DEVICE_INLINE Wide sqr_wide(const MersenneField& xs)
+    {
+      return mul_wide(xs, xs);
+    }
+
+    static constexpr HOST_DEVICE_INLINE MersenneField sqr(const MersenneField& xs) { return xs * xs; }
+
+    static constexpr HOST_DEVICE_INLINE MersenneField to_montgomery(const MersenneField& xs) { return xs; }
+
+    static constexpr HOST_DEVICE_INLINE MersenneField from_montgomery(const MersenneField& xs) { return xs; }
+
+    static constexpr HOST_DEVICE_INLINE MersenneField pow(MersenneField base, int exp)
+    {
+      MersenneField res = one();
+      while (exp > 0) {
+        if (exp & 1) res = res * base;
+        base = base * base;
+        exp >>= 1;
+      }
+      return res;
+    }
+  };
+  struct fp_config {
+    static constexpr unsigned limbs_count = 1;
+    static constexpr unsigned omegas_count = 1;
+    static constexpr unsigned modulus_bit_count = 31;
+    static constexpr unsigned num_of_reductions = 1;
+
+    static constexpr storage<limbs_count> modulus = {0x7fffffff};
+    static constexpr storage<limbs_count> modulus_2 = {0xfffffffe};
+    static constexpr uint64_t modulus_3 = 0x17ffffffd;
+    static constexpr storage<limbs_count> modulus_4 = {0xfffffffc};
+    static constexpr storage<limbs_count> neg_modulus = {0x87ffffff};
+    static constexpr storage<2 * limbs_count> modulus_wide = {0x7fffffff, 0x00000000};
+    static constexpr storage<2 * limbs_count> modulus_squared = {0x00000001, 0x3fffffff};
+    static constexpr storage<2 * limbs_count> modulus_squared_2 = {0x00000002, 0x7ffffffe};
+    static constexpr storage<2 * limbs_count> modulus_squared_4 = {0x00000004, 0xfffffffc};
+
+    static constexpr storage<limbs_count> m = {0x80000001};
+    static constexpr storage<limbs_count> one = {0x00000001};
+    static constexpr storage<limbs_count> zero = {0x00000000};
+    static constexpr storage<limbs_count> montgomery_r = {0x00000001};
+    static constexpr storage<limbs_count> montgomery_r_inv = {0x00000001};
+
+    static constexpr storage_array<omegas_count, limbs_count> omega = {{{0x7ffffffe}}};
+
+    static constexpr storage_array<omegas_count, limbs_count> omega_inv = {{{0x7ffffffe}}};
+
+    static constexpr storage_array<omegas_count, limbs_count> inv = {{{0x40000000}}};
+
+    // nonresidue to generate the extension field
+    static constexpr uint32_t nonresidue = 11;
+    // true if nonresidue is negative.
+    static constexpr bool nonresidue_is_negative = false;
+  };
+
+  /**
+   * Scalar field. Is always a prime field.
+   */
+  typedef MersenneField<fp_config> scalar_t;
+
+  /**
+   * Extension field of `scalar_t` enabled if `-DEXT_FIELD` env variable is.
+   */
+  typedef ExtensionField<fp_config, scalar_t> extension_t;
+} // namespace m31
diff --git a/sumcheck/cuda/icicle/fields/stark_fields/stark252.cuh b/sumcheck/cuda/icicle/fields/stark_fields/stark252.cuh
new file mode 100644
index 00000000..d61f757e
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/stark_fields/stark252.cuh
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "fields/storage.cuh"
+#include "fields/field.cuh"
+#include "fields/params_gen.cuh"
+
+// modulus = 3618502788666131213697322783095070105623107215331596699973092056135872020481 (2^251+17*2^192+1)
+namespace stark252 {
+  struct fp_config {
+    static constexpr storage<8> modulus = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
+                                           0x00000000, 0x00000000, 0x00000011, 0x08000000};
+    PARAMS(modulus)
+
+    static constexpr storage<8> rou = {0x42f8ef94, 0x6070024f, 0xe11a6161, 0xad187148,
+                                       0x9c8b0fa5, 0x3f046451, 0x87529cfa, 0x005282db};
+    TWIDDLES(modulus, rou)
+  };
+
+  /**
+   * Scalar field. Is always a prime field.
+   */
+  typedef Field<fp_config> scalar_t;
+} // namespace stark252
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/fields/storage.cuh b/sumcheck/cuda/icicle/fields/storage.cuh
new file mode 100644
index 00000000..a8d43212
--- /dev/null
+++ b/sumcheck/cuda/icicle/fields/storage.cuh
@@ -0,0 +1,25 @@
+#pragma once
+#include <cstdint>
+
+#define LIMBS_ALIGNMENT(x) ((x) % 4 == 0 ? 16 : ((x) % 2 == 0 ? 8 : 4))
+
+template <unsigned LIMBS_COUNT>
+struct
+#ifdef __CUDA_ARCH__
+  __align__(LIMBS_ALIGNMENT(LIMBS_COUNT))
+#endif
+    storage
+{
+  static constexpr unsigned LC = LIMBS_COUNT;
+  uint32_t limbs[LIMBS_COUNT];
+};
+
+template <unsigned OMEGAS_COUNT, unsigned LIMBS_COUNT>
+struct
+#ifdef __CUDA_ARCH__
+  __align__(LIMBS_ALIGNMENT(LIMBS_COUNT))
+#endif
+    storage_array
+{
+  storage<LIMBS_COUNT> storages[OMEGAS_COUNT];
+};
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/gpu-utils/device_context.cuh b/sumcheck/cuda/icicle/gpu-utils/device_context.cuh
new file mode 100644
index 00000000..b1448a5a
--- /dev/null
+++ b/sumcheck/cuda/icicle/gpu-utils/device_context.cuh
@@ -0,0 +1,58 @@
+#pragma once
+#ifndef DEVICE_CONTEXT_H
+#define DEVICE_CONTEXT_H
+
+#include <cuda_runtime.h>
+#include "gpu-utils/error_handler.cuh"
+
+namespace device_context {
+
+  constexpr std::size_t MAX_DEVICES = 32;
+
+  /**
+   * Properties of the device used in icicle functions.
+   */
+  struct DeviceContext {
+    cudaStream_t& stream;  /**< Stream to use. Default value: 0. */
+    std::size_t device_id; /**< Index of the currently used GPU. Default value: 0. */
+    cudaMemPool_t mempool; /**< Mempool to use. Default value: 0. */
+  };
+
+  /**
+   * Return default device context that corresponds to using the default stream of the first GPU
+   */
+  inline DeviceContext get_default_device_context() // TODO: naming convention ?
+  {
+    static cudaStream_t default_stream = (cudaStream_t)0;
+    return DeviceContext{
+      (cudaStream_t&)default_stream, // stream
+      0,                             // device_id
+      0,                             // mempool
+    };
+  }
+
+  // checking whether a pointer is on host or device and asserts device matches provided device
+  static bool is_host_ptr(const void* p, int device_id = 0)
+  {
+    cudaPointerAttributes attributes;
+    CHK_STICKY(cudaPointerGetAttributes(&attributes, p));
+    const bool is_on_host = attributes.type == cudaMemoryTypeHost ||
+                            attributes.type == cudaMemoryTypeUnregistered; // unregistered is host memory
+    const bool is_on_cur_device = !is_on_host && attributes.device == device_id;
+    const bool is_valid_ptr = is_on_host || is_on_cur_device;
+    if (!is_valid_ptr) { THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Invalid ptr"); }
+
+    return is_on_host;
+  }
+
+  static int get_cuda_device(const void* p)
+  {
+    cudaPointerAttributes attributes;
+    CHK_STICKY(cudaPointerGetAttributes(&attributes, p));
+    const bool is_on_host = attributes.type == cudaMemoryTypeHost ||
+                            attributes.type == cudaMemoryTypeUnregistered; // unregistered is host memory
+    return is_on_host ? -1 : attributes.device;
+  }
+
+} // namespace device_context
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/icicle/gpu-utils/error_handler.cuh b/sumcheck/cuda/icicle/gpu-utils/error_handler.cuh
new file mode 100644
index 00000000..1254ec60
--- /dev/null
+++ b/sumcheck/cuda/icicle/gpu-utils/error_handler.cuh
@@ -0,0 +1,152 @@
+#pragma once
+#ifndef ERR_H
+#define ERR_H
+
+#include <iostream>
+
+#include <cuda_runtime.h>
+#include <stdexcept>
+#include <string>
+
+enum class IcicleError_t {
+  IcicleSuccess = 0,
+  InvalidArgument = 1,
+  MemoryAllocationError = 2,
+  UndefinedError = 999999999,
+};
+
+std::string inline IcicleGetErrorString(IcicleError_t error)
+{
+  switch (error) {
+  case IcicleError_t::IcicleSuccess:
+    return "Success";
+  case IcicleError_t::InvalidArgument:
+    return "Invalid argument";
+  case IcicleError_t::MemoryAllocationError:
+    return "Memory allocation error";
+  case IcicleError_t::UndefinedError:
+    return "Undefined error occurred";
+  default:
+    return "Unknown error code";
+  }
+}
+
+class IcicleError : public std::runtime_error
+{
+private:
+  int errCode; // Field to store the error code
+
+public:
+  // Constructor for cudaError_t with optional message
+  IcicleError(cudaError_t cudaError, const std::string& msg = "")
+      : std::runtime_error("CUDA Error: " + std::string(cudaGetErrorString(cudaError)) + " " + msg),
+        errCode(static_cast<int>(cudaError))
+  {
+  }
+
+  // Constructor for cudaError_t with const char* message
+  IcicleError(cudaError_t cudaError, const char* msg) : IcicleError(cudaError, std::string(msg)) {}
+
+  // Constructor for IcicleError_t with optional message
+  IcicleError(IcicleError_t icicleError, const std::string& msg = "")
+      : std::runtime_error("Icicle Error: " + IcicleGetErrorString(icicleError) + " " + msg),
+        errCode(static_cast<int>(icicleError))
+  {
+  }
+
+  // Constructor for IcicleError_t with const char* message
+  IcicleError(IcicleError_t icicleError, const char* msg) : IcicleError(icicleError, std::string(msg)) {}
+
+  // Getter for errCode
+  int getErrorCode() const { return errCode; }
+};
+
+// TODO: ? do{..}while(0) as per https://hownot2code.wordpress.com/2016/12/05/do-while-0-in-macros/
+
+#define CHK_ERR(err, func, file, line) check(err, func, file, line)
+#define CHK_LOG(val)                   check((val), #val, __FILE__, __LINE__)
+#define CHK_VAL(val, file, line)       check((val), #val, file, line)
+
+cudaError_t inline check(cudaError_t err, const char* const func, const char* const file, const int line)
+{
+  if (err != cudaSuccess) {
+    std::cerr << "CUDA Runtime Error by: " << func << " at: " << file << ":" << line << std::endl;
+    std::cerr << cudaGetErrorString(err) << std::endl << std::endl;
+  }
+
+  return err;
+}
+
+// TODO: one macro that optionally (by compile-time switch) doesn't throw
+#define CHK_STICKY_NO_THROW(val) checkCudaErrorIsSticky((val), #val, __FILE__, __LINE__, false)
+
+#define CHK_LAST_STICKY_NO_THROW()                                                                                     \
+  checkCudaErrorIsSticky(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__, false)
+
+#define CHK_LAST() checkCudaErrorIsSticky(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__)
+
+#define CHK_STICKY(val) checkCudaErrorIsSticky((val), #val, __FILE__, __LINE__)
+
+#define THROW_ICICLE_CUDA(val)                       throwIcicleCudaErr(val, __FUNCTION__, __FILE__, __LINE__)
+#define THROW_ICICLE_CUDA_ERR(val, func, file, line) throwIcicleCudaErr(val, func, file, line)
+void inline throwIcicleCudaErr(
+  cudaError_t err, const char* const func, const char* const file, const int line, bool isUnrecoverable = true)
+{
+  // TODO: fmt::format introduced only in C++20
+  std::string err_msg = (isUnrecoverable ? "!!!Unrecoverable!!! : " : "") + std::string{cudaGetErrorString(err)} +
+                        " : detected by: " + func + " at: " + file + ":" + std::to_string(line) +
+                        "\nThe error is reported there and may be caused by prior calls.\n";
+  std::cerr << err_msg << std::endl; // TODO: Logging
+  throw IcicleError{err, err_msg};
+}
+
+#define THROW_ICICLE(val, reason, func, file, line) throwIcicleErr(val, reason, func, file, line)
+#define THROW_ICICLE_ERR(val, reason)               throwIcicleErr(val, reason, __FUNCTION__, __FILE__, __LINE__)
+void inline throwIcicleErr(
+  IcicleError_t err, const char* const reason, const char* const func, const char* const file, const int line)
+{
+  std::string err_msg = std::string{IcicleGetErrorString(err)} + " : by: " + func + " at: " + file + ":" +
+                        std::to_string(line) + " error: " + reason;
+  std::cerr << err_msg << std::endl; // TODO: Logging
+  throw IcicleError{err, err_msg};
+}
+
+cudaError_t inline checkCudaErrorIsSticky(
+  cudaError_t err, const char* const func, const char* const file, const int line, bool isThrowing = true)
+{
+  if (err != cudaSuccess) {
+    // check for sticky (unrecoverable) error when the only option is to restart process
+    cudaError_t err2 = cudaDeviceSynchronize();
+    bool is_logged;
+    if (err2 != cudaSuccess) { // we suspect sticky error
+      if (err != err2) {
+        is_logged = true;
+        CHK_ERR(err, func, file, line);
+      }
+      // we are practically almost sure error is sticky
+      if (isThrowing) {
+        THROW_ICICLE_CUDA_ERR(err, func, file, line);
+      } else {
+        err = err2;
+      }
+    }
+    if (!is_logged) CHK_ERR(err, func, file, line);
+  }
+
+  return err;
+}
+
+// most common macros to use
+#define CHK_INIT_IF_RETURN()                                                                                           \
+  {                                                                                                                    \
+    cudaError_t err_result = CHK_LAST();                                                                               \
+    if (err_result != cudaSuccess) return err_result;                                                                  \
+  }
+
+#define CHK_IF_RETURN(val)                                                                                             \
+  {                                                                                                                    \
+    cudaError_t err_result = CHK_STICKY(val);                                                                          \
+    if (err_result != cudaSuccess) return err_result;                                                                  \
+  }
+
+#endif
diff --git a/sumcheck/cuda/icicle/gpu-utils/modifiers.cuh b/sumcheck/cuda/icicle/gpu-utils/modifiers.cuh
new file mode 100644
index 00000000..04ad395d
--- /dev/null
+++ b/sumcheck/cuda/icicle/gpu-utils/modifiers.cuh
@@ -0,0 +1,11 @@
+#if defined(DEVMODE) || defined(DEBUG)
+#define INLINE_MACRO
+#define UNROLL
+#else
+#define INLINE_MACRO __forceinline__
+#define UNROLL       #pragma unroll
+#endif
+
+#define HOST_INLINE        __host__ INLINE_MACRO
+#define DEVICE_INLINE      __device__ INLINE_MACRO
+#define HOST_DEVICE_INLINE __host__ __device__ INLINE_MACRO
diff --git a/sumcheck/cuda/icicle/gpu-utils/sharedmem.cuh b/sumcheck/cuda/icicle/gpu-utils/sharedmem.cuh
new file mode 100644
index 00000000..404af55e
--- /dev/null
+++ b/sumcheck/cuda/icicle/gpu-utils/sharedmem.cuh
@@ -0,0 +1,221 @@
+// based on https://leimao.github.io/blog/CUDA-Shared-Memory-Templated-Kernel/
+// may be outdated, but only worked like that
+
+// -------------------------------------------------------------
+// cuDPP -- CUDA Data Parallel Primitives library
+// -------------------------------------------------------------
+// $Revision: 5636 $
+// $Date: 2009-07-02 13:39:38 +1000 (Thu, 02 Jul 2009) $
+// -------------------------------------------------------------
+// This source code is distributed under the terms of license.txt
+// in the root directory of this source distribution.
+// -------------------------------------------------------------
+
+/**
+ * @file
+ * sharedmem.h
+ *
+ * @brief Shared memory declaration struct for templatized types.
+ *
+ * Because dynamically sized shared memory arrays are declared "extern" in CUDA,
+ * we can't templatize their types directly.  To get around this, we declare a
+ * simple wrapper struct that will declare the extern array with a different
+ * name depending on the type.  This avoids linker errors about multiple
+ * definitions.
+ *
+ * To use dynamically allocated shared memory in a templatized __global__ or
+ * __device__ function, just replace code like this:
+ *
+ * <pre>
+ *  template<class T>
+ *  __global__ void
+ *  foo( T* d_out, T* d_in)
+ *  {
+ *      // Shared mem size is determined by the host app at run time
+ *      extern __shared__  T sdata[];
+ *      ...
+ *      doStuff(sdata);
+ *      ...
+ *  }
+ * </pre>
+ *
+ *  With this
+ * <pre>
+ *  template<class T>
+ *  __global__ void
+ *  foo( T* d_out, T* d_in)
+ *  {
+ *      // Shared mem size is determined by the host app at run time
+ *      SharedMemory<T> smem;
+ *      T* sdata = smem.getPointer();
+ *      ...
+ *      doStuff(sdata);
+ *      ...
+ *  }
+ * </pre>
+ */
+
+#ifndef _SHAREDMEM_H_
+#define _SHAREDMEM_H_
+
+/** @brief Wrapper class for templatized dynamic shared memory arrays.
+ *
+ * This struct uses template specialization on the type \a T to declare
+ * a differently named dynamic shared memory array for each type
+ * (\code extern __shared__ T s_type[] \endcode).
+ *
+ * Currently there are specializations for the following types:
+ * \c int, \c uint, \c char, \c uchar, \c short, \c ushort, \c long,
+ * \c unsigned long, \c bool, \c float, and \c double. One can also specialize it
+ * for user defined types.
+ */
+template <typename T>
+struct SharedMemory {
+  //! @brief Return a pointer to the runtime-sized shared memory array.
+  //! @returns Pointer to runtime-sized shared memory array
+  __device__ T* getPointer()
+  {
+    // extern __device__ void Error_UnsupportedType(); // Ensure that we won't compile any un-specialized types
+    // Error_UnsupportedType();
+    return (T*)0;
+  }
+  // TODO: Use operator overloading to make this class look like a regular array
+};
+
+// Following are the specializations for the following types.
+// int, uint, char, uchar, short, ushort, long, ulong, bool, float, and double
+// One could also specialize it for user-defined types.
+
+template <>
+struct SharedMemory<int> {
+  __device__ int* getPointer()
+  {
+    extern __shared__ int s_int[];
+    return s_int;
+  }
+};
+
+template <>
+struct SharedMemory<unsigned int> {
+  __device__ unsigned int* getPointer()
+  {
+    extern __shared__ unsigned int s_uint[];
+    return s_uint;
+  }
+};
+
+template <>
+struct SharedMemory<char> {
+  __device__ char* getPointer()
+  {
+    extern __shared__ char s_char[];
+    return s_char;
+  }
+};
+
+template <>
+struct SharedMemory<unsigned char> {
+  __device__ unsigned char* getPointer()
+  {
+    extern __shared__ unsigned char s_uchar[];
+    return s_uchar;
+  }
+};
+
+template <>
+struct SharedMemory<short> {
+  __device__ short* getPointer()
+  {
+    extern __shared__ short s_short[];
+    return s_short;
+  }
+};
+
+template <>
+struct SharedMemory<unsigned short> {
+  __device__ unsigned short* getPointer()
+  {
+    extern __shared__ unsigned short s_ushort[];
+    return s_ushort;
+  }
+};
+
+template <>
+struct SharedMemory<long> {
+  __device__ long* getPointer()
+  {
+    extern __shared__ long s_long[];
+    return s_long;
+  }
+};
+
+template <>
+struct SharedMemory<unsigned long> {
+  __device__ unsigned long* getPointer()
+  {
+    extern __shared__ unsigned long s_ulong[];
+    return s_ulong;
+  }
+};
+
+template <>
+struct SharedMemory<long long> {
+  __device__ long long* getPointer()
+  {
+    extern __shared__ long long s_longlong[];
+    return s_longlong;
+  }
+};
+
+template <>
+struct SharedMemory<unsigned long long> {
+  __device__ unsigned long long* getPointer()
+  {
+    extern __shared__ unsigned long long s_ulonglong[];
+    return s_ulonglong;
+  }
+};
+
+template <>
+struct SharedMemory<bool> {
+  __device__ bool* getPointer()
+  {
+    extern __shared__ bool s_bool[];
+    return s_bool;
+  }
+};
+
+template <>
+struct SharedMemory<float> {
+  __device__ float* getPointer()
+  {
+    extern __shared__ float s_float[];
+    return s_float;
+  }
+};
+
+template <>
+struct SharedMemory<double> {
+  __device__ double* getPointer()
+  {
+    extern __shared__ double s_double[];
+    return s_double;
+  }
+};
+
+template <>
+struct SharedMemory<uchar4> {
+  __device__ uchar4* getPointer()
+  {
+    extern __shared__ uchar4 s_uchar4[];
+    return s_uchar4;
+  }
+};
+
+#endif //_SHAREDMEM_H_
+
+// Leave this at the end of the file
+// Local Variables:
+// mode:c++
+// c-file-style: "NVIDIA"
+// End:
\ No newline at end of file
diff --git a/sumcheck/cuda/include/LinearGKR/scratchpad.cuh b/sumcheck/cuda/include/LinearGKR/scratchpad.cuh
new file mode 100644
index 00000000..e2ccd02f
--- /dev/null
+++ b/sumcheck/cuda/include/LinearGKR/scratchpad.cuh
@@ -0,0 +1,58 @@
+#pragma once
+
+#include "circuit/circuit.cuh"
+
+namespace gkr{
+
+    template<typename F, typename F_primitive>
+    class GKRScratchPad{
+    private:
+        void _mem_init(uint32_t max_nb_output, uint32_t max_nb_input){
+
+            #define __allocate(x)           reinterpret_cast<F*>          (malloc(x * sizeof(F          )))
+            #define __allocate_primitive(x) reinterpret_cast<F_primitive*>(malloc(x * sizeof(F_primitive)))
+
+            v_evals = __allocate(max_nb_input);
+            hg_evals = __allocate(max_nb_input);
+            eq_evals_at_rx = __allocate_primitive(max_nb_input);
+
+            eq_evals_at_rz1 = __allocate_primitive(max_nb_output);
+            eq_evals_at_rz2 = __allocate_primitive(max_nb_output);
+            eq_evals_first_half = __allocate_primitive(max_nb_output);
+            eq_evals_second_half = __allocate_primitive(max_nb_output);
+
+            gate_exists = (bool*)malloc(max_nb_input * sizeof(bool));
+        }
+
+    public:
+        F *v_evals, *hg_evals;
+        F_primitive *eq_evals_at_rx;
+        F_primitive *eq_evals_at_rz1, *eq_evals_at_rz2;
+        F_primitive *eq_evals_first_half, *eq_evals_second_half;
+        bool *gate_exists;
+
+        void prepare(const Circuit<F, F_primitive> &circuit){
+            uint32_t max_nb_output_vars = 0, max_nb_input_vars = 0;
+            for (int l_idx = 0; l_idx < circuit.layers_len; l_idx++){
+                CircuitLayer<F, F_primitive> layer = circuit.layers[l_idx];
+                max_nb_output_vars = std::max(max_nb_output_vars, layer.nb_output_vars);
+                max_nb_input_vars = std::max(max_nb_input_vars, layer.nb_input_vars);
+            }
+            _mem_init(1 << max_nb_output_vars, 1 << max_nb_input_vars);
+        }
+
+        ~GKRScratchPad(){
+            #define __free(x) free(reinterpret_cast<void*>(x))
+            __free(v_evals);
+            __free(hg_evals);
+            __free(eq_evals_at_rx);
+            __free(eq_evals_at_rz1);
+            __free(eq_evals_at_rz2);
+            __free(eq_evals_first_half);
+            __free(eq_evals_second_half);
+            free(gate_exists);
+        }
+    };
+
+}// namespace gkr
+
diff --git a/sumcheck/cuda/include/LinearGKR/sumcheck.cuh b/sumcheck/cuda/include/LinearGKR/sumcheck.cuh
new file mode 100644
index 00000000..4be1412e
--- /dev/null
+++ b/sumcheck/cuda/include/LinearGKR/sumcheck.cuh
@@ -0,0 +1,193 @@
+#pragma once
+
+#include <vector>  // Only Verifier needs vector
+#include <cassert>
+#include <cstdio>
+
+#include "fiat_shamir/transcript.cuh"
+#include "circuit/circuit.cuh"
+#include "sumcheck_helper.cuh"
+#include "sumcheck_verifier_utils.cuh"
+
+namespace gkr{
+
+    template<typename F, typename F_primitive>
+    void sumcheck_prove_gkr_layer(
+            // Circuit
+            const CircuitLayer<F, F_primitive>& poly,
+
+            // rz1[rz1_outer_len][rz1_inner_len]
+            const F_primitive* rz1, const uint32_t & rz1_inner_len,
+
+            // rz2[rz2_outer_len][rz2_inner_len]
+            const F_primitive* rz2, const uint32_t & rz2_inner_len,
+
+            // Random Combination
+            const F_primitive& alpha, const F_primitive& beta,
+
+            // Proof Transcript
+            Transcript<F, F_primitive>& transcript,
+
+            // Scratchpad
+            GKRScratchPad<F, F_primitive>& scratch_pad,
+
+            // Return results
+            F_primitive* rz1s, F_primitive* rz2s,
+
+            // Timer
+            TimingBreakdown& timer
+
+    ){
+
+        // Define the helper
+        SumcheckGKRHelper<F, F_primitive> helper;
+
+        // Timer
+        auto total_prepare = std::chrono::milliseconds ::zero();
+        auto total_polyeval = std::chrono::milliseconds ::zero();
+        auto total_fiathash = std::chrono::nanoseconds ::zero();
+        auto total_sumcheck = std::chrono::milliseconds ::zero();
+
+        // Tic-Toc
+        auto start = std::chrono::high_resolution_clock::now();
+        auto end = std::chrono::high_resolution_clock::now();
+
+        // Prepare for GKR
+        start = std::chrono::high_resolution_clock::now();
+        helper.prepare(poly,
+                       rz1, rz1_inner_len,
+                       rz2,  rz2_inner_len,
+                       alpha, beta, scratch_pad);
+        end = std::chrono::high_resolution_clock::now();
+        timer.prepare_time += (double) std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
+
+        // Sumcheck Main Loop
+        for (uint32_t i_var = 0; i_var < (2 * poly.nb_input_vars); i_var++){
+            // Prepare for Phase two (prepare Y)
+            start = std::chrono::high_resolution_clock::now();
+            if (i_var == poly.nb_input_vars){ helper._prepare_phase_two(); }
+            end = std::chrono::high_resolution_clock::now();
+            timer.prepare_time += (double) std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
+
+            // Polynomial Evluation
+            F evals[3];
+            helper.poly_evals_at(i_var, 2, evals, timer);
+
+            // Fiat Shamir to get random challenge
+            start = std::chrono::high_resolution_clock::now();
+            transcript.append_f(evals[0]);
+            transcript.append_f(evals[1]);
+            transcript.append_f(evals[2]);
+            auto r = transcript.challenge_f();
+            end = std::chrono::high_resolution_clock::now();
+            timer.fiathash_time += (double) std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
+
+            // Evaluate on challenge
+            helper.receive_challenge(i_var, r, timer);
+
+            // If this is the final one of each phase, record it in transcript
+            if (i_var == poly.nb_input_vars - 1){
+                transcript.append_f(helper.vx_claim());
+            }
+        }
+
+
+        transcript.append_f(helper.vy_claim());
+
+        uint32_t rx_len = helper.rx_len;
+        uint32_t ry_len = helper.ry_len;
+
+        // Memory freed outside the function call
+        rz1s = (F_primitive*) malloc(sizeof(F_primitive) * rx_len);
+        rz2s = (F_primitive*) malloc(sizeof(F_primitive) * ry_len);
+
+        for(int x_i = 0; x_i < rx_len; x_i++){
+            rz1s[x_i] = helper.rx[x_i];
+        }
+        for(int y_i = 0; y_i < ry_len; y_i++){
+            rz2s[y_i] = helper.ry[y_i];
+        }
+
+        // Print out timing breakdown at the end of proof
+        std::cout << "Total LinearGKR Prepare:\t"     << (float) timer.prepare_time  / 1000.0 << "\ts" << std::endl;
+        std::cout << "-------------------------------------------" << std::endl;
+        std::cout << "Total CPU <> CUDA (PCIe):\t"     << (float) timer.pcie_time / 1000.0 << "\tms" << std::endl;
+        std::cout << "-------------------------------------------" << std::endl;
+        std::cout << "    - PolyEval:  \t\t"    << (float) timer.polyeval_time / 1000.0 << "\tms" << std::endl;
+        std::cout << "    - Fiat-Shamir:  \t\t" << (float) timer.fiathash_time / 1000000.0 << "\tms" << std::endl;
+        std::cout << "    - Challenge:  \t\t"   << (float) timer.challenge_time/ 1000.0 << "\tms" << std::endl;
+        std::cout << "Total Sum-check:  \t\t"  <<
+        (float) ((timer.challenge_time + timer.polyeval_time) / 1000.0 + (timer.fiathash_time / 1000000.0)) << "\tms" << std::endl;
+        std::cout << "-------------------------------------------" << std::endl;
+    }
+
+    template<typename F, typename F_primitive>
+    std::tuple<
+    bool,
+    std::vector<F_primitive>, std::vector<F_primitive>,
+    F, F > sumcheck_verify_gkr_layer(
+            const CircuitLayer<F, F_primitive>& poly,
+            const F_primitive* rz1,
+            const F_primitive* rz2,
+            const F& claimed_v1,
+            const F& claimed_v2,
+            const F_primitive& alpha,
+            const F_primitive& beta,
+            Proof<F>& proof,
+            Transcript<F, F_primitive>& transcript){
+
+        // Start Verification
+        uint32_t nb_vars = poly.nb_input_vars;
+        F sum = claimed_v1 * alpha + claimed_v2 * beta;
+        std::vector<F_primitive> rx, ry;
+        std::vector<F_primitive> *rs = &rx;
+        F vx_claim;
+
+        bool verified = true;
+        for (uint32_t i_var = 0; i_var < (2 * nb_vars); i_var++){
+            const std::vector<F> low_degree_evals = {proof.get_next_and_step(), proof.get_next_and_step(), proof.get_next_and_step()};
+
+            transcript.append_f(low_degree_evals[0]);
+            transcript.append_f(low_degree_evals[1]);
+            transcript.append_f(low_degree_evals[2]);
+            auto r = transcript.challenge_f();
+
+            (*rs).emplace_back(r);
+            verified &= (low_degree_evals[0] + low_degree_evals[1]) == sum;
+            sum = degree_2_eval(low_degree_evals, r);
+
+            if (i_var == nb_vars - 1){
+                auto start = std::chrono::high_resolution_clock::now();
+                vx_claim = proof.get_next_and_step();
+                sum -= vx_claim * eval_sparse_circuit_connect_poly<F, F_primitive, 1>(poly.add,
+                                                                                      rz1, poly.nb_output_vars,
+                                                                                      rz2, poly.nb_output_vars,
+                                                                                      alpha, beta, {rx}
+                );
+                transcript.append_f(vx_claim);
+                auto end = std::chrono::high_resolution_clock::now();
+                float ms = (float) std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
+                if(verbose) printf("verify: X sparse eval time = %f s\n", ms/1000.0);
+            }
+
+            // Reach the end of phase one, switch to verify Y
+            if (i_var == nb_vars - 1){
+                rs = &ry;
+            }
+        }
+
+        auto start = std::chrono::high_resolution_clock::now();
+        F vy_claim = proof.get_next_and_step();
+        verified &= sum == vx_claim * vy_claim * eval_sparse_circuit_connect_poly<F, F_primitive, 2>(poly.mul,
+                                                                                                     rz1, poly.nb_output_vars,
+                                                                                                     rz2, poly.nb_output_vars,
+                                                                                                     alpha, beta, {rx, ry});
+        transcript.append_f(vy_claim);
+        auto end = std::chrono::high_resolution_clock::now();
+        float ms = (float) std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
+        if(verbose) printf("verify: Y sparse eval time = %f s\n", ms/1000.0);
+
+        // Return verification result
+        return {verified, rx, ry, vx_claim, vy_claim};
+    }
+}
diff --git a/sumcheck/cuda/include/LinearGKR/sumcheck_common.cuh b/sumcheck/cuda/include/LinearGKR/sumcheck_common.cuh
new file mode 100644
index 00000000..2279cdb2
--- /dev/null
+++ b/sumcheck/cuda/include/LinearGKR/sumcheck_common.cuh
@@ -0,0 +1,172 @@
+#pragma once
+
+#include <cuda_runtime.h>
+#include <chrono>
+
+#include "circuit/circuit.cuh"
+#include "field/M31.cuh"
+#include "field/M31ext3.cuh"
+#include "field/bn254.cuh"
+
+namespace gkr{
+
+    // GPU / CUDA knob
+    static bool verbose = false;
+    static bool useGPU = true;
+
+    // Timing Breakdown
+    struct TimingBreakdown{
+        // Linear GKR, prepare time                 (ms)
+        double prepare_time = 0.0;
+
+        // PCIe transfer time                       (us)
+        double pcie_time = 0.0;
+
+        // Sum-check's Polynomial Evaluation time   (us)
+        double polyeval_time = 0.0;
+
+        // Sum-check's Fiat-Shamir Hash time        (ns)
+        double fiathash_time = 0.0;
+
+        // Sum-check's Receive Challenge time       (us)
+        double challenge_time = 0.0;
+    };
+
+    template<typename F_primitive>
+    __host__ __device__
+    inline F_primitive _eq(const F_primitive& x, const F_primitive& y){
+        // x * y + (1 - x) * (1 - y)
+        return x * y * 2 - x - y + 1;
+    }
+
+    template<typename F_primitive>
+    __global__
+    void _eq_evals_kernel(const F_primitive* __restrict__ eq_evals_src,
+                                F_primitive* __restrict__ eq_evals_dst,
+                          const F_primitive* eq_z_i_one,
+                          const F_primitive* eq_z_i_zero,
+                                uint32_t nb_cur_evals){
+        uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+        if(idx < nb_cur_evals){
+            eq_evals_dst[idx + nb_cur_evals] = eq_evals_src[idx] * (*eq_z_i_one);
+            eq_evals_dst[idx]                = eq_evals_src[idx] * (*eq_z_i_zero);
+        }
+    }
+
+    template<typename F_primitive>
+    void _eq_evals_at_primitive(const F_primitive* r,
+                                const uint32_t & r_len,
+                                const F_primitive& mul_factor,
+                                F_primitive* eq_evals){
+        eq_evals[0] = mul_factor;
+        for (uint32_t i = 0; i < r_len; i++){
+            uint32_t nb_cur_evals = 1 << i; // max(nb_cur_evals) = 1 << (r_len - 1)
+            F_primitive eq_z_i_zero = _eq(r[i], F_primitive::zero());
+            F_primitive eq_z_i_one  = _eq(r[i], F_primitive::one());
+            // Runs on GPU
+            if(useGPU && i > 10){
+                // Define variables
+                F_primitive* d_eq_evals_src;
+                F_primitive* d_eq_evals_dst;
+                F_primitive* d_eq_z_i_one;
+                F_primitive* d_eq_z_i_zero;
+                // Malloc space
+                cudaMalloc((void **)&d_eq_evals_src, nb_cur_evals * sizeof(F_primitive));
+                cudaMalloc((void **)&d_eq_evals_dst, 2 * nb_cur_evals * sizeof(F_primitive));
+                cudaMalloc((void **)&d_eq_z_i_one, sizeof(F_primitive));
+                cudaMalloc((void **)&d_eq_z_i_zero, sizeof(F_primitive));
+                // Move input
+                cudaMemcpy(d_eq_evals_src,    eq_evals,    nb_cur_evals * sizeof(F_primitive), cudaMemcpyHostToDevice);
+                cudaMemcpy(d_eq_z_i_one,    &eq_z_i_one,    sizeof(F_primitive), cudaMemcpyHostToDevice);
+                cudaMemcpy(d_eq_z_i_zero,    &eq_z_i_zero,    sizeof(F_primitive), cudaMemcpyHostToDevice);
+                // Launch kernel
+                uint32_t num_thread = 256;
+                uint32_t num_block = (nb_cur_evals + num_thread - 1) / num_thread;
+                _eq_evals_kernel<<<num_block, num_thread>>>(
+                        d_eq_evals_src, d_eq_evals_dst,
+                        d_eq_z_i_one, d_eq_z_i_zero,
+                        nb_cur_evals);
+                // Copy the result back
+                cudaMemcpy(eq_evals,    d_eq_evals_dst,    2 * nb_cur_evals * sizeof(F_primitive), cudaMemcpyDeviceToHost);
+                // Free cuda memory
+                cudaFree(d_eq_evals_src);
+                cudaFree(d_eq_evals_dst);
+                cudaFree(d_eq_z_i_one);
+                cudaFree(d_eq_z_i_zero);
+            }else{
+                // Too small, run on CPU
+                for (uint32_t j = 0; j < nb_cur_evals; j++){
+                    eq_evals[j + nb_cur_evals] = eq_evals[j] * eq_z_i_one;
+                    eq_evals[j]                = eq_evals[j] * eq_z_i_zero;
+                }
+            }
+        }
+    }
+
+    template<typename F_primitive>
+    __global__
+    void cross_prod_eq(const F_primitive* __restrict__ d_sqrtN1st,
+                       const F_primitive* __restrict__ d_sqrtN2nd,
+                             F_primitive* __restrict__ d_eq_evals,
+                             uint32_t r_len
+                             ){
+        // Mimic what CPU does
+        auto first_half_bits = r_len / 2;
+        auto first_half_mask = (1 << first_half_bits) - 1;
+        // Get the i loop variables
+        uint32_t i = blockIdx.x * blockDim.x + threadIdx.x;
+        if(i < (1 << r_len)){
+            uint32_t first_half  = i &  first_half_mask;
+            uint32_t second_half = i >> first_half_bits;
+            d_eq_evals[i] = d_sqrtN1st[first_half] * d_sqrtN2nd[second_half];
+        }
+    }
+
+    // compute the multilinear extension eq(a, b) at
+    // a = r, b = bit at all bits
+    // the bits are interpreted as little endian numbers
+    // The returned value is multiplied by the 'mul_factor' argument
+    template<typename F_primitive>
+    void _eq_evals_at(const F_primitive*    r,
+                      const uint32_t&       r_len,
+                      const F_primitive& mul_factor,
+                      F_primitive* eq_evals,
+                      F_primitive* sqrtN1st,
+                      F_primitive* sqrtN2nd){
+
+        auto first_half_bits = r_len / 2;
+        auto first_half_mask = (1 << first_half_bits) - 1;
+
+        _eq_evals_at_primitive(r, first_half_bits, mul_factor, sqrtN1st);
+        _eq_evals_at_primitive(&r[first_half_bits], r_len - first_half_bits, F_primitive(1), sqrtN2nd);
+
+        // Use GPU / CPU to do cross product of eq
+        if(useGPU){
+            // Prepare CUDA parameters
+            uint32_t num_thread = 128;
+            uint32_t num_block = ((1 << r_len) + num_thread - 1) / num_thread;
+            // Malloc CUDA
+            F_primitive* d_sqrtN1st;
+            F_primitive* d_sqrtN2nd;
+            F_primitive* d_eq_evals;
+            cudaMalloc((void **)&d_sqrtN1st, sizeof(F_primitive) * (1 << first_half_bits));
+            cudaMalloc((void **)&d_sqrtN2nd, sizeof(F_primitive) * (1 << (r_len - first_half_bits)));
+            cudaMalloc((void **)&d_eq_evals, sizeof(F_primitive) * (1 << r_len));
+            // Transfer input from Host to Device
+            cudaMemcpy(d_sqrtN1st, sqrtN1st, sizeof(F_primitive) * (1 << first_half_bits), cudaMemcpyHostToDevice);
+            cudaMemcpy(d_sqrtN2nd, sqrtN2nd, sizeof(F_primitive) * (1 << (r_len - first_half_bits)), cudaMemcpyHostToDevice);
+            // Launch Kernel
+            cross_prod_eq<<<num_block, num_thread>>>(d_sqrtN1st, d_sqrtN2nd, d_eq_evals, r_len);
+            // Transfer output from device to host
+            cudaMemcpy(eq_evals, d_eq_evals, sizeof(F_primitive) * (1 << r_len), cudaMemcpyDeviceToHost);
+            // Free
+            cudaFree(d_sqrtN1st);cudaFree(d_sqrtN2nd);cudaFree(d_eq_evals);
+        }else{
+            for (uint32_t i = 0; i < (uint32_t)(1 << r_len); i++){
+                uint32_t first_half  = i &  first_half_mask;
+                uint32_t second_half = i >> first_half_bits;
+                eq_evals[i] = sqrtN1st[first_half] * sqrtN2nd[second_half];
+            }
+        }
+    }
+} // namespace gkr
diff --git a/sumcheck/cuda/include/LinearGKR/sumcheck_helper.cuh b/sumcheck/cuda/include/LinearGKR/sumcheck_helper.cuh
new file mode 100644
index 00000000..202d6d09
--- /dev/null
+++ b/sumcheck/cuda/include/LinearGKR/sumcheck_helper.cuh
@@ -0,0 +1,535 @@
+#pragma once
+
+#include "circuit/circuit.cuh"
+#include "sumcheck_common.cuh"
+#include "scratchpad.cuh"
+
+#define MAX_RESULT_LEN  32
+
+namespace gkr{
+
+    // CUDA Kernel for Sum-check
+    template<typename F, typename F_primitive>
+    __global__
+    void sumcheck_kernel(F_primitive*       d_r,                   // Challenge kernel received
+                         F* __restrict__    d_src_v,               // Read  only
+                         F* __restrict__    d_bookkeeping_f,       // Write only
+                         F* __restrict__    d_bookkeeping_hg_src,  // Read  only
+                         F* __restrict__    d_bookkeeping_hg_dst,  // Write only
+                         uint32_t size
+    ){
+        // Get the loop variable
+        uint32_t i = blockDim.x * blockIdx.x + threadIdx.x;
+
+        // Read the new random challenge
+        F_primitive r = *d_r;
+
+        // Do the same thing as main loop
+        if(i < size){
+            d_bookkeeping_f     [i] = d_src_v[2 * i]              + (d_src_v[2 * i + 1]              - d_src_v[2 * i]             ) * r;
+            d_bookkeeping_hg_dst[i] = d_bookkeeping_hg_src[2 * i] + (d_bookkeeping_hg_src[2 * i + 1] - d_bookkeeping_hg_src[2 * i]) * r;
+        }
+    }
+
+    // CUDA Kernel for Polynomial Evaluation
+    template<typename F>
+    __global__
+    void poly_eval_kernel(F* __restrict__ d_src_v,
+                          F* __restrict__ d_bookkeeping_hg,
+                          F* __restrict__ d_block_results,
+                          int evalSize){
+        int idx = blockIdx.x * blockDim.x + threadIdx.x;
+        int tid = threadIdx.x;
+
+        // Arrange the shared memory
+        extern __shared__ F s_data[];
+        F* s_p0 = s_data;
+        F* s_p1 = &s_data[blockDim.x];
+        F* s_p2 = &s_data[2 * blockDim.x];
+
+        s_p0[tid] = F::zero();
+        s_p1[tid] = F::zero();
+        s_p2[tid] = F::zero();
+
+        if (idx < evalSize) {
+            auto f_v_0 = d_src_v[idx * 2];
+            auto f_v_1 = d_src_v[idx * 2 + 1];
+            auto hg_v_0 = d_bookkeeping_hg[idx * 2];
+            auto hg_v_1 = d_bookkeeping_hg[idx * 2 + 1];
+
+            s_p0[tid] = f_v_0 * hg_v_0;
+            s_p1[tid] = f_v_1 * hg_v_1;
+            s_p2[tid] = (f_v_0 + f_v_1) * (hg_v_0 + hg_v_1);
+        }
+
+        __syncthreads();
+
+        // Perform parallel reduction in shared memory
+        for (int stride = blockDim.x / 2; stride > 0; stride >>= 1) {
+            if (tid < stride) {
+                s_p0[tid] += s_p0[tid + stride];
+                s_p1[tid] += s_p1[tid + stride];
+                s_p2[tid] += s_p2[tid + stride];
+            }
+            __syncthreads();
+        }
+
+        // Write the block result to global memory
+        if (tid == 0) {
+            d_block_results[blockIdx.x * 3] = s_p0[0];
+            d_block_results[blockIdx.x * 3 + 1] = s_p1[0];
+            d_block_results[blockIdx.x * 3 + 2] = s_p2[0];
+        }
+    }
+
+    template<typename F>
+    __global__
+    void reduce_blocks(const F* __restrict__ d_block_results_src,
+                             F* __restrict__ d_block_results_dst,
+                             uint32_t num_src_blocks) {
+        uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+        uint32_t tid = threadIdx.x;
+
+        // Arrange the shared memory
+        extern __shared__ F s_data[];
+        F* s_p0 = s_data;
+        F* s_p1 = &s_data[blockDim.x];
+        F* s_p2 = &s_data[2 * blockDim.x];
+
+        // Load data into shared memory
+        if(idx < num_src_blocks) {
+            s_p0[tid] = d_block_results_src[idx * 3];
+            s_p1[tid] = d_block_results_src[idx * 3 + 1];
+            s_p2[tid] = d_block_results_src[idx * 3 + 2];
+        } else {
+            s_p0[tid] = F::zero();
+            s_p1[tid] = F::zero();
+            s_p2[tid] = F::zero();
+        }
+        __syncthreads();
+
+        // Perform parallel reduction in shared memory
+        for (int stride = blockDim.x / 2; stride > 0; stride >>= 1) {
+            if (tid < stride) {
+                s_p0[tid] += s_p0[tid + stride];
+                s_p1[tid] += s_p1[tid + stride];
+                s_p2[tid] += s_p2[tid + stride];
+            }
+            __syncthreads();
+        }
+
+        // Write the block result to global memory
+        if (tid == 0) {
+            d_block_results_dst[blockIdx.x * 3] = s_p0[0];
+            d_block_results_dst[blockIdx.x * 3 + 1] = s_p1[0];
+            d_block_results_dst[blockIdx.x * 3 + 2] = s_p2[0];
+        }
+    }
+
+    template<typename F, typename F_primitive>
+    class SumcheckMultiLinearProdHelper {
+    public:
+        uint32_t nb_vars;
+        uint32_t sumcheck_var_idx;
+        uint32_t cur_eval_size;
+        F* bookkeeping_f;
+        F* bookkeeping_hg;
+        const F* initial_v;
+
+        // CUDA device memory
+        bool gpuMode = false;
+        F* d_r;
+        F* d_src_v;
+        F* d_bookkeeping_f;
+        F* d_bookkeeping_hg_src;
+        F* d_bookkeeping_hg_dst;
+        F* d_block_results;
+        F* d_blocks_reduce;
+        bool d_blocks_reduce_malloced = false;
+
+        // Assign the pointer from scratchpad
+        void prepare(uint32_t nb_vars_, F* p1_evals, F* p2_evals, const F* v){
+            nb_vars = nb_vars_;
+            sumcheck_var_idx = 0;
+            cur_eval_size  = 1 << nb_vars;
+            bookkeeping_f  = p1_evals;
+            bookkeeping_hg = p2_evals;
+            initial_v = v;
+        }
+
+        void poly_eval_kernel_wrapper(const F*  __restrict__ src_v,
+                                      F& p0,
+                                      F& p1,
+                                      F& p2,
+                                      int evalSize,
+                                      uint32_t& var_idx,
+                                      TimingBreakdown& timer){
+            auto start = std::chrono::high_resolution_clock::now();
+
+            // Define CUDA parameters
+            int num_thread = (evalSize >= 512) ? 512 : (evalSize <= 32 ? 32 : evalSize);
+            int num_block_src = (evalSize + num_thread - 1) / num_thread;
+
+            if(var_idx == 0){
+                // Allocate memory for src_v
+                cudaMalloc((void **)&d_src_v, 2 * evalSize * sizeof(F));
+                cudaMemcpy(d_src_v,    src_v,    2 * evalSize * sizeof(F), cudaMemcpyHostToDevice);
+
+                // Allocate memory for bookkeeping_hg
+                cudaMalloc((void **)&d_bookkeeping_hg_src, 2 * evalSize * sizeof(F));
+                cudaMalloc((void **)&d_bookkeeping_hg_dst, evalSize * sizeof(F));
+                cudaMemcpy(d_bookkeeping_hg_src,    bookkeeping_hg,    2 * evalSize * sizeof(F), cudaMemcpyHostToDevice);
+
+                // Allocate memory for block results
+                cudaMalloc((void **)&d_block_results, num_block_src * 3 * sizeof(F));
+            }
+
+            auto end = std::chrono::high_resolution_clock::now();
+
+            timer.pcie_time += (double) std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
+
+            start = std::chrono::high_resolution_clock::now();
+            // Calculate the size of shared memory
+            size_t sharedMemSize = 3 * num_thread * sizeof(F);
+
+            // Launch Kernel
+            poly_eval_kernel<<<num_block_src, num_thread, sharedMemSize>>>(
+                    d_src_v,
+                    (var_idx % 2 == 0) ? d_bookkeeping_hg_src : d_bookkeeping_hg_dst,
+                    d_block_results,
+                    evalSize
+            );
+
+            // Reduce over block results
+            bool choose_reduce = false;
+            int num_block_old = num_block_src;
+            while(num_block_src > 1){
+                int num_block_dst = (num_block_src + num_thread - 1) / num_thread;
+                if(!d_blocks_reduce_malloced){
+                    cudaMalloc((void **)&d_blocks_reduce, num_block_dst * 3 * sizeof(F));
+                    d_blocks_reduce_malloced = true;
+                }
+                reduce_blocks<<<num_block_dst, num_thread, sharedMemSize>>>(
+                        choose_reduce ? d_blocks_reduce : d_block_results,
+                        choose_reduce ? d_block_results : d_blocks_reduce,
+                        num_block_src
+                        );
+                choose_reduce = !choose_reduce;
+                num_block_src = num_block_dst;
+            }
+
+            // Allocate host memory for block results and copy from device
+            F* h_block_results = (F*)malloc(3 * sizeof(F));
+            cudaMemcpy(h_block_results, choose_reduce ? d_blocks_reduce : d_block_results, 3 * sizeof(F), cudaMemcpyDeviceToHost);
+
+            // Do accumulation on host
+            p0 = h_block_results[0];
+            p1 = h_block_results[1];
+            p2 = h_block_results[2];
+
+            // Clean up and record time
+            free(h_block_results);
+            end = std::chrono::high_resolution_clock::now();
+            auto total = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            timer.polyeval_time += (double) total.count();
+
+            // Debug print
+            if(verbose) printf("#block = %d, #thread = %d, time = %.1f us\n", num_block_old, num_thread, (float) total.count());
+        }
+
+        // Polynominal Evaluation
+        void poly_eval_at(uint32_t var_idx, uint32_t degree, const bool *gate_exists, F* evals, TimingBreakdown& timer){
+            F p0 = F::zero();
+            F p1 = F::zero();
+            F p2 = F::zero();
+            const F* src_v = (var_idx == 0 ? initial_v : bookkeeping_f);
+            int evalSize = 1 << (nb_vars - var_idx - 1);
+
+            // Switch between GPU vs. CPU implementation
+            if(useGPU){
+                if(verbose) printf("CUDA: poly_eval_at : var_idx = %u, eval_size = %d, ", var_idx, evalSize);
+                poly_eval_kernel_wrapper(src_v, p0, p1, p2, evalSize, var_idx, timer);
+            }else{
+                auto start = std::chrono::high_resolution_clock::now();
+                if(verbose) printf("CPU: poly_eval_at : var_idx = %u, eval_size = %d\n", var_idx, evalSize);
+                for (int i = 0; i < evalSize; i++){
+                    if (!gate_exists[i * 2] && !gate_exists[i * 2 + 1]){ continue; }
+                    auto f_v_0      = src_v[i * 2];
+                    auto f_v_1      = src_v[i * 2 + 1];
+                    auto hg_v_0 = bookkeeping_hg[i * 2];
+                    auto hg_v_1 = bookkeeping_hg[i * 2 + 1];
+                    p0 += f_v_0 * hg_v_0;
+                    p1 += f_v_1 * hg_v_1;
+                    p2 += (f_v_0 + f_v_1) * (hg_v_0 + hg_v_1);
+                }
+                auto end = std::chrono::high_resolution_clock::now();
+                auto total = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                timer.polyeval_time += (double) total.count();
+            }
+
+            // Compute final poly evaluation results
+            p2 = p1 * F(6) + p0 * F(3) - p2 * F(2);
+            evals[0] = p0;
+            evals[1] = p1;
+            evals[2] = p2;
+        }
+
+        // Receive Challenge of MLE Helper
+        void receive_challenge(uint32_t var_idx,     // Index variable, nothing to do with computation
+                               const F_primitive& r, // Random challenge
+                               bool *gate_exists,     // Existence of gates
+                               TimingBreakdown& timer
+                               ){
+            // Select the source
+            auto* src_v = (var_idx == 0 ? initial_v : bookkeeping_f);
+
+            // Sanity check
+            assert(var_idx == sumcheck_var_idx && var_idx < nb_vars);
+
+            // Define CUDA managed memory if it is the first iteration
+            if(useGPU && var_idx == 0){
+                gpuMode = true;
+                // Memory Allocation on GPU
+                cudaMalloc((void **)&d_r,                  sizeof(F));
+                cudaMalloc((void **)&d_bookkeeping_f,      (cur_eval_size >> 1) * sizeof(F)); // write-only
+            }
+
+            // Switch between CUDA and CPU
+            if(gpuMode){
+                auto start = std::chrono::high_resolution_clock::now();
+
+                // Memory copy from Host to Device
+                cudaMemcpy(d_r,&r, sizeof(F), cudaMemcpyHostToDevice);
+
+                // Launch Kernel
+                int eval_size = cur_eval_size >> 1;
+                int num_thread = (eval_size >= 512) ? 512 : (eval_size <= 32 ? 32 : eval_size);
+                int num_block  = (eval_size + num_thread - 1) / num_thread;
+
+                sumcheck_kernel<<<num_block, num_thread>>>(
+                        d_r,
+                        d_src_v,
+                        d_bookkeeping_f,
+                        (var_idx % 2 == 0) ? d_bookkeeping_hg_src : d_bookkeeping_hg_dst,
+                        (var_idx % 2 == 0) ? d_bookkeeping_hg_dst : d_bookkeeping_hg_src,
+                        eval_size
+                );
+                cudaDeviceSynchronize(); // No-need to make functional correct, but necessary for time measure
+
+                // Copy result back
+                cudaMemcpy(d_src_v,  d_bookkeeping_f, eval_size * sizeof(F),    cudaMemcpyDeviceToDevice);
+                auto end = std::chrono::high_resolution_clock::now();
+                auto total = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                timer.challenge_time += ((double) total.count());
+                if(verbose) printf("CUDA: receive_chal : var_idx = %u, eval_size = %u, #block = %d, #thread = %d, time = %.1f us\n",
+                                   var_idx, eval_size, num_block, num_thread, (float) total.count());
+            }else{
+                auto start = std::chrono::high_resolution_clock::now();
+                for (uint32_t i = 0; i < (cur_eval_size >> 1); i++){
+                    if (!gate_exists[2 * i] && !gate_exists[2 * i + 1]){
+                        gate_exists   [i] = false;
+                        bookkeeping_f [i] = src_v[2 * i]          + (src_v[2 * i + 1]          - src_v[2 * i]         ) * r;
+                        bookkeeping_hg[i] = 0;
+                    }else{
+                        gate_exists   [i] = true;
+                        bookkeeping_f [i] = src_v[2 * i]          + (src_v[2 * i + 1]          - src_v[2 * i]         ) * r;
+                        bookkeeping_hg[i] = bookkeeping_hg[2 * i] + (bookkeeping_hg[2 * i + 1] - bookkeeping_hg[2 * i]) * r;
+                    }
+                }
+                auto end = std::chrono::high_resolution_clock::now();
+                auto total = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                timer.challenge_time += ((double) total.count());
+                if(verbose) printf("CPU: receive_chal : var_idx = %u, eval_size = %u, time = %.1f us\n", var_idx, cur_eval_size >> 1, (float) total.count());
+            }
+
+            // Turn off the CUDA if workload size if too small
+            if(gpuMode && (cur_eval_size >> 1) == 1){
+                gpuMode = false;
+                // Copy back the final v claim
+                cudaMemcpy(bookkeeping_f,  d_bookkeeping_f, sizeof(F),    cudaMemcpyDeviceToHost);
+                // Free all CUDA memory
+                cudaFree(d_r);
+                cudaFree(d_src_v);
+                cudaFree(d_bookkeeping_f);
+                cudaFree(d_bookkeeping_hg_src);
+                cudaFree(d_bookkeeping_hg_dst);
+                cudaFree(d_block_results);
+                if(d_blocks_reduce_malloced) {
+                    d_blocks_reduce_malloced = false;
+                    cudaFree(d_blocks_reduce);
+                }
+            }
+
+            cur_eval_size >>= 1;
+            sumcheck_var_idx++;
+        }
+    };
+
+    template<typename F, typename F_primitive>
+    class SumcheckGKRHelper{
+    public:
+
+        CircuitLayer<F, F_primitive> const* poly_ptr;
+        F_primitive alpha, beta;
+        GKRScratchPad<F, F_primitive>* pad_ptr;
+        F_primitive rx[MAX_RESULT_LEN]; uint32_t rx_len = 0;
+        F_primitive ry[MAX_RESULT_LEN]; uint32_t ry_len = 0;
+        SumcheckMultiLinearProdHelper<F, F_primitive> x_helper, y_helper;
+        uint32_t nb_input_vars;
+        uint32_t nb_output_vars;
+
+        void _prepare_g_x_vals(
+                const F_primitive* rz1, const uint32_t & rz1_len,
+                const F_primitive* rz2, const uint32_t & rz2_len,
+                const F_primitive& alpha,
+                const F_primitive& beta,
+                const SparseCircuitConnection<F_primitive, 2>& mul,
+                const SparseCircuitConnection<F_primitive, 1>& add,
+                const MultiLinearPoly<F>& vals,
+                bool* gate_exists){
+            F *hg_vals = pad_ptr->hg_evals;
+
+            for(int i = 0; i < vals.evals_len; i++){ hg_vals[i] = 0; }
+            for(int i = 0; i < vals.evals_len; i++){ gate_exists[i] = false; }
+
+            auto start = std::chrono::high_resolution_clock::now();
+            _eq_evals_at(rz1, rz1_len, alpha, pad_ptr->eq_evals_at_rz1, pad_ptr -> eq_evals_first_half, pad_ptr -> eq_evals_second_half);
+            _eq_evals_at(rz2, rz2_len, beta, pad_ptr->eq_evals_at_rz2, pad_ptr -> eq_evals_first_half, pad_ptr -> eq_evals_second_half);
+            F_primitive * eq_evals_at_rz1 = pad_ptr->eq_evals_at_rz1;
+            F_primitive const* eq_evals_at_rz2 = pad_ptr->eq_evals_at_rz2;
+            auto end = std::chrono::high_resolution_clock::now();
+            auto total = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+            std::cout << "    - phase 1: two eq evals \t" << (float) total.count() / 1000.0 << "\ts" << std::endl;
+
+            start = std::chrono::high_resolution_clock::now();
+            for (int i = 0; i < (1 << rz1_len); ++i){
+                eq_evals_at_rz1[i] = eq_evals_at_rz1[i] + eq_evals_at_rz2[i];
+            }
+            end = std::chrono::high_resolution_clock::now();
+            total = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+            std::cout << "    - phase 1: vec addition \t" << (float) total.count() / 1000.0 << "\ts" << std::endl;
+
+            start = std::chrono::high_resolution_clock::now();
+            for(long unsigned int i = 0; i < mul.sparse_evals_len; i++){
+                // g(x) += eq(rz, z) * v(y) * coef
+                const Gate<F_primitive, 2> &gate = mul.sparse_evals[i];
+                uint32_t x = gate.i_ids[0];
+                uint32_t y = gate.i_ids[1];
+                uint32_t z = gate.o_id;
+                hg_vals[x] += vals.evals[y] * (gate.coef * eq_evals_at_rz1[z]);
+                gate_exists[x] = true;
+            }
+            end = std::chrono::high_resolution_clock::now();
+            total = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+            std::cout << "    - phase 1: build gx(mult) \t" << (float) total.count() / 1000.0 << "\ts" << std::endl;
+
+            start = std::chrono::high_resolution_clock::now();
+            for(long unsigned int i = 0; i < add.sparse_evals_len; i++){
+                // g(x) += eq(rz, x) * coef
+                const Gate<F_primitive, 1> &gate = add.sparse_evals[i];
+                uint32_t x = gate.i_ids[0];
+                uint32_t z = gate.o_id;
+                hg_vals[x] += gate.coef * eq_evals_at_rz1[z];
+                gate_exists[x] = true;
+            }
+            end = std::chrono::high_resolution_clock::now();
+            total = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+            std::cout << "    - phase 1: build gx(add) \t" << (float) total.count() / 1000.0 << "\ts" << std::endl;
+        }
+
+        void _prepare_h_y_vals(
+                const F& v_rx,
+                const SparseCircuitConnection<F_primitive, 2>& mul,
+                bool *gate_exists){
+            auto start = std::chrono::high_resolution_clock::now();
+            F *hg_vals = pad_ptr->hg_evals;
+            // Reset hg_vals;
+            for(int i = 0; i < (1 << rx_len); i++){
+                hg_vals[i] = 0;
+                gate_exists[i] = false;
+            }
+
+            F_primitive const* eq_evals_at_rz1 = pad_ptr->eq_evals_at_rz1; // already computed in g_x preparation
+            _eq_evals_at(rx, rx_len, F_primitive::one(), pad_ptr->eq_evals_at_rx, pad_ptr -> eq_evals_first_half, pad_ptr -> eq_evals_second_half);
+            F_primitive const* eq_evals_at_rx = pad_ptr->eq_evals_at_rx;
+
+            for(int i = 0; i < mul.sparse_evals_len; i++){
+                const Gate<F_primitive, 2> &gate = mul.sparse_evals[i];
+                // g(y) += eq(rz, z) * eq(rx, x) * v(y) * coef
+                uint32_t x = gate.i_ids[0];
+                uint32_t y = gate.i_ids[1];
+                uint32_t z = gate.o_id;
+                hg_vals[y] += v_rx * (eq_evals_at_rz1[z] * eq_evals_at_rx[x] * gate.coef);
+                gate_exists[y] = true;
+            }
+            auto end = std::chrono::high_resolution_clock::now();
+            auto total = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+            std::cout << "    - phase 2: build hy(mult) \t" << (float) total.count() / 1000.0 << "\ts" << std::endl;
+        }
+
+        void _prepare_phase_two(){
+            _prepare_h_y_vals(vx_claim(), poly_ptr->mul, pad_ptr->gate_exists);
+            // TODO: may use the memory v_x_evals as long as the value vx_claim is saved
+            y_helper.prepare(nb_input_vars, pad_ptr->v_evals, pad_ptr->hg_evals, poly_ptr->input_layer_vals.evals);
+        }
+
+        void prepare(
+                const CircuitLayer<F, F_primitive>& poly,
+                const F_primitive* rz1, const uint32_t & rz1_len,
+                const F_primitive* rz2, const uint32_t & rz2_len,
+                const F_primitive& alpha_,
+                const F_primitive& beta_,
+                GKRScratchPad<F, F_primitive>& scratch_pad){
+
+            // Assign pointer
+            nb_input_vars = poly.nb_input_vars;
+            nb_output_vars = poly.nb_output_vars;
+            alpha = alpha_;
+            beta = beta_;
+            poly_ptr = &poly;
+            pad_ptr = &scratch_pad;
+
+            // phase one
+            _prepare_g_x_vals(rz1, rz1_len,
+                              rz2, rz2_len,
+                              alpha, beta,
+                              poly.mul,poly.add,
+                              poly.input_layer_vals,
+                              pad_ptr->gate_exists);
+            x_helper.prepare(nb_input_vars, pad_ptr->v_evals, pad_ptr->hg_evals, poly.input_layer_vals.evals);
+        }
+
+        void poly_evals_at(uint32_t var_idx, uint32_t degree, F* evals, TimingBreakdown& timer){
+            if (var_idx < nb_input_vars){
+                return x_helper.poly_eval_at(var_idx, degree, pad_ptr->gate_exists, evals, timer);
+            }else{
+                // When about the enter phase two, prepare the scratchpad
+                return y_helper.poly_eval_at(var_idx - nb_input_vars, degree, pad_ptr->gate_exists, evals, timer);
+            }
+        }
+
+        void receive_challenge(uint32_t var_idx, const F_primitive& r, TimingBreakdown& timer){
+            if (var_idx < nb_input_vars){
+                // Call x's sumcheck
+                x_helper.receive_challenge(var_idx, r, pad_ptr->gate_exists, timer);
+                assert(rx_len < MAX_RESULT_LEN);
+                rx[rx_len] = r;
+                rx_len += 1;
+            }else{
+                // Call y's sumcheck
+                y_helper.receive_challenge(var_idx - nb_input_vars, r, pad_ptr->gate_exists, timer);
+                assert(ry_len < MAX_RESULT_LEN);
+                ry[ry_len] = r;
+                ry_len += 1;
+            }
+        }
+
+        F vx_claim(){
+            return pad_ptr->v_evals[0];
+        }
+
+        F vy_claim(){
+            return pad_ptr->v_evals[0];
+        }
+    };
+} // namespace gkr
diff --git a/sumcheck/cuda/include/LinearGKR/sumcheck_verifier_utils.cuh b/sumcheck/cuda/include/LinearGKR/sumcheck_verifier_utils.cuh
new file mode 100644
index 00000000..a5aaa42b
--- /dev/null
+++ b/sumcheck/cuda/include/LinearGKR/sumcheck_verifier_utils.cuh
@@ -0,0 +1,54 @@
+#pragma once
+
+#include <vector>  // Only Verifier needs vector
+
+#include "circuit/circuit.cuh"
+#include "sumcheck_common.cuh"
+
+namespace gkr{
+    template<typename F, typename F_primitive>
+    F degree_2_eval(const std::vector<F>& vals, const F_primitive& x){
+        const F& c0 = vals[0];
+        F c2 = F::INV_2 * (vals[2] - vals[1] * 2 + vals[0]);
+        F c1 = vals[1] - vals[0] - c2;
+
+        return c0 + (c2 * x + c1) * x;
+    }
+
+
+    template<typename F, typename F_primitive, uint32_t nb_input>
+    F_primitive eval_sparse_circuit_connect_poly(
+        const SparseCircuitConnection<F_primitive, nb_input>& poly,
+        const F_primitive* rz1, const uint32_t & rz1_len,
+        const F_primitive* rz2, const uint32_t & rz2_len,
+        const F_primitive& alpha,
+        const F_primitive& beta,
+        const std::vector<std::vector<F_primitive>>& ris
+        ){
+
+        std::vector<F_primitive> eq_evals_at_rz1(1 << rz1_len);
+        std::vector<F_primitive> eq_evals_at_rz2(1 << rz2_len);
+
+        _eq_evals_at_primitive(rz1, rz1_len, alpha, eq_evals_at_rz1.data());
+        _eq_evals_at_primitive(rz2, rz2_len, beta, eq_evals_at_rz2.data());
+
+        std::vector<std::vector<F_primitive>> eq_evals_at_ris(nb_input);
+
+        for (uint32_t i = 0; i < nb_input; i++){
+            eq_evals_at_ris[i].resize(1 << (ris[i].size()));
+            _eq_evals_at_primitive(ris[i].data(), ris[i].size(), F_primitive::one(), eq_evals_at_ris[i].data());
+        }
+
+        F_primitive v = F_primitive::zero();
+        for (int g_id = 0; g_id < poly.sparse_evals_len; g_id++){
+            Gate<F_primitive, nb_input> gate = poly.sparse_evals[g_id];
+            auto prod = (eq_evals_at_rz1[gate.o_id] + eq_evals_at_rz2[gate.o_id]);
+            for (uint32_t i = 0; i < nb_input; i++){
+                prod *= eq_evals_at_ris[i][gate.i_ids[i]];
+            }
+            v += prod * gate.coef;
+        }
+
+        return v;
+    }
+}
\ No newline at end of file
diff --git a/sumcheck/cuda/include/circuit/circuit.cuh b/sumcheck/cuda/include/circuit/circuit.cuh
new file mode 100644
index 00000000..8ad72906
--- /dev/null
+++ b/sumcheck/cuda/include/circuit/circuit.cuh
@@ -0,0 +1,163 @@
+#pragma once
+
+#define MAX_NUM_LAYERS 10
+
+namespace gkr{
+
+    // Evaluate the MLE
+    template<typename F, typename F_primitive>
+    F eval_multilinear(const F* evals, const uint32_t& evals_len, const F_primitive* x, const uint32_t& x_len){
+        assert((1UL << x_len) == evals_len);
+        F* scratch = (F*) malloc(evals_len * sizeof(F));
+        for(int i = 0; i < evals_len; i++){
+            scratch[i] = evals[i];
+        }
+        uint32_t cur_eval_size = evals_len >> 1;
+        for (int x_idx = 0; x_idx < x_len; x_idx++){
+            F_primitive r = x[x_idx];
+            for (uint32_t i = 0; i < cur_eval_size; i++){
+                scratch[i] = scratch[(i << 1)] + (scratch[(i << 1) + 1] - scratch[(i << 1)]) * r;
+            }
+            cur_eval_size >>= 1;
+        }
+        F result = scratch[0];
+        free(scratch);
+        return result;
+    }
+
+    // Class of Multi-linear evaluation
+    template<typename F>
+    class MultiLinearPoly{
+    public:
+        uint32_t nb_vars = 0;
+        F* evals = nullptr;
+        uint32_t evals_len = 0;
+
+        static MultiLinearPoly random(uint32_t nb_vars) {
+            MultiLinearPoly poly;
+
+            poly.nb_vars = nb_vars;
+            uint32_t evals_len = 1 << nb_vars;
+            poly.evals = (F*)malloc(evals_len * sizeof(F));
+            poly.evals_len = evals_len;
+            for (uint32_t i = 0; i < evals_len; i++){
+                poly.evals[i] = F::random();
+            }
+
+            return poly;
+        }
+    };
+
+    // One single gate
+    template<typename F, uint32_t nb_input>
+    class Gate{
+    public:
+        uint32_t     i_ids[nb_input];
+        uint32_t     o_id;
+        alignas(8) F coef;
+
+        Gate(){}
+        Gate(uint32_t o_id, uint32_t i_ids[nb_input], F coef) {
+            this->o_id = o_id;
+            for (uint32_t i = 0; i < nb_input; i++){
+                this->i_ids[i] = i_ids[i];
+            }
+            this->coef = coef;
+        }
+    };
+
+    // The sparse connection
+    template<typename F, uint32_t nb_input>
+    class SparseCircuitConnection{
+    public:
+        uint32_t nb_output_vars = 0;
+        uint32_t nb_input_vars = 0;
+        Gate<F, nb_input>* sparse_evals = nullptr;
+        uint32_t sparse_evals_len = 0;
+
+        static SparseCircuitConnection random(uint32_t nb_output_vars, uint32_t nb_input_vars){
+            SparseCircuitConnection poly;
+            poly.nb_input_vars = nb_input_vars;
+            poly.nb_output_vars = nb_output_vars;
+            uint32_t output_size = 1 << nb_output_vars;
+            uint32_t input_size = 1 << nb_input_vars;
+            poly.sparse_evals = (Gate<F, nb_input>*) malloc(output_size * sizeof(Gate<F, nb_input>));
+            poly.sparse_evals_len = output_size;
+
+            for (uint32_t i = 0; i < output_size; i++){
+                // to make sure all o_gates are used
+                uint32_t o_gate = i;
+                uint32_t i_gates[nb_input];
+                uint32_t i_gate = i;
+                for (uint32_t j = 0; j < nb_input; j++){
+                    i_gates[j] = i_gate % input_size;
+                    i_gate = i_gate + output_size;
+                }
+                poly.sparse_evals[i] = Gate<F, nb_input> (o_gate, i_gates, F::one());
+            }
+            return poly;
+        }
+    };
+
+    // One Layer of GKR circuit
+    template<typename F, typename F_primitive>
+    class CircuitLayer{
+    public:
+        uint32_t nb_output_vars;
+        uint32_t nb_input_vars;
+        MultiLinearPoly<F> input_layer_vals;
+        MultiLinearPoly<F> output_layer_vals;
+
+        SparseCircuitConnection<F_primitive, 1> add;
+        SparseCircuitConnection<F_primitive, 2> mul;
+
+        static CircuitLayer random(uint32_t nb_output_vars, uint32_t nb_input_vars){
+            CircuitLayer poly;
+            poly.nb_output_vars = nb_output_vars;
+            poly.nb_input_vars = nb_input_vars;
+            poly.input_layer_vals = MultiLinearPoly<F>::random(nb_input_vars);
+
+            poly.mul = SparseCircuitConnection<F_primitive, 2>::random(nb_output_vars, nb_input_vars);
+            poly.add = SparseCircuitConnection<F_primitive, 1>::random(nb_output_vars, nb_input_vars);
+            return poly;
+        }
+
+        void evaluate(F* output, uint32_t output_len) const {
+
+            for (int i = 0; i < mul.sparse_evals_len; i++){
+                Gate<F_primitive, 2> gate = mul.sparse_evals[i];
+                output[gate.o_id] +=
+                        input_layer_vals.evals[gate.i_ids[0]] *
+                        input_layer_vals.evals[gate.i_ids[1]] *
+                        gate.coef;
+            }
+
+            for (int i = 0; i < add.sparse_evals_len; i++){
+                Gate<F_primitive, 1> gate = add.sparse_evals[i];
+                output[gate.o_id] +=
+                        input_layer_vals.evals[gate.i_ids[0]] *
+                        gate.coef;
+            }
+        }
+    };
+
+    // GKR Layered Circuit
+    template<typename F, typename F_primitive>
+    class Circuit{
+    public:
+        CircuitLayer<F, F_primitive> layers[MAX_NUM_LAYERS];
+        uint32_t layers_len = 0;
+        void add_layer(const CircuitLayer<F, F_primitive>& layer){
+            assert(layers_len < MAX_NUM_LAYERS);
+            layers[layers_len] = layer;
+            layers_len = layers_len + 1;
+        }
+        void evaluate(){
+            for (uint32_t i = 0; i < layers_len - 1; ++i){
+                layers[i + 1].input_layer_vals.evals = layers[i].evaluate();
+            }
+            layers[layers_len-1].output_layer_vals.evals = layers[layers_len-1].evaluate();
+        }
+    };
+
+} // namespace gkr
\ No newline at end of file
diff --git a/sumcheck/cuda/include/fiat_shamir/transcript.cuh b/sumcheck/cuda/include/fiat_shamir/transcript.cuh
new file mode 100644
index 00000000..53156eff
--- /dev/null
+++ b/sumcheck/cuda/include/fiat_shamir/transcript.cuh
@@ -0,0 +1,99 @@
+#pragma once
+
+#include "hash/hashes.cuh"
+
+#define DIGEST_SIZE         32
+#define MAX_PROOF_BYTE_SIZE 8192
+
+namespace gkr{
+
+    template<typename F>
+    class Proof{
+    public:
+        uint32_t idx, bytes_write_ptr, commitment_nb_bytes, opening_nb_bytes;
+        uint8_t bytes[MAX_PROOF_BYTE_SIZE];
+
+        Proof(){
+            idx = 0;
+            bytes_write_ptr = 0;
+        }
+
+        void append_bytes(const uint8_t* __restrict__ input_bytes, uint32_t len){
+            for(int l_idx = 0; l_idx < len; l_idx++){
+                bytes[bytes_write_ptr + l_idx] = input_bytes[l_idx];
+            }
+            bytes_write_ptr += len;
+            assert(bytes_write_ptr < MAX_PROOF_BYTE_SIZE);
+        }
+
+        inline void reset(){
+            idx = 0;
+            bytes_write_ptr = 0;
+        }
+
+        const uint8_t* bytes_head(){
+            return bytes + idx;
+        }
+
+        inline void step(uint32_t nb_bytes){
+            idx += nb_bytes;
+        }
+
+        inline F get_next_and_step(){
+            F f;
+            f.from_bytes(bytes + idx);
+            step(sizeof(F));
+            return f;
+        }
+    };
+
+    template <typename F, typename F_primitive>
+    class Transcript{
+    private:
+        inline void _hash_to_digest(){
+            uint32_t hash_end_idx = proof.bytes_write_ptr;
+            if (hash_end_idx - hash_start_idx > 0)
+            {
+                hasher.hash(digest, proof.bytes + hash_start_idx, hash_end_idx - hash_start_idx);
+                hash_start_idx = hash_end_idx;
+            }
+            else
+            {
+                hasher.hash(digest, digest, DIGEST_SIZE);
+            }
+        }
+
+    public:
+        Proof<F> proof;
+        SHA256Hasher hasher;
+        uint32_t hash_start_idx;
+        uint8_t digest[DIGEST_SIZE];
+
+        Transcript(){
+            proof = Proof<F>();
+            hasher = SHA256Hasher();
+            hash_start_idx = 0;
+            for(unsigned char & i : digest){ i = 0; }
+        }
+
+        void append_bytes(const uint8_t* bytes, uint32_t len){
+            proof.append_bytes(bytes, len);
+        }
+
+        void append_f(const F &f){
+            uint32_t cur_size = proof.bytes_write_ptr;
+            proof.bytes_write_ptr += sizeof(F);
+            f.to_bytes(proof.bytes + cur_size);
+        }
+
+        F_primitive challenge_f(){
+            _hash_to_digest();
+
+            F_primitive f;
+            assert(sizeof(F_primitive) <= DIGEST_SIZE);
+            f.from_bytes(digest);
+            return f;
+        }
+    };
+
+}
\ No newline at end of file
diff --git a/sumcheck/cuda/include/field/M31.cuh b/sumcheck/cuda/include/field/M31.cuh
new file mode 100644
index 00000000..995cb2ab
--- /dev/null
+++ b/sumcheck/cuda/include/field/M31.cuh
@@ -0,0 +1,90 @@
+#pragma once
+
+#include "basefield.cuh"
+
+namespace gkr::M31_field {
+
+    const int mod = 2147483647;
+    #define mod_reduce_int(x) (x = (((x) & mod) + ((x) >> 31)))
+
+    class M31 final : public BaseField<M31> {
+    public:
+        // internal storage of M31
+        uint32_t x;
+
+        static M31 INV_2;
+
+        __host__ __device__
+        static M31 zero() { return new_unchecked(0); }
+
+        __host__ __device__
+        static M31 one() { return new_unchecked(1); }
+
+        __host__
+        static M31 random() {
+            return M31{static_cast<uint32_t>(rand())};
+        }
+
+        __host__ __device__
+        static inline M31 new_unchecked(uint32_t x){
+            M31 f;
+            f.x = x;
+            return f;
+        }
+
+        __host__ __device__
+        M31() { this->x = 0; }
+
+        __host__ __device__
+        M31(uint32_t v){
+            mod_reduce_int(v);
+            this->x = v;
+        }
+
+        __host__ __device__
+        inline M31 operator+(const M31 &rhs) const{
+            M31 result;
+            result.x = (x + rhs.x);
+            if (result.x >= mod) result.x -= mod;
+            return result;
+        }
+
+        __host__ __device__
+        inline M31 operator*(const M31 &rhs) const{
+            int64_t xx = static_cast<int64_t>(x) * rhs.x;
+            mod_reduce_int(xx);
+            if (xx >= mod) xx -= mod;
+            return new_unchecked(xx);
+        }
+
+        __host__ __device__
+        inline M31 operator-() const{
+            uint32_t xx = (this->x == 0) ? 0 : (mod - this->x);
+            return new_unchecked(xx);
+        }
+
+        __host__ __device__
+        inline M31 operator-(const M31 &rhs) const{
+            return *this + (-rhs);
+        }
+
+        __host__ __device__
+        bool operator==(const M31 &rhs) const{
+            return this->x == rhs.x;
+        };
+
+        // From field to transcript as bytes
+        void to_bytes(uint8_t* output) const{
+            memcpy(output, this, sizeof(*this));
+        };
+
+        // Convert from transcript bytes to Field
+        void from_bytes(const uint8_t* input){
+            memcpy(this, input, 4);
+            mod_reduce_int(x);
+            if (x >= mod) x -= mod;
+        };
+    };
+
+    M31 M31::INV_2 = (1 << 30);
+}
\ No newline at end of file
diff --git a/sumcheck/cuda/include/field/M31ext3.cuh b/sumcheck/cuda/include/field/M31ext3.cuh
new file mode 100644
index 00000000..8d1bcb04
--- /dev/null
+++ b/sumcheck/cuda/include/field/M31ext3.cuh
@@ -0,0 +1,163 @@
+#pragma once
+
+#include "basefield.cuh"
+
+namespace gkr::M31ext3_field {
+    // Mod of M31
+    const int mod = 2147483647;
+    #define mod_reduce_int(x) (x = (((x) & mod) + ((x) >> 31)))
+
+    class M31ext3 final : public BaseField<M31ext3> {
+    private:
+        __host__ __device__
+        static inline uint32_t mul_m31(const uint32_t& a, const uint32_t& b){
+            uint64_t xx = static_cast<uint64_t>(a) * static_cast<uint64_t>(b);
+            mod_reduce_int(xx);
+            if (xx >= mod) xx -= mod;
+            return xx;
+        }
+
+        __host__ __device__
+        static inline uint32_t add_m31(const uint32_t& a, const uint32_t& b){
+            uint32_t res;
+            res = a + b;
+            if(res >= mod) res -= mod;
+            return res;
+        }
+    public:
+        // internal storage of M31 extension-3
+        uint32_t fs[3] = {0, 0, 0};
+
+        static M31ext3 INV_2;
+
+        __host__ __device__
+        static M31ext3 zero() {
+            M31ext3 f;
+            f.fs[0] = 0;
+            f.fs[1] = 0;
+            f.fs[2] = 0;
+            return f;
+        }
+
+        __host__ __device__
+        static M31ext3 one() {
+            M31ext3 f;
+            f.fs[0] = 1;
+            f.fs[1] = 0;
+            f.fs[2] = 0;
+            return f;
+        }
+
+        __host__
+        static M31ext3 random(){
+            M31ext3 f;
+            f.fs[0] = static_cast<uint32_t>(rand());
+            f.fs[1] = static_cast<uint32_t>(rand());
+            f.fs[2] = static_cast<uint32_t>(rand());
+            mod_reduce_int(f.fs[0]);
+            mod_reduce_int(f.fs[1]);
+            mod_reduce_int(f.fs[2]);
+            return f;
+        }
+
+        __host__ __device__
+        static inline M31ext3 new_unchecked(uint32_t x){
+            M31ext3 f;
+            f.fs[0] = x;
+            return f;
+        }
+
+        __host__ __device__
+        M31ext3() {
+            this->fs[0] = 0;
+            this->fs[1] = 0;
+            this->fs[2] = 0;
+        }
+
+        __host__ __device__
+        M31ext3(uint32_t v){
+            mod_reduce_int(v);
+            this->fs[0] = v;
+        }
+
+        __host__ __device__
+        inline M31ext3 operator+(const M31ext3 &rhs) const{
+            M31ext3 result;
+            for(int i = 0; i < 3; i++){
+                result.fs[i] = (fs[i] + rhs.fs[i]);
+                if (result.fs[i] >= mod) result.fs[i] -= mod;
+            }
+            return result;
+        }
+
+        __host__ __device__
+        inline M31ext3 operator*(const M31ext3 &rhs) const{
+            M31ext3 f;
+
+            //            let a = &a.v;
+            //            let b = &b.v;
+            //            let mut res = [M31::default(); 3];
+            //            res[0] = a[0] * b[0] + M31 { v: 5 } * (a[1] * b[2] + a[2] * b[1]);
+            //            res[1] = a[0] * b[1] + a[1] * b[0] + M31 { v: 5 } * a[2] * b[2];
+            //            res[2] = a[0] * b[2] + a[1] * b[1] + a[2] * b[0];
+
+             f.fs[0] = add_m31(
+                     mul_m31(fs[0], rhs.fs[0]),
+                     mul_m31(5, add_m31(
+                             mul_m31(fs[1], rhs.fs[2]),
+                             mul_m31(fs[2], rhs.fs[1]))));
+
+             f.fs[1] = add_m31(
+                    add_m31(
+                            mul_m31(fs[0], rhs.fs[1]),
+                            mul_m31(fs[1], rhs.fs[0])
+                    ),
+                    mul_m31(5, mul_m31(fs[2], rhs.fs[2]))
+            );
+
+            f.fs[2] = add_m31(
+                    add_m31(
+                            mul_m31(fs[0], rhs.fs[2]),
+                            mul_m31(fs[1], rhs.fs[1])
+                    ),
+                    mul_m31(fs[2], rhs.fs[0])
+            );
+
+            return f;
+        }
+
+        __host__ __device__
+        inline M31ext3 operator-() const{
+            M31ext3 f;
+            for(int i = 0; i < 3; i++) {
+                f.fs[i] = (this->fs[i] == 0) ? 0 : (mod - this->fs[i]);
+            }
+            return f;
+        }
+
+        __host__ __device__
+        inline M31ext3 operator-(const M31ext3 &rhs) const{
+            return *this + (-rhs);
+        }
+
+        __host__ __device__
+        bool operator==(const M31ext3 &rhs) const{
+            return (this->fs[0] == rhs.fs[0]) && (this->fs[1] == rhs.fs[1]) && (this->fs[2] == rhs.fs[2]);
+        };
+
+        // From field to transcript as bytes
+        void to_bytes(uint8_t* output) const{
+            memcpy(output, this, sizeof(*this));
+        };
+
+        // Convert from transcript bytes to Field
+        void from_bytes(const uint8_t* input){
+            memcpy(this, input, 12);
+            for(int i = 0; i < 3; i++) {
+                mod_reduce_int(fs[i]);
+                if (fs[i] >= mod) fs[i] -= mod;
+            }
+        };
+    };
+    M31ext3 M31ext3::INV_2 = (1 << 30);
+}
\ No newline at end of file
diff --git a/sumcheck/cuda/include/field/basefield.cuh b/sumcheck/cuda/include/field/basefield.cuh
new file mode 100644
index 00000000..646e8733
--- /dev/null
+++ b/sumcheck/cuda/include/field/basefield.cuh
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <cstdint>
+
+namespace gkr{
+
+    template <typename F>
+    class BaseField{
+    public:
+        __host__ __device__
+        static F zero();
+
+        __host__ __device__
+        static F one();
+
+        __host__ __device__
+        static F random();
+
+        __host__ __device__
+        F operator+(const F &rhs) const;
+
+        __host__ __device__
+        F operator*(const F &rhs) const;
+
+        __host__ __device__
+        F operator-() const;
+
+        __host__ __device__
+        F operator-(const F &rhs) const;
+
+        __host__ __device__
+        bool operator==(const F &rhs) const;
+
+        __host__ __device__
+        bool operator!=(const F&rhs) { return !(*this == rhs);}
+
+        __host__ __device__
+        void operator+=(const F &rhs) { *static_cast<F *>(this) = *static_cast<F *>(this) + rhs; }
+
+        __host__ __device__
+        void operator-=(const F &rhs) { *static_cast<F *>(this) = *static_cast<F *>(this) - rhs; }
+
+        __host__ __device__
+        void operator*=(const F &rhs) { *static_cast<F *>(this) = *static_cast<F *>(this) * rhs; }
+
+        // Host Function that runs on CPU
+        void to_bytes(uint8_t *output) const;
+        void from_bytes(const uint8_t *input);
+    };
+}
\ No newline at end of file
diff --git a/sumcheck/cuda/include/field/bn254.cuh b/sumcheck/cuda/include/field/bn254.cuh
new file mode 100644
index 00000000..f868e6e3
--- /dev/null
+++ b/sumcheck/cuda/include/field/bn254.cuh
@@ -0,0 +1,109 @@
+#pragma once
+
+#include "basefield.cuh"
+
+// Icicle Library
+#ifdef useBN254
+#include "fields/id.h"
+#define CURVE_ID BN254
+#define FIELD_ID BN254
+#include "fields/field_config.cuh"
+#include "curves/curve_config.cuh"
+#include "gpu-utils/modifiers.cuh"
+
+typedef curve_config::scalar_t  field_t;
+
+namespace gkr::BN254_field {
+
+    class Bn254 final : public BaseField<Bn254> {
+    public:
+
+        // internal
+        field_t fd __attribute__((packed));
+
+        static Bn254 INV_2;
+
+        __host__ __device__
+        static Bn254 zero() {
+            Bn254 f;
+            f.fd = field_t ::zero();
+            return f;
+        }
+
+        __host__ __device__
+        static Bn254 one() {
+            Bn254 f;
+            f.fd = field_t ::one();
+            return f;
+        }
+
+        __host__
+        static Bn254 random(){
+            Bn254 f;
+            f.fd = field_t ::rand_host();
+            return f;
+        }
+
+        __host__ __device__
+        Bn254() {
+            this->fd = field_t ::zero();
+        }
+
+        __host__ __device__
+        Bn254(uint32_t v){
+            this->fd = field_t ::from(v);
+        }
+
+        __host__ __device__
+        Bn254(field_t ff){
+            this->fd = ff;
+        }
+
+        __host__ __device__
+        inline Bn254 operator+(const Bn254 &rhs) const{
+            Bn254 result;
+            result.fd = fd + rhs.fd;
+            return result;
+        }
+
+        __host__ __device__
+        inline Bn254 operator*(const Bn254 &rhs) const{
+            Bn254 result;
+            result.fd = fd * rhs.fd;
+            return result;
+        }
+
+        __host__ __device__
+        inline Bn254 operator-() const{
+            Bn254 res;
+            res.fd = field_t::neg(fd);
+            return res;
+        }
+
+        __host__ __device__
+        inline Bn254 operator-(const Bn254 &rhs) const{
+            Bn254 res;
+            res.fd = fd - rhs.fd;
+            return res;
+        }
+
+        __host__ __device__
+        bool operator==(const Bn254 &rhs) const{
+            return fd == rhs.fd;
+        };
+
+        // From field to transcript as bytes
+        void to_bytes(uint8_t* output) const{
+            memcpy(output, this, sizeof(*this));
+        };
+
+        // Convert from transcript bytes to Field
+        void from_bytes(const uint8_t* input){
+            memcpy(this, input, 32);
+            while (field_t::lt( field_t{field_t::get_modulus()}, fd))
+                fd = fd - field_t{field_t::get_modulus()};
+        };
+    };
+    Bn254 Bn254::INV_2 = field_t ::inverse(field_t::from(2));
+}
+#endif
\ No newline at end of file
diff --git a/sumcheck/cuda/include/hash/hashes.cuh b/sumcheck/cuda/include/hash/hashes.cuh
new file mode 100644
index 00000000..191022f8
--- /dev/null
+++ b/sumcheck/cuda/include/hash/hashes.cuh
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "sha256.cuh"
+
+namespace gkr{
+    // Trait of Hasher
+    class Hasher{
+    public:
+        virtual void hash(uint8_t *output, const uint8_t *input, uint32_t input_len){}
+    };
+
+    // SHA2-256 Hasher for Fiat-shamir
+    class SHA256Hasher: public Hasher{
+    public:
+        CSHA256 btc_sha256_hasher;
+        void hash(uint8_t *output, const uint8_t *input, uint32_t input_len) override {
+            btc_sha256_hasher.Reset().Write(input, input_len).Finalize(output);
+        }
+    };
+}
\ No newline at end of file
diff --git a/sumcheck/cuda/include/hash/sha256.cuh b/sumcheck/cuda/include/hash/sha256.cuh
new file mode 100644
index 00000000..dcca649c
--- /dev/null
+++ b/sumcheck/cuda/include/hash/sha256.cuh
@@ -0,0 +1,225 @@
+#pragma once
+
+#include <cstdint>
+
+namespace gkr {
+
+    inline uint32_t internal_bswap_32(uint32_t x){
+        return (((x & 0xff000000U) >> 24) | ((x & 0x00ff0000U) >>  8) |
+                ((x & 0x0000ff00U) <<  8) | ((x & 0x000000ffU) << 24));
+    }
+
+    inline uint64_t internal_bswap_64(uint64_t x){
+        return (((x & 0xff00000000000000ull) >> 56)
+                | ((x & 0x00ff000000000000ull) >> 40)
+                | ((x & 0x0000ff0000000000ull) >> 24)
+                | ((x & 0x000000ff00000000ull) >> 8)
+                | ((x & 0x00000000ff000000ull) << 8)
+                | ((x & 0x0000000000ff0000ull) << 24)
+                | ((x & 0x000000000000ff00ull) << 40)
+                | ((x & 0x00000000000000ffull) << 56));
+    }
+
+    inline uint32_t be32toh_internal(uint32_t big_endian_32bits){
+        return internal_bswap_32(big_endian_32bits);
+    }
+
+    inline uint32_t htobe32_internal(uint32_t host_32bits){
+        return internal_bswap_32(host_32bits);
+    }
+
+    inline uint64_t htobe64_internal(uint64_t host_64bits){
+        return internal_bswap_64(host_64bits);
+    }
+
+    uint32_t static inline ReadBE32(const unsigned char* ptr){
+        uint32_t x;
+        memcpy(&x, ptr, 4);
+        return be32toh_internal(x);
+    }
+
+    void static inline WriteBE32(unsigned char* ptr, uint32_t x){
+        uint32_t v = htobe32_internal(x);
+        memcpy(ptr, &v, 4);
+    }
+
+    void static inline WriteBE64(unsigned char* ptr, uint64_t x){
+        uint64_t v = htobe64_internal(x);
+        memcpy(ptr, &v, 8);
+    }
+
+    uint32_t inline Ch(uint32_t x, uint32_t y, uint32_t z) { return z ^ (x & (y ^ z)); }
+    uint32_t inline Maj(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (z & (x | y)); }
+    uint32_t inline Sigma0(uint32_t x) { return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10); }
+    uint32_t inline Sigma1(uint32_t x) { return (x >> 6 | x << 26) ^ (x >> 11 | x << 21) ^ (x >> 25 | x << 7); }
+    uint32_t inline sigma0(uint32_t x) { return (x >> 7 | x << 25) ^ (x >> 18 | x << 14) ^ (x >> 3); }
+    uint32_t inline sigma1(uint32_t x) { return (x >> 17 | x << 15) ^ (x >> 19 | x << 13) ^ (x >> 10); }
+
+    /** One round of SHA-256. */
+    void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, uint32_t f, uint32_t g, uint32_t& h, uint32_t k){
+        uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k;
+        uint32_t t2 = Sigma0(a) + Maj(a, b, c);
+        d += t1;
+        h = t1 + t2;
+    }
+
+    /** Initialize SHA-256 state. */
+    void inline Initialize(uint32_t* s){
+        s[0] = 0x6a09e667ul;
+        s[1] = 0xbb67ae85ul;
+        s[2] = 0x3c6ef372ul;
+        s[3] = 0xa54ff53aul;
+        s[4] = 0x510e527ful;
+        s[5] = 0x9b05688cul;
+        s[6] = 0x1f83d9abul;
+        s[7] = 0x5be0cd19ul;
+    }
+
+    /** Perform a number of SHA-256 transformations, processing 64-byte chunks. */
+    void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks){
+        while (blocks--) {
+            uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
+            uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
+
+            Round(a, b, c, d, e, f, g, h, 0x428a2f98 + (w0 = ReadBE32(chunk + 0)));
+            Round(h, a, b, c, d, e, f, g, 0x71374491 + (w1 = ReadBE32(chunk + 4)));
+            Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf + (w2 = ReadBE32(chunk + 8)));
+            Round(f, g, h, a, b, c, d, e, 0xe9b5dba5 + (w3 = ReadBE32(chunk + 12)));
+            Round(e, f, g, h, a, b, c, d, 0x3956c25b + (w4 = ReadBE32(chunk + 16)));
+            Round(d, e, f, g, h, a, b, c, 0x59f111f1 + (w5 = ReadBE32(chunk + 20)));
+            Round(c, d, e, f, g, h, a, b, 0x923f82a4 + (w6 = ReadBE32(chunk + 24)));
+            Round(b, c, d, e, f, g, h, a, 0xab1c5ed5 + (w7 = ReadBE32(chunk + 28)));
+            Round(a, b, c, d, e, f, g, h, 0xd807aa98 + (w8 = ReadBE32(chunk + 32)));
+            Round(h, a, b, c, d, e, f, g, 0x12835b01 + (w9 = ReadBE32(chunk + 36)));
+            Round(g, h, a, b, c, d, e, f, 0x243185be + (w10 = ReadBE32(chunk + 40)));
+            Round(f, g, h, a, b, c, d, e, 0x550c7dc3 + (w11 = ReadBE32(chunk + 44)));
+            Round(e, f, g, h, a, b, c, d, 0x72be5d74 + (w12 = ReadBE32(chunk + 48)));
+            Round(d, e, f, g, h, a, b, c, 0x80deb1fe + (w13 = ReadBE32(chunk + 52)));
+            Round(c, d, e, f, g, h, a, b, 0x9bdc06a7 + (w14 = ReadBE32(chunk + 56)));
+            Round(b, c, d, e, f, g, h, a, 0xc19bf174 + (w15 = ReadBE32(chunk + 60)));
+
+            Round(a, b, c, d, e, f, g, h, 0xe49b69c1 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
+            Round(h, a, b, c, d, e, f, g, 0xefbe4786 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
+            Round(g, h, a, b, c, d, e, f, 0x0fc19dc6 + (w2 += sigma1(w0) + w11 + sigma0(w3)));
+            Round(f, g, h, a, b, c, d, e, 0x240ca1cc + (w3 += sigma1(w1) + w12 + sigma0(w4)));
+            Round(e, f, g, h, a, b, c, d, 0x2de92c6f + (w4 += sigma1(w2) + w13 + sigma0(w5)));
+            Round(d, e, f, g, h, a, b, c, 0x4a7484aa + (w5 += sigma1(w3) + w14 + sigma0(w6)));
+            Round(c, d, e, f, g, h, a, b, 0x5cb0a9dc + (w6 += sigma1(w4) + w15 + sigma0(w7)));
+            Round(b, c, d, e, f, g, h, a, 0x76f988da + (w7 += sigma1(w5) + w0 + sigma0(w8)));
+            Round(a, b, c, d, e, f, g, h, 0x983e5152 + (w8 += sigma1(w6) + w1 + sigma0(w9)));
+            Round(h, a, b, c, d, e, f, g, 0xa831c66d + (w9 += sigma1(w7) + w2 + sigma0(w10)));
+            Round(g, h, a, b, c, d, e, f, 0xb00327c8 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
+            Round(f, g, h, a, b, c, d, e, 0xbf597fc7 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
+            Round(e, f, g, h, a, b, c, d, 0xc6e00bf3 + (w12 += sigma1(w10) + w5 + sigma0(w13)));
+            Round(d, e, f, g, h, a, b, c, 0xd5a79147 + (w13 += sigma1(w11) + w6 + sigma0(w14)));
+            Round(c, d, e, f, g, h, a, b, 0x06ca6351 + (w14 += sigma1(w12) + w7 + sigma0(w15)));
+            Round(b, c, d, e, f, g, h, a, 0x14292967 + (w15 += sigma1(w13) + w8 + sigma0(w0)));
+
+            Round(a, b, c, d, e, f, g, h, 0x27b70a85 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
+            Round(h, a, b, c, d, e, f, g, 0x2e1b2138 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
+            Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc + (w2 += sigma1(w0) + w11 + sigma0(w3)));
+            Round(f, g, h, a, b, c, d, e, 0x53380d13 + (w3 += sigma1(w1) + w12 + sigma0(w4)));
+            Round(e, f, g, h, a, b, c, d, 0x650a7354 + (w4 += sigma1(w2) + w13 + sigma0(w5)));
+            Round(d, e, f, g, h, a, b, c, 0x766a0abb + (w5 += sigma1(w3) + w14 + sigma0(w6)));
+            Round(c, d, e, f, g, h, a, b, 0x81c2c92e + (w6 += sigma1(w4) + w15 + sigma0(w7)));
+            Round(b, c, d, e, f, g, h, a, 0x92722c85 + (w7 += sigma1(w5) + w0 + sigma0(w8)));
+            Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1 + (w8 += sigma1(w6) + w1 + sigma0(w9)));
+            Round(h, a, b, c, d, e, f, g, 0xa81a664b + (w9 += sigma1(w7) + w2 + sigma0(w10)));
+            Round(g, h, a, b, c, d, e, f, 0xc24b8b70 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
+            Round(f, g, h, a, b, c, d, e, 0xc76c51a3 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
+            Round(e, f, g, h, a, b, c, d, 0xd192e819 + (w12 += sigma1(w10) + w5 + sigma0(w13)));
+            Round(d, e, f, g, h, a, b, c, 0xd6990624 + (w13 += sigma1(w11) + w6 + sigma0(w14)));
+            Round(c, d, e, f, g, h, a, b, 0xf40e3585 + (w14 += sigma1(w12) + w7 + sigma0(w15)));
+            Round(b, c, d, e, f, g, h, a, 0x106aa070 + (w15 += sigma1(w13) + w8 + sigma0(w0)));
+
+            Round(a, b, c, d, e, f, g, h, 0x19a4c116 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
+            Round(h, a, b, c, d, e, f, g, 0x1e376c08 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
+            Round(g, h, a, b, c, d, e, f, 0x2748774c + (w2 += sigma1(w0) + w11 + sigma0(w3)));
+            Round(f, g, h, a, b, c, d, e, 0x34b0bcb5 + (w3 += sigma1(w1) + w12 + sigma0(w4)));
+            Round(e, f, g, h, a, b, c, d, 0x391c0cb3 + (w4 += sigma1(w2) + w13 + sigma0(w5)));
+            Round(d, e, f, g, h, a, b, c, 0x4ed8aa4a + (w5 += sigma1(w3) + w14 + sigma0(w6)));
+            Round(c, d, e, f, g, h, a, b, 0x5b9cca4f + (w6 += sigma1(w4) + w15 + sigma0(w7)));
+            Round(b, c, d, e, f, g, h, a, 0x682e6ff3 + (w7 += sigma1(w5) + w0 + sigma0(w8)));
+            Round(a, b, c, d, e, f, g, h, 0x748f82ee + (w8 += sigma1(w6) + w1 + sigma0(w9)));
+            Round(h, a, b, c, d, e, f, g, 0x78a5636f + (w9 += sigma1(w7) + w2 + sigma0(w10)));
+            Round(g, h, a, b, c, d, e, f, 0x84c87814 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
+            Round(f, g, h, a, b, c, d, e, 0x8cc70208 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
+            Round(e, f, g, h, a, b, c, d, 0x90befffa + (w12 += sigma1(w10) + w5 + sigma0(w13)));
+            Round(d, e, f, g, h, a, b, c, 0xa4506ceb + (w13 += sigma1(w11) + w6 + sigma0(w14)));
+            Round(c, d, e, f, g, h, a, b, 0xbef9a3f7 + (w14 + sigma1(w12) + w7 + sigma0(w15)));
+            Round(b, c, d, e, f, g, h, a, 0xc67178f2 + (w15 + sigma1(w13) + w8 + sigma0(w0)));
+
+            s[0] += a;
+            s[1] += b;
+            s[2] += c;
+            s[3] += d;
+            s[4] += e;
+            s[5] += f;
+            s[6] += g;
+            s[7] += h;
+            chunk += 64;
+        }
+    }
+
+    /** A hasher class for SHA-256. */
+    class CSHA256{
+    private:
+        uint32_t s[8];
+        unsigned char buf[64];
+        uint64_t bytes{0};
+
+    public:
+        static const size_t OUTPUT_SIZE = 32;
+
+        CSHA256(){
+            gkr::Initialize(s);
+        }
+
+        CSHA256& Write(const unsigned char* data, size_t len){
+            const unsigned char* end = data + len;
+            size_t bufsize = bytes % 64;
+            if (bufsize && bufsize + len >= 64) {
+                // Fill the buffer, and process it.
+                memcpy(buf + bufsize, data, 64 - bufsize);
+                bytes += 64 - bufsize;
+                data += 64 - bufsize;
+                gkr::Transform(s, buf, 1);
+                bufsize = 0;
+            }
+            if (end - data >= 64) {
+                size_t blocks = (end - data) / 64;
+                Transform(s, data, blocks);
+                data += 64 * blocks;
+                bytes += 64 * blocks;
+            }
+            if (end > data) {
+                // Fill the buffer with what remains.
+                memcpy(buf + bufsize, data, end - data);
+                bytes += end - data;
+            }
+            return *this;
+        }
+
+        void Finalize(unsigned char hash[OUTPUT_SIZE]){
+            static const unsigned char pad[64] = {0x80};
+            unsigned char sizedesc[8];
+            WriteBE64(sizedesc, bytes << 3);
+            Write(pad, 1 + ((119 - (bytes % 64)) % 64));
+            Write(sizedesc, 8);
+            WriteBE32(hash, s[0]);
+            WriteBE32(hash + 4, s[1]);
+            WriteBE32(hash + 8, s[2]);
+            WriteBE32(hash + 12, s[3]);
+            WriteBE32(hash + 16, s[4]);
+            WriteBE32(hash + 20, s[5]);
+            WriteBE32(hash + 24, s[6]);
+            WriteBE32(hash + 28, s[7]);
+        }
+
+        CSHA256& Reset(){
+            bytes = 0;
+            gkr::Initialize(s);
+            return *this;
+        }
+    };
+}
\ No newline at end of file
diff --git a/sumcheck/cuda/src/sumcheck_cuda.cu b/sumcheck/cuda/src/sumcheck_cuda.cu
new file mode 100644
index 00000000..b601d857
--- /dev/null
+++ b/sumcheck/cuda/src/sumcheck_cuda.cu
@@ -0,0 +1,169 @@
+#include <cstdio>
+#include <tuple>
+#include <chrono>
+#include <iostream>
+#include <ctime>
+#include <getopt.h>
+#include <iomanip>
+
+#include "LinearGKR/sumcheck.cuh"
+#include "circuit/circuit.cuh"
+
+// Function to display usage/help information
+void print_usage() {
+    std::cout << "Usage: ./sumcheck.bin -m [cpu|gpu] -p [2^(size) of circuit] [-v]" << std::endl;
+}
+
+int main(int argc, char* argv[]){
+    // Seed the random number generator with the current time
+    srand(time(nullptr));
+
+    using namespace gkr;
+
+    // Define optional parameters
+    const char* mode = nullptr;
+    uint32_t circuit_size = 20;
+
+    // Parse command line options
+    int opt;
+    while ((opt = getopt(argc, argv, "m:p:v")) != -1) {
+        switch (opt) {
+            case 'm':
+                mode = optarg;  // Get the argument for -m (mode)
+                break;
+            case 'p':
+                circuit_size = std::atoi(optarg);
+                break;
+            case 'v':
+                verbose = true;  // Set verbose to true if -v is passed
+                break;
+            default:
+                print_usage();  // Print help if invalid options are passed
+                return 1;
+        }
+    }
+
+    // Set useGPU variable based on the mode argument
+    if (mode != nullptr) {
+        if (strcmp(mode, "gpu") == 0) {
+            useGPU = true;  // Use GPU if "gpu" is specified
+        } else if (strcmp(mode, "cpu") == 0) {
+            useGPU = false;  // Use CPU if "cpu" is specified
+        } else {
+            std::cerr << "Invalid mode. Use 'cpu' or 'gpu'." << std::endl;
+            print_usage();
+            return 1;  // Exit if an invalid mode is passed
+        }
+    }
+
+    // Output the current settings if verbose is enabled
+    if (verbose) {
+        std::cout << "Verbose mode enabled." << std::endl;
+        std::cout << "Using " << (useGPU ? "GPU" : "CPU") << " for computation." << std::endl;
+    }
+
+    // Choose the Field
+#ifdef useBN254
+    // use BN254
+    using F           = BN254_field::Bn254;
+    using F_primitive = BN254_field::Bn254;
+    auto field_type   = "BN254";
+#else
+#ifdef useM31ext3
+    // use M31ext3
+    using F           = M31ext3_field::M31ext3;
+    using F_primitive = M31ext3_field::M31ext3;
+    auto field_type   = "M31ext3";
+#else
+    using F           = M31_field::M31;
+    using F_primitive = M31_field::M31;
+    auto field_type   = "M31";
+#endif
+#endif
+    std::cout   << "\n-------------------------------------------\n\t\t* Random *" << std::endl
+                << "-------------------------------------------" << std::endl ;
+
+    // Determine the size of circuit
+    uint32_t nb_output_vars = circuit_size, nb_input_vars = circuit_size;
+
+    // Create timer
+    struct TimingBreakdown timer;
+
+    // Create Random Circuit
+    std::cout << "Randomizing Input ..." << std::endl;
+    CircuitLayer<F, F_primitive> layer = CircuitLayer<F, F_primitive>::random(nb_output_vars, nb_input_vars);
+    Circuit<F, F_primitive> circuit;
+    circuit.add_layer(layer);
+    std::cout << "Randomization Done!" << std::endl;
+
+    // Evaluate the output
+    std::cout << "Evaluating Output ..." << std::endl;
+    uint32_t output_len = 1 << nb_output_vars;
+    F* output = (F*) malloc(output_len * sizeof(F));
+    layer.evaluate(output, output_len);
+    std::cout << "Evaluation Done!" << std::endl;
+
+    uint32_t rz1_len = nb_output_vars;
+    uint32_t rz2_len = nb_output_vars;
+    auto* rz1 = (F_primitive*) malloc (rz1_len * sizeof(F_primitive));
+    auto* rz2 = (F_primitive*) malloc (rz2_len * sizeof(F_primitive));
+
+    // Generate random numbers to commit
+    std::cout << "Commit Output ..." << std::endl;
+    for (uint32_t i = 0; i < nb_output_vars; i++){
+        rz1[i] = F_primitive::random();
+        rz2[i] = F_primitive::random();
+    }
+    F claim_v1 = eval_multilinear(output, output_len, rz1, nb_output_vars);
+    F claim_v2 = eval_multilinear(output, output_len, rz2, nb_output_vars);
+    std::cout << "Commit Done!" << std::endl;
+
+    F_primitive alpha = F_primitive::random();
+    F_primitive beta = F_primitive::random();
+
+    GKRScratchPad<F, F_primitive> spad{};
+    spad.prepare(circuit);
+
+    // Define the result of sumcheck prove
+    F_primitive* rz1s = nullptr;
+    F_primitive* rz2s = nullptr;
+
+    // Entering GPU proof generation
+    auto start = std::chrono::high_resolution_clock::now();
+    Transcript<F, F_primitive> prover_transcript;
+    std::cout << std::fixed << std::setprecision(4); // Set precision
+    std::cout   << "\n-------------------------------------------"  << std::endl;
+    if(useGPU){
+        std::cout   << "\t\t* GPU Prover *"                                 << std::endl;
+    }else{
+        std::cout   << "\t\t* CPU Prover *"                                 << std::endl;
+    }
+    std::cout   << "-------------------------------------------"    << std::endl ;
+    sumcheck_prove_gkr_layer(layer,
+                             rz1, nb_output_vars,
+                             rz2, nb_output_vars,
+                             alpha, beta,
+                             prover_transcript,
+                             spad,
+                             rz1s, rz2s,
+                             timer);
+    Proof<F> &proof = prover_transcript.proof;
+    free(rz1s); free(rz2s);
+    auto end = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+    std::cout << "Input size: 2 ^ " << nb_input_vars << ", Output size: 2 ^ " << nb_output_vars << std::endl;
+    std::cout << "Field type: " << field_type << std::endl;
+    std::cout << "Prove time: " << (float)duration.count() / 1000 << " seconds" << std::endl;
+    printf("Proof size: %u bytes\n", proof.bytes_write_ptr);
+
+    // Doing Verification on CPU
+    std::cout   << "\n-------------------------------------------\n\t\t* Verifier *" << std::endl
+                << "-------------------------------------------" << std::endl ;
+    Transcript<F, F_primitive> verifier_transcript;
+    bool verified = std::get<0>(sumcheck_verify_gkr_layer(layer,
+                                                          rz1, rz2,
+                                                          claim_v1, claim_v2,
+                                                          alpha, beta,
+                                                          proof, verifier_transcript));
+    printf("Verify pass = %d\n", verified);
+}
diff --git a/sumcheck/src/prover_helper/power_gate.rs b/sumcheck/src/prover_helper/power_gate.rs
index 056a8118..e8a935a7 100644
--- a/sumcheck/src/prover_helper/power_gate.rs
+++ b/sumcheck/src/prover_helper/power_gate.rs
@@ -151,8 +151,13 @@ impl<const D: usize> SumcheckPowerGateHelper<D> {
                 hg_v[1] = bk_hg_1[i * 2 + 1];
                 p_add[0] += C::challenge_mul_field(&hg_v[0], &f_v[0]);
                 p_add[1] += C::challenge_mul_field(&hg_v[1], &f_v[1]);
+
+                let s_f_v = f_v[0] + f_v[1];
+                let s_hg_v = hg_v[0] + hg_v[1];
+                p_add[2] += C::challenge_mul_field(&s_hg_v, &s_f_v);
             }
-            p_add[2] = p_add[1] + p_add[1] - p_add[0] + C::Field::from(2);
+            p_add[2] = p_add[1].mul_by_6() + p_add[0].mul_by_3() - p_add[2].double();
+
             // interpolate p_add into 7 points
             Self::interpolate_3::<C>(&p_add, &mut p);
             p
diff --git a/sumcheck/src/prover_helper/sumcheck_gkr_vanilla.rs b/sumcheck/src/prover_helper/sumcheck_gkr_vanilla.rs
index 58124b56..24239a15 100644
--- a/sumcheck/src/prover_helper/sumcheck_gkr_vanilla.rs
+++ b/sumcheck/src/prover_helper/sumcheck_gkr_vanilla.rs
@@ -19,8 +19,7 @@ pub(crate) struct SumcheckGkrVanillaHelper<'a, C: GKRConfig> {
     rz1: &'a Option<Vec<C::ChallengeField>>,
     r_simd: &'a [C::ChallengeField],
     r_mpi: &'a [C::ChallengeField],
-    alpha: C::ChallengeField,
-    beta: Option<C::ChallengeField>,
+    alpha: Option<C::ChallengeField>,
 
     pub(crate) input_var_num: usize,
     pub(crate) simd_var_num: usize,
@@ -30,6 +29,7 @@ pub(crate) struct SumcheckGkrVanillaHelper<'a, C: GKRConfig> {
     mpi_var_helper: SumcheckSimdProdGateHelper,
 
     mpi_config: &'a MPIConfig,
+    is_output_layer: bool,
 }
 
 /// internal helper functions
@@ -57,10 +57,10 @@ impl<'a, C: GKRConfig> SumcheckGkrVanillaHelper<'a, C> {
         rz1: &'a Option<Vec<C::ChallengeField>>,
         r_simd: &'a [C::ChallengeField],
         r_mpi: &'a [C::ChallengeField],
-        alpha: C::ChallengeField,
-        beta: Option<C::ChallengeField>,
+        alpha: Option<C::ChallengeField>,
         sp: &'a mut ProverScratchPad<C>,
         mpi_config: &'a MPIConfig,
+        is_output_layer: bool,
     ) -> Self {
         let simd_var_num = C::get_field_pack_size().trailing_zeros() as usize;
         SumcheckGkrVanillaHelper {
@@ -76,7 +76,6 @@ impl<'a, C: GKRConfig> SumcheckGkrVanillaHelper<'a, C> {
             r_simd,
             r_mpi,
             alpha,
-            beta,
 
             input_var_num: layer.input_var_num,
             simd_var_num,
@@ -87,6 +86,7 @@ impl<'a, C: GKRConfig> SumcheckGkrVanillaHelper<'a, C> {
                 mpi_config.world_size().trailing_zeros() as usize,
             ),
             mpi_config,
+            is_output_layer,
         }
     }
 
@@ -166,7 +166,12 @@ impl<'a, C: GKRConfig> SumcheckGkrVanillaHelper<'a, C> {
         var_idx: usize,
         degree: usize,
     ) -> [C::ChallengeField; 3] {
-        self.poly_evals_at_rx(var_idx, degree)
+        let [p0, p1, p2] = self.poly_evals_at_rx(var_idx, degree);
+        [
+            p0 * self.sp.phase2_coef,
+            p1 * self.sp.phase2_coef,
+            p2 * self.sp.phase2_coef,
+        ]
     }
 
     #[inline]
@@ -227,25 +232,29 @@ impl<'a, C: GKRConfig> SumcheckGkrVanillaHelper<'a, C> {
 
     #[inline]
     pub(crate) fn prepare_simd(&mut self) {
-        EqPolynomial::<C::ChallengeField>::eq_eval_at(
-            self.r_simd,
-            &C::ChallengeField::one(),
-            &mut self.sp.eq_evals_at_r_simd0,
-            &mut self.sp.eq_evals_first_half,
-            &mut self.sp.eq_evals_second_half,
-        );
+        if self.is_output_layer || self.alpha.is_none() {
+            EqPolynomial::<C::ChallengeField>::eq_eval_at(
+                self.r_simd,
+                &C::ChallengeField::one(),
+                &mut self.sp.eq_evals_at_r_simd0,
+                &mut self.sp.eq_evals_first_half,
+                &mut self.sp.eq_evals_second_half,
+            );
+        }
     }
 
     #[inline]
     pub(crate) fn prepare_mpi(&mut self) {
-        // TODO: No need to evaluate it at all world ranks, remove redundancy later.
-        EqPolynomial::<C::ChallengeField>::eq_eval_at(
-            self.r_mpi,
-            &C::ChallengeField::one(),
-            &mut self.sp.eq_evals_at_r_mpi0,
-            &mut self.sp.eq_evals_first_half,
-            &mut self.sp.eq_evals_second_half,
-        );
+        if self.is_output_layer || self.alpha.is_none() {
+            // TODO: No need to evaluate it at all world ranks, remove redundancy later.
+            EqPolynomial::<C::ChallengeField>::eq_eval_at(
+                self.r_mpi,
+                &C::ChallengeField::one(),
+                &mut self.sp.eq_evals_at_r_mpi0,
+                &mut self.sp.eq_evals_first_half,
+                &mut self.sp.eq_evals_second_half,
+            );
+        }
     }
 
     #[inline]
@@ -254,7 +263,6 @@ impl<'a, C: GKRConfig> SumcheckGkrVanillaHelper<'a, C> {
         let add = &self.layer.add;
         let vals = &self.layer.input_vals;
         let eq_evals_at_rz0 = &mut self.sp.eq_evals_at_rz0;
-        let eq_evals_at_rz1 = &mut self.sp.eq_evals_at_rz1;
         let gate_exists = &mut self.sp.gate_exists_5;
         let hg_vals = &mut self.sp.hg_evals;
         // hg_vals[0..vals.len()].fill(F::zero()); // FIXED: consider memset unsafe?
@@ -266,26 +274,32 @@ impl<'a, C: GKRConfig> SumcheckGkrVanillaHelper<'a, C> {
             std::ptr::write_bytes(gate_exists.as_mut_ptr(), 0, vals.len());
         }
 
-        EqPolynomial::<C::ChallengeField>::eq_eval_at(
-            self.rz0,
-            &self.alpha,
-            eq_evals_at_rz0,
-            &mut self.sp.eq_evals_first_half,
-            &mut self.sp.eq_evals_second_half,
-        );
+        assert_eq!(self.rz1.is_none(), self.alpha.is_none());
 
-        // they should both be some or both be none though
-        if self.rz1.is_some() && self.beta.is_some() {
+        if self.is_output_layer || self.rz1.is_none() {
+            // Case 1: Output layer. There is only 1 claim
+            // Case 2: Internal layer, but there is only 1 claim to prove,
+            //  eq_evals_at_rx was thus skipped in the previous round
+            EqPolynomial::<C::ChallengeField>::eq_eval_at(
+                self.rz0,
+                &C::ChallengeField::ONE,
+                eq_evals_at_rz0,
+                &mut self.sp.eq_evals_first_half,
+                &mut self.sp.eq_evals_second_half,
+            );
+        } else {
+            let alpha = self.alpha.unwrap();
+            let eq_evals_at_rx_previous = &self.sp.eq_evals_at_rx;
             EqPolynomial::<C::ChallengeField>::eq_eval_at(
                 self.rz1.as_ref().unwrap(),
-                &self.beta.unwrap(),
-                eq_evals_at_rz1,
+                &alpha,
+                eq_evals_at_rz0,
                 &mut self.sp.eq_evals_first_half,
                 &mut self.sp.eq_evals_second_half,
             );
 
-            for i in 0..1 << self.rz0.len() {
-                eq_evals_at_rz0[i] += eq_evals_at_rz1[i];
+            for i in 0..(1 << self.rz0.len()) {
+                eq_evals_at_rz0[i] += eq_evals_at_rx_previous[i];
             }
         }
 
@@ -343,22 +357,24 @@ impl<'a, C: GKRConfig> SumcheckGkrVanillaHelper<'a, C> {
             std::ptr::write_bytes(gate_exists.as_mut_ptr(), 0, fill_len);
         }
 
+        // TODO-Optimization: For root process, _eq_vec does not have to be recomputed
+        self.sp.phase2_coef =
+            EqPolynomial::<C::ChallengeField>::eq_vec(self.r_mpi, &self.r_mpi_var)
+                * self.sp.eq_evals_at_r_simd0[0]
+                * v_rx_rsimd_rw;
+
+        // EQ Polys for next round
         EqPolynomial::<C::ChallengeField>::eq_eval_at(
             &self.r_mpi_var,
-            &C::ChallengeField::one(),
+            &C::ChallengeField::ONE,
             &mut self.sp.eq_evals_at_r_mpi0,
             &mut self.sp.eq_evals_first_half,
             &mut self.sp.eq_evals_second_half,
         );
 
-        // TODO: For root process, _eq_vec does not have to be recomputed
-        let coef = EqPolynomial::<C::ChallengeField>::eq_vec(self.r_mpi, &self.r_mpi_var)
-            * self.sp.eq_evals_at_r_simd0[0]
-            * v_rx_rsimd_rw;
-
         EqPolynomial::<C::ChallengeField>::eq_eval_at(
             &self.rx,
-            &coef,
+            &C::ChallengeField::ONE,
             eq_evals_at_rx,
             &mut self.sp.eq_evals_first_half,
             &mut self.sp.eq_evals_second_half,
@@ -366,7 +382,7 @@ impl<'a, C: GKRConfig> SumcheckGkrVanillaHelper<'a, C> {
 
         EqPolynomial::<C::ChallengeField>::eq_eval_at(
             &self.r_simd_var,
-            &C::ChallengeField::one(),
+            &C::ChallengeField::ONE,
             &mut self.sp.eq_evals_at_r_simd0,
             &mut self.sp.eq_evals_first_half,
             &mut self.sp.eq_evals_second_half,
diff --git a/sumcheck/src/scratch_pad.rs b/sumcheck/src/scratch_pad.rs
index fbe4df00..71c8f6e3 100644
--- a/sumcheck/src/scratch_pad.rs
+++ b/sumcheck/src/scratch_pad.rs
@@ -19,7 +19,6 @@ pub struct ProverScratchPad<C: GKRConfig> {
 
     pub eq_evals_at_rx: Vec<C::ChallengeField>,
     pub eq_evals_at_rz0: Vec<C::ChallengeField>,
-    pub eq_evals_at_rz1: Vec<C::ChallengeField>,
     pub eq_evals_at_r_simd0: Vec<C::ChallengeField>,
     pub eq_evals_at_r_mpi0: Vec<C::ChallengeField>,
     pub eq_evals_first_half: Vec<C::ChallengeField>,
@@ -27,6 +26,8 @@ pub struct ProverScratchPad<C: GKRConfig> {
 
     pub gate_exists_5: Vec<bool>,
     pub gate_exists_1: Vec<bool>,
+
+    pub phase2_coef: C::ChallengeField,
 }
 
 impl<C: GKRConfig> ProverScratchPad<C> {
@@ -45,7 +46,6 @@ impl<C: GKRConfig> ProverScratchPad<C> {
 
             eq_evals_at_rx: vec![C::ChallengeField::default(); max_input_num],
             eq_evals_at_rz0: vec![C::ChallengeField::default(); max_output_num],
-            eq_evals_at_rz1: vec![C::ChallengeField::default(); max_output_num],
             eq_evals_at_r_simd0: vec![C::ChallengeField::default(); C::get_field_pack_size()],
             eq_evals_at_r_mpi0: vec![C::ChallengeField::default(); mpi_world_size],
             eq_evals_first_half: vec![
@@ -65,6 +65,7 @@ impl<C: GKRConfig> ProverScratchPad<C> {
 
             gate_exists_5: vec![false; max_input_num],
             gate_exists_1: vec![false; max_input_num],
+            phase2_coef: C::ChallengeField::ZERO,
         }
     }
 }
@@ -72,7 +73,6 @@ impl<C: GKRConfig> ProverScratchPad<C> {
 pub struct VerifierScratchPad<C: GKRConfig> {
     // ====== for evaluating cst, add and mul ======
     pub eq_evals_at_rz0: Vec<C::ChallengeField>,
-    pub eq_evals_at_rz1: Vec<C::ChallengeField>,
     pub eq_evals_at_r_simd: Vec<C::ChallengeField>,
     pub eq_evals_at_r_mpi: Vec<C::ChallengeField>,
 
@@ -145,7 +145,6 @@ impl<C: GKRConfig> VerifierScratchPad<C> {
 
         Self {
             eq_evals_at_rz0: vec![C::ChallengeField::zero(); max_io_size],
-            eq_evals_at_rz1: vec![C::ChallengeField::zero(); max_io_size],
             eq_evals_at_r_simd: vec![C::ChallengeField::zero(); simd_size],
             eq_evals_at_r_mpi: vec![C::ChallengeField::zero(); config.mpi_config.world_size()],
 
diff --git a/sumcheck/src/sumcheck.rs b/sumcheck/src/sumcheck.rs
index 569cd699..f7ad0e54 100644
--- a/sumcheck/src/sumcheck.rs
+++ b/sumcheck/src/sumcheck.rs
@@ -1,6 +1,7 @@
+use arith::FieldSerde;
 use circuit::CircuitLayer;
 use config::{GKRConfig, MPIConfig};
-use transcript::{Transcript, TranscriptInstance};
+use transcript::Transcript;
 
 use crate::{
     prover_helper::{SumcheckGkrSquareHelper, SumcheckGkrVanillaHelper},
@@ -10,25 +11,34 @@ use crate::{
 // FIXME
 #[allow(clippy::too_many_arguments)]
 #[allow(clippy::type_complexity)]
-pub fn sumcheck_prove_gkr_layer<C: GKRConfig>(
+pub fn sumcheck_prove_gkr_layer<C: GKRConfig, T: Transcript<C::ChallengeField>>(
     layer: &CircuitLayer<C>,
     rz0: &[C::ChallengeField],
     rz1: &Option<Vec<C::ChallengeField>>,
     r_simd: &[C::ChallengeField],
     r_mpi: &[C::ChallengeField],
-    alpha: C::ChallengeField,
-    beta: Option<C::ChallengeField>,
-    transcript: &mut TranscriptInstance<C::FiatShamirHashType>,
+    alpha: Option<C::ChallengeField>,
+    transcript: &mut T,
     sp: &mut ProverScratchPad<C>,
     mpi_config: &MPIConfig,
+    is_output_layer: bool,
 ) -> (
     Vec<C::ChallengeField>,
     Option<Vec<C::ChallengeField>>,
     Vec<C::ChallengeField>,
     Vec<C::ChallengeField>,
 ) {
-    let mut helper =
-        SumcheckGkrVanillaHelper::new(layer, rz0, rz1, r_simd, r_mpi, alpha, beta, sp, mpi_config);
+    let mut helper = SumcheckGkrVanillaHelper::new(
+        layer,
+        rz0,
+        rz1,
+        r_simd,
+        r_mpi,
+        alpha,
+        sp,
+        mpi_config,
+        is_output_layer,
+    );
 
     helper.prepare_simd();
     helper.prepare_mpi();
@@ -37,41 +47,37 @@ pub fn sumcheck_prove_gkr_layer<C: GKRConfig>(
     helper.prepare_x_vals();
     for i_var in 0..helper.input_var_num {
         let evals = helper.poly_evals_at_rx(i_var, 2);
-        let r = mpi_config
-            .transcript_io::<C::ChallengeField, C::FiatShamirHashType>(&evals, transcript);
+        let r = mpi_config.transcript_io::<C::ChallengeField, T>(&evals, transcript);
         helper.receive_rx(i_var, r);
     }
 
     helper.prepare_simd_var_vals();
     for i_var in 0..helper.simd_var_num {
         let evals = helper.poly_evals_at_r_simd_var(i_var, 3);
-        let r = mpi_config
-            .transcript_io::<C::ChallengeField, C::FiatShamirHashType>(&evals, transcript);
+        let r = mpi_config.transcript_io::<C::ChallengeField, T>(&evals, transcript);
         helper.receive_r_simd_var(i_var, r);
     }
 
     helper.prepare_mpi_var_vals();
     for i_var in 0..mpi_config.world_size().trailing_zeros() as usize {
         let evals = helper.poly_evals_at_r_mpi_var(i_var, 3);
-        let r = mpi_config
-            .transcript_io::<C::ChallengeField, C::FiatShamirHashType>(&evals, transcript);
+        let r = mpi_config.transcript_io::<C::ChallengeField, T>(&evals, transcript);
         helper.receive_r_mpi_var(i_var, r);
     }
 
     let vx_claim = helper.vx_claim();
-    transcript.append_field_element::<C::ChallengeField>(&vx_claim);
+    transcript.append_field_element(&vx_claim);
 
     // gkr phase 2 over variable y
     if !layer.structure_info.max_degree_one {
         helper.prepare_y_vals();
         for i_var in 0..helper.input_var_num {
             let evals = helper.poly_evals_at_ry(i_var, 2);
-            let r = mpi_config
-                .transcript_io::<C::ChallengeField, C::FiatShamirHashType>(&evals, transcript);
+            let r = mpi_config.transcript_io::<C::ChallengeField, T>(&evals, transcript);
             helper.receive_ry(i_var, r);
         }
         let vy_claim = helper.vy_claim();
-        transcript.append_field_element::<C::ChallengeField>(&vy_claim);
+        transcript.append_field_element(&vy_claim);
     }
 
     let rx = helper.rx;
@@ -88,10 +94,10 @@ pub fn sumcheck_prove_gkr_layer<C: GKRConfig>(
 
 // FIXME
 #[allow(clippy::needless_range_loop)] // todo: remove
-pub fn sumcheck_prove_gkr_square_layer<C: GKRConfig>(
+pub fn sumcheck_prove_gkr_square_layer<C: GKRConfig, T: Transcript<C::ChallengeField>>(
     layer: &CircuitLayer<C>,
     rz0: &[C::ChallengeField],
-    transcript: &mut TranscriptInstance<C::FiatShamirHashType>,
+    transcript: &mut T,
     sp: &mut ProverScratchPad<C>,
 ) -> Vec<C::ChallengeField> {
     const D: usize = 7;
@@ -104,22 +110,28 @@ pub fn sumcheck_prove_gkr_square_layer<C: GKRConfig>(
         let evals: [C::Field; D] = helper.poly_evals_at(i_var);
 
         for deg in 0..D {
-            transcript.append_field_element::<C::Field>(&evals[deg]);
+            let mut buf = vec![];
+            evals[deg].serialize_into(&mut buf).unwrap();
+            transcript.append_u8_slice(&buf);
         }
 
-        let r = transcript.generate_challenge::<C::ChallengeField>();
+        let r = transcript.generate_challenge_field_element();
 
         log::trace!("i_var={} evals: {:?} r: {:?}", i_var, evals, r);
 
         helper.receive_challenge(i_var, r);
         if i_var == layer.input_var_num - 1 {
             log::trace!("vx claim: {:?}", helper.vx_claim());
-            transcript.append_field_element::<C::Field>(&helper.vx_claim());
+            let mut buf = vec![];
+            helper.vx_claim().serialize_into(&mut buf).unwrap();
+            transcript.append_u8_slice(&buf);
         }
     }
 
     log::trace!("claimed vx = {:?}", helper.vx_claim());
-    transcript.append_field_element::<C::Field>(&helper.vx_claim());
+    let mut buf = vec![];
+    helper.vx_claim().serialize_into(&mut buf).unwrap();
+    transcript.append_u8_slice(&buf);
 
     helper.rx
 }
diff --git a/sumcheck/src/verifier_helper.rs b/sumcheck/src/verifier_helper.rs
index b5fd845f..7837ec89 100644
--- a/sumcheck/src/verifier_helper.rs
+++ b/sumcheck/src/verifier_helper.rs
@@ -13,33 +13,33 @@ impl GKRVerifierHelper {
     #[inline(always)]
     pub fn prepare_layer<C: GKRConfig>(
         layer: &CircuitLayer<C>,
-        alpha: &C::ChallengeField,
-        beta: &Option<C::ChallengeField>,
+        alpha: &Option<C::ChallengeField>,
         rz0: &[C::ChallengeField],
         rz1: &Option<Vec<C::ChallengeField>>,
         r_simd: &Vec<C::ChallengeField>,
         r_mpi: &Vec<C::ChallengeField>,
         sp: &mut VerifierScratchPad<C>,
+        is_output_layer: bool,
     ) {
-        EqPolynomial::<C::ChallengeField>::eq_eval_at(
-            rz0,
-            alpha,
-            &mut sp.eq_evals_at_rz0,
-            &mut sp.eq_evals_first_part,
-            &mut sp.eq_evals_second_part,
-        );
+        assert_eq!(alpha.is_none(), rz1.is_none());
 
-        if beta.is_some() && rz1.is_some() {
+        if is_output_layer {
             EqPolynomial::<C::ChallengeField>::eq_eval_at(
-                rz1.as_ref().unwrap(),
-                beta.as_ref().unwrap(),
-                &mut sp.eq_evals_at_rz1,
+                rz0,
+                &C::ChallengeField::ONE,
+                &mut sp.eq_evals_at_rz0,
                 &mut sp.eq_evals_first_part,
                 &mut sp.eq_evals_second_part,
             );
-
-            for i in 0..(1usize << layer.output_var_num) {
-                sp.eq_evals_at_rz0[i] += sp.eq_evals_at_rz1[i];
+        } else {
+            // use results from previous layer
+            let output_len = 1 << rz0.len();
+            sp.eq_evals_at_rz0[..output_len].copy_from_slice(&sp.eq_evals_at_rx[..output_len]);
+            if alpha.is_some() && rz1.is_some() {
+                let alpha = alpha.unwrap();
+                for i in 0..(1usize << layer.output_var_num) {
+                    sp.eq_evals_at_rz0[i] += alpha * sp.eq_evals_at_ry[i];
+                }
             }
         }
 
@@ -74,10 +74,6 @@ impl GKRVerifierHelper {
         let mpi_world_size = sp.eq_evals_at_r_mpi.len();
         let local_input_size = public_input.len() / mpi_world_size;
 
-        let simd_sum: C::ChallengeField = sp.eq_evals_at_r_simd.iter().sum();
-        let mpi_sum: C::ChallengeField = sp.eq_evals_at_r_mpi.iter().sum();
-        let simd_mpi_sum = simd_sum * mpi_sum;
-
         for cst_gate in cst_gates {
             let tmp = match cst_gate.coef_type {
                 CoefType::PublicInput(input_idx) => {
@@ -100,17 +96,15 @@ impl GKRVerifierHelper {
                             &sp.eq_evals_at_r_simd,
                         )
                 }
-                _ => {
-                    C::challenge_mul_circuit_field(
-                        &sp.eq_evals_at_rz0[cst_gate.o_id],
-                        &cst_gate.coef,
-                    ) * simd_mpi_sum
-                }
+                _ => C::challenge_mul_circuit_field(
+                    &sp.eq_evals_at_rz0[cst_gate.o_id],
+                    &cst_gate.coef,
+                ),
             };
             v += tmp;
         }
 
-        v * simd_sum * mpi_sum
+        v
     }
 
     #[inline(always)]
diff --git a/transcript/Cargo.toml b/transcript/Cargo.toml
index 8dbdb235..c74e5d4b 100644
--- a/transcript/Cargo.toml
+++ b/transcript/Cargo.toml
@@ -7,4 +7,4 @@ edition = "2021"
 arith = { path = "../arith" }
 
 sha2 = "0.10.8"
-tiny-keccak = { version = "2.0.2", features = [ "sha3" ] }
\ No newline at end of file
+tiny-keccak = { version = "2.0.2", features = [ "sha3", "keccak" ] }
\ No newline at end of file
diff --git a/transcript/src/fiat_shamir_hash.rs b/transcript/src/fiat_shamir_hash.rs
index f044f7f4..268d1e35 100644
--- a/transcript/src/fiat_shamir_hash.rs
+++ b/transcript/src/fiat_shamir_hash.rs
@@ -1,10 +1,15 @@
+use arith::{Field, FieldSerde};
+
 pub mod sha2_256;
 pub use sha2_256::*;
 
 pub mod keccak_256;
 pub use keccak_256::*;
 
-pub trait FiatShamirHash {
+pub mod mimc;
+pub use mimc::*;
+
+pub trait FiatShamirBytesHash {
     /// The size of the hash output in bytes.
     const DIGEST_SIZE: usize;
 
@@ -17,3 +22,11 @@ pub trait FiatShamirHash {
     /// Hash the input in place.
     fn hash_inplace(buffer: &mut [u8]);
 }
+
+pub trait FiatShamirFieldHash<F: Field + FieldSerde> {
+    /// Create a new hash instance.
+    fn new() -> Self;
+
+    /// hash a vector of field element and return the hash result
+    fn hash(&self, input: &[F]) -> F;
+}
diff --git a/transcript/src/fiat_shamir_hash/keccak_256.rs b/transcript/src/fiat_shamir_hash/keccak_256.rs
index 0d8e2e94..21645eca 100644
--- a/transcript/src/fiat_shamir_hash/keccak_256.rs
+++ b/transcript/src/fiat_shamir_hash/keccak_256.rs
@@ -1,11 +1,11 @@
 use tiny_keccak::{Hasher, Sha3};
 
-use super::FiatShamirHash;
+use super::FiatShamirBytesHash;
 
-#[derive(Clone)]
+#[derive(Clone, Default)]
 pub struct Keccak256hasher {}
 
-impl FiatShamirHash for Keccak256hasher {
+impl FiatShamirBytesHash for Keccak256hasher {
     const DIGEST_SIZE: usize = 32;
 
     #[inline]
diff --git a/transcript/src/fiat_shamir_hash/mimc.rs b/transcript/src/fiat_shamir_hash/mimc.rs
new file mode 100644
index 00000000..ced9cbe1
--- /dev/null
+++ b/transcript/src/fiat_shamir_hash/mimc.rs
@@ -0,0 +1,91 @@
+use arith::{Field, FieldSerde};
+
+use tiny_keccak::{Hasher, Keccak};
+
+use super::FiatShamirFieldHash;
+
+#[derive(Debug, Clone, Default)]
+pub struct MIMCConstants<F: Field> {
+    cts: Vec<F>,
+    n_rounds: i64,
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct MIMCHasher<F: Field> {
+    constants: MIMCConstants<F>,
+}
+
+impl<F: Field + FieldSerde> FiatShamirFieldHash<F> for MIMCHasher<F> {
+    fn new() -> Self {
+        Self {
+            constants: generate_mimc_constants::<F>(),
+        }
+    }
+
+    fn hash(&self, input: &[F]) -> F {
+        let mut h: F = F::ZERO;
+        for a in input {
+            let r = self.mimc5_hash(&h, a);
+            h += r + a;
+        }
+        h
+    }
+}
+
+impl<F: Field + FieldSerde> MIMCHasher<F> {
+    #[inline(always)]
+    pub fn pow5(x: F) -> F {
+        let x2 = x * x;
+        let x4 = x2 * x2;
+        x4 * x
+    }
+
+    pub fn mimc5_hash(&self, h: &F, x_in: &F) -> F {
+        let mut x = *x_in;
+
+        for i in 0..self.constants.n_rounds as usize {
+            x = Self::pow5(x + h + self.constants.cts[i]);
+        }
+        x + h
+    }
+}
+
+const SEED: &str = "seed";
+pub fn generate_mimc_constants<F: Field>() -> MIMCConstants<F> {
+    let mut keccak = Keccak::v256();
+    let mut h = [0u8; 32];
+    keccak.update(SEED.as_bytes());
+    keccak.finalize(&mut h);
+    let mut keccak = Keccak::v256();
+    let mut h_iv = [0u8; 32];
+    let seed_iv = format!("{}{}", SEED, "_iv");
+    keccak.update(seed_iv.as_bytes());
+    keccak.finalize(&mut h_iv);
+
+    let n_rounds: i64 = 110;
+    let cts = get_constants(SEED, n_rounds);
+    MIMCConstants::<F> { cts, n_rounds }
+}
+
+pub fn get_constants<F: Field>(seed: &str, n_rounds: i64) -> Vec<F> {
+    let mut cts: Vec<F> = Vec::new();
+
+    let mut keccak = Keccak::v256();
+    let mut h = [0u8; 32];
+    keccak.update(seed.as_bytes());
+    keccak.finalize(&mut h);
+
+    for _ in 0..n_rounds {
+        let mut keccak = Keccak::v256();
+        keccak.update(&h);
+        keccak.finalize(&mut h);
+
+        // big endian -> little endian, in order to match the one in gnark
+        // or probably we can change the implementation there
+        let mut h_reverse = h;
+        h_reverse.reverse();
+
+        cts.push(F::from_uniform_bytes(&h_reverse));
+    }
+    cts
+}
diff --git a/transcript/src/fiat_shamir_hash/sha2_256.rs b/transcript/src/fiat_shamir_hash/sha2_256.rs
index 6033d5c8..ba6606be 100644
--- a/transcript/src/fiat_shamir_hash/sha2_256.rs
+++ b/transcript/src/fiat_shamir_hash/sha2_256.rs
@@ -1,11 +1,11 @@
 use sha2::{digest::Output, Digest, Sha256};
 
-use super::FiatShamirHash;
+use super::FiatShamirBytesHash;
 
 #[derive(Debug, Clone, Default)]
 pub struct SHA256hasher;
 
-impl FiatShamirHash for SHA256hasher {
+impl FiatShamirBytesHash for SHA256hasher {
     const DIGEST_SIZE: usize = 32;
 
     #[inline]
diff --git a/transcript/src/lib.rs b/transcript/src/lib.rs
index 66734364..890669d3 100644
--- a/transcript/src/lib.rs
+++ b/transcript/src/lib.rs
@@ -1,8 +1,10 @@
 mod fiat_shamir_hash;
-pub use fiat_shamir_hash::{FiatShamirHash, Keccak256hasher, SHA256hasher};
+pub use fiat_shamir_hash::{
+    FiatShamirBytesHash, FiatShamirFieldHash, Keccak256hasher, MIMCHasher, SHA256hasher,
+};
 
 mod transcript;
-pub use transcript::{Transcript, TranscriptInstance};
+pub use transcript::{BytesHashTranscript, FieldHashTranscript, Transcript};
 
 mod proof;
 pub use proof::Proof;
diff --git a/transcript/src/tests.rs b/transcript/src/tests.rs
index acc565c2..4b1336ac 100644
--- a/transcript/src/tests.rs
+++ b/transcript/src/tests.rs
@@ -1,3 +1,5 @@
+use crate::fiat_shamir_hash::{FiatShamirFieldHash, MIMCHasher};
+use arith::{BN254Fr, FieldSerde};
 use sha2::{Digest, Sha256};
 
 const EXAMPLE_IN: [u8; 32] = [
@@ -15,3 +17,24 @@ fn check_sha256_aligned() {
     println!("{:?}", out);
     assert_eq!(out, EXAMPLE_OUT.into());
 }
+
+const MIMC5_BN254_IN: u32 = 123;
+
+// The result is generated by the currect version (10/20/2024) of mimc5 itself.
+// The point is to pin down a hash output so later we can refer to it.
+// There is a similar test in recursion located at recursion/modules/transcript/hash_test.go
+const MIMC5_BN254_ONT: [u8; 32] = [
+    23, 0, 30, 22, 99, 236, 217, 86, 113, 255, 221, 106, 184, 226, 45, 109, 67, 123, 85, 88, 103,
+    54, 177, 150, 88, 18, 208, 172, 76, 143, 30, 5,
+];
+
+#[test]
+fn check_mimc5_aligned() {
+    let mimc = MIMCHasher::<BN254Fr>::new();
+    let input = BN254Fr::from(MIMC5_BN254_IN);
+    let output = mimc.hash(&[input]);
+    assert_eq!(
+        output,
+        BN254Fr::deserialize_from(&MIMC5_BN254_ONT[..]).unwrap()
+    );
+}
diff --git a/transcript/src/transcript.rs b/transcript/src/transcript.rs
index c2a34494..05da7d76 100644
--- a/transcript/src/transcript.rs
+++ b/transcript/src/transcript.rs
@@ -2,77 +2,152 @@ use std::marker::PhantomData;
 
 use arith::{Field, FieldSerde};
 
-use crate::{fiat_shamir_hash::FiatShamirHash, Proof};
+use crate::{
+    fiat_shamir_hash::{FiatShamirBytesHash, FiatShamirFieldHash},
+    Proof,
+};
 
-pub trait Transcript<H: FiatShamirHash> {
+pub trait Transcript<F: Field + FieldSerde> {
     /// Create a new transcript.
     fn new() -> Self;
 
     /// Append a field element to the transcript.
-    fn append_field_element<F>(&mut self, f: &F)
-    where
-        F: FieldSerde,
-    {
-        let mut buf = vec![];
-        f.serialize_into(&mut buf).unwrap();
-        self.append_u8_slice(&buf);
-    }
+    fn append_field_element(&mut self, f: &F);
 
-    /// Append a byte slice to the transcript.
+    /// Append a slice of bytes
     fn append_u8_slice(&mut self, buffer: &[u8]);
 
     /// Generate a challenge.
-    fn generate_challenge<F: Field>(&mut self) -> F;
+    fn generate_challenge_field_element(&mut self) -> F;
+
+    /// Generate a slice of random bytes of some fixed size
+    /// Use this function when you need some randomness other than the native field
+    fn generate_challenge_u8_slice(&mut self, n_bytes: usize) -> Vec<u8>;
 
     /// Generate a challenge vector.
     #[inline]
-    fn generate_challenge_vector<F: Field>(&mut self, n: usize) -> Vec<F> {
+    fn generate_challenge_field_elements(&mut self, n: usize) -> Vec<F> {
         let mut challenges = Vec::with_capacity(n);
         for _ in 0..n {
-            challenges.push(self.generate_challenge());
+            challenges.push(self.generate_challenge_field_element());
         }
         challenges
     }
+
+    /// Produce the proof
+    /// It is not recommended to append/challenge after calling this function
+    fn finalize_and_get_proof(&self) -> Proof;
+
+    /// Return current state of the transcript
+    /// Note: this may incur an additional hash to shrink the state
+    fn hash_and_return_state(&mut self) -> Vec<u8>;
+
+    /// Set the state
+    /// Note: Any unhashed data will be discarded
+    fn set_state(&mut self, state: &[u8]);
+
+    /// lock proof, no changes will be made to the proof until unlock is called
+    fn lock_proof(&mut self);
+
+    /// unlock proof
+    fn unlock_proof(&mut self);
 }
 
 #[derive(Clone, Default, Debug, PartialEq)]
-pub struct TranscriptInstance<H: FiatShamirHash> {
-    phantom: PhantomData<H>,
+pub struct BytesHashTranscript<F: Field + FieldSerde, H: FiatShamirBytesHash> {
+    phantom: PhantomData<(F, H)>,
 
     /// The digest bytes.
     pub digest: Vec<u8>,
 
     /// The proof bytes.
-    pub proof: Proof,
+    proof: Proof,
 
     /// The pointer to the proof bytes indicating where the hash starts.
     hash_start_index: usize,
+
+    /// locking point
+    proof_locked: bool,
+    proof_locked_at: usize,
 }
 
-impl<H: FiatShamirHash> Transcript<H> for TranscriptInstance<H> {
+impl<F: Field + FieldSerde, H: FiatShamirBytesHash> Transcript<F> for BytesHashTranscript<F, H> {
     fn new() -> Self {
-        TranscriptInstance {
+        Self {
             phantom: PhantomData,
             digest: vec![0u8; H::DIGEST_SIZE],
             proof: Proof::default(),
             hash_start_index: 0,
+            proof_locked: false,
+            proof_locked_at: 0,
         }
     }
 
+    fn append_field_element(&mut self, f: &F) {
+        let mut buf = vec![];
+        f.serialize_into(&mut buf).unwrap();
+        self.append_u8_slice(&buf);
+    }
+
     /// Append a byte slice to the transcript.
     fn append_u8_slice(&mut self, buffer: &[u8]) {
         self.proof.bytes.extend_from_slice(buffer);
     }
 
     /// Generate a challenge.
-    fn generate_challenge<F: Field>(&mut self) -> F {
+    fn generate_challenge_field_element(&mut self) -> F {
         self.hash_to_digest();
         assert!(F::SIZE <= H::DIGEST_SIZE);
         F::from_uniform_bytes(&self.digest.clone().try_into().unwrap())
     }
+
+    fn generate_challenge_u8_slice(&mut self, n_bytes: usize) -> Vec<u8> {
+        let mut ret = vec![];
+        let mut cur_n_bytes = 0usize;
+
+        while cur_n_bytes < n_bytes {
+            self.hash_to_digest();
+            ret.extend_from_slice(&self.digest);
+            cur_n_bytes += H::DIGEST_SIZE;
+        }
+
+        ret.resize(n_bytes, 0);
+        ret
+    }
+
+    fn finalize_and_get_proof(&self) -> Proof {
+        self.proof.clone()
+    }
+
+    fn hash_and_return_state(&mut self) -> Vec<u8> {
+        self.hash_to_digest();
+        self.digest.clone()
+    }
+
+    fn set_state(&mut self, state: &[u8]) {
+        self.hash_start_index = self.proof.bytes.len(); // discard unhashed data
+        assert!(state.len() == H::DIGEST_SIZE);
+        self.digest = state.to_vec();
+    }
+
+    fn lock_proof(&mut self) {
+        assert!(!self.proof_locked);
+        self.proof_locked = true;
+        self.proof_locked_at = self.proof.bytes.len();
+    }
+
+    fn unlock_proof(&mut self) {
+        assert!(self.proof_locked);
+        self.proof_locked = false;
+        if self.hash_start_index < self.proof.bytes.len() {
+            self.hash_to_digest();
+        }
+        self.proof.bytes.resize(self.proof_locked_at, 0);
+        self.hash_start_index = self.proof.bytes.len();
+    }
 }
 
-impl<H: FiatShamirHash> TranscriptInstance<H> {
+impl<F: Field + FieldSerde, H: FiatShamirBytesHash> BytesHashTranscript<F, H> {
     /// Hash the input into the output.
     pub fn hash_to_digest(&mut self) {
         let hash_end_index = self.proof.bytes.len();
@@ -87,3 +162,118 @@ impl<H: FiatShamirHash> TranscriptInstance<H> {
         }
     }
 }
+
+#[derive(Clone, Debug, PartialEq)]
+pub struct FieldHashTranscript<F: Field + FieldSerde, H: FiatShamirFieldHash<F>> {
+    /// Internal hasher, it's a little costly to create a new hasher
+    pub hasher: H,
+
+    /// The digest bytes.
+    pub digest: F,
+
+    /// The proof bytes
+    pub proof: Proof,
+
+    /// The data to be hashed
+    pub data_pool: Vec<F>,
+
+    /// Proof locked or not
+    pub proof_locked: bool,
+}
+
+impl<F: Field + FieldSerde, H: FiatShamirFieldHash<F>> Transcript<F> for FieldHashTranscript<F, H> {
+    #[inline(always)]
+    fn new() -> Self {
+        Self {
+            hasher: H::new(),
+            digest: F::default(),
+            proof: Proof::default(),
+            data_pool: vec![],
+            proof_locked: false,
+        }
+    }
+
+    fn append_field_element(&mut self, f: &F) {
+        let mut buffer = vec![];
+        f.serialize_into(&mut buffer).unwrap();
+        if !self.proof_locked {
+            self.proof.bytes.extend_from_slice(&buffer);
+        }
+        self.data_pool.push(*f);
+    }
+
+    fn append_u8_slice(&mut self, buffer: &[u8]) {
+        if !self.proof_locked {
+            self.proof.bytes.extend_from_slice(buffer);
+        }
+        let buffer_size = buffer.len();
+        let mut cur = 0;
+        while cur + 32 <= buffer_size {
+            self.data_pool.push(F::from_uniform_bytes(
+                buffer[cur..cur + 32].try_into().unwrap(),
+            ));
+            cur += 32
+        }
+
+        if cur < buffer_size {
+            let mut buffer_last = buffer[cur..].to_vec();
+            buffer_last.resize(32, 0);
+            self.data_pool
+                .push(F::from_uniform_bytes(buffer_last[..].try_into().unwrap()));
+        }
+    }
+
+    fn generate_challenge_field_element(&mut self) -> F {
+        self.hash_to_digest();
+        self.digest
+    }
+
+    fn generate_challenge_u8_slice(&mut self, n_bytes: usize) -> Vec<u8> {
+        let mut bytes = vec![];
+        let mut buf = vec![];
+        while bytes.len() < n_bytes {
+            self.hash_to_digest();
+            self.digest.serialize_into(&mut buf).unwrap();
+            bytes.extend_from_slice(&buf);
+        }
+        bytes.resize(n_bytes, 0);
+        bytes
+    }
+
+    fn finalize_and_get_proof(&self) -> Proof {
+        self.proof.clone()
+    }
+
+    fn hash_and_return_state(&mut self) -> Vec<u8> {
+        self.hash_to_digest();
+        let mut state = vec![];
+        self.digest.serialize_into(&mut state).unwrap();
+        state
+    }
+
+    fn set_state(&mut self, state: &[u8]) {
+        self.data_pool.clear();
+        self.digest = F::deserialize_from(state).unwrap();
+    }
+
+    fn lock_proof(&mut self) {
+        assert!(!self.proof_locked);
+        self.proof_locked = true;
+    }
+
+    fn unlock_proof(&mut self) {
+        assert!(self.proof_locked);
+        self.proof_locked = false;
+    }
+}
+
+impl<F: Field + FieldSerde, H: FiatShamirFieldHash<F>> FieldHashTranscript<F, H> {
+    pub fn hash_to_digest(&mut self) {
+        if !self.data_pool.is_empty() {
+            self.digest = self.hasher.hash(&self.data_pool);
+            self.data_pool.clear();
+        } else {
+            self.digest = self.hasher.hash(&[self.digest]);
+        }
+    }
+}