diff --git a/crates/fflonk/src/allocator/pool.rs b/crates/fflonk/src/allocator/pool.rs
index c7aa9df..ded16f4 100644
--- a/crates/fflonk/src/allocator/pool.rs
+++ b/crates/fflonk/src/allocator/pool.rs
@@ -29,7 +29,7 @@ pub struct PoolAllocator;
 
 impl DeviceAllocator for PoolAllocator {
     fn allocate(&self, layout: std::alloc::Layout) -> CudaResult<std::ptr::NonNull<[u8]>> {
-       unimplemented!("Pool allocator can't do static allocation/deallocation")
+        unimplemented!("Pool allocator can't do static allocation/deallocation")
     }
 
     fn deallocate(&self, ptr: std::ptr::NonNull<u8>, layout: std::alloc::Layout) {
diff --git a/crates/fflonk/src/relations/copy_perm.rs b/crates/fflonk/src/relations/copy_perm.rs
index d423536..bca9d48 100644
--- a/crates/fflonk/src/relations/copy_perm.rs
+++ b/crates/fflonk/src/relations/copy_perm.rs
@@ -204,11 +204,7 @@ where
 
     // get monomial finally
     ntt::bitreverse(&mut grand_prod_bitreversed, stream)?;
-    ntt::inplace_coset_ifft_for_gen_on(
-        &mut grand_prod_bitreversed,
-        &coset_shift_inv,
-        stream,
-    )?;
+    ntt::inplace_coset_ifft_for_gen_on(&mut grand_prod_bitreversed, &coset_shift_inv, stream)?;
 
     Ok(Poly::from_buffer(grand_prod_bitreversed))
 }
diff --git a/crates/gpu-ffi/src/arithmetic.rs b/crates/gpu-ffi/src/arithmetic.rs
index b782d27..2b45f9b 100644
--- a/crates/gpu-ffi/src/arithmetic.rs
+++ b/crates/gpu-ffi/src/arithmetic.rs
@@ -398,7 +398,6 @@ pub fn raw_evaluate(
         return Err(GpuError::SchedulingErr);
     }
 
-
     Ok(())
 }
 
diff --git a/crates/gpu-ffi/src/bindings_extra.rs b/crates/gpu-ffi/src/bindings_extra.rs
index e3263d7..57295b0 100644
--- a/crates/gpu-ffi/src/bindings_extra.rs
+++ b/crates/gpu-ffi/src/bindings_extra.rs
@@ -9,12 +9,7 @@ pub fn call_host_fn<F: FnMut()>(stream: bc_stream, cb: &F) -> Result<(), GpuErro
     let callback_data = cb as *const _ as *mut ::std::os::raw::c_void;
 
     unsafe {
-        if bc_launch_host_fn(
-            stream,
-            Some(callback_wrapper::<F>),
-            callback_data,
-        ) != 0
-        {
+        if bc_launch_host_fn(stream, Some(callback_wrapper::<F>), callback_data) != 0 {
             return Err(GpuError::SchedulingErr);
         }
     }
@@ -33,38 +28,28 @@ pub fn malloc_from_pool_async(
     Ok(())
 }
 
-pub fn device_disable_peer_access(
-    device_id: usize,
-) -> Result<(), GpuError> {
+pub fn device_disable_peer_access(device_id: usize) -> Result<(), GpuError> {
     if unsafe { bc_device_disable_peer_access(device_id as i32) } != 0 {
         return Err(GpuError::DevicePeerAccessErr);
     }
     Ok(())
 }
 
-pub fn device_enable_peer_access(
-    device_id: i32,
-) -> Result<(), GpuError> {
+pub fn device_enable_peer_access(device_id: i32) -> Result<(), GpuError> {
     if unsafe { bc_device_enable_peer_access(device_id) } != 0 {
         return Err(GpuError::DevicePeerAccessErr);
     }
     Ok(())
 }
 
-pub fn mem_pool_disable_peer_access(
-    pool: bc_mem_pool,
-    device_id: usize,
-) -> Result<(), GpuError> {
+pub fn mem_pool_disable_peer_access(pool: bc_mem_pool, device_id: usize) -> Result<(), GpuError> {
     if unsafe { bc_mem_pool_disable_peer_access(pool, device_id as i32) } != 0 {
         return Err(GpuError::MemPoolPeerAccessErr);
     }
     Ok(())
 }
 
-pub fn mem_pool_enable_peer_access(
-    pool: bc_mem_pool,
-    device_id: i32,
-) -> Result<(), GpuError> {
+pub fn mem_pool_enable_peer_access(pool: bc_mem_pool, device_id: i32) -> Result<(), GpuError> {
     if unsafe { bc_mem_pool_enable_peer_access(pool, device_id) } != 0 {
         return Err(GpuError::MemPoolPeerAccessErr);
     }
@@ -237,7 +222,7 @@ impl bc_event {
         }
     }
 
-    pub fn sync(self) ->  Result<(), GpuError> {
+    pub fn sync(self) -> Result<(), GpuError> {
         if unsafe { bc_event_synchronize(self) } != 0 {
             return Err(GpuError::EventSyncErr);
         }
@@ -257,7 +242,14 @@ impl ntt_configuration {
     ) -> Self {
         let log_extension_degree = log_2(lde_factor as usize);
         let coset_index = bitreverse(coset_index, log_extension_degree as usize);
-        let mut this = Self::new(ctx, inputs as *mut c_void, outputs, log_values_count, false, false);
+        let mut this = Self::new(
+            ctx,
+            inputs as *mut c_void,
+            outputs,
+            log_values_count,
+            false,
+            false,
+        );
         this.coset_index = coset_index as u32;
         this.log_extension_degree = log_extension_degree;
 
diff --git a/crates/gpu-ffi/src/lib.rs b/crates/gpu-ffi/src/lib.rs
index 9c2f834..bef8e51 100644
--- a/crates/gpu-ffi/src/lib.rs
+++ b/crates/gpu-ffi/src/lib.rs
@@ -5,14 +5,14 @@
 pub mod error;
 pub mod utils;
 
-pub mod other;
 pub mod bindings;
 pub mod bindings_extra;
+pub mod other;
 pub mod wrapper;
 
-pub use other::*;
 pub use bindings::*;
 pub use bindings_extra::*;
+pub use other::*;
 
 pub use error::*;
 pub use utils::*;
diff --git a/crates/gpu-ffi/src/msm.rs b/crates/gpu-ffi/src/msm.rs
index 755e86d..c7ee024 100644
--- a/crates/gpu-ffi/src/msm.rs
+++ b/crates/gpu-ffi/src/msm.rs
@@ -51,6 +51,6 @@ pub fn raw_msm(ctx: &GpuContext, d_scalars: *mut c_void, len: usize) -> Result<V
     let mut result = vec![0u8; result_buf_len];
 
     copy_and_free(&mut result[..], d_result, ctx.get_d2h_stream())?;
-    
+
     Ok(result)
 }
diff --git a/crates/gpu-ffi/src/ntt.rs b/crates/gpu-ffi/src/ntt.rs
index 57c6663..1964b51 100644
--- a/crates/gpu-ffi/src/ntt.rs
+++ b/crates/gpu-ffi/src/ntt.rs
@@ -10,7 +10,7 @@ pub fn ntt(
     let d_scalars = alloc_and_copy(ctx, scalars, ctx.get_h2d_stream())?;
     ctx.wait_h2d()?;
 
-    raw_ntt(ctx, d_scalars, len, bits_reversed, inverse,)
+    raw_ntt(ctx, d_scalars, len, bits_reversed, inverse)
 }
 
 pub fn raw_ntt(
@@ -20,7 +20,7 @@ pub fn raw_ntt(
     bits_reversed: bool,
     inverse: bool,
 ) -> Result<(), GpuError> {
-    let log_scalars_count = log_2(len/FIELD_ELEMENT_LEN);
+    let log_scalars_count = log_2(len / FIELD_ELEMENT_LEN);
     let cfg = ntt_configuration::new(
         ctx,
         d_scalars,
@@ -49,7 +49,7 @@ pub fn ifft_then_msm(
     h2d_finished.record(ctx.get_h2d_stream())?;
     ctx.get_exec_stream().wait(h2d_finished)?;
 
-    raw_ntt(ctx, d_scalars, len, bits_reversed, true,)?;
+    raw_ntt(ctx, d_scalars, len, bits_reversed, true)?;
     let result = raw_msm(ctx, d_scalars, len)?;
 
     let exec_finished = bc_event::new()?;
@@ -86,7 +86,7 @@ pub fn raw_coset_ntt(
     coset_idx: usize,
     inverse: bool,
 ) -> Result<(), GpuError> {
-    let log_scalars_count = log_2(len/FIELD_ELEMENT_LEN);
+    let log_scalars_count = log_2(len / FIELD_ELEMENT_LEN);
     let mut cfg = ntt_configuration::new_for_lde(
         ctx,
         d_scalars,
@@ -171,11 +171,7 @@ pub fn lde(
     Ok(result)
 }
 
-pub fn fft(
-    ctx: &GpuContext,
-    scalars: &mut [u8],
-    bits_reversed: bool,    
-) -> Result<(), GpuError> {
+pub fn fft(ctx: &GpuContext, scalars: &mut [u8], bits_reversed: bool) -> Result<(), GpuError> {
     ntt(ctx, scalars, bits_reversed, false)
 }
 
@@ -214,7 +210,7 @@ pub fn multi_ntt(
         h2d_finished.record(ctx.get_h2d_stream())?;
         ctx.get_exec_stream().wait(h2d_finished)?;
 
-        raw_ntt(ctx, d_scalars, len,  bits_reversed, inverse)?;
+        raw_ntt(ctx, d_scalars, len, bits_reversed, inverse)?;
 
         let exec_finished = bc_event::new()?;
         exec_finished.record(ctx.get_exec_stream())?;
@@ -227,10 +223,6 @@ pub fn multi_ntt(
     Ok(())
 }
 
-pub fn ifft(
-    ctx: &GpuContext,
-    scalars: &mut [u8],
-    bits_reversed: bool,
-) -> Result<(), GpuError> {
+pub fn ifft(ctx: &GpuContext, scalars: &mut [u8], bits_reversed: bool) -> Result<(), GpuError> {
     ntt(ctx, scalars, bits_reversed, true)
 }
diff --git a/crates/gpu-ffi/src/utils.rs b/crates/gpu-ffi/src/utils.rs
index 3a3a043..f2e099e 100644
--- a/crates/gpu-ffi/src/utils.rs
+++ b/crates/gpu-ffi/src/utils.rs
@@ -16,7 +16,6 @@ pub fn log_2(num: usize) -> u32 {
     pow
 }
 
-
 #[inline(always)]
 pub fn bitreverse(n: usize, l: usize) -> usize {
     let mut r = n.reverse_bits();
@@ -27,7 +26,6 @@ pub fn bitreverse(n: usize, l: usize) -> usize {
     r
 }
 
-
 // pub fn decode_projective_point<E: Engine>(encoding: [Vec<u8>; 3]) -> E::G1 {
 //     let [encoding_x, encoding_y, encoding_z] = encoding;
 //     let mut repr = <<E::G1 as CurveProjective>::Base as PrimeField>::Repr::default();
@@ -64,7 +62,7 @@ pub fn bitreverse(n: usize, l: usize) -> usize {
 //     for _ in 0..num_chunks {
 //         let (current_bases, rest) = bases.split_at(chunk_size);
 //         bases = rest;
-        
+
 //         let (mut current_encoding_x, rest) = encoding_x.split_at(chunk_size * 32);
 //         encoding_x = rest;
 //         let (mut current_encoding_y, rest) = encoding_y.split_at(chunk_size * 32);
@@ -95,9 +93,9 @@ pub fn bitreverse(n: usize, l: usize) -> usize {
 //     [final_encoding_x, final_encoding_y]
 // }
 
-// pub fn decode_scalars<E: Engine>(encoding: &[u8]) -> Vec<E::Fr> {    
+// pub fn decode_scalars<E: Engine>(encoding: &[u8]) -> Vec<E::Fr> {
 //     let len = encoding.len() / 32;
-//     let mut result = vec![E::Fr::zero(); len];    
+//     let mut result = vec![E::Fr::zero(); len];
 //     unsafe{std::ptr::copy(encoding.as_ptr() as *const E::Fr, result.as_mut_ptr(), len)};
 //     result
 // }
diff --git a/crates/gpu-ffi/src/wrapper.rs b/crates/gpu-ffi/src/wrapper.rs
index cb26df1..47ad35b 100644
--- a/crates/gpu-ffi/src/wrapper.rs
+++ b/crates/gpu-ffi/src/wrapper.rs
@@ -63,7 +63,7 @@ impl GpuContext {
         if unsafe { ntt_set_up() } != 0 {
             return Err(GpuError::CreateContextErr);
         }
-        
+
         Ok(Self {
             device_id: device_id,
             mem_pool: mem_pool,