diff --git a/Project.toml b/Project.toml
index 975a02f..b6606ce 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,10 +6,12 @@ version = "1.0.0-DEV"
 [deps]
 ChebParticleMesh = "1983ef0c-217d-4026-99b0-9163e7750d85"
 ExTinyMD = "fec76197-d59f-46dd-a0ed-76a83c21f7aa"
+FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 FastChebInterp = "cf66c380-9a80-432c-aff8-4f9c79c0bdde"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
 OMEinsum = "ebe7aa44-baf0-506c-a96f-8464559b3922"
+Polynomials = "f27b6e38-b328-58d1-80ce-0feddd5e7a45"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 SumOfExpVPMR = "2c69873a-e0bb-44e1-90b4-d15ac3b7e936"
 
diff --git a/src/FFCT/Chebyshev.jl b/src/FFCT/Chebyshev.jl
new file mode 100644
index 0000000..7f9b655
--- /dev/null
+++ b/src/FFCT/Chebyshev.jl
@@ -0,0 +1,66 @@
+function USeries_direct(z::T, k_xi::T, k_yj::T, uspara::USeriesPara{T}, M_mid::Int) where{T}
+    t = zero(T)
+    k2 = k_xi^2 + k_yj^2
+
+    for l in M_mid + 1:length(uspara.sw)
+        sl, wl = uspara.sw[l]
+        t += π * wl * sl^2 * exp(-sl^2 * k2 / 4) * exp(-z^2 / sl^2)
+    end
+
+    return t
+end
+
+function ChebUSeries_k(k_xi::T, k_yj::T, L_z::T, uspara::USeriesPara{T}, M::Int, Q::Int) where{T}
+
+    f = z -> USeries_direct(z, k_xi, k_yj, uspara, M)
+    x = chebpoints(Q, zero(T), L_z)
+
+    return chebinterp(f.(x), zero(T), L_z)
+end
+
+function ChebUSeries(k_x::Vector{T}, k_y::Vector{T}, L_z::T, uspara::USeriesPara{T}, M::Int, Q::Int) where{T}
+
+    cheb_mat = Array{ChebPoly{1, T, T}, 2}(undef, (length(k_x), length(k_y)))
+
+    for i in 1:length(k_x)
+        for j in 1:length(k_y)
+            cheb_mat[i, j] = ChebUSeries_k(k_x[i], k_y[j], L_z, uspara, M, Q)
+        end
+    end
+
+    return cheb_mat
+end
+
+@inline function chebpoly(n::Int, x::T, scale::T) where{T}
+    return cos(n * acos(x / scale))
+end
+
+@inbounds function real2Cheb!(H_r::Array{Complex{T}, 3}, H_c::Array{Complex{T}, 3}, r_z::Vector{T}, L_z::T) where{T}
+
+    set_zeros!(H_c)
+    N_z = size(H_r, 3)
+
+    for k in 1:N_z, l in 1:N_z
+        cheb_temp = k == 1 ? 0.5 : chebpoly(k - 1, r_z[l] - L_z / T(2), L_z / T(2))
+        for j in 1:size(H_r, 2), i in 1:size(H_r, 1)
+            H_c[i, j, k] += 2 / N_z * H_r[i, j, l] * cheb_temp
+        end
+    end
+
+    return H_c
+end
+
+@inbounds function Cheb2real!(H_c::Array{Complex{T}, 3}, H_r::Array{Complex{T}, 3}, r_z::Vector{T}, L_z::T) where{T}
+
+    set_zeros!(H_r)
+    N_z = size(H_c, 3)
+
+    for k in 1:N_z, l in 1:N_z
+        cheb_temp = chebpoly(k - 1, r_z[l] - L_z / T(2), L_z / T(2))
+        for j in 1:size(H_c, 2), i in 1:size(H_c, 1)
+            H_r[i, j, l] += H_c[i, j, k] * cheb_temp
+        end
+    end
+
+    return H_r
+end
\ No newline at end of file
diff --git a/src/FFCT/Taylor.jl b/src/FFCT/Taylor.jl
new file mode 100644
index 0000000..c05302a
--- /dev/null
+++ b/src/FFCT/Taylor.jl
@@ -0,0 +1,52 @@
+function Greens_Q2d_direct(z::T, k_xi::T, k_yj::T, uspara::USeriesPara{T}, M_mid::Int) where{T}
+    t = zero(T)
+    k2 = k_xi^2 + k_yj^2
+
+    for l in M_mid + 1:length(uspara.sw)
+        sl, wl = uspara.sw[l]
+        t += π * wl * sl^2 * exp(-sl^2 * k2 / 4) * exp(-z^2 / sl^2)
+    end
+
+    return t
+end
+
+
+#(x^n / n!)
+@inbounds @inline function special_powern(x::T, n::Int) where{T}
+    t = one(T)
+    for i in 1:n
+        t *= x / T(i)
+    end
+    return t
+end
+
+# the resulting Polynomial is in the form of f = x -> poly((x / L_z)^2)
+function TaylorUSeries_k(k_xi::T, k_yj::T, L_z::T, uspara::USeriesPara{T}, M_mid::Int, Q::Int) where{T}
+    coefs = zeros(T, Q)
+    k2 = k_xi^2 + k_yj^2
+    if !(k2 ≈ zero(T))
+        for l in M_mid + 1:length(uspara.sw)
+            sl, wl = uspara.sw[l]
+            for n in 0:Q - 1
+                coefs[n + 1] += (-1)^n * π * wl * sl^2 * exp(-sl^2 * k2 / 4) * special_powern(L_z^2 / sl^2, n)
+            end
+        end
+    end
+    return coefs
+end
+
+function TaylorUSeries(k_x::Vector{T}, k_y::Vector{T}, L_z::T, uspara::USeriesPara{T}, M_mid::Int, Q::Int) where{T}
+
+    taylor_mats = [zeros(T, (length(k_x), length(k_y))) for i in 1:Q]
+
+    for i in 1:length(k_x)
+        for j in 1:length(k_y)
+            coefs = TaylorUSeries_k(k_x[i], k_y[j], L_z, uspara, M_mid, Q)
+            for k in 1:Q
+                taylor_mats[k][i, j] = coefs[k]
+            end
+        end
+    end
+
+    return taylor_mats
+end
\ No newline at end of file
diff --git a/src/FFCT/cheb_interpolate.jl b/src/FFCT/cheb_interpolate.jl
deleted file mode 100644
index a98b2b1..0000000
--- a/src/FFCT/cheb_interpolate.jl
+++ /dev/null
@@ -1,32 +0,0 @@
-function USeries_direct(z::T, k_xi::T, k_yj::T, uspara::USeriesPara{T}, M_mid::Int) where{T}
-    t = zero(T)
-    k2 = k_xi^2 + k_yj^2
-
-    for l in M_mid + 1:length(uspara.sw)
-        sl, wl = uspara.sw[l]
-        t += π * wl * exp(-sl^2 * k2 / 4) * exp(-z^2 / sl^2) * (T(2) - T(4) * z^2 / sl^2 + k2 * sl^2)
-    end
-
-    return t
-end
-
-function ChebUSeries_k(k_xi::T, k_yj::T, L_z::T, uspara::USeriesPara{T}, M::Int, Q::Int) where{T}
-
-    f = z -> USeries_direct(z, k_xi, k_yj, uspara, M)
-    x = chebpoints(Q, zero(T), L_z)
-
-    return chebinterp(f.(x), zero(T), L_z)
-end
-
-function ChebUSeries(k_x::Vector{T}, k_y::Vector{T}, L_z::T, uspara::USeriesPara{T}, M::Int, Q::Int) where{T}
-
-    cheb_mat = Array{ChebPoly{1, T, T}, 2}(undef, (length(k_x), length(k_y)))
-
-    for i in 1:length(k_x)
-        for j in 1:length(k_y)
-            cheb_mat[i, j] = ChebUSeries_k(k_x[i], k_y[j], L_z, uspara, M, Q)
-        end
-    end
-
-    return cheb_mat
-end
\ No newline at end of file
diff --git a/src/FFCT/gather.jl b/src/FFCT/gather.jl
new file mode 100644
index 0000000..9fe606b
--- /dev/null
+++ b/src/FFCT/gather.jl
@@ -0,0 +1,54 @@
+@inbounds function gather_nu_single(q::T, pos::NTuple{3, T}, L::NTuple{3, T}, k_x::Vector{T}, k_y::Vector{T}, phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}}, H_c::Array{Complex{T}, 3}) where{T}
+    x, y, z = pos
+    L_x, L_y, L_z = L
+    ϕ = zero(Complex{T})
+    
+    revise_phase_pos!(phase_x, phase_y, k_x, k_y, x - T(L[1] / 2), y - T(L[2] / 2))
+
+    for k in 1:size(H_c, 3)
+        cheb_val = chebpoly(k - 1, z - L_z / T(2), L_z / T(2))
+        ϕ += dot(phase_x', (@view H_c[:, :, k]), phase_y)* cheb_val
+    end
+
+    return q * ϕ / (2 * L_x * L_y)
+end
+
+function gather_nu(qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T}, k_x::Vector{T}, k_y::Vector{T}, phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}}, H_c::Array{Complex{T}, 3}) where{T}
+    E = zero(Complex{T})
+    for i in 1:length(qs)
+        E += gather_nu_single(qs[i], poses[i], L, k_x, k_y, phase_x, phase_y, H_c)
+    end
+    return real(E)
+end
+
+@inbounds function gather_thin_single(q::T, pos::NTuple{3, T}, L_z::T, pad_grid::Array{Complex{T}, 3}, gridinfo::GridInfo{2, T}, cheb_value::Vector{Array{T, 1}}, cheb_coefs::NTuple{2, ChebCoef{T}}) where{T}
+    
+    potential_i = zero(T)
+    idl = gridinfo.index_list
+    x, y, z = pos
+
+    near_id_image = image_grid_id((pos[1], pos[2]), gridinfo)
+    near_pos_image = image_grid_pos(near_id_image, gridinfo)
+    for i in 1:2
+        dx = pos[i] - near_pos_image[i]
+        pwcheb_eval!(dx, cheb_value[i], cheb_coefs[i])
+    end
+
+    for i in gridinfo.iter_list
+        image_id = near_id_image.id .+ i
+        for k in 1:size(pad_grid, 3)
+            cheb_val = chebpoly(k - 1, z - L_z / T(2), L_z / T(2))
+            potential_i += real(pad_grid[idl[1][image_id[1]], idl[2][image_id[2]], k]) * prod(cheb_value[j][i[j] + gridinfo.w[j] + 1] for j in 1:2) * cheb_val
+        end
+    end
+
+    return q * prod(gridinfo.h) * potential_i
+end
+
+function gather_thin(qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T}, H_c::Array{Complex{T}, 3}, gridinfo::GridInfo{2, T}, cheb_value::Vector{Array{T, 1}}, cheb_coefs::NTuple{2, ChebCoef{T}}) where{T}
+    E = zero(Complex{T})
+    for i in 1:length(qs)
+        E += gather_thin_single(qs[i], poses[i], L[3], H_c, gridinfo, cheb_value, cheb_coefs)
+    end
+    return real(E) / 2
+end
\ No newline at end of file
diff --git a/src/FFCT/interpolate.jl b/src/FFCT/interpolate.jl
new file mode 100644
index 0000000..cb223e2
--- /dev/null
+++ b/src/FFCT/interpolate.jl
@@ -0,0 +1,126 @@
+function interpolate_nu_loop!(
+    H_r::Array{Complex{T}, 3},
+    qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T},
+    k_x::Vector{T}, k_y::Vector{T}, 
+    phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}}, 
+    r_z::Vector{T}, uspara::USeriesPara{T}, M_mid::Int) where{T}
+
+    set_zeros!(H_r)
+
+    for n in 1:length(qs)
+        x, y, z = poses[n]
+        q = qs[n]
+        revise_phase_neg!(phase_x, phase_y, k_x, k_y, x - L[1] / 2, y - L[2] / 2)
+
+        for k in 1:size(H_r, 3)
+            r_zk = r_z[k]
+            for l in M_mid + 1:length(uspara.sw)
+                sl, wl = uspara.sw[l]
+
+                for j in 1:size(H_r, 2)
+                    k_yj = k_y[j]
+                    for i in 1:size(H_r, 1)
+                        k_xi = k_x[i]
+                        k2 = k_xi^2 + k_yj^2
+                        phase = phase_x[i] * phase_y[j]
+                        if !(k2 ≈ zero(T))
+                            H_r[i, j, k] += q * π * wl * sl^2 * phase * exp(-sl^2 * k2 / 4) * exp(-(z - r_zk)^2 / sl^2)
+                        end
+                    end
+                end
+            end
+        end
+    end
+
+    return H_r
+end
+
+function interpolate_nu_cheb!(
+    H_r::Array{Complex{T}, 3},
+    qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T},
+    k_x::Vector{T}, k_y::Vector{T}, 
+    phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}}, 
+    r_z::Vector{T}, cheb_mat::Array{ChebPoly{1, T, T}, 2}) where{T}
+
+    set_zeros!(H_r)
+
+    for n in 1:length(qs)
+        x, y, z = poses[n]
+        q = qs[n]
+        revise_phase_neg!(phase_x, phase_y, k_x, k_y, x - L[1] / 2, y - L[2] / 2)
+
+        for k in 1:size(H_r, 3)
+            r_zk = r_z[k]
+            for j in 1:size(H_r, 2)
+                k_yj = k_y[j]
+                for i in 1:size(H_r, 1)
+                    k_xi = k_x[i]
+                    k2 = k_xi^2 + k_yj^2
+                    if !(k2 ≈ zero(T))
+                        phase = phase_x[i] * phase_y[j]
+                        H_r[i, j, k] += q * phase * cheb_mat[i, j](abs(z - r_zk))
+                    end
+                end
+            end
+        end
+    end
+
+    return H_r
+end
+
+
+@inbounds function interpolate_thin_single!(q::T, pos::NTuple{3, T}, pad_grid::Array{Complex{T}, 3}, gridinfo::GridInfo{2, T}, cheb_value::Vector{Array{T, 1}}, cheb_coefs::NTuple{2, ChebCoef{T}}, r_z::Vector{T}, Taylor_order::Int, L_z::T) where{T}
+
+    idl = gridinfo.index_list
+    pos_new = (pos[1], pos[2])
+
+    near_id_image = image_grid_id(pos_new, gridinfo)
+    near_pos_image = image_grid_pos(near_id_image, gridinfo)
+    for i in 1:2
+        dx = pos[i] - near_pos_image[i]
+        pwcheb_eval!(dx, cheb_value[i], cheb_coefs[i])
+    end
+
+    for i in gridinfo.iter_list
+        image_id = near_id_image.id .+ i
+        for k in 1:size(pad_grid, 3)
+            qn = q * ((pos[3] - r_z[k]) / L_z)^(2 * (Taylor_order - 1))
+            pad_grid[idl[1][image_id[1]], idl[2][image_id[2]], k] += Complex{T}(qn * prod(cheb_value[j][i[j] + gridinfo.w[j] + 1] for j in 1:2))
+        end
+    end
+
+    return nothing
+end
+
+@inbounds function interpolate_thin!(qs::Vector{T}, poses::Vector{NTuple{3, T}}, pad_grids::Vector{Array{Complex{T}, 3}}, gridinfo::GridInfo{2, T}, cheb_value::Vector{Array{T, 1}}, cheb_coefs::NTuple{2, ChebCoef{T}}, r_z::Vector{T}, L_z::T) where{T}
+
+    @assert length(qs) == length(poses)
+    set_zeros!.(pad_grids)
+
+    for Taylor_order in 1:length(pad_grids)
+        for i in 1:length(qs)
+            interpolate_thin_single!(qs[i], poses[i], pad_grids[Taylor_order], gridinfo, cheb_value, cheb_coefs, r_z, Taylor_order, L_z)
+        end
+    end
+
+    return nothing
+end
+
+function ϕkz_direct(z0::T, qs::Vector{T}, poses::Vector{NTuple{3, T}}, k_x::T, k_y::T, uspara::USeriesPara{T}, M_mid::Int) where{T}
+
+    s = zero(T)
+
+    for n in 1:length(qs)
+        x, y, z = poses[n]
+        q = qs[n]
+
+        for l in M_mid + 1:length(uspara.sw)
+            sl, wl = uspara.sw[l]
+            k2 = k_x^2 + k_y^2
+            phase = exp( - T(1)im * (k_x * x + k_y * y))
+            s += q * π * wl * sl^2 * phase * exp(-sl^2 * k2 / 4) * exp(-(z - z0)^2 / sl^2)
+        end
+    end
+
+    return s
+end
\ No newline at end of file
diff --git a/src/FFCT/linear_eqs.jl b/src/FFCT/linear_eqs.jl
deleted file mode 100644
index 3a58515..0000000
--- a/src/FFCT/linear_eqs.jl
+++ /dev/null
@@ -1,79 +0,0 @@
-@inbounds function boundaries_single!(
-    q::T, pos::NTuple{3, T}, L::NTuple{3, T},
-    b_l::Array{Complex{T}, 2}, b_u::Array{Complex{T}, 2}, 
-    k_x::Vector{T}, k_y::Vector{T}, us_mat::Array{Complex{T}, 3},
-    phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}}, 
-    L_z::T, uspara::USeriesPara{T}, M_mid::Int) where{T}
-
-    x, y, z = pos
-
-    revise_phase_neg!(phase_x, phase_y, k_x, k_y, x - L[1] / 2, y - L[2] / 2)
-
-    for l in M_mid + 1:length(uspara.sw)
-        sl, wl = uspara.sw[l]
-        temp = q * π * wl * sl^2
-        temp_l = temp * exp(- z^2  / sl^2)
-        temp_u = temp * exp(- (L_z - z)^2  / sl^2)
-        for j in 1:size(b_l, 2)
-            for i in 1:size(b_l, 1)
-                phase = phase_x[i] * phase_y[j]
-                exp_temp = us_mat[i, j, l - M_mid]
-                b_l[i, j] += temp_l * exp_temp * phase
-                b_u[i, j] += temp_u * exp_temp * phase
-            end
-        end
-    end
-
-    return b_l, b_u
-end
-
-function boundaries!(
-    qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T},
-    b_l::Array{Complex{T}, 2}, b_u::Array{Complex{T}, 2}, 
-    k_x::Vector{T}, k_y::Vector{T}, us_mat::Array{Complex{T}, 3},
-    phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}}, 
-    L_z::T, uspara::USeriesPara{T}, M_mid::Int) where{T}
-
-    set_zeros!(b_l)
-    set_zeros!(b_u)
-
-    for i in 1:length(qs)
-        boundaries_single!(qs[i], poses[i], L, b_l, b_u, k_x, k_y, us_mat, phase_x, phase_y, L_z, uspara, M_mid)
-    end
-
-    return b_l, b_u
-end
-
-@inbounds function solve_eqs!(rhs::Vector{Complex{T}}, sol::Vector{Complex{T}}, H_c::Array{Complex{T}, 3}, H_s::Array{Complex{T}, 3}, b_l::Array{Complex{T}, 2}, b_u::Array{Complex{T}, 2}, ivsm::Array{T, 4}, L_z::T) where{T}
-
-    set_zeros!(H_s)
-
-    N_x = Int((size(H_c, 1) - 1)/2)
-    N_y = Int((size(H_c, 2) - 1)/2)
-    N_z = size(H_c, 3)
-    for j in 1:size(H_c, 2), i in 1:size(H_c, 1)
-        if !((i == N_x + 1) && (j == N_y + 1))
-            for l in 1:N_z
-                rhs[l] = - H_c[i, j, l]
-            end
-            rhs[N_z + 1] = b_l[i, j]
-            rhs[N_z + 2] = b_u[i, j]
-
-            mul!(sol, (@view ivsm[:, :, i, j]), rhs, true, false)
-
-            H_s[i, j, 1] = T(2) * sol[N_z + 1]
-            H_s[i, j, 2] = L_z^2 * (sol[2] - sol[4]) / T(32) + L_z * sol[N_z + 2] / T(2)
-
-            sol[N_z + 1] = zero(Complex{T})
-            sol[N_z + 2] = zero(Complex{T})
-
-            for s in 3:N_z
-                k = s - 1
-                H_s[i, j, s] = L_z^2 / (8 * k) * 
-                ((sol[s - 2] - sol[s])/(2 * (k - 1)) - (sol[s] - sol[s + 2])/(2 * (k + 1)))
-            end
-        end
-    end
-
-    return H_s
-end
\ No newline at end of file
diff --git a/src/FFCT/nugrid_gather.jl b/src/FFCT/nugrid_gather.jl
deleted file mode 100644
index 65b630e..0000000
--- a/src/FFCT/nugrid_gather.jl
+++ /dev/null
@@ -1,22 +0,0 @@
-@inbounds function gather_nu_single(q::T, pos::NTuple{3, T}, L::NTuple{3, T}, k_x::Vector{T}, k_y::Vector{T}, phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}}, H_s::Array{Complex{T}, 3}) where{T}
-    x, y, z = pos
-    L_x, L_y, L_z = L
-    ϕ = zero(Complex{T})
-    
-    revise_phase_pos!(phase_x, phase_y, k_x, k_y, x - T(L[1] / 2), y - T(L[2] / 2))
-
-    for k in 1:size(H_s, 3)
-        cheb_val = k == 1 ? T(0.5) : chebpoly(k - 1, z - L_z / T(2), L_z / T(2))
-        ϕ += dot(phase_x', (@view H_s[:, :, k]), phase_y)* cheb_val
-    end
-
-    return q * ϕ / (2 * L_x * L_y)
-end
-
-function gather_nu(qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T}, k_x::Vector{T}, k_y::Vector{T}, phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}}, H_s::Array{Complex{T}, 3}) where{T}
-    E = zero(Complex{T})
-    for i in 1:length(qs)
-        E += gather_nu_single(qs[i], poses[i], L, k_x, k_y, phase_x, phase_y, H_s)
-    end
-    return real(E)
-end
\ No newline at end of file
diff --git a/src/FFCT/nugrid_interpolate.jl b/src/FFCT/nugrid_interpolate.jl
deleted file mode 100644
index 6dc858f..0000000
--- a/src/FFCT/nugrid_interpolate.jl
+++ /dev/null
@@ -1,170 +0,0 @@
-@inbounds function revise_z_coef!(z_coef::Array{T, 3}, exp_coef::Array{T, 3}, r_z::Vector{T}, poses::Vector{NTuple{3, T}}, uspara::USeriesPara{T}, M_mid::Int) where{T}
-
-    for n in 1:size(poses, 1)
-        x, y, z = poses[n]
-        for k in 1:size(r_z, 1)
-            r_zk = r_z[k]
-            for l in M_mid + 1:length(uspara.sw)
-                sl, wl = uspara.sw[l]
-                temp_z = (z - r_zk)^2 / sl^2
-                temp_exp = wl * exp(-temp_z)
-                exp_coef[l - M_mid, k, n] = temp_exp * sl^2
-                z_coef[l - M_mid, k, n] = (T(2) - T(4) * temp_z) * temp_exp
-            end
-        end
-    end
-
-    return nothing
-end
-
-
-"""
-H1[i, j, k] := phase_xys[i, j, n] * z_coef[l, k, n] * us_mat[i, j, l]
-H2[i, j, k] := phase_xys[i, j, n] * exp_coef[l, k, n] * k_mat[i, j] * us_mat[i, j, l]
-"""
-
-@inbounds function interpolate_nu_einsum!(
-    H_r::Array{Complex{T}, 3},
-    qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T},
-    k_x::Vector{T}, k_y::Vector{T}, k_mat::Array{T, 2}, 
-    phase_xs::Array{Complex{T}, 2}, phase_ys::Array{Complex{T}, 2}, phase_xys::Array{Complex{T}, 3}, 
-    z_coef::Array{T, 3}, exp_coef::Array{T, 3}, temp_ijlk::Array{Complex{T}, 4}, temp_ijl::Array{Complex{T}, 3},
-    r_z::Vector{T}, us_mat::Array{Complex{T}, 3}, uspara::USeriesPara{T}, M_mid::Int, size_dict::Dict{Char, Int64}) where{T}
-
-    revise_phase_neg_all!(qs, poses, L, phase_xs, phase_ys, phase_xys, k_x, k_y)
-    revise_z_coef!(z_coef, exp_coef, r_z, poses, uspara, M_mid)
-
-    # H1
-    einsum!(ein"ijn, lkn -> ijlk", (phase_xys, z_coef), temp_ijlk, true, false, size_dict)
-    einsum!(ein"ijlk, ijl -> ijk", (temp_ijlk, us_mat), H_r, true, false, size_dict)
-
-    # H2
-    einsum!(ein"ijn, lkn -> ijlk", (phase_xys, exp_coef), temp_ijlk, true, false, size_dict)
-    einsum!(ein"ij, ijl -> ijl", (k_mat, us_mat), temp_ijl, true, false, size_dict)
-    einsum!(ein"ijlk, ijl -> ijk", (temp_ijlk, temp_ijl), H_r, true, true, size_dict)
-
-    H_r .*= π
-
-    return H_r
-end
-
-@inbounds function interpolate_nu_einsum_non_inplace!(
-    H_r::Array{Complex{T}, 3},
-    qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T},
-    k_x::Vector{T}, k_y::Vector{T}, k_mat::Array{T, 2}, 
-    phase_xs::Array{Complex{T}, 2}, phase_ys::Array{Complex{T}, 2}, phase_xys::Array{Complex{T}, 3}, 
-    z_coef::Array{T, 3}, exp_coef::Array{T, 3}, 
-    r_z::Vector{T}, us_mat::Array{Complex{T}, 3}, uspara::USeriesPara{T}, M_mid::Int) where{T}
-
-    revise_phase_neg_all!(qs, poses, L, phase_xs, phase_ys, phase_xys, k_x, k_y)
-    revise_z_coef!(z_coef, exp_coef, r_z, poses, uspara, M_mid)
-
-    @ein H1[i, j, k] := phase_xys[i, j, n] * z_coef[l, k, n] * us_mat[i, j, l]
-    @ein H2[i, j, k] := phase_xys[i, j, n] * exp_coef[l, k, n] * k_mat[i, j] * us_mat[i, j, l]
-
-    H_r = π .* (H1 .+ H2)
-
-    return H_r
-end
-
-@inbounds function interpolate_nu_cheb_single!(
-    H_r::Array{Complex{T}, 3},
-    q::T, pos::NTuple{3, T}, L::NTuple{3, T},
-    k_x::Vector{T}, k_y::Vector{T}, 
-    phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}}, 
-    r_z::Vector{T}, cheb_mat::Array{ChebPoly{1, T, T}, 2}) where{T}
-
-    x, y, z = pos
-    revise_phase_neg!(phase_x, phase_y, k_x, k_y, x - L[1] / 2, y - L[2] / 2)
-
-    
-    for j in 1:size(H_r, 2)
-        phase_yj = phase_y[j]
-        for i in 1:size(H_r, 1)
-            phase_xi = phase_x[i]
-            phase = phase_xi * phase_yj
-            f_cheb = cheb_mat[i, j]
-            for k in 1:size(H_r, 3)
-                r_zk = r_z[k]
-                H_r[i, j, k] += q * phase * f_cheb(abs(z - r_zk))
-            end
-        end
-    end
-
-    return H_r
-end
-
-@inbounds function interpolate_nu_cheb!(
-    H_r::Array{Complex{T}, 3},
-    qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T},
-    k_x::Vector{T}, k_y::Vector{T}, 
-    phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}}, 
-    r_z::Vector{T}, cheb_mat::Array{ChebPoly{1, T, T}, 2}) where{T}
-
-    set_zeros!(H_r)
-    for i in 1:length(qs)
-        interpolate_nu_cheb_single!(H_r, qs[i], poses[i], L, k_x, k_y, phase_x, phase_y, r_z, cheb_mat)
-    end
-
-    return H_r
-end
-
-@inbounds function interpolate_nu_loop_single!(
-    H_r::Array{Complex{T}, 3},
-    q::T, pos::NTuple{3, T}, 
-    N::NTuple{3, Int}, L::NTuple{3, T}, k_x::Vector{T}, k_y::Vector{T}, 
-    phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}}, 
-    r_z::Vector{T}, us_mat::Array{Complex{T}, 3}, uspara::USeriesPara{T}, M_mid::Int) where{T}
-
-    x, y, z = pos
-    revise_phase_neg!(phase_x, phase_y, k_x, k_y, x - L[1] / 2, y - L[2] / 2)
-
-    for k in 1:size(H_r, 3)
-        r_zk = r_z[k]
-        for l in M_mid + 1:length(uspara.sw)
-            sl, wl = uspara.sw[l]
-
-            for j in 1:size(H_r, 2)
-                k_yj = k_y[j]
-                for i in 1:size(H_r, 1)
-                    k_xi = k_x[i]
-                    phase = phase_x[i] * phase_y[j]
-                    us = us_mat[i, j, l - M_mid]
-                    H_r[i, j, k] += q * π * wl * phase * (T(2) - T(4) * (z - r_zk)^2 / sl^2 + (k_xi^2 + k_yj^2) * sl^2) * exp(- (z - r_zk)^2 / sl^2) * us
-                end
-            end
-        end
-    end
-
-    return H_r
-end
-
-function interpolate_nu_loop!(
-    H_r::Array{Complex{T}, 3},
-    qs::Vector{T}, poses::Vector{NTuple{3, T}}, 
-    N::NTuple{3, Int}, L::NTuple{3, T}, k_x::Vector{T}, k_y::Vector{T}, 
-    phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}}, 
-    r_z::Vector{T}, us_mat::Array{Complex{T}, 3}, uspara::USeriesPara{T}, M_mid::Int) where{T}
-
-    set_zeros!(H_r)
-    for i in 1:length(qs)
-        interpolate_nu_loop_single!(H_r, qs[i], poses[i], N, L, k_x, k_y, phase_x, phase_y, r_z, us_mat, uspara, M_mid)
-    end
-
-    return H_r
-end
-
-@inbounds function real2Cheb!(H_r::Array{Complex{T}, 3}, H_c::Array{Complex{T}, 3}, r_z::Vector{T}, L_z::T) where{T}
-
-    set_zeros!(H_c)
-    N_z = size(H_r, 3)
-
-    for k in 1:N_z, l in 1:N_z
-        cheb_temp = chebpoly(k - 1, r_z[l] - L_z / T(2), L_z / T(2))
-        for j in 1:size(H_r, 2), i in 1:size(H_r, 1)
-            H_c[i, j, k] += 2 / N_z * H_r[i, j, l] * cheb_temp
-        end
-    end
-
-    return H_c
-end
\ No newline at end of file
diff --git a/src/FFCT/precompute.jl b/src/FFCT/precompute.jl
index 90bd034..00c75ad 100644
--- a/src/FFCT/precompute.jl
+++ b/src/FFCT/precompute.jl
@@ -4,68 +4,26 @@
     return fill!(a, zero(T))
 end
 
-@inline function chebpoly(n::Int, x::T, scale::T) where{T}
-    return cos(n * acos(x / scale))
-end
-
 # precompute the parameters to be used
-function FFCT_precompute(L::NTuple{3, T}, N_grid::NTuple{3, Int}, uspara::USeriesPara{T}, M_mid::Int, n_atoms::Int) where{T}
+function long_paras_gen(L::NTuple{3, T}, N_grid::NTuple{3, Int}, n_atoms::Int) where{T}
     L_x, L_y, L_z = L
     N_x, N_y, N_z = N_grid
     k_x = Vector{T}([2π * i / L_x for i in -N_x:N_x])
     k_y = Vector{T}([2π * i / L_y for i in -N_y:N_y])
     r_z = Vector{T}([L_z / 2 * ( 1.0 - cos((2i - 1)*π / 2N_z) ) for i in 1:N_z])
-    
-    k_mat = zeros(T, 2N_x + 1, 2N_y + 1)
-    kx_mat = zeros(T, 2N_x + 1, 2N_y + 1)
-    ky_mat = zeros(T, 2N_x + 1, 2N_y + 1)
-
-    for i in 1:2N_x + 1, j in 1:2N_y + 1
-        k_mat[i, j] = k_x[i]^2 + k_y[j]^2
-        kx_mat[i, j] = k_x[i]
-        ky_mat[i, j] = k_y[j]
-    end
 
     # xy k space grid, z real space grid
     H_r = zeros(Complex{T}, 2N_x + 1, 2N_y + 1, N_z)
     # xy k space grid, z ChebCoefs
     H_c = zeros(Complex{T}, 2N_x + 1, 2N_y + 1, N_z)
-    # solution to the linear eqs, xy k space grid, z ChebCoefs
-    H_s = zeros(Complex{T}, 2N_x + 1, 2N_y + 1, N_z)
-
-    M = length(uspara.sw)
-    M_range = M - M_mid
-    us_mat = zeros(T, 2N_x + 1, 2N_y + 1, M_range)
-    for l in 1:M_range
-        sl, wl = uspara.sw[l + M_mid]
-        us_mat[:, :, l] = exp.( - sl^2 .* k_mat ./ 4)
-    end
-
-    ivsm = inverse_mat(N_grid, L[3], k_x, k_y)
-
-    # the boundary condition at z = 0 and z = L_z
-    b_l = zeros(Complex{T}, 2N_x + 1, 2N_y + 1)
-    b_u = zeros(Complex{T}, 2N_x + 1, 2N_y + 1)
+
     phase_x = zeros(Complex{T}, 2N_x + 1)
     phase_y = zeros(Complex{T}, 2N_y + 1)
 
-    phase_xs = zeros(Complex{T}, 2N_x + 1, n_atoms)
-    phase_ys = zeros(Complex{T}, 2N_y + 1, n_atoms)
-    phase_xys = zeros(Complex{T}, 2N_x + 1, 2N_y + 1, n_atoms)
-
-    rhs = zeros(Complex{T}, N_z + 2)
-    sol = zeros(Complex{T}, N_z + 2)
-
     sort_z = zeros(Int, n_atoms)
     z = zeros(T, n_atoms)
-
-    size_dict = Dict('i' => 2N_x + 1, 'j' => 2N_y + 1, 'k' => N_z, 'l' => M_range, 'n' => n_atoms)
-    temp_ijlk = zeros(Complex{T}, (size_dict['i'], size_dict['j'], size_dict['l'], size_dict['k']))
-    temp_ijl = zeros(Complex{T}, (size_dict['i'], size_dict['j'], size_dict['l']))
-    z_coef = zeros(T, M_range, N_z, n_atoms)
-    exp_coef = zeros(T, M_range, N_z, n_atoms)
     
-    return k_x, k_y, k_mat, r_z, Complex{T}.(us_mat), H_r, H_c, H_s, ivsm, b_l, b_u, phase_x, phase_y, phase_xs, phase_ys, phase_xys, rhs, sol, sort_z, z, size_dict, temp_ijlk, temp_ijl, z_coef, exp_coef
+    return k_x, k_y, r_z, H_r, H_c, phase_x, phase_y,  sort_z, z
 end
 
 @inbounds function revise_phase_neg!(phase_x::AbstractArray{Complex{T}, 1}, phase_y::AbstractArray{Complex{T}, 1}, k_x::Vector{T}, k_y::Vector{T}, x::T, y::T) where{T}
@@ -84,76 +42,37 @@ end
     return nothing
 end
 
-@inbounds function revise_phase_neg_all!(qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T}, phase_xs::Array{Complex{T}, 2}, phase_ys::Array{Complex{T}, 2}, phase_xys::Array{Complex{T}, 3}, k_x::Vector{T}, k_y::Vector{T}) where{T}
+function thin_grids_gen(N_real::NTuple{2, Int}, R_z::Int, w::NTuple{2, Int}, β::NTuple{2, T}, L::NTuple{3, T}, cheb_order::Int, uspara::USeriesPara{T}, Taylor_Q::Int) where{T}
+    periodicity = (true, true)
+    extra_pad = (0, 0)
 
-    for n in 1:size(poses, 1)
-        x, y, z = poses[n]
-        revise_phase_neg!((@view phase_xs[:, n]), (@view phase_ys[:, n]), k_x, k_y, x - L[1]/2, y - L[2]/2)
-    end
+    gridinfo = GridInfo(N_real, w, periodicity, extra_pad, (L[1], L[2]))
+    pad_grids = [zeros(Complex{T}, N_real[1], N_real[2], R_z) for i in 1:Taylor_Q]
 
-    for n in 1:size(phase_xys, 3), j in 1:size(phase_xys, 2), i in 1:size(phase_xys, 1)
-        phase_xys[i, j, n] = phase_xs[i, n] * phase_ys[j, n] * qs[n]
-    end
+    f_window = [x -> Wkb(x, (w[i] + 0.5) * gridinfo.h[i], β[i]) for i in 1:2]
+    cheb_coefs = tuple([ChebCoef(f_window[i], gridinfo.h[i], w[i], cheb_order) for i in 1:2]...)
 
-    return nothing
+    F_f_window = [x -> FWkb(x, (w[i] + 0.5) * gridinfo.h[i], β[i]) for i in 1:2]
+
+    k_x = gridinfo.k[1]
+    k_y = gridinfo.k[2]
+    taylor_mats = TaylorUSeries(k_x, k_y, L[3], uspara, 0, Taylor_Q)
+
+    func_scale = (k_x, k_y) -> (F_f_window[1](k_x) * F_f_window[2](k_y))^(-2)
+    sf0 = ScalingFactor(func_scale, gridinfo)
+    scalefactors = [ScalingFactor(func_scale, sf0.factors .* taylor_mats[i]) for i in 1:Taylor_Q]
+
+    return (gridinfo, pad_grids, cheb_coefs, scalefactors)
 end
 
-# precompute the inverse matrix
-function inverse_mat(N_grid::NTuple{3, Int}, L_z::T, k_x::Vector{T}, k_y::Vector{T}) where{T}
-    N_x, N_y, N_z = N_grid
-    inverse_mat = zeros(T, N_z + 2, N_z + 2, 2 * N_x + 1, 2 * N_y + 1)
-
-    scale = L_z / T(2)
-
-    A = zeros(T, N_z, N_z + 2)
-    A[2,2] = 1 / 8 * scale^2
-    A[2,4] = - 1 / 8 * scale^2
-    for s in 3:N_z
-        k = s - 1
-        A[s, s - 2] = scale^2 / (2 * k * 2(k - 1))
-        A[s, s] = - (1 / 2(k - 1) + 1 / 2(k + 1)) * scale^2 / 2k
-        A[s, s + 2] = scale^2 / (2k * 2(k+1))
-    end
-
-    B = zeros(T, N_z, 2)
-    B[1, 1] = T(2)
-    B[2, 2] = scale
-
-    C = zeros(T, 2, N_z + 2)
-    for s = 3:N_z
-        k = s - 1
-        C[1, s-2] += scale^2 * (1/(2*k)) * (1/(2*(k-1))) * (-1)^k
-        C[1, s] -= scale^2 * ((1/(2*k)) * (1/(2*(k-1))) + (1/(2*k)) * (1/(2*(k+1)))) * (-1)^k
-        C[1, s+2] += scale^2 * (1/(2*k)) * (1/(2*(k+1))) * (-1)^k
-        
-        C[2, s-2] += scale^2 * (1/(2*k)) * (1/(2*(k-1)))
-        C[2, s] -= scale^2 * ((1/(2*k)) * (1/(2*(k-1))) + (1/(2*k)) * (1/(2*(k+1))))
-        C[2, s+2] += scale^2 * (1/(2*k)) * (1/(2*(k+1)))
-    end
-    C[1, 2] += 1/8 * scale^2 * (-1)
-    C[1, 4] -= 1/8 * scale^2 * (-1)
-    C[2, 2] += 1/8 * scale^2
-    C[2, 4] -= 1/8 * scale^2
-
-    D = zeros(T, 2, 2)
-    D[1, 1] = T(1)
-    D[1, 2] = - scale
-    D[2, 1] = T(1)
-    D[2, 2] = scale
-
-    divisor = zeros(T, N_z + 2, N_z + 2)
-    for i in 1:2N_x + 1, j in 1:2N_y + 1
-        k = sqrt(k_x[i]^2 + k_y[j]^2)
-        if k != zero(T)
-            nu = - k^2
-            divisor[1:N_z, 1:N_z] = A[1:N_z, 1:N_z] .* nu .+ T.(I(N_z))
-            divisor[1:N_z, N_z+1:N_z+2] = nu .* B[1:N_z, 1:2]
-            divisor[N_z+1:N_z+2, 1:N_z] = C[1:2, 1:N_z]
-            divisor[N_z+1:N_z+2, N_z+1:N_z+2] = D
-
-            inverse_mat[:,:,i,j] = inv(divisor)
-        end
-    end
-
-    return inverse_mat
+function thin_paras_gen(N_real::NTuple{2, Int}, R_z::Int, w::NTuple{2, Int}, β::NTuple{2, T}, L::NTuple{3, T}, cheb_order::Int, uspara::USeriesPara{T}, Taylor_Q::Int) where{T}
+    gridinfo, pad_grids, cheb_coefs, scalefactors = thin_grids_gen(N_real, R_z, w, β, L, cheb_order, uspara, Taylor_Q)
+
+    H_r = zeros(Complex{T}, N_real[1], N_real[2], R_z)
+    H_c = zeros(Complex{T}, N_real[1], N_real[2], R_z)
+
+    cheb_value = [zeros(T, size(cheb_coefs[i].coef, 1) - 1) for i in 1:2]
+    r_z = Vector{T}([L[3] / 2 * ( 1.0 - cos((2i - 1)*π / 2R_z) ) for i in 1:R_z])
+
+    return gridinfo, pad_grids, cheb_coefs, scalefactors, H_r, H_c, cheb_value, r_z
 end
\ No newline at end of file
diff --git a/src/FFCT/scaling.jl b/src/FFCT/scaling.jl
new file mode 100644
index 0000000..0934d45
--- /dev/null
+++ b/src/FFCT/scaling.jl
@@ -0,0 +1,35 @@
+function scale_thin!(pad_grids::Vector{Array{Complex{T}, 3}}, scalefactors::Vector{ScalingFactor{2, T}}, H_r::Array{Complex{T}, 3}) where{T}
+    for i in 1:length(pad_grids)
+        piecewise_mul!(pad_grids[i], scalefactors[i].factors)
+    end
+    return sum!(H_r, pad_grids)
+end
+
+function piecewise_mul!(grid::Array{Complex{T}, 3}, factor::Array{Complex{T}, 2}) where{T}
+    for k in 1:size(grid, 3)
+        (@view grid[:, :, k]) .*= factor
+    end
+    return grid
+end
+
+function piecewise_fft!(grid::Array{Complex{T}, 3}) where{T}
+    for k in 1:size(grid, 3)
+        fft!(@view grid[:, :, k])
+    end
+    return grid
+end
+
+function piecewise_ifft!(grid::Array{Complex{T}, 3}) where{T}
+    for k in 1:size(grid, 3)
+        ifft!(@view grid[:, :, k])
+    end
+    return grid
+end
+
+function sum!(pad_gird0::Array{Complex{T}, 3}, pad_grids::Vector{Array{Complex{T}, 3}}) where{T}
+    set_zeros!(pad_gird0)
+    for i in 1:length(pad_grids)
+        pad_gird0 .+= pad_grids[i]
+    end
+    return pad_gird0
+end
\ No newline at end of file
diff --git a/src/FSSoGInteraction.jl b/src/FSSoGInteraction.jl
index f8a746e..83fc926 100644
--- a/src/FSSoGInteraction.jl
+++ b/src/FSSoGInteraction.jl
@@ -58,10 +58,10 @@ function FSSoGInteraction(
     gridinfo, gridbox, cheb_coefs, scalefactor = mid_paras_gen(N_real_mid, w, β, L, extra_pad_ratio, cheb_order, uspara, M_mid)
     
     # some of these term are for other methods, not used
-    k_x, k_y, k_mat, r_z, us_mat, H_r, H_c, H_s, ivsm, b_l, b_u, phase_x, phase_y, phase_xs, phase_ys, phase_xys, rhs, sol, sort_z, z, size_dict, temp_ijlk, temp_ijl, z_coef, exp_coef = FFCT_precompute(L, N_grid_long, uspara, M_mid, n_atoms)
+    k_x, k_y, r_z, H_r, H_c, phase_x, phase_y, sort_z, z = long_paras_gen(L, N_grid_long, n_atoms)
     cheb_mat = ChebUSeries(k_x, k_y, L[3], uspara, M_mid, Q_long)
 
-    return FSSoGInteraction{T}(b, σ, ω, M, ϵ, L, boundary, n_atoms, uspara, position, charge, uspara_cheb, r_c, F0, gridinfo, gridbox, cheb_coefs, scalefactor, M_mid, k_x, k_y, r_z, phase_x, phase_y, us_mat, b_l, b_u, rhs, sol, ivsm, H_r, H_c, H_s, cheb_mat, z, sort_z, soepara)
+    return FSSoGInteraction{T}(b, σ, ω, M, ϵ, L, boundary, n_atoms, uspara, position, charge, uspara_cheb, r_c, F0, gridinfo, gridbox, cheb_coefs, scalefactor, M_mid, k_x, k_y, r_z, phase_x, phase_y, H_r, H_c, cheb_mat, z, sort_z, soepara)
 end
 
 function FSSoGInteraction(
@@ -76,4 +76,41 @@ function FSSoGInteraction(
     b, σ, ω, M = T(preset_parameters[preset][1]), T(preset_parameters[preset][2]), T(preset_parameters[preset][3]), Int(preset_parameters[preset][4])
     
     return FSSoGInteraction(b, σ, ω, M, L, n_atoms, r_c, Q_short, r_min, N_real_mid, w, β, extra_pad_ratio, cheb_order, M_mid, N_grid_long, Q_long, soepara, ϵ = ϵ)
+end
+
+function FSSoGThinInteraction(
+    b::T, σ::T, ω::T, M::Int,
+    L::NTuple{3, T}, n_atoms::Int,
+    r_c::T, Q_short::Int, r_min::T,
+    N_real::NTuple{2, Int}, R_z::Int, w::NTuple{2, Int}, β::NTuple{2, T}, cheb_order::Int, Taylor_Q::Int, soepara::SoePara{Complex{T}}; 
+    ϵ::T = one(T)) where{T}
+
+    uspara = USeriesPara(b, σ, ω, M)
+    position = [tuple(zeros(T, 3)...) for i in 1:n_atoms]
+    charge = zeros(T, n_atoms)
+
+    boundary = Q2dBoundary(L...)
+
+    uspara_cheb = Es_USeries_Cheb(uspara, r_min, r_c, Q_short)
+    F0 = F0_cal(b, σ, ω, M)
+
+    gridinfo, pad_grids, cheb_coefs, scalefactors, H_r, H_c, cheb_value, r_z = thin_paras_gen(N_real, R_z, w, β, L, cheb_order, uspara, Taylor_Q)
+
+    sort_z = zeros(Int, n_atoms)
+    z = zeros(T, n_atoms)
+
+    return FSSoGThinInteraction{T}(b, σ, ω, M, ϵ, L, boundary, n_atoms, uspara, position, charge, uspara_cheb, r_c, F0, r_z, H_r, H_c, gridinfo, pad_grids, scalefactors, cheb_coefs, cheb_value, z, sort_z, soepara)
+end
+
+function FSSoGThinInteraction(
+    L::NTuple{3, T}, n_atoms::Int,
+    r_c::T, Q_short::Int, r_min::T,
+    N_real::NTuple{2, Int}, R_z::Int, w::NTuple{2, Int}, β::NTuple{2, T}, cheb_order::Int, Taylor_Q::Int, soepara::SoePara{Complex{T}}; 
+    preset::Int = 1, ϵ::T = one(T)) where{T}
+
+    @assert preset ≤ length(preset_parameters)
+    
+    b, σ, ω, M = T(preset_parameters[preset][1]), T(preset_parameters[preset][2]), T(preset_parameters[preset][3]), Int(preset_parameters[preset][4])
+    
+    return FSSoGThinInteraction(b, σ, ω, M, L, n_atoms, r_c, Q_short, r_min, N_real, R_z, w, β, cheb_order, Taylor_Q, soepara, ϵ = ϵ)
 end
\ No newline at end of file
diff --git a/src/FastSpecSoG.jl b/src/FastSpecSoG.jl
index ff20296..6f047c9 100644
--- a/src/FastSpecSoG.jl
+++ b/src/FastSpecSoG.jl
@@ -1,36 +1,40 @@
 module FastSpecSoG
 
-using ExTinyMD, LinearAlgebra, SpecialFunctions, ChebParticleMesh, SumOfExpVPMR, LoopVectorization, OMEinsum, FastChebInterp
+using ExTinyMD, LinearAlgebra, SpecialFunctions, ChebParticleMesh, SumOfExpVPMR, LoopVectorization, OMEinsum, FastChebInterp, Polynomials, FFTW
 
 import FastChebInterp: ChebPoly
 
 export USeriesPara, U_series, BSA, ChebUSeries, proper_M
-export FSSoG_naive, FSSoGInteraction
+export FSSoG_naive, FSSoGInteraction, FSSoGThinInteraction
 export short_energy_naive, long_energy_naive, energy_naive
 
 export Es_Cheb_precompute, short_energy_Cheb
 
 export mid_paras_gen, energy_mid, energy_long, energy_short
-export FFCT_precompute, boundaries!, inverse_mat
-export interpolate_nu_einsum!, interpolate_nu_cheb!, interpolate_nu_loop!
-export real2Cheb!, solve_eqs!, gather_nu
-
-export energy_long_loop_k, energy_long_einsum_k, energy_long_cheb_k, energy_long_0
+export long_paras_gen, interpolate_nu_cheb!, interpolate_nu_loop!
+export real2Cheb!, Cheb2real!, gather_nu
+export energy_long_loop_k, energy_long_cheb_k, energy_long_0
 
 export long_energy_us_k, long_energy_us_0, long_energy_sw_0, long_energy_sw_k
 export SoePara, SoePara4, SoePara8, SoePara16
 
+export TaylorUSeries_k, TaylorUSeries
+export thin_paras_gen, interpolate_thin!, gather_thin, scale_thin!
+export piecewise_fft!, piecewise_ifft!, piecewise_mul!
+export energy_long_thin_k
+
 include("types.jl")
 
 include("U_series.jl")
 include("FSSoGInteraction.jl")
 
 include("FFCT/precompute.jl")
-include("FFCT/nugrid_interpolate.jl")
-include("FFCT/linear_eqs.jl")
-include("FFCT/nugrid_gather.jl")
+include("FFCT/interpolate.jl")
+include("FFCT/gather.jl")
 include("FFCT/zeroth_order.jl")
-include("FFCT/cheb_interpolate.jl")
+include("FFCT/Chebyshev.jl")
+include("FFCT/Taylor.jl")
+include("FFCT/scaling.jl")
 
 include("energy/energy_short.jl")
 include("energy/energy_mid.jl")
diff --git a/src/energy/energy.jl b/src/energy/energy.jl
index 5ee5a7b..5ab5b09 100644
--- a/src/energy/energy.jl
+++ b/src/energy/energy.jl
@@ -21,4 +21,17 @@ function ExTinyMD.energy(interaction::FSSoGInteraction{T}, neighbor::CellList3D{
     E_short = energy_short(interaction, neighbor)
 
     return E_long + E_mid + E_short
+end
+
+function ExTinyMD.energy(interaction::FSSoGThinInteraction{T}, neighbor::CellList3D{T}, info::SimulationInfo{T}, atoms::Vector{Atom{T}}) where{T}
+
+    for i in 1:interaction.n_atoms
+        interaction.charge[i] = atoms[info.particle_info[i].id].charge
+        interaction.position[i] = info.particle_info[i].position.coo
+    end
+
+    E_long = energy_long(interaction)
+    E_short = energy_short(interaction, neighbor)
+
+    return E_long + E_short
 end
\ No newline at end of file
diff --git a/src/energy/energy_long.jl b/src/energy/energy_long.jl
index 4a7b1ee..222bb25 100644
--- a/src/energy/energy_long.jl
+++ b/src/energy/energy_long.jl
@@ -1,105 +1,84 @@
-# N_x, N_y are the uniform grid numbers in the x and y directions
-# N_z is the Chebyshev grid number in the z direction
-# n_atoms is the number of particles
-# M_num is the number of Gaussians M_mid+1:M
-
-# This part will be done in the following steps:
-# 1. Generate the inverse matrix, k matrix
-# 2. interpolate the particles onto the grid (Fourier grids in xy and chebgrid in z)
-# 3. Convert the grid in z into the Chebyshev series
-# 4. solve the equation for the boundary condition
-# 5. gather the energy
-
-function energy_long_loop_k(
-    qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T}, N_grid::NTuple{3, Int}, M_mid::Int, 
-    k_x::Vector{T}, k_y::Vector{T}, r_z::Vector{T}, 
-    phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}},
-    us_mat::Array{Complex{T}, 3}, b_l::Array{Complex{T}, 2}, b_u::Array{Complex{T}, 2}, 
-    rhs::Vector{Complex{T}}, sol::Vector{Complex{T}}, ivsm::Array{T, 4},
-    H_r::Array{Complex{T}, 3}, H_c::Array{Complex{T}, 3}, H_s::Array{Complex{T}, 3}, 
-    uspara::USeriesPara{T}
+function energy_long_0(
+    qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T}, M_mid::Int, 
+    z::Vector{T}, sort_z::Vector{Int}, 
+    uspara::USeriesPara{T}, soepara::SoePara{Complex{T}}
     ) where{T}
 
     @assert M_mid ≤ length(uspara.sw)
 
-    b_l, b_u = boundaries!(qs, poses, L, b_l, b_u, k_x, k_y, us_mat, phase_x, phase_y, L[3], uspara, M_mid)
-    H_r = interpolate_nu_loop!(H_r, qs, poses, N_grid, L, k_x, k_y, phase_x, phase_y, r_z, us_mat, uspara, M_mid)
-    H_c = real2Cheb!(H_r, H_c, r_z, L[3])
-    H_s = solve_eqs!(rhs, sol, H_c, H_s, b_l, b_u, ivsm, L[3])
-    E_k = gather_nu(qs, poses, L, k_x, k_y, phase_x, phase_y, H_s)
-
-    @debug "long range energy, einsum, FFCT" E_k
+    revise_z!(z, sort_z, poses)
+    E_0 = zeroth_order(qs, z, soepara, uspara, sort_z, L, M_mid)
 
-    return E_k
+    return E_0
 end
 
-function energy_long_einsum_k(
-    qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T}, M_mid::Int, 
-    k_x::Vector{T}, k_y::Vector{T}, k_mat::Array{T, 2}, r_z::Vector{T}, 
-    phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}},
-    phase_xs::Array{Complex{T}, 2}, phase_ys::Array{Complex{T}, 2}, phase_xys::Array{Complex{T}, 3},
-    temp_ijlk::Array{Complex{T}, 4}, temp_ijl::Array{Complex{T}, 3}, size_dict::Dict{Char, Int64}, z_coef::Array{T, 3}, exp_coef::Array{T, 3}, 
-    us_mat::Array{Complex{T}, 3}, b_l::Array{Complex{T}, 2}, b_u::Array{Complex{T}, 2}, 
-    rhs::Vector{Complex{T}}, sol::Vector{Complex{T}}, ivsm::Array{T, 4},
-    H_r::Array{Complex{T}, 3}, H_c::Array{Complex{T}, 3}, H_s::Array{Complex{T}, 3}, 
-    uspara::USeriesPara{T}
-    ) where{T}
+function energy_long(interaction::FSSoGInteraction{T}) where{T}
 
-    @assert M_mid ≤ length(uspara.sw)
+    E_k = energy_long_cheb_k(interaction.charge, interaction.position, interaction.L, interaction.k_x, interaction.k_y, interaction.r_z, interaction.phase_x, interaction.phase_y, interaction.H_r, interaction.H_c, interaction.cheb_mat)
 
-    b_l, b_u = boundaries!(qs, poses, L, b_l, b_u, k_x, k_y, us_mat, phase_x, phase_y, L[3], uspara, M_mid)
-    H_r = interpolate_nu_einsum!(H_r, qs, poses, L, k_x, k_y, k_mat, phase_xs, phase_ys, phase_xys, z_coef, exp_coef, temp_ijlk, temp_ijl, r_z, us_mat, uspara, M_mid, size_dict)
-    H_c = real2Cheb!(H_r, H_c, r_z, L[3])
-    H_s = solve_eqs!(rhs, sol, H_c, H_s, b_l, b_u, ivsm, L[3])
-    E_k = gather_nu(qs, poses, L, k_x, k_y, phase_x, phase_y, H_s)
+    E_0 = energy_long_0(interaction.charge, interaction.position, interaction.L, interaction.M_mid, interaction.z, interaction.sort_z, interaction.uspara, interaction.soepara)
+
+    return (E_k + E_0) / (4π * interaction.ϵ)
+end
 
-    @debug "long range energy, loop, FFCT" E_k
+function energy_long(interaction::FSSoGThinInteraction{T}) where{T}
 
-    return E_k
+    E_k = energy_long_thin_k(interaction.charge, interaction.position, interaction.L, interaction.r_z, interaction.H_r, interaction.H_c, interaction.gridinfo, interaction.pad_grids, interaction.scalefactors, interaction.cheb_coefs, interaction.cheb_value)
+
+    E_0 = energy_long_0(interaction.charge, interaction.position, interaction.L, 0, interaction.z, interaction.sort_z, interaction.uspara, interaction.soepara)
+    return (E_k + E_0) / (4π * interaction.ϵ)
 end
 
-function energy_long_cheb_k(
+function energy_long_loop_k(
     qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T}, 
     M_mid::Int, 
     k_x::Vector{T}, k_y::Vector{T}, r_z::Vector{T}, 
     phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}},
-    us_mat::Array{Complex{T}, 3}, b_l::Array{Complex{T}, 2}, b_u::Array{Complex{T}, 2}, 
-    rhs::Vector{Complex{T}}, sol::Vector{Complex{T}}, ivsm::Array{T, 4},
-    H_r::Array{Complex{T}, 3}, H_c::Array{Complex{T}, 3}, H_s::Array{Complex{T}, 3}, 
-    uspara::USeriesPara{T}, cheb_mat::Array{ChebPoly{1, T, T}, 2}
-    ) where{T}
+    H_r::Array{Complex{T}, 3}, H_c::Array{Complex{T}, 3},
+    uspara::USeriesPara{T}) where{T}
 
     @assert M_mid ≤ length(uspara.sw)
 
-    b_l, b_u = boundaries!(qs, poses, L, b_l, b_u, k_x, k_y, us_mat, phase_x, phase_y, L[3], uspara, M_mid)
-    H_r = interpolate_nu_cheb!(H_r,qs, poses, L, k_x, k_y, phase_x, phase_y, r_z, cheb_mat)
+    H_r = interpolate_nu_loop!(H_r, qs, poses, L, k_x, k_y, phase_x, phase_y, r_z, uspara, M_mid)
     H_c = real2Cheb!(H_r, H_c, r_z, L[3])
-    H_s = solve_eqs!(rhs, sol, H_c, H_s, b_l, b_u, ivsm, L[3])
-    E_k = gather_nu(qs, poses, L, k_x, k_y, phase_x, phase_y, H_s)
+    E_k = gather_nu(qs, poses, L, k_x, k_y, phase_x, phase_y, H_c)
 
-    @debug "long range energy, cheb, FFCT" E_k
+    @debug "long range energy, cube, loop" E_k
 
     return E_k
 end
 
-function energy_long_0(
-    qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T}, M_mid::Int, 
-    z::Vector{T}, sort_z::Vector{Int}, 
-    uspara::USeriesPara{T}, soepara::SoePara{Complex{T}}
-    ) where{T}
+function energy_long_cheb_k(
+    qs::Vector{T}, poses::Vector{NTuple{3, T}}, L::NTuple{3, T}, 
+    k_x::Vector{T}, k_y::Vector{T}, r_z::Vector{T}, 
+    phase_x::Vector{Complex{T}}, phase_y::Vector{Complex{T}},
+    H_r::Array{Complex{T}, 3}, H_c::Array{Complex{T}, 3}, cheb_mat::Array{ChebPoly{1, T, T}, 2}) where{T}
 
-    @assert M_mid ≤ length(uspara.sw)
+    H_r = interpolate_nu_cheb!(H_r, qs, poses, L, k_x, k_y, phase_x, phase_y, r_z, cheb_mat)
+    H_c = real2Cheb!(H_r, H_c, r_z, L[3])
+    E_k = gather_nu(qs, poses, L, k_x, k_y, phase_x, phase_y, H_c)
 
-    revise_z!(z, sort_z, poses)
-    E_0 = zeroth_order(qs, z, soepara, uspara, sort_z, L, M_mid)
+    @debug "long range energy, cube, cheb" E_k
 
-    return E_0
+    return E_k
 end
 
-function energy_long(interaction::FSSoGInteraction{T}) where{T}
+function energy_long_thin_k(
+    qs::Vector{T}, poses::Vector{NTuple{3, T}},
+    L::NTuple{3, T}, r_z::Vector{T}, 
+    H_r::Array{Complex{T}, 3}, H_c::Array{Complex{T}, 3},
+    gridinfo::GridInfo{2, T}, pad_grids::Vector{Array{Complex{T}, 3}}, scalefactors::Vector{ScalingFactor{2, T}}, 
+    cheb_coefs::NTuple{2, ChebCoef{T}}, cheb_value::Vector{Array{T, 1}}
+    ) where{T}
 
-    E_k = energy_long_cheb_k(interaction.charge, interaction.position, interaction.L, interaction.M_mid, interaction.k_x, interaction.k_y, interaction.r_z, interaction.phase_x, interaction.phase_y, interaction.us_mat, interaction.b_l, interaction.b_u, interaction.rhs, interaction.sol, interaction.ivsm, interaction.H_r, interaction.H_c, interaction.H_s, interaction.uspara, interaction.cheb_mat)
-    E_0 = energy_long_0(interaction.charge, interaction.position, interaction.L, interaction.M_mid, interaction.z, interaction.sort_z, interaction.uspara, interaction.soepara)
+    interpolate_thin!(qs, poses, pad_grids, gridinfo, cheb_value, cheb_coefs, r_z, L[3])
+    piecewise_fft!.(pad_grids)
+    H_r = scale_thin!(pad_grids, scalefactors, H_r)
+    H_c = real2Cheb!(H_r, H_c, r_z, L[3])
+    H_c = piecewise_ifft!(H_c)
+    E_k = gather_thin(qs, poses, L, H_c, gridinfo, cheb_value, cheb_coefs)
 
-    return (E_k + E_0) / (4π * interaction.ϵ)
+    @debug "long range energy, thin" E_k
+
+    return E_k
 end
\ No newline at end of file
diff --git a/src/energy/energy_mid.jl b/src/energy/energy_mid.jl
index 80a2d42..cef8968 100644
--- a/src/energy/energy_mid.jl
+++ b/src/energy/energy_mid.jl
@@ -10,6 +10,7 @@ function Tdk(kx::T, ky::T, kz::T, uspara::USeriesPara{T}, M_mid::Int) where{T}
     return sqrt(π)/4 * Tdk_value
 end
 
+# using the KB window function as default
 function mid_paras_gen(N_real::NTuple{3, Int}, w::NTuple{3, Int}, β::NTuple{3, T}, L::NTuple{3, T}, extra_pad_ratio::Int, cheb_order::Int, uspara::USeriesPara{T}, M_mid::Int) where{T}
     periodicity = (true, true, false)
     extra_pad = extra_pad_ratio .* w
diff --git a/src/energy/energy_short.jl b/src/energy/energy_short.jl
index 7b43d1a..c88fd5b 100644
--- a/src/energy/energy_short.jl
+++ b/src/energy/energy_short.jl
@@ -58,4 +58,8 @@ end
 
 function energy_short(interaction::FSSoGInteraction{T}, neighbor::CellList3D{T}) where{T}
     return short_energy_Cheb(interaction.uspara_cheb, interaction.r_c, interaction.F0, interaction.boundary, neighbor, interaction.position, interaction.charge) / interaction.ϵ
+end
+
+function energy_short(interaction::FSSoGThinInteraction{T}, neighbor::CellList3D{T}) where{T}
+    return short_energy_Cheb(interaction.uspara_cheb, interaction.r_c, interaction.F0, interaction.boundary, neighbor, interaction.position, interaction.charge) / interaction.ϵ
 end
\ No newline at end of file
diff --git a/src/types.jl b/src/types.jl
index 1c5ac58..617b539 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -48,17 +48,45 @@ mutable struct FSSoGInteraction{T} <: ExTinyMD.AbstractInteraction
     r_z::Vector{T}
     phase_x::Vector{Complex{T}}
     phase_y::Vector{Complex{T}}
-    us_mat::Array{Complex{T}, 3}
-    b_l::Array{Complex{T}, 2}
-    b_u::Array{Complex{T}, 2}
-    rhs::Vector{Complex{T}}
-    sol::Vector{Complex{T}}
-    ivsm::Array{T, 4}
     H_r::Array{Complex{T}, 3}
     H_c::Array{Complex{T}, 3}
-    H_s::Array{Complex{T}, 3}
     cheb_mat::Array{ChebPoly{1, T, T}, 2}
 
+    # for FFCT zero order term
+    z::Vector{T}
+    sort_z::Vector{Int}
+    soepara::SoePara{Complex{T}}
+end
+
+mutable struct FSSoGThinInteraction{T} <: ExTinyMD.AbstractInteraction
+    b::T
+    σ::T
+    ω::T
+    M::Int
+    ϵ::T
+    L::NTuple{3, T}
+    boundary::Boundary{T}
+    n_atoms::Int
+
+    uspara::USeriesPara{T}
+    position::Vector{NTuple{3, T}}
+    charge::Vector{T}
+
+    # for short range
+    uspara_cheb::ChebPoly{1, T, T}
+    r_c::T
+    F0::T
+
+    # for long range by 2dfft
+    r_z::Vector{T}
+    H_r::Array{Complex{T}, 3}
+    H_c::Array{Complex{T}, 3}
+    gridinfo::GridInfo{2, T}
+    pad_grids::Vector{Array{Complex{T}, 3}}
+    scalefactors::Vector{ScalingFactor{2, T}}
+    cheb_coefs::NTuple{2, ChebCoef{T}}
+    cheb_value::Vector{Array{T, 1}}
+
     # for FFCT zero order term
     z::Vector{T}
     sort_z::Vector{Int}
diff --git a/test/Chebyshev.jl b/test/Chebyshev.jl
new file mode 100644
index 0000000..a670bfb
--- /dev/null
+++ b/test/Chebyshev.jl
@@ -0,0 +1,34 @@
+@testset "Cheb expansion of U_series" begin
+    @info "testing the Cheb expansion"
+    k_xi = 0.1
+    k_yj = 0.2
+    L_z = 10.0
+    z = [0.01:0.01:L_z...]
+    f_cheb = FastSpecSoG.ChebUSeries_k(k_xi, k_yj, L_z, USeriesPara(2), 3, 16)
+    direct_U = map(x -> FastSpecSoG.USeries_direct(x, k_xi, k_yj, USeriesPara(2), 3), z)
+    @test isapprox(f_cheb.(z), direct_U)
+end
+
+@testset "real2Cheb and Cheb2real" begin
+    n_atoms = 32
+    L = (100.0, 100.0, 1.0)
+
+    qs = rand(n_atoms)
+    qs .-= sum(qs) ./ n_atoms
+    poses = [tuple(L .* rand(3)...) for i in 1:n_atoms]
+
+    N_grid = (32, 32, 32)
+    uspara = USeriesPara(2)
+    M_mid = 0
+
+    k_x, k_y, r_z, H_r1, H_c, phase_x, phase_y,  sort_z, z = long_paras_gen(L, N_grid, n_atoms)
+    cheb_mat = ChebUSeries(k_x, k_y, L[3], uspara, M_mid, 64)
+
+    H_r2 = copy(H_r1)
+
+    H_r = interpolate_nu_loop!(H_r1, qs, poses, L, k_x, k_y, phase_x, phase_y, r_z, uspara, M_mid)
+    H_c = real2Cheb!(H_r1, H_c, r_z, L[3])
+    H_r2 = Cheb2real!(H_c, H_r1, r_z, L[3])
+
+    @test isapprox(H_r, H_r2; rtol = 1e-14)
+end
\ No newline at end of file
diff --git a/test/Project.toml b/test/Project.toml
index 8a8c659..81c0861 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,6 +1,8 @@
 [deps]
 EwaldSummations = "329efeb5-5dc7-45f3-8303-d0bcfeef1c8a"
 ExTinyMD = "fec76197-d59f-46dd-a0ed-76a83c21f7aa"
+FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
+Polynomials = "f27b6e38-b328-58d1-80ce-0feddd5e7a45"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 TaylorSeries = "6aa5eb33-94cf-58f4-a9d0-e4b2c4fc25ea"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/test/Taylor.jl b/test/Taylor.jl
new file mode 100644
index 0000000..08974d7
--- /dev/null
+++ b/test/Taylor.jl
@@ -0,0 +1,17 @@
+@testset "Taylor expansions" begin
+    @info "Testing the TaylorUSeries_k"
+    for preset in 1:6
+        @testset "Preset $preset" begin
+            uspara = USeriesPara(preset)
+            M_mid = 0
+            L_z = abs(1.0 + randn())
+            Q = 64
+            k_x, k_y = randn(2)
+            poly = Polynomial(TaylorUSeries_k(k_x, k_y, L_z, uspara, M_mid, Q))
+            xs = [0.0:0.01:L_z...]
+            fd = x -> FastSpecSoG.Greens_Q2d_direct(x, k_x, k_y, uspara, M_mid)
+            fp = x -> poly((x / L_z)^2)
+            @test fp.(xs) ≈ fd.(xs)
+        end
+    end
+end
\ No newline at end of file
diff --git a/test/cheb_interpolate.jl b/test/cheb_interpolate.jl
deleted file mode 100644
index e398911..0000000
--- a/test/cheb_interpolate.jl
+++ /dev/null
@@ -1,10 +0,0 @@
-@testset "Cheb expansion of U_series" begin
-    @info "testing the Cheb expansion"
-    k_xi = 0.1
-    k_yj = 0.2
-    L_z = 10.0
-    z = [0.01:0.01:L_z...]
-    f_cheb = FastSpecSoG.ChebUSeries_k(k_xi, k_yj, L_z, USeriesPara(2), 3, 16)
-    direct_U = map(x -> FastSpecSoG.USeries_direct(x, k_xi, k_yj, USeriesPara(2), 3), z)
-    @test isapprox(f_cheb.(z), direct_U)
-end
\ No newline at end of file
diff --git a/test/energy.jl b/test/energy.jl
index ff52ae4..d25cd92 100644
--- a/test/energy.jl
+++ b/test/energy.jl
@@ -1,6 +1,6 @@
-@testset "energy" begin
+@testset "energy cube" begin
 
-    @info "testing the energy"
+    @info "testing the energy cube"
 
     n_atoms = 100
     L = 100.0
@@ -33,7 +33,7 @@
     preset = 2
     M_mid = 3
 
-    N_grid = (16, 16, 31)
+    N_grid = (32, 32, 32)
     Q = 24
     r_c = 10.0
 
@@ -45,4 +45,53 @@
     @test abs(energy_ewald - energy_sog_naive) < 1e-4
     @test abs(energy_ewald - energy_sog) < 1e-4
     @test abs(energy_sog_naive - energy_sog) < 1e-6
+end
+
+@testset "energy thin" begin
+
+    @info "testing the energy thin"
+
+    n_atoms = 100
+    Lx = 100.0
+    Ly = 100.0
+    Lz = 1.0
+    boundary = ExTinyMD.Q2dBoundary(Lx, Ly, Lz)
+
+    atoms = Vector{Atom{Float64}}()
+    for i in 1:n_atoms÷2
+        push!(atoms, Atom(type = 1, mass = 1.0, charge = 2.0))
+    end
+
+    for i in n_atoms÷2 + 1 : n_atoms
+        push!(atoms, Atom(type = 2, mass = 1.0, charge = - 2.0))
+    end
+
+    info = SimulationInfo(n_atoms, atoms, (0.0, Lx, 0.0, Ly, 0.0, Lz), boundary; min_r = 1.0, temp = 1.0)
+
+    Ewald2D_interaction = Ewald2DInteraction(n_atoms, 5.0, 0.2, (Lx, Ly, Lz), ϵ = 1.0)
+    Ewald2D_neighbor = CellList3D(info, Ewald2D_interaction.r_c, boundary, 1)
+    energy_ewald = energy(Ewald2D_interaction, Ewald2D_neighbor, info, atoms)
+
+    N_real = (128, 128)
+    R_z = 32
+    w = (16, 16)
+    β = 5.0 .* w
+    cheb_order = 16
+    preset = 2
+    Q = 24
+    Taylor_Q = 24
+    r_c = 10.0
+
+    fssog_interaction = FSSoGThinInteraction((Lx, Ly, Lz), n_atoms, r_c, Q, 0.5, N_real, R_z, w, β, cheb_order, Taylor_Q, SoePara16(); preset = preset, ϵ = 1.0)
+
+    fssog_neighbor = CellList3D(info, fssog_interaction.r_c, fssog_interaction.boundary, 1)
+    energy_sog = ExTinyMD.energy(fssog_interaction, fssog_neighbor, info, atoms)
+
+    fssog_naive = FSSoG_naive((Lx, Ly, Lz), n_atoms, 10.0, 2.0, preset = 2)
+    fssog_neighbor = CellList3D(info, fssog_naive.r_c, boundary, 1)
+    energy_sog_naive = energy_naive(fssog_naive, fssog_neighbor, info, atoms)
+
+    @test abs(energy_ewald - energy_sog) < 1e-3
+    @test abs(energy_ewald - energy_sog_naive) < 1e-3
+    @test abs(energy_sog - energy_sog_naive) < 1e-6
 end
\ No newline at end of file
diff --git a/test/energy_long.jl b/test/energy_long.jl
index 5d055ed..dea8950 100644
--- a/test/energy_long.jl
+++ b/test/energy_long.jl
@@ -1,37 +1,90 @@
-@testset "energy_long long range" begin
+@testset "energy long range" begin
     @info "testing the long range part of the energy"
-    n_atoms = 8
+    n_atoms = 32
     L = (100.0, 100.0, 100.0)
 
     qs = rand(n_atoms)
     qs .-= sum(qs) ./ n_atoms
     poses = [tuple(L .* rand(3)...) for i in 1:n_atoms]
 
-    # using the 3DFFT
     N_grid = (32, 32, 32)
     uspara = USeriesPara(2)
     soepara = SoePara16()
     M_mid = 5
 
-    k_x, k_y, k_mat, r_z, us_mat, H_r, H_c, H_s, ivsm, b_l, b_u, phase_x, phase_y, phase_xs, phase_ys, phase_xys, rhs, sol, sort_z, z, size_dict, temp_ijlk, temp_ijl, z_coef, exp_coef = FFCT_precompute(L, N_grid, USeriesPara(2), M_mid, n_atoms)
+    k_x, k_y, r_z, H_r, H_c, phase_x, phase_y, sort_z, z = long_paras_gen(L, N_grid, n_atoms)
+    cheb_mat = ChebUSeries(k_x, k_y, L[3], uspara, M_mid, 64)
 
-    @info "running the FFCT for the long range part of the energy"
-    E_FFCT_loop_k = energy_long_loop_k(qs, poses, L, N_grid, M_mid, k_x, k_y, r_z, phase_x, phase_y, us_mat, b_l, b_u, rhs, sol, ivsm, H_r, H_c, H_s, uspara)
-    
-    E_FFCT_einsum_k = energy_long_einsum_k(qs, poses, L, M_mid, k_x, k_y, k_mat, r_z, phase_x, phase_y, phase_xs, phase_ys, phase_xys, temp_ijlk, temp_ijl, size_dict, z_coef, exp_coef, us_mat, b_l, b_u, rhs, sol, ivsm, H_r, H_c, H_s, uspara)
-
-    E_FFCT_cheb_k = energy_long_cheb_k(qs, poses, L, M_mid, k_x, k_y, r_z, phase_x, phase_y, us_mat, b_l, b_u, rhs, sol, ivsm, H_r, H_c, H_s, uspara, ChebUSeries(k_x, k_y, L[3], uspara, M_mid, 64))
+    E_long_loop_k = energy_long_loop_k(qs, poses, L, M_mid, k_x, k_y, r_z, phase_x, phase_y, H_r, H_c, uspara)
+    E_long_cheb_k = energy_long_cheb_k(qs, poses, L, k_x, k_y, r_z, phase_x, phase_y, H_r, H_c, cheb_mat)
 
-    E_FFCT_0 = energy_long_0(qs, poses, L, M_mid, z, sort_z, uspara, soepara)
+    E_long_0 = energy_long_0(qs, poses, L, M_mid, z, sort_z, uspara, soepara)
 
     @info "running the direct summation for the long range part of the energy"
     # using the direct summation
-    E_direct_k = long_energy_us_k(qs, poses, 30, L, uspara, M_mid + 1, length(uspara.sw))
+    E_direct_k = long_energy_us_k(qs, poses, 50, L, uspara, M_mid + 1, length(uspara.sw))
     E_direct_0 = long_energy_us_0(qs, poses, L, uspara, M_mid + 1, length(uspara.sw))
 
-    @test isapprox(E_FFCT_0, E_direct_0, atol = 1e-8)
-    @test E_FFCT_einsum_k ≈ E_FFCT_loop_k
-    @test isapprox(E_FFCT_loop_k, E_direct_k, atol=1e-8)
-    @test isapprox(E_FFCT_einsum_k, E_direct_k, atol=1e-8)
-    @test isapprox(E_FFCT_cheb_k, E_direct_k, atol=1e-8)
+    @test isapprox(E_long_0, E_direct_0, atol = 1e-4)
+    @test isapprox(E_long_loop_k, E_long_cheb_k)
+    @test isapprox(E_long_loop_k, E_direct_k, atol=1e-8)
+    @test isapprox(E_long_cheb_k, E_direct_k, atol=1e-8)
+end
+
+@testset "thin systems long, fft" begin
+
+    @info "testing the long range part of the thin system, 2dfft"
+
+    preset = 2
+    
+    n_atoms = 32
+    L = (100.0, 100.0, 1.0)
+
+    qs = rand(n_atoms)
+    qs .-= sum(qs) ./ n_atoms
+    poses = [tuple(L .* rand(3)...) for i in 1:n_atoms]
+
+    # 2dfft
+    N_real = (128, 128)
+    R_z = 16
+    w = (16, 16)
+    β = 5.0 .* w
+    cheb_order = 16
+    uspara = USeriesPara(preset)
+    Taylor_Q = 16
+
+    gridinfo, pad_grids, cheb_coefs, scalefactors, H_r, H_c, cheb_value, r_z = thin_paras_gen(N_real, R_z, w, β, L, cheb_order, uspara, Taylor_Q)
+
+    E_thin_k = energy_long_thin_k(qs, poses, L, r_z, H_r, H_c, gridinfo, pad_grids, scalefactors, cheb_coefs, cheb_value)
+
+    E_direct_k = long_energy_us_k(qs, poses, 30, L, uspara, 1, length(uspara.sw))
+
+    @test isapprox(E_thin_k, E_direct_k)
+end
+
+@testset "thin systems long, loop & cheb" begin
+
+    @info "testing the long range part of the thin system, loop and cheb"
+    n_atoms = 32
+    L = (100.0, 100.0, 0.5)
+
+    qs = rand(n_atoms)
+    qs .-= sum(qs) ./ n_atoms
+    poses = [tuple(L .* rand(3)...) for i in 1:n_atoms]
+
+    N_grid = (32, 32, 64)
+    uspara = USeriesPara(2)
+    M_mid = 0
+
+    k_x, k_y, r_z, H_r, H_c, phase_x, phase_y,  sort_z, z = long_paras_gen(L, N_grid, n_atoms)
+    cheb_mat = ChebUSeries(k_x, k_y, L[3], uspara, M_mid, 64)
+
+    E_long_loop_k = energy_long_loop_k(qs, poses, L, M_mid, k_x, k_y, r_z, phase_x, phase_y, H_r, H_c, uspara)
+    E_long_cheb_k = energy_long_cheb_k(qs, poses, L, k_x, k_y, r_z, phase_x, phase_y, H_r, H_c, cheb_mat)
+
+    E_direct_k = long_energy_us_k(qs, poses, 30, L, uspara, M_mid + 1, length(uspara.sw))
+    
+    @test isapprox(E_long_loop_k, E_long_cheb_k)
+    @test isapprox(E_long_loop_k, E_direct_k, atol=1e-8)
+    @test isapprox(E_long_cheb_k, E_direct_k, atol=1e-8)
 end
\ No newline at end of file
diff --git a/test/interpolate.jl b/test/interpolate.jl
new file mode 100644
index 0000000..1f1aa95
--- /dev/null
+++ b/test/interpolate.jl
@@ -0,0 +1,28 @@
+@testset "interpolate on non-uniform grid" begin
+    @info "testing the interpolation on non-uniform grid"
+    n_atoms = 8
+    L = (100.0, 100.0, 100.0)
+
+    qs = rand(n_atoms)
+    qs .-= sum(qs) ./ n_atoms
+    poses = [tuple(L .* rand(3)...) for i in 1:n_atoms]
+
+    # using the FFCT
+    N_grid = (16, 16, 16)
+    uspara = USeriesPara(2)
+    Q = 64
+    soepara = SoePara16()
+
+    for M_mid in 2:length(uspara.sw) - 1
+        @testset "M_mid = $M_mid" begin
+            k_x, k_y, r_z, H_r, H_c, phase_x, phase_y,  sort_z, z = long_paras_gen(L, N_grid, n_atoms)
+            cheb_mat = ChebUSeries(k_x, k_y, L[3], uspara, M_mid, Q)
+
+            H_r_loop = copy(interpolate_nu_loop!(copy(H_r), qs, poses, L, k_x, k_y, phase_x, phase_y, r_z, uspara, M_mid))
+
+            H_r_cheb = copy(interpolate_nu_cheb!(copy(H_r), qs, poses, L, k_x, k_y, phase_x, phase_y, r_z, cheb_mat))
+
+            @test isapprox(H_r_loop, H_r_cheb; atol=1e-8)
+        end
+    end
+end
\ No newline at end of file
diff --git a/test/interpolate_nu.jl b/test/interpolate_nu.jl
deleted file mode 100644
index 1a3b006..0000000
--- a/test/interpolate_nu.jl
+++ /dev/null
@@ -1,36 +0,0 @@
-@testset "interpolate on non-uniform grid" begin
-    @info "testing the interpolation on non-uniform grid"
-    n_atoms = 8
-    L = (100.0, 100.0, 100.0)
-
-    qs = rand(n_atoms)
-    qs .-= sum(qs) ./ n_atoms
-    poses = [tuple(L .* rand(3)...) for i in 1:n_atoms]
-
-    # using the FFCT
-    N_grid = (16, 16, 16)
-    uspara = USeriesPara(2)
-    Q = 64
-    soepara = SoePara16()
-
-    for M_mid in 2:length(uspara.sw) - 1
-        @testset "M_mid = $M_mid" begin
-            k_x, k_y, k_mat, r_z, us_mat, H_r, H_c, H_s, ivsm, b_l, b_u, phase_x, phase_y, phase_xs, phase_ys, phase_xys, rhs, sol, sort_z, z, size_dict, temp_ijlk, temp_ijl, z_coef, exp_coef = FFCT_precompute(L, N_grid, USeriesPara(2), M_mid, n_atoms)
-
-            cheb_mat = ChebUSeries(k_x, k_y, L[3], uspara, M_mid, Q)
-
-            H_r_einsum = copy(interpolate_nu_einsum!(copy(H_r), qs, poses, L, k_x, k_y, k_mat, phase_xs, phase_ys, phase_xys, z_coef, exp_coef, temp_ijlk, temp_ijl, r_z, us_mat, uspara, M_mid, size_dict))
-
-            H_r_einsum_direct = copy(FastSpecSoG.interpolate_nu_einsum_non_inplace!(copy(H_r), qs, poses, L, k_x, k_y, k_mat, phase_xs, phase_ys, phase_xys, z_coef, exp_coef, r_z, us_mat, uspara, M_mid))
-
-            H_r_loop = copy(interpolate_nu_loop!(copy(H_r), qs, poses, N_grid, L, k_x, k_y, phase_x, phase_y, r_z, us_mat, uspara, M_mid))
-
-            H_r_cheb = copy(interpolate_nu_cheb!(copy(H_r), qs, poses, L, k_x, k_y, phase_x, phase_y, r_z, cheb_mat))
-
-            @test isapprox(H_r_einsum, H_r_einsum_direct; atol = 1e-13)
-            @test isapprox(H_r_einsum_direct, H_r_loop; atol = 1e-13)
-            @test isapprox(H_r_einsum, H_r_loop; atol = 1e-13)
-            @test isapprox(H_r_einsum, H_r_cheb; atol=1e-8)
-        end
-    end
-end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 5c841fa..67520b9 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,6 +1,6 @@
 using FastSpecSoG
 using Test
-using ExTinyMD, EwaldSummations, TaylorSeries
+using ExTinyMD, EwaldSummations, TaylorSeries, FFTW, Polynomials
 using Random
 Random.seed!(1234)
 
@@ -9,7 +9,9 @@ Random.seed!(1234)
     include("energy.jl")
     include("energy_short.jl")
     include("energy_mid.jl")
-    include("interpolate_nu.jl")
+    include("interpolate.jl")
     include("energy_long.jl")
-    include("cheb_interpolate.jl")
+    include("Chebyshev.jl")
+    include("Taylor.jl")
+    include("scaling.jl")
 end
\ No newline at end of file
diff --git a/test/scaling.jl b/test/scaling.jl
new file mode 100644
index 0000000..c2ce1d6
--- /dev/null
+++ b/test/scaling.jl
@@ -0,0 +1,30 @@
+@testset "fft_Q2D" begin
+    N = 32
+    grid = rand(ComplexF64, N, N, N)
+    grid_copy = copy(grid)
+    grid = piecewise_fft!(grid)
+    for k in 1:N
+        @test isapprox(grid[:, :, k], fft(grid_copy[:, :, k]))
+    end
+end
+
+@testset "ifft_Q2D" begin
+    N = 32
+    grid = rand(ComplexF64, N, N, N)
+    grid_copy = copy(grid)
+    grid = piecewise_ifft!(grid)
+    for k in 1:N
+        @test isapprox(grid[:, :, k], ifft(grid_copy[:, :, k]))
+    end
+end
+
+@testset "scaling_Q2D" begin
+    N = 32
+    grid = rand(ComplexF64, N, N, N)
+    factor = rand(ComplexF64, N, N)
+    grid_copy = copy(grid)
+    grid = piecewise_mul!(grid, factor)
+    for k in 1:N
+        @test isapprox(grid[:, :, k], grid_copy[:, :, k] .* factor)
+    end
+end